ateetvatan commited on
Commit
2289445
·
1 Parent(s): 994963c

openchat-llm init

Browse files
Files changed (4) hide show
  1. .gitignore +368 -0
  2. app.py +83 -0
  3. model_loader.py +14 -0
  4. requirements.txt +17 -0
.gitignore ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be added to the global gitignore or merged into this project gitignore. For a PyCharm
158
+ # project, it is recommended to include the following files:
159
+ # .idea/
160
+ # *.iml
161
+ # *.ipr
162
+ # *.iws
163
+ .idea/
164
+ *.iml
165
+ *.ipr
166
+ *.iws
167
+
168
+ # VS Code
169
+ .vscode/
170
+ *.code-workspace
171
+
172
+ # Sublime Text
173
+ *.sublime-project
174
+ *.sublime-workspace
175
+
176
+ # Vim
177
+ *.swp
178
+ *.swo
179
+ *~
180
+
181
+ # Emacs
182
+ *~
183
+ \#*\#
184
+ /.emacs.desktop
185
+ /.emacs.desktop.lock
186
+ *.elc
187
+ auto-save-list
188
+ tramp
189
+ .\#*
190
+
191
+ # macOS
192
+ .DS_Store
193
+ .AppleDouble
194
+ .LSOverride
195
+ Icon
196
+ ._*
197
+ .DocumentRevisions-V100
198
+ .fseventsd
199
+ .Spotlight-V100
200
+ .TemporaryItems
201
+ .Trashes
202
+ .VolumeIcon.icns
203
+ .com.apple.timemachine.donotpresent
204
+ .AppleDB
205
+ .AppleDesktop
206
+ Network Trash Folder
207
+ Temporary Items
208
+ .apdisk
209
+
210
+ # Windows
211
+ Thumbs.db
212
+ Thumbs.db:encryptable
213
+ ehthumbs.db
214
+ ehthumbs_vista.db
215
+ *.tmp
216
+ *.temp
217
+ Desktop.ini
218
+ $RECYCLE.BIN/
219
+ *.cab
220
+ *.msi
221
+ *.msix
222
+ *.msm
223
+ *.msp
224
+ *.lnk
225
+
226
+ # Linux
227
+ *~
228
+ .fuse_hidden*
229
+ .directory
230
+ .Trash-*
231
+ .nfs*
232
+
233
+ # ===== MACHINE LEARNING SPECIFIC =====
234
+
235
+ # Model files and checkpoints
236
+ *.pth
237
+ *.pt
238
+ *.ckpt
239
+ *.safetensors
240
+ *.bin
241
+ *.h5
242
+ *.hdf5
243
+ *.pb
244
+ *.onnx
245
+ *.tflite
246
+ *.tfl
247
+ *.lite
248
+ *.mlmodel
249
+ *.mlpackage
250
+
251
+ # Model directories
252
+ models/
253
+ checkpoints/
254
+ saved_models/
255
+ pretrained_models/
256
+ model_cache/
257
+ transformers_cache/
258
+ huggingface_cache/
259
+
260
+ # Hugging Face specific
261
+ .cache/
262
+ .huggingface/
263
+
264
+ # TensorBoard logs
265
+ runs/
266
+ logs/
267
+ tensorboard_logs/
268
+ *.tfevents.*
269
+
270
+ # Weights & Biases
271
+ wandb/
272
+
273
+ # MLflow
274
+ mlruns/
275
+ mlflow.db
276
+
277
+ # Jupyter notebook checkpoints
278
+ .ipynb_checkpoints/
279
+
280
+ # ===== FASTAPI SPECIFIC =====
281
+
282
+ # FastAPI generated files
283
+ .openapi.json
284
+ openapi.json
285
+
286
+ # API documentation builds
287
+ docs/_build/
288
+ site/
289
+
290
+ # ===== LOGS AND TEMPORARY FILES =====
291
+
292
+ # Application logs
293
+ *.log
294
+ logs/
295
+ log/
296
+
297
+ # Temporary files
298
+ tmp/
299
+ temp/
300
+ *.tmp
301
+ *.temp
302
+
303
+ # Cache directories
304
+ .cache/
305
+ cache/
306
+ __pycache__/
307
+
308
+ # ===== CONFIGURATION FILES =====
309
+
310
+ # Environment variables
311
+ .env
312
+ .env.local
313
+ .env.development
314
+ .env.test
315
+ .env.production
316
+
317
+ # Configuration files with sensitive data
318
+ config.ini
319
+ config.yaml
320
+ config.yml
321
+ secrets.json
322
+ credentials.json
323
+
324
+ # ===== DATABASE FILES =====
325
+
326
+ # SQLite databases
327
+ *.db
328
+ *.sqlite
329
+ *.sqlite3
330
+
331
+ # ===== DOCKER =====
332
+
333
+ # Docker files
334
+ .dockerignore
335
+ docker-compose.override.yml
336
+
337
+ # ===== MONITORING AND METRICS =====
338
+
339
+ # Prometheus metrics
340
+ *.prom
341
+
342
+ # Grafana dashboards
343
+ grafana/
344
+
345
+ # ===== SECURITY =====
346
+
347
+ # SSH keys
348
+ *.pem
349
+ *.key
350
+ *.crt
351
+ *.csr
352
+ *.p12
353
+ *.pfx
354
+
355
+ # ===== BACKUP FILES =====
356
+
357
+ # Backup files
358
+ *.bak
359
+ *.backup
360
+ *.old
361
+ *.orig
362
+
363
+ # ===== PROJECT SPECIFIC =====
364
+
365
+ # MASX specific
366
+ masx_cache/
367
+ masx_logs/
368
+ masx_models/
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This is the main file for the OpenChat-3.5 LLM API.
3
+ -model_loader.py file to load the model and tokenizer.
4
+ """
5
+
6
+ import logging
7
+ from fastapi import FastAPI, HTTPException
8
+ from pydantic import BaseModel
9
+ from model_loader import tokenizer, model
10
+ import uvicorn
11
+ import torch
12
+
13
+ logger = logging.getLogger(__name__)
14
+ logging.basicConfig(level=logging.INFO)
15
+
16
+ # Initialize FastAPI app
17
+ app = FastAPI(
18
+ title="masx-openchat-llm",
19
+ description="MASX AI service exposing the OpenChat-3.5 LLM as an inference endpoint",
20
+ version="1.0.0"
21
+ )
22
+
23
+ # Request ********schema*******
24
+ class PromptRequest(BaseModel):
25
+ prompt: str
26
+ max_tokens: int = 256
27
+ temperature: float = 0.0 # Deterministic by default
28
+
29
+ # Response ********schema*******
30
+ class ChatResponse(BaseModel):
31
+ response: str
32
+
33
+ @app.get("/status")
34
+ async def status():
35
+ """Check model status and max supported tokens."""
36
+ try:
37
+ max_context = getattr(model.config, "max_position_embeddings", "unknown")
38
+ return {"status": "ok", "model": model.name_or_path, "max_context_tokens": max_context}
39
+ except Exception as e:
40
+ logger.error("Status error: %s", str(e))
41
+ raise HTTPException(status_code=500, detail=str(e))
42
+
43
+ @app.post("/chat", response_model=ChatResponse)
44
+ async def chat(req: PromptRequest):
45
+ """OpenChat-3.5 Run inference prompt"""
46
+ try:
47
+ logger.info("Received prompt: %s", req.prompt)
48
+
49
+ # Dynamically choose device at request time
50
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
51
+ logger.info(f"Using device: {device}")
52
+
53
+ # Move model to device if not
54
+ if next(model.parameters()).device != device:
55
+ logger.info("Moving model to %s", device)
56
+ model.to(device)
57
+
58
+ # Tokenize input
59
+ inputs = tokenizer(req.prompt, return_tensors="pt").to(device)
60
+
61
+ # Generation parameters
62
+ gen_kwargs = {
63
+ "max_new_tokens": req.max_tokens,
64
+ "temperature": req.temperature,
65
+ "do_sample": req.temperature > 0,
66
+ }
67
+
68
+ # Generate output
69
+ outputs = model.generate(**inputs, **gen_kwargs)
70
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
71
+
72
+ # Trim echoed prompt if present
73
+ response_text = generated_text[len(req.prompt):].strip()
74
+
75
+ logger.info("Generated response: %s", response_text)
76
+ return ChatResponse(response=response_text)
77
+
78
+ except Exception as e:
79
+ logger.error("Inference failed: %s", str(e), exc_info=True)
80
+ raise HTTPException(status_code=500, detail="Inference failure: " + str(e))
81
+
82
+ if __name__ == "__main__":
83
+ uvicorn.run("app:app", host="0.0.0.0", port=8080, log_level="info")
model_loader.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model_loader.py
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch, os
4
+ from dotenv import load_dotenv
5
+
6
+ # Load environment variables
7
+ load_dotenv()
8
+ MODEL_NAME = os.getenv("MODEL_NAME", "openchat/openchat-3.5-1210")
9
+
10
+ # Load tokenizer
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
+
13
+ # Load model initially on CPU
14
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to("cpu")
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core FastAPI dependencies
2
+ fastapi>=0.104.0
3
+ uvicorn[standard]>=0.24.0
4
+ pydantic>=2.5.0
5
+
6
+ # Machine Learning and Transformers
7
+ torch>=2.1.0
8
+ transformers>=4.36.0
9
+ accelerate>=0.25.0
10
+
11
+ # Additional utilities
12
+ numpy>=1.24.0
13
+ requests>=2.31.0
14
+
15
+ # Optional: For better performance and monitoring
16
+ # tensorboard>=2.15.0 # Uncomment if you need training monitoring
17
+ # wandb>=0.16.0 # Uncomment if you need experiment tracking