Spaces:

AteetVatan
/

masx-openchat-llm

Sleeping

App Files Files

xet

Community

ateetvatan commited on Jul 2

Commit

2289445

1 Parent(s): 994963c

openchat-llm init

Browse files

Files changed (4) hide show

.gitignore +368 -0
app.py +83 -0
model_loader.py +14 -0
requirements.txt +17 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,368 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be added to the global gitignore or merged into this project gitignore.  For a PyCharm
+#  project, it is recommended to include the following files:
+#  .idea/
+#  *.iml
+#  *.ipr
+#  *.iws
+.idea/
+*.iml
+*.ipr
+*.iws
+# VS Code
+.vscode/
+*.code-workspace
+# Sublime Text
+*.sublime-project
+*.sublime-workspace
+# Vim
+*.swp
+*.swo
+*~
+# Emacs
+*~
+\#*\#
+/.emacs.desktop
+/.emacs.desktop.lock
+*.elc
+auto-save-list
+tramp
+.\#*
+# macOS
+.DS_Store
+.AppleDouble
+.LSOverride
+Icon
+._*
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+# Windows
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+*.tmp
+*.temp
+Desktop.ini
+$RECYCLE.BIN/
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+*.lnk
+# Linux
+*~
+.fuse_hidden*
+.directory
+.Trash-*
+.nfs*
+# ===== MACHINE LEARNING SPECIFIC =====
+# Model files and checkpoints
+*.pth
+*.pt
+*.ckpt
+*.safetensors
+*.bin
+*.h5
+*.hdf5
+*.pb
+*.onnx
+*.tflite
+*.tfl
+*.lite
+*.mlmodel
+*.mlpackage
+# Model directories
+models/
+checkpoints/
+saved_models/
+pretrained_models/
+model_cache/
+transformers_cache/
+huggingface_cache/
+# Hugging Face specific
+.cache/
+.huggingface/
+# TensorBoard logs
+runs/
+logs/
+tensorboard_logs/
+*.tfevents.*
+# Weights & Biases
+wandb/
+# MLflow
+mlruns/
+mlflow.db
+# Jupyter notebook checkpoints
+.ipynb_checkpoints/
+# ===== FASTAPI SPECIFIC =====
+# FastAPI generated files
+.openapi.json
+openapi.json
+# API documentation builds
+docs/_build/
+site/
+# ===== LOGS AND TEMPORARY FILES =====
+# Application logs
+*.log
+logs/
+log/
+# Temporary files
+tmp/
+temp/
+*.tmp
+*.temp
+# Cache directories
+.cache/
+cache/
+__pycache__/
+# ===== CONFIGURATION FILES =====
+# Environment variables
+.env
+.env.local
+.env.development
+.env.test
+.env.production
+# Configuration files with sensitive data
+config.ini
+config.yaml
+config.yml
+secrets.json
+credentials.json
+# ===== DATABASE FILES =====
+# SQLite databases
+*.db
+*.sqlite
+*.sqlite3
+# ===== DOCKER =====
+# Docker files
+.dockerignore
+docker-compose.override.yml
+# ===== MONITORING AND METRICS =====
+# Prometheus metrics
+*.prom
+# Grafana dashboards
+grafana/
+# ===== SECURITY =====
+# SSH keys
+*.pem
+*.key
+*.crt
+*.csr
+*.p12
+*.pfx
+# ===== BACKUP FILES =====
+# Backup files
+*.bak
+*.backup
+*.old
+*.orig
+# ===== PROJECT SPECIFIC =====
+# MASX specific
+masx_cache/
+masx_logs/
+masx_models/

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""
+This is the main file for the OpenChat-3.5 LLM API.
+-model_loader.py file to load the model and tokenizer.
+"""
+import logging
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from model_loader import tokenizer, model
+import uvicorn
+import torch
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+# Initialize FastAPI app
+app = FastAPI(
+    title="masx-openchat-llm",
+    description="MASX AI service exposing the OpenChat-3.5 LLM as an inference endpoint",
+    version="1.0.0"
+)
+# Request ********schema*******
+class PromptRequest(BaseModel):
+    prompt: str
+    max_tokens: int = 256
+    temperature: float = 0.0  # Deterministic by default
+# Response ********schema*******
+class ChatResponse(BaseModel):
+    response: str
+@app.get("/status")
+async def status():
+    """Check model status and max supported tokens."""
+    try:
+        max_context = getattr(model.config, "max_position_embeddings", "unknown")
+        return {"status": "ok", "model": model.name_or_path, "max_context_tokens": max_context}
+    except Exception as e:
+        logger.error("Status error: %s", str(e))
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/chat", response_model=ChatResponse)
+async def chat(req: PromptRequest):
+    """OpenChat-3.5 Run inference prompt"""
+    try:
+        logger.info("Received prompt: %s", req.prompt)
+        # Dynamically choose device at request time
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {device}")
+        # Move model to device if not
+        if next(model.parameters()).device != device:
+            logger.info("Moving model to %s", device)
+            model.to(device)
+        # Tokenize input
+        inputs = tokenizer(req.prompt, return_tensors="pt").to(device)
+        # Generation parameters
+        gen_kwargs = {
+            "max_new_tokens": req.max_tokens,
+            "temperature": req.temperature,
+            "do_sample": req.temperature > 0,
+        }
+        # Generate output
+        outputs = model.generate(**inputs, **gen_kwargs)
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Trim echoed prompt if present
+        response_text = generated_text[len(req.prompt):].strip()
+        logger.info("Generated response: %s", response_text)
+        return ChatResponse(response=response_text)
+    except Exception as e:
+        logger.error("Inference failed: %s", str(e), exc_info=True)
+        raise HTTPException(status_code=500, detail="Inference failure: " + str(e))
+if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=8080, log_level="info")

model_loader.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# model_loader.py
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch, os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+MODEL_NAME = os.getenv("MODEL_NAME", "openchat/openchat-3.5-1210")
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+# Load model initially on CPU
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to("cpu")

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+# Core FastAPI dependencies
+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
+pydantic>=2.5.0
+# Machine Learning and Transformers
+torch>=2.1.0
+transformers>=4.36.0
+accelerate>=0.25.0
+# Additional utilities
+numpy>=1.24.0
+requests>=2.31.0
+# Optional: For better performance and monitoring
+# tensorboard>=2.15.0  # Uncomment if you need training monitoring
+# wandb>=0.16.0        # Uncomment if you need experiment tracking