| from fastapi import FastAPI, Request | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| app = FastAPI() | |
| # === Model Config === | |
| REPO_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF" | |
| FILENAME = "mistral-7b-instruct-v0.1.Q4_K_M.gguf" | |
| MODEL_DIR = "models" | |
| MODEL_PATH = os.path.join(MODEL_DIR, FILENAME) | |
| # === Download only if not already present === | |
| if not os.path.exists(MODEL_PATH): | |
| print(f"Downloading model {FILENAME} from Hugging Face...") | |
| model_path = hf_hub_download( | |
| repo_id=REPO_ID, | |
| filename=FILENAME, | |
| cache_dir=MODEL_DIR, | |
| local_dir=MODEL_DIR, | |
| local_dir_use_symlinks=False | |
| ) | |
| else: | |
| print(f"Model already exists at: {MODEL_PATH}") | |
| model_path = MODEL_PATH | |
| # === Load LLM === | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=1024, | |
| n_threads=4 # Adjust for your CPU | |
| ) | |
| def root(): | |
| return {"message": "Mistral API is live!"} | |
| async def generate(request: Request): | |
| data = await request.json() | |
| prompt = data.get("prompt", "") | |
| response = llm(prompt, max_tokens=128, temperature=0.7) | |
| return {"response": response["choices"][0]["text"]} | |