from fastapi import FastAPI, Request from llama_cpp import Llama from huggingface_hub import hf_hub_download import os app = FastAPI() # === Model Config === REPO_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF" FILENAME = "mistral-7b-instruct-v0.1.Q4_K_M.gguf" MODEL_DIR = "models" MODEL_PATH = os.path.join(MODEL_DIR, FILENAME) # === Download only if not already present === if not os.path.exists(MODEL_PATH): print(f"Downloading model {FILENAME} from Hugging Face...") model_path = hf_hub_download( repo_id=REPO_ID, filename=FILENAME, cache_dir=MODEL_DIR, local_dir=MODEL_DIR, local_dir_use_symlinks=False ) else: print(f"Model already exists at: {MODEL_PATH}") model_path = MODEL_PATH # === Load LLM === llm = Llama( model_path=model_path, n_ctx=1024, n_threads=4 # Adjust for your CPU ) @app.get("/") def root(): return {"message": "Mistral API is live!"} @app.post("/generate") async def generate(request: Request): data = await request.json() prompt = data.get("prompt", "") response = llm(prompt, max_tokens=128, temperature=0.7) return {"response": response["choices"][0]["text"]}