from fastapi import FastAPI, Request | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
import os | |
app = FastAPI() | |
# === Model Config === | |
REPO_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF" | |
FILENAME = "mistral-7b-instruct-v0.1.Q4_K_M.gguf" | |
MODEL_DIR = "models" | |
MODEL_PATH = os.path.join(MODEL_DIR, FILENAME) | |
# === Download only if not already present === | |
if not os.path.exists(MODEL_PATH): | |
print(f"Downloading model {FILENAME} from Hugging Face...") | |
model_path = hf_hub_download( | |
repo_id=REPO_ID, | |
filename=FILENAME, | |
cache_dir=MODEL_DIR, | |
local_dir=MODEL_DIR, | |
local_dir_use_symlinks=False | |
) | |
else: | |
print(f"Model already exists at: {MODEL_PATH}") | |
model_path = MODEL_PATH | |
# === Load LLM === | |
llm = Llama( | |
model_path=model_path, | |
n_ctx=1024, | |
n_threads=4 # Adjust for your CPU | |
) | |
def root(): | |
return {"message": "Mistral API is live!"} | |
async def generate(request: Request): | |
data = await request.json() | |
prompt = data.get("prompt", "") | |
response = llm(prompt, max_tokens=128, temperature=0.7) | |
return {"response": response["choices"][0]["text"]} | |