File size: 1,186 Bytes
39290f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from fastapi import FastAPI, Request
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os

app = FastAPI()

# === Model Config ===
REPO_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
FILENAME = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
MODEL_DIR = "models"
MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)

# === Download only if not already present ===
if not os.path.exists(MODEL_PATH):
    print(f"Downloading model {FILENAME} from Hugging Face...")
    model_path = hf_hub_download(
        repo_id=REPO_ID,
        filename=FILENAME,
        cache_dir=MODEL_DIR,
        local_dir=MODEL_DIR,
        local_dir_use_symlinks=False
    )
else:
    print(f"Model already exists at: {MODEL_PATH}")
    model_path = MODEL_PATH

# === Load LLM ===
llm = Llama(
    model_path=model_path,
    n_ctx=1024,
    n_threads=4  # Adjust for your CPU
)

@app.get("/")
def root():
    return {"message": "Mistral API is live!"}

@app.post("/generate")
async def generate(request: Request):
    data = await request.json()
    prompt = data.get("prompt", "")
    response = llm(prompt, max_tokens=128, temperature=0.7)
    return {"response": response["choices"][0]["text"]}