|
from fastapi import FastAPI, Request |
|
from llama_cpp import Llama |
|
from huggingface_hub import hf_hub_download |
|
import os |
|
|
|
app = FastAPI() |
|
|
|
|
|
REPO_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF" |
|
FILENAME = "mistral-7b-instruct-v0.1.Q4_K_M.gguf" |
|
MODEL_DIR = "models" |
|
MODEL_PATH = os.path.join(MODEL_DIR, FILENAME) |
|
|
|
|
|
if not os.path.exists(MODEL_PATH): |
|
print(f"Downloading model {FILENAME} from Hugging Face...") |
|
model_path = hf_hub_download( |
|
repo_id=REPO_ID, |
|
filename=FILENAME, |
|
cache_dir=MODEL_DIR, |
|
local_dir=MODEL_DIR, |
|
local_dir_use_symlinks=False |
|
) |
|
else: |
|
print(f"Model already exists at: {MODEL_PATH}") |
|
model_path = MODEL_PATH |
|
|
|
|
|
llm = Llama( |
|
model_path=model_path, |
|
n_ctx=1024, |
|
n_threads=4 |
|
) |
|
|
|
@app.get("/") |
|
def root(): |
|
return {"message": "Mistral API is live!"} |
|
|
|
@app.post("/generate") |
|
async def generate(request: Request): |
|
data = await request.json() |
|
prompt = data.get("prompt", "") |
|
response = llm(prompt, max_tokens=128, temperature=0.7) |
|
return {"response": response["choices"][0]["text"]} |
|
|