JBAIP / app.py
Diamanta's picture
Update app.py
2cdd46e verified
raw
history blame
1.79 kB
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List, Optional
from llama_cpp import Llama
import os
import time
import uuid
app = FastAPI()
llm = None
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
model: str
messages: List[Message]
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 256
@app.on_event("startup")
def load_model():
global llm
model_path_file = "/tmp/model_path.txt"
if not os.path.exists(model_path_file):
raise RuntimeError(f"Model path file not found: {model_path_file}")
with open(model_path_file, "r") as f:
model_path = f.read().strip()
if not os.path.exists(model_path):
raise RuntimeError(f"Model not found at path: {model_path}")
llm = Llama(model_path=model_path)
@app.get("/")
async def root():
return {"message": "API is running"}
@app.get("/api/tags")
async def api_tags():
return []
@app.post("/chat")
async def chat(req: ChatRequest):
global llm
if llm is None:
return {"error": "Model not initialized."}
# Build prompt from messages, Ollama uses system/user/assistant roles
prompt = ""
for m in req.messages:
prompt += f"{m.role}: {m.content}\n"
prompt += "assistant:"
output = llm(
prompt,
max_tokens=req.max_tokens,
temperature=req.temperature,
stop=["user:", "assistant:"]
)
text = output.get("choices", [{}])[0].get("text", "").strip()
response = {
"id": str(uuid.uuid4()),
"model": req.model,
"choices": [
{
"message": {"role": "assistant", "content": text},
"finish_reason": "stop"
}
]
}
return response