Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from fastapi import FastAPI
|
2 |
from pydantic import BaseModel
|
3 |
from typing import List, Optional
|
4 |
from llama_cpp import Llama
|
@@ -10,6 +10,7 @@ app = FastAPI()
|
|
10 |
|
11 |
llm = None
|
12 |
|
|
|
13 |
class Message(BaseModel):
|
14 |
role: str
|
15 |
content: str
|
@@ -20,6 +21,17 @@ class ChatRequest(BaseModel):
|
|
20 |
temperature: Optional[float] = 0.7
|
21 |
max_tokens: Optional[int] = 256
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
@app.on_event("startup")
|
24 |
def load_model():
|
25 |
global llm
|
@@ -32,13 +44,21 @@ def load_model():
|
|
32 |
raise RuntimeError(f"Model not found at path: {model_path}")
|
33 |
llm = Llama(model_path=model_path)
|
34 |
|
35 |
-
@app.get("/")
|
36 |
-
async def
|
37 |
-
return {"
|
38 |
|
39 |
-
@app.get("/
|
40 |
-
async def
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
@app.post("/chat")
|
44 |
async def chat(req: ChatRequest):
|
@@ -46,7 +66,11 @@ async def chat(req: ChatRequest):
|
|
46 |
if llm is None:
|
47 |
return {"error": "Model not initialized."}
|
48 |
|
49 |
-
#
|
|
|
|
|
|
|
|
|
50 |
prompt = ""
|
51 |
for m in req.messages:
|
52 |
prompt += f"{m.role}: {m.content}\n"
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
from pydantic import BaseModel
|
3 |
from typing import List, Optional
|
4 |
from llama_cpp import Llama
|
|
|
10 |
|
11 |
llm = None
|
12 |
|
13 |
+
# Models
|
14 |
class Message(BaseModel):
|
15 |
role: str
|
16 |
content: str
|
|
|
21 |
temperature: Optional[float] = 0.7
|
22 |
max_tokens: Optional[int] = 256
|
23 |
|
24 |
+
class ModelInfo(BaseModel):
|
25 |
+
id: str
|
26 |
+
name: str
|
27 |
+
description: str
|
28 |
+
|
29 |
+
# Load your models info here or dynamically from disk/config
|
30 |
+
AVAILABLE_MODELS = [
|
31 |
+
ModelInfo(id="llama2", name="Llama 2", description="Meta Llama 2 model"),
|
32 |
+
# Add more models if you want
|
33 |
+
]
|
34 |
+
|
35 |
@app.on_event("startup")
|
36 |
def load_model():
|
37 |
global llm
|
|
|
44 |
raise RuntimeError(f"Model not found at path: {model_path}")
|
45 |
llm = Llama(model_path=model_path)
|
46 |
|
47 |
+
@app.get("/health")
|
48 |
+
async def health_check():
|
49 |
+
return {"status": "ok"}
|
50 |
|
51 |
+
@app.get("/models")
|
52 |
+
async def list_models():
|
53 |
+
# Return available models info
|
54 |
+
return [model.dict() for model in AVAILABLE_MODELS]
|
55 |
+
|
56 |
+
@app.get("/models/{model_id}")
|
57 |
+
async def get_model(model_id: str):
|
58 |
+
for model in AVAILABLE_MODELS:
|
59 |
+
if model.id == model_id:
|
60 |
+
return model.dict()
|
61 |
+
raise HTTPException(status_code=404, detail="Model not found")
|
62 |
|
63 |
@app.post("/chat")
|
64 |
async def chat(req: ChatRequest):
|
|
|
66 |
if llm is None:
|
67 |
return {"error": "Model not initialized."}
|
68 |
|
69 |
+
# Validate model - simple check
|
70 |
+
if req.model not in [m.id for m in AVAILABLE_MODELS]:
|
71 |
+
raise HTTPException(status_code=400, detail="Unsupported model")
|
72 |
+
|
73 |
+
# Construct prompt from messages
|
74 |
prompt = ""
|
75 |
for m in req.messages:
|
76 |
prompt += f"{m.role}: {m.content}\n"
|