Diamanta commited on
Commit
9d9d39a
·
verified ·
1 Parent(s): 2cdd46e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -8
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from typing import List, Optional
4
  from llama_cpp import Llama
@@ -10,6 +10,7 @@ app = FastAPI()
10
 
11
  llm = None
12
 
 
13
  class Message(BaseModel):
14
  role: str
15
  content: str
@@ -20,6 +21,17 @@ class ChatRequest(BaseModel):
20
  temperature: Optional[float] = 0.7
21
  max_tokens: Optional[int] = 256
22
 
 
 
 
 
 
 
 
 
 
 
 
23
  @app.on_event("startup")
24
  def load_model():
25
  global llm
@@ -32,13 +44,21 @@ def load_model():
32
  raise RuntimeError(f"Model not found at path: {model_path}")
33
  llm = Llama(model_path=model_path)
34
 
35
- @app.get("/")
36
- async def root():
37
- return {"message": "API is running"}
38
 
39
- @app.get("/api/tags")
40
- async def api_tags():
41
- return []
 
 
 
 
 
 
 
 
42
 
43
  @app.post("/chat")
44
  async def chat(req: ChatRequest):
@@ -46,7 +66,11 @@ async def chat(req: ChatRequest):
46
  if llm is None:
47
  return {"error": "Model not initialized."}
48
 
49
- # Build prompt from messages, Ollama uses system/user/assistant roles
 
 
 
 
50
  prompt = ""
51
  for m in req.messages:
52
  prompt += f"{m.role}: {m.content}\n"
 
1
+ from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from typing import List, Optional
4
  from llama_cpp import Llama
 
10
 
11
  llm = None
12
 
13
+ # Models
14
  class Message(BaseModel):
15
  role: str
16
  content: str
 
21
  temperature: Optional[float] = 0.7
22
  max_tokens: Optional[int] = 256
23
 
24
+ class ModelInfo(BaseModel):
25
+ id: str
26
+ name: str
27
+ description: str
28
+
29
+ # Load your models info here or dynamically from disk/config
30
+ AVAILABLE_MODELS = [
31
+ ModelInfo(id="llama2", name="Llama 2", description="Meta Llama 2 model"),
32
+ # Add more models if you want
33
+ ]
34
+
35
  @app.on_event("startup")
36
  def load_model():
37
  global llm
 
44
  raise RuntimeError(f"Model not found at path: {model_path}")
45
  llm = Llama(model_path=model_path)
46
 
47
+ @app.get("/health")
48
+ async def health_check():
49
+ return {"status": "ok"}
50
 
51
+ @app.get("/models")
52
+ async def list_models():
53
+ # Return available models info
54
+ return [model.dict() for model in AVAILABLE_MODELS]
55
+
56
+ @app.get("/models/{model_id}")
57
+ async def get_model(model_id: str):
58
+ for model in AVAILABLE_MODELS:
59
+ if model.id == model_id:
60
+ return model.dict()
61
+ raise HTTPException(status_code=404, detail="Model not found")
62
 
63
  @app.post("/chat")
64
  async def chat(req: ChatRequest):
 
66
  if llm is None:
67
  return {"error": "Model not initialized."}
68
 
69
+ # Validate model - simple check
70
+ if req.model not in [m.id for m in AVAILABLE_MODELS]:
71
+ raise HTTPException(status_code=400, detail="Unsupported model")
72
+
73
+ # Construct prompt from messages
74
  prompt = ""
75
  for m in req.messages:
76
  prompt += f"{m.role}: {m.content}\n"