Diamanta commited on
Commit
45c840a
·
verified ·
1 Parent(s): 7357a31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -5
app.py CHANGED
@@ -2,14 +2,11 @@ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from typing import List
4
  from llama_cpp import Llama
 
5
 
6
  app = FastAPI()
7
 
8
- llm = Llama(
9
- model_path="phi-2.Q4_K_M.gguf",
10
- n_ctx=2048,
11
- n_threads=2
12
- )
13
 
14
  class Message(BaseModel):
15
  role: str
@@ -21,8 +18,24 @@ class ChatRequest(BaseModel):
21
  temperature: float = 0.7
22
  max_tokens: int = 256
23
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  @app.post("/v1/chat/completions")
25
  async def chat_completions(req: ChatRequest):
 
 
 
 
26
  prompt = "\n".join([f"{m.role}: {m.content}" for m in req.messages]) + "\nassistant:"
27
  output = llm(
28
  prompt,
 
2
  from pydantic import BaseModel
3
  from typing import List
4
  from llama_cpp import Llama
5
+ import os
6
 
7
  app = FastAPI()
8
 
9
+ llm = None # Will initialize on startup
 
 
 
 
10
 
11
  class Message(BaseModel):
12
  role: str
 
18
  temperature: float = 0.7
19
  max_tokens: int = 256
20
 
21
+ @app.on_event("startup")
22
+ def load_model():
23
+ global llm
24
+ model_path = "phi-2.Q4_K_M.gguf"
25
+ if not os.path.exists(model_path):
26
+ raise RuntimeError(f"Model not found: {model_path}")
27
+ llm = Llama(
28
+ model_path=model_path,
29
+ n_ctx=2048,
30
+ n_threads=2
31
+ )
32
+
33
  @app.post("/v1/chat/completions")
34
  async def chat_completions(req: ChatRequest):
35
+ global llm
36
+ if llm is None:
37
+ return {"error": "Model not initialized."}
38
+
39
  prompt = "\n".join([f"{m.role}: {m.content}" for m in req.messages]) + "\nassistant:"
40
  output = llm(
41
  prompt,