Diamanta commited on
Commit
ec4633f
·
verified ·
1 Parent(s): 362b5db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -18
app.py CHANGED
@@ -1,13 +1,15 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
- from typing import List
4
  from llama_cpp import Llama
5
  import os
 
6
 
7
  app = FastAPI()
8
 
9
- llm = None # Will initialize on startup
10
 
 
11
  class Message(BaseModel):
12
  role: str
13
  content: str
@@ -15,38 +17,55 @@ class Message(BaseModel):
15
  class ChatRequest(BaseModel):
16
  model: str
17
  messages: List[Message]
18
- temperature: float = 0.7
19
- max_tokens: int = 256
20
 
 
21
  @app.on_event("startup")
22
  def load_model():
23
- with open("/tmp/model_path.txt", "r") as f:
 
 
 
 
24
  model_path = f.read().strip()
25
-
26
  if not os.path.exists(model_path):
27
- raise RuntimeError(f"Model not found: {model_path}")
 
28
 
29
- @app.post("/v1/chat/completions")
 
30
  async def chat_completions(req: ChatRequest):
31
  global llm
32
  if llm is None:
33
  return {"error": "Model not initialized."}
34
 
35
- prompt = "\n".join([f"{m.role}: {m.content}" for m in req.messages]) + "\nassistant:"
 
 
 
 
 
 
36
  output = llm(
37
  prompt,
38
  max_tokens=req.max_tokens,
39
  temperature=req.temperature,
40
  stop=["user:", "assistant:"]
41
  )
42
- text = output["choices"][0]["text"]
43
- return {
44
- "id": "chatcmpl-123",
 
45
  "object": "chat.completion",
46
- "choices": [{
47
- "index": 0,
48
- "message": {"role": "assistant", "content": text},
49
- "finish_reason": "stop"
50
- }],
51
- "model": req.model
 
 
 
52
  }
 
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ from typing import List, Optional
4
  from llama_cpp import Llama
5
  import os
6
+ import time
7
 
8
  app = FastAPI()
9
 
10
+ llm = None
11
 
12
+ # Request models
13
  class Message(BaseModel):
14
  role: str
15
  content: str
 
17
  class ChatRequest(BaseModel):
18
  model: str
19
  messages: List[Message]
20
+ temperature: Optional[float] = 0.7
21
+ max_tokens: Optional[int] = 256
22
 
23
+ # Startup event to load the model
24
  @app.on_event("startup")
25
  def load_model():
26
+ global llm
27
+ model_path_file = "/tmp/model_path.txt"
28
+ if not os.path.exists(model_path_file):
29
+ raise RuntimeError(f"Model path file not found: {model_path_file}")
30
+ with open(model_path_file, "r") as f:
31
  model_path = f.read().strip()
 
32
  if not os.path.exists(model_path):
33
+ raise RuntimeError(f"Model not found at path: {model_path}")
34
+ llm = Llama(model_path=model_path)
35
 
36
+ # LM Studio style chat completion endpoint
37
+ @app.post("/chat/completions")
38
  async def chat_completions(req: ChatRequest):
39
  global llm
40
  if llm is None:
41
  return {"error": "Model not initialized."}
42
 
43
+ # Construct prompt from messages
44
+ # LM Studio usually concatenates messages with role tags
45
+ prompt = ""
46
+ for msg in req.messages:
47
+ prompt += f"{msg.role}: {msg.content}\n"
48
+ prompt += "assistant:"
49
+
50
  output = llm(
51
  prompt,
52
  max_tokens=req.max_tokens,
53
  temperature=req.temperature,
54
  stop=["user:", "assistant:"]
55
  )
56
+ text = output.get("choices", [{}])[0].get("text", "").strip()
57
+
58
+ response = {
59
+ "id": f"chatcmpl-{int(time.time())}",
60
  "object": "chat.completion",
61
+ "created": int(time.time()),
62
+ "model": req.model,
63
+ "choices": [
64
+ {
65
+ "index": 0,
66
+ "message": {"role": "assistant", "content": text},
67
+ "finish_reason": "stop"
68
+ }
69
+ ]
70
  }
71
+ return response