Diamanta commited on
Commit
c0132d6
·
verified ·
1 Parent(s): ec4633f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -13
app.py CHANGED
@@ -4,12 +4,12 @@ from typing import List, Optional
4
  from llama_cpp import Llama
5
  import os
6
  import time
 
7
 
8
  app = FastAPI()
9
 
10
  llm = None
11
 
12
- # Request models
13
  class Message(BaseModel):
14
  role: str
15
  content: str
@@ -20,7 +20,6 @@ class ChatRequest(BaseModel):
20
  temperature: Optional[float] = 0.7
21
  max_tokens: Optional[int] = 256
22
 
23
- # Startup event to load the model
24
  @app.on_event("startup")
25
  def load_model():
26
  global llm
@@ -33,18 +32,16 @@ def load_model():
33
  raise RuntimeError(f"Model not found at path: {model_path}")
34
  llm = Llama(model_path=model_path)
35
 
36
- # LM Studio style chat completion endpoint
37
- @app.post("/chat/completions")
38
- async def chat_completions(req: ChatRequest):
39
  global llm
40
  if llm is None:
41
  return {"error": "Model not initialized."}
42
 
43
- # Construct prompt from messages
44
- # LM Studio usually concatenates messages with role tags
45
  prompt = ""
46
- for msg in req.messages:
47
- prompt += f"{msg.role}: {msg.content}\n"
48
  prompt += "assistant:"
49
 
50
  output = llm(
@@ -56,13 +53,10 @@ async def chat_completions(req: ChatRequest):
56
  text = output.get("choices", [{}])[0].get("text", "").strip()
57
 
58
  response = {
59
- "id": f"chatcmpl-{int(time.time())}",
60
- "object": "chat.completion",
61
- "created": int(time.time()),
62
  "model": req.model,
63
  "choices": [
64
  {
65
- "index": 0,
66
  "message": {"role": "assistant", "content": text},
67
  "finish_reason": "stop"
68
  }
 
4
  from llama_cpp import Llama
5
  import os
6
  import time
7
+ import uuid
8
 
9
  app = FastAPI()
10
 
11
  llm = None
12
 
 
13
  class Message(BaseModel):
14
  role: str
15
  content: str
 
20
  temperature: Optional[float] = 0.7
21
  max_tokens: Optional[int] = 256
22
 
 
23
  @app.on_event("startup")
24
  def load_model():
25
  global llm
 
32
  raise RuntimeError(f"Model not found at path: {model_path}")
33
  llm = Llama(model_path=model_path)
34
 
35
+ @app.post("/chat")
36
+ async def chat(req: ChatRequest):
 
37
  global llm
38
  if llm is None:
39
  return {"error": "Model not initialized."}
40
 
41
+ # Build prompt from messages, Ollama uses system/user/assistant roles
 
42
  prompt = ""
43
+ for m in req.messages:
44
+ prompt += f"{m.role}: {m.content}\n"
45
  prompt += "assistant:"
46
 
47
  output = llm(
 
53
  text = output.get("choices", [{}])[0].get("text", "").strip()
54
 
55
  response = {
56
+ "id": str(uuid.uuid4()),
 
 
57
  "model": req.model,
58
  "choices": [
59
  {
 
60
  "message": {"role": "assistant", "content": text},
61
  "finish_reason": "stop"
62
  }