Spaces:

Diamanta
/

JBAIP

Sleeping

App Files Files Community

Diamanta commited on Jun 1

Commit

ec4633f

verified ·

1 Parent(s): 362b5db

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -18

app.py CHANGED Viewed

@@ -1,13 +1,15 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
-from typing import List
 from llama_cpp import Llama
 import os
 app = FastAPI()
-llm = None  # Will initialize on startup
 class Message(BaseModel):
     role: str
     content: str
@@ -15,38 +17,55 @@ class Message(BaseModel):
 class ChatRequest(BaseModel):
     model: str
     messages: List[Message]
-    temperature: float = 0.7
-    max_tokens: int = 256
 @app.on_event("startup")
 def load_model():
-    with open("/tmp/model_path.txt", "r") as f:
         model_path = f.read().strip()
     if not os.path.exists(model_path):
-        raise RuntimeError(f"Model not found: {model_path}")
-@app.post("/v1/chat/completions")
 async def chat_completions(req: ChatRequest):
     global llm
     if llm is None:
         return {"error": "Model not initialized."}
-    prompt = "\n".join([f"{m.role}: {m.content}" for m in req.messages]) + "\nassistant:"
     output = llm(
         prompt,
         max_tokens=req.max_tokens,
         temperature=req.temperature,
         stop=["user:", "assistant:"]
     )
-    text = output["choices"][0]["text"]
-    return {
-        "id": "chatcmpl-123",
         "object": "chat.completion",
-        "choices": [{
-            "index": 0,
-            "message": {"role": "assistant", "content": text},
-            "finish_reason": "stop"
-        }],
-        "model": req.model
     }

 from fastapi import FastAPI
 from pydantic import BaseModel
+from typing import List, Optional
 from llama_cpp import Llama
 import os
+import time
 app = FastAPI()
+llm = None
+# Request models
 class Message(BaseModel):
     role: str
     content: str
 class ChatRequest(BaseModel):
     model: str
     messages: List[Message]
+    temperature: Optional[float] = 0.7
+    max_tokens: Optional[int] = 256
+# Startup event to load the model
 @app.on_event("startup")
 def load_model():
+    global llm
+    model_path_file = "/tmp/model_path.txt"
+    if not os.path.exists(model_path_file):
+        raise RuntimeError(f"Model path file not found: {model_path_file}")
+    with open(model_path_file, "r") as f:
         model_path = f.read().strip()
     if not os.path.exists(model_path):
+        raise RuntimeError(f"Model not found at path: {model_path}")
+    llm = Llama(model_path=model_path)
+# LM Studio style chat completion endpoint
+@app.post("/chat/completions")
 async def chat_completions(req: ChatRequest):
     global llm
     if llm is None:
         return {"error": "Model not initialized."}
+    # Construct prompt from messages
+    # LM Studio usually concatenates messages with role tags
+    prompt = ""
+    for msg in req.messages:
+        prompt += f"{msg.role}: {msg.content}\n"
+    prompt += "assistant:"
     output = llm(
         prompt,
         max_tokens=req.max_tokens,
         temperature=req.temperature,
         stop=["user:", "assistant:"]
     )
+    text = output.get("choices", [{}])[0].get("text", "").strip()
+    response = {
+        "id": f"chatcmpl-{int(time.time())}",
         "object": "chat.completion",
+        "created": int(time.time()),
+        "model": req.model,
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": text},
+                "finish_reason": "stop"
+            }
+        ]
     }
+    return response