Spaces:

Diamanta
/

JBAIP

Sleeping

Diamanta commited on Jun 1

Commit

cdbdba1

verified ·

1 Parent(s): 95ecbaa

Create main.py

Files changed (1) hide show

main.py ADDED Viewed

+from fastapi import FastAPI, Request
+from llama_cpp import Llama
+from pydantic import BaseModel
+from typing import List
+import uvicorn
+app = FastAPI()
+# Load small model (e.g., Phi-2 or DeepSeek)
+llm = Llama(model_path="phi-2.Q4_K_M.gguf", n_ctx=2048, n_threads=2)
+class Message(BaseModel):
+    role: str
+    content: str
+class ChatRequest(BaseModel):
+    model: str
+    messages: List[Message]
+    temperature: float = 0.7
+    max_tokens: int = 256
+    stream: bool = False
+@app.post("/v1/chat/completions")
+async def chat_completions(req: ChatRequest):
+    prompt = "\n".join([f"{m.role}: {m.content}" for m in req.messages]) + "\nassistant:"
+    output = llm(
+        prompt,
+        max_tokens=req.max_tokens,
+        temperature=req.temperature,
+        stop=["user:", "assistant:"]
+    )
+    text = output["choices"][0]["text"]
+    return {
+        "id": "chatcmpl-123",
+        "object": "chat.completion",
+        "choices": [{
+            "index": 0,
+            "message": {"role": "assistant", "content": text},
+            "finish_reason": "stop"
+        }],
+        "model": req.model
+    }
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)