Diamanta commited on
Commit
cdbdba1
·
verified ·
1 Parent(s): 95ecbaa

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +45 -0
main.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from llama_cpp import Llama
3
+ from pydantic import BaseModel
4
+ from typing import List
5
+ import uvicorn
6
+
7
+ app = FastAPI()
8
+
9
+ # Load small model (e.g., Phi-2 or DeepSeek)
10
+ llm = Llama(model_path="phi-2.Q4_K_M.gguf", n_ctx=2048, n_threads=2)
11
+
12
+ class Message(BaseModel):
13
+ role: str
14
+ content: str
15
+
16
+ class ChatRequest(BaseModel):
17
+ model: str
18
+ messages: List[Message]
19
+ temperature: float = 0.7
20
+ max_tokens: int = 256
21
+ stream: bool = False
22
+
23
+ @app.post("/v1/chat/completions")
24
+ async def chat_completions(req: ChatRequest):
25
+ prompt = "\n".join([f"{m.role}: {m.content}" for m in req.messages]) + "\nassistant:"
26
+ output = llm(
27
+ prompt,
28
+ max_tokens=req.max_tokens,
29
+ temperature=req.temperature,
30
+ stop=["user:", "assistant:"]
31
+ )
32
+ text = output["choices"][0]["text"]
33
+ return {
34
+ "id": "chatcmpl-123",
35
+ "object": "chat.completion",
36
+ "choices": [{
37
+ "index": 0,
38
+ "message": {"role": "assistant", "content": text},
39
+ "finish_reason": "stop"
40
+ }],
41
+ "model": req.model
42
+ }
43
+
44
+ if __name__ == "__main__":
45
+ uvicorn.run(app, host="0.0.0.0", port=8000)