Spaces:
Sleeping
Sleeping
File size: 1,029 Bytes
a028adc cdbdba1 a028adc cdbdba1 a028adc cdbdba1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
from llama_cpp import Llama
app = FastAPI()
llm = Llama(
model_path="phi-2.Q4_K_M.gguf",
n_ctx=2048,
n_threads=2
)
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
model: str
messages: List[Message]
temperature: float = 0.7
max_tokens: int = 256
@app.post("/v1/chat/completions")
async def chat_completions(req: ChatRequest):
prompt = "\n".join([f"{m.role}: {m.content}" for m in req.messages]) + "\nassistant:"
output = llm(
prompt,
max_tokens=req.max_tokens,
temperature=req.temperature,
stop=["user:", "assistant:"]
)
text = output["choices"][0]["text"]
return {
"id": "chatcmpl-123",
"object": "chat.completion",
"choices": [{
"index": 0,
"message": {"role": "assistant", "content": text},
"finish_reason": "stop"
}],
"model": req.model
}
|