Spaces:

Diamanta
/

JBAIP

Sleeping

JBAIP / app.py

Update app.py

45c840a verified 3 months ago

1.37 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from typing import List
	from llama_cpp import Llama
	import os

	app = FastAPI()

	llm = None # Will initialize on startup

	class Message(BaseModel):
	role: str
	content: str

	class ChatRequest(BaseModel):
	model: str
	messages: List[Message]
	temperature: float = 0.7
	max_tokens: int = 256

	@app.on_event("startup")
	def load_model():
	global llm
	model_path = "phi-2.Q4_K_M.gguf"
	if not os.path.exists(model_path):
	raise RuntimeError(f"Model not found: {model_path}")
	llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=2
	)

	@app.post("/v1/chat/completions")
	async def chat_completions(req: ChatRequest):
	global llm
	if llm is None:
	return {"error": "Model not initialized."}

	prompt = "\n".join([f"{m.role}: {m.content}" for m in req.messages]) + "\nassistant:"
	output = llm(
	prompt,
	max_tokens=req.max_tokens,
	temperature=req.temperature,
	stop=["user:", "assistant:"]
	)
	text = output["choices"][0]["text"]
	return {
	"id": "chatcmpl-123",
	"object": "chat.completion",
	"choices": [{
	"index": 0,
	"message": {"role": "assistant", "content": text},
	"finish_reason": "stop"
	}],
	"model": req.model
	}