Spaces:

Diamanta
/

JBAIP

Sleeping

JBAIP / app.py

Update app.py

2cdd46e verified 3 months ago

1.79 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from typing import List, Optional
	from llama_cpp import Llama
	import os
	import time
	import uuid

	app = FastAPI()

	llm = None

	class Message(BaseModel):
	role: str
	content: str

	class ChatRequest(BaseModel):
	model: str
	messages: List[Message]
	temperature: Optional[float] = 0.7
	max_tokens: Optional[int] = 256

	@app.on_event("startup")
	def load_model():
	global llm
	model_path_file = "/tmp/model_path.txt"
	if not os.path.exists(model_path_file):
	raise RuntimeError(f"Model path file not found: {model_path_file}")
	with open(model_path_file, "r") as f:
	model_path = f.read().strip()
	if not os.path.exists(model_path):
	raise RuntimeError(f"Model not found at path: {model_path}")
	llm = Llama(model_path=model_path)

	@app.get("/")
	async def root():
	return {"message": "API is running"}

	@app.get("/api/tags")
	async def api_tags():
	return []

	@app.post("/chat")
	async def chat(req: ChatRequest):
	global llm
	if llm is None:
	return {"error": "Model not initialized."}

	# Build prompt from messages, Ollama uses system/user/assistant roles
	prompt = ""
	for m in req.messages:
	prompt += f"{m.role}: {m.content}\n"
	prompt += "assistant:"

	output = llm(
	prompt,
	max_tokens=req.max_tokens,
	temperature=req.temperature,
	stop=["user:", "assistant:"]
	)
	text = output.get("choices", [{}])[0].get("text", "").strip()

	response = {
	"id": str(uuid.uuid4()),
	"model": req.model,
	"choices": [
	{
	"message": {"role": "assistant", "content": text},
	"finish_reason": "stop"
	}
	]
	}
	return response