File size: 1,790 Bytes
43a49a4
cdbdba1
ec4633f
43a49a4
45c840a
ec4633f
c0132d6
cdbdba1
 
 
ec4633f
cdbdba1
 
 
 
 
 
 
 
ec4633f
 
cdbdba1
45c840a
 
ec4633f
 
 
 
 
ddfcea6
45c840a
ec4633f
 
45c840a
2cdd46e
 
 
 
 
 
 
 
c0132d6
 
45c840a
 
 
 
c0132d6
ec4633f
c0132d6
 
ec4633f
 
cdbdba1
 
 
 
 
 
ec4633f
 
 
c0132d6
ec4633f
 
 
 
 
 
 
cdbdba1
ec4633f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List, Optional
from llama_cpp import Llama
import os
import time
import uuid

app = FastAPI()

llm = None

class Message(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    model: str
    messages: List[Message]
    temperature: Optional[float] = 0.7
    max_tokens: Optional[int] = 256

@app.on_event("startup")
def load_model():
    global llm
    model_path_file = "/tmp/model_path.txt"
    if not os.path.exists(model_path_file):
        raise RuntimeError(f"Model path file not found: {model_path_file}")
    with open(model_path_file, "r") as f:
        model_path = f.read().strip()
    if not os.path.exists(model_path):
        raise RuntimeError(f"Model not found at path: {model_path}")
    llm = Llama(model_path=model_path)

@app.get("/")
async def root():
    return {"message": "API is running"}

@app.get("/api/tags")
async def api_tags():
    return []

@app.post("/chat")
async def chat(req: ChatRequest):
    global llm
    if llm is None:
        return {"error": "Model not initialized."}

    # Build prompt from messages, Ollama uses system/user/assistant roles
    prompt = ""
    for m in req.messages:
        prompt += f"{m.role}: {m.content}\n"
    prompt += "assistant:"

    output = llm(
        prompt,
        max_tokens=req.max_tokens,
        temperature=req.temperature,
        stop=["user:", "assistant:"]
    )
    text = output.get("choices", [{}])[0].get("text", "").strip()

    response = {
        "id": str(uuid.uuid4()),
        "model": req.model,
        "choices": [
            {
                "message": {"role": "assistant", "content": text},
                "finish_reason": "stop"
            }
        ]
    }
    return response