from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional
from llama_cpp import Llama
from fastapi.responses import PlainTextResponse, JSONResponse

import os
import time
import uuid

app = FastAPI()

llm = None

# Models
class Message(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    model: str
    messages: List[Message]
    temperature: Optional[float] = 0.7
    max_tokens: Optional[int] = 256

class ModelInfo(BaseModel):
    id: str
    name: str
    description: str

# Load your models info here or dynamically from disk/config
AVAILABLE_MODELS = [
    ModelInfo(id="llama2", name="Llama 2", description="Meta Llama 2 model"),
    # Add more models if you want
]

@app.on_event("startup")
def load_model():
    global llm
    model_path_file = "/tmp/model_path.txt"
    if not os.path.exists(model_path_file):
        raise RuntimeError(f"Model path file not found: {model_path_file}")
    with open(model_path_file, "r") as f:
        model_path = f.read().strip()
    if not os.path.exists(model_path):
        raise RuntimeError(f"Model not found at path: {model_path}")
    llm = Llama(model_path=model_path)

@app.get("/", response_class=PlainTextResponse)
async def root():
    return "Ollama is running"

@app.get("/health")
async def health_check():
    return {"status": "ok"}

@app.get("/api/tags")
async def api_tags():
    return JSONResponse(content={
        "models": [
            {
                "name": "phi-2",
                "modified_at": "2025-06-01T00:00:00Z",
                "size": 2147483648,
                "digest": "sha256:placeholderdigest",
                "details": {
                    "format": "gguf",
                    "family": "phi",
                    "families": ["phi"]
                }
            }
        ]
    })

@app.get("/models")
async def list_models():
    # Return available models info
    return [model.dict() for model in AVAILABLE_MODELS]

@app.get("/models/{model_id}")
async def get_model(model_id: str):
    for model in AVAILABLE_MODELS:
        if model.id == model_id:
            return model.dict()
    raise HTTPException(status_code=404, detail="Model not found")

@app.post("/chat")
async def chat(req: ChatRequest):
    global llm
    if llm is None:
        return {"error": "Model not initialized."}

    # Validate model - simple check
    if req.model not in [m.id for m in AVAILABLE_MODELS]:
        raise HTTPException(status_code=400, detail="Unsupported model")

    # Construct prompt from messages
    prompt = ""
    for m in req.messages:
        prompt += f"{m.role}: {m.content}\n"
    prompt += "assistant:"

    output = llm(
        prompt,
        max_tokens=req.max_tokens,
        temperature=req.temperature,
        stop=["user:", "assistant:"]
    )
    text = output.get("choices", [{}])[0].get("text", "").strip()

    response = {
        "id": str(uuid.uuid4()),
        "model": req.model,
        "choices": [
            {
                "message": {"role": "assistant", "content": text},
                "finish_reason": "stop"
            }
        ]
    }
    return response