import os
from pathlib import Path
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse, JSONResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# Setup Hugging Face cache directory
cache_dir = os.getenv("TRANSFORMERS_CACHE", "/cache")
os.makedirs(cache_dir, exist_ok=True)

# Optional token (for private models)
hf_token = os.getenv("HF_TOKEN")

# Load model and tokenizer
model_id = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token, cache_dir=cache_dir)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    token=hf_token,
    cache_dir=cache_dir,
    device_map="auto",
    torch_dtype="auto"
)

# Build generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1
)

# Initialize FastAPI app
app = FastAPI()

# Serve static files like script.js
app.mount("/static", StaticFiles(directory="."), name="static")

# Route: Serve index.html at root
@app.get("/", response_class=HTMLResponse)
async def serve_home():
    html_path = Path("index.html")
    return HTMLResponse(content=html_path.read_text(), status_code=200)

# Route: Chat API
@app.post("/api")
async def ask_ai(request: Request):
    data = await request.json()
    question = data.get("question", "").strip()

    if not question:
        return JSONResponse(content={"answer": "❗ Please enter a valid question."})

    prompt = f"[INST] {question} [/INST]"
    try:
        output = pipe(prompt)[0]["generated_text"]
        return JSONResponse(content={"answer": output.strip()})
    except Exception as e:
        return JSONResponse(content={"answer": f"⚠️ Error: {str(e)}"})

# Optional: Serve script.js if not using /static path in HTML
@app.get("/script.js")
async def serve_script():
    return FileResponse("script.js", media_type="application/javascript")