import os from fastapi import FastAPI, Request from fastapi.responses import JSONResponse, HTMLResponse from fastapi.staticfiles import StaticFiles from pathlib import Path from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # Model: Falcon-rw-1b (decoder-only, not instruction-tuned) model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) # Init FastAPI app app = FastAPI() # Mount static directory app.mount("/static", StaticFiles(directory="."), name="static") # Serve HTML UI @app.get("/", response_class=HTMLResponse) async def serve_page(): return HTMLResponse(Path("index.html").read_text()) # Chat API @app.post("/api") async def ask_ai(request: Request): data = await request.json() question = data.get("question", "").strip() if not question: return JSONResponse(content={"answer": "❗ Please enter a valid question."}) try: # Falcon works with plain prompts output = pipe(question, max_new_tokens=256, return_full_text=False)[0]["generated_text"] return JSONResponse(content={"answer": output.strip()}) except Exception as e: return JSONResponse(content={"answer": f"⚠️ Error: {str(e)}"})