Spaces:
Runtime error
Runtime error
import os | |
from fastapi import FastAPI, Request | |
from fastapi.responses import JSONResponse, HTMLResponse | |
from fastapi.staticfiles import StaticFiles | |
from pathlib import Path | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
# Model: Falcon-rw-1b (decoder-only, not instruction-tuned) | |
model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id) | |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
# Init FastAPI app | |
app = FastAPI() | |
# Mount static directory | |
app.mount("/static", StaticFiles(directory="."), name="static") | |
# Serve HTML UI | |
async def serve_page(): | |
return HTMLResponse(Path("index.html").read_text()) | |
# Chat API | |
async def ask_ai(request: Request): | |
data = await request.json() | |
question = data.get("question", "").strip() | |
if not question: | |
return JSONResponse(content={"answer": "❗ Please enter a valid question."}) | |
try: | |
# Falcon works with plain prompts | |
output = pipe(question, max_new_tokens=256, return_full_text=False)[0]["generated_text"] | |
return JSONResponse(content={"answer": output.strip()}) | |
except Exception as e: | |
return JSONResponse(content={"answer": f"⚠️ Error: {str(e)}"}) | |