import os from uuid import uuid4 from fastapi import FastAPI from fastapi.responses import FileResponse from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from pydantic import BaseModel, Field from sentence_transformers import SentenceTransformer import weaviate from weaviate.classes.init import Auth WEAVIATE_URL = os.getenv("WEAVIATE_URL", "https://hrdhwtqlrkqmc8sfizwvpq.c0.asia-southeast1.gcp.weaviate.cloud") WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "pMDX7ysJPkSTUMdV3gEwxhGmyB7wB301fLaJ") CLASS_NAME = "PdfChunk" app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) app.mount("/static", StaticFiles(directory="static"), name="static") model = SentenceTransformer("all-MiniLM-L6-v2") client = weaviate.connect_to_weaviate_cloud( cluster_url=WEAVIATE_URL, auth_credentials=Auth.api_key(WEAVIATE_API_KEY), ) class QueryRequest(BaseModel): text: str = Field(..., min_length=1) top_k: int = Field(default=7, ge=1, le=100) @app.get("/") def root(): return {"message": "PDF RAG API is running. Use POST / to query."} @app.get("/openapi.yaml") def serve_openapi_yaml(): return FileResponse("static/openapi.yaml", media_type="text/yaml") @app.post("/") def query_weaviate(q: QueryRequest): try: query_vector = model.encode(q.text).tolist() collection = client.collections.get(CLASS_NAME) results = collection.query.near_vector( near_vector=query_vector, limit=q.top_k, ) return { "query": q.text, "results": [ { "text": obj.properties.get("text", ""), "source": obj.properties.get("source", ""), } for obj in results.objects ] } except Exception as e: return {"error": str(e)}