diff --git a/Dockerfile b/Dockerfile index 648a05564fc889b4745a43fff32c8e2589c35bfa..b6b0c8235ed3c55a9db411334c365918f7f50062 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,18 @@ FROM python:3.9-slim WORKDIR /app + COPY requirements.txt ./ RUN pip install --no-cache-dir -r requirements.txt RUN echo "Installed required Python packages." -COPY frontend/ ./frontend +COPY buffalo_rag/ /app/buffalo_rag +COPY main.py /app/main.py +COPY data/ /app/data +COPY start.sh /app/start.sh + +RUN chmod +x /app/start.sh + EXPOSE 7860 -CMD ["python", "frontend/flask_app.py"] \ No newline at end of file +EXPOSE 8000 + +CMD ["/bin/bash", "/app/start.sh"] diff --git a/buffalo_rag/api/main.py b/buffalo_rag/api/main.py new file mode 100644 index 0000000000000000000000000000000000000000..9d03effe1f7d4a7cf90522aba4351c11f09b58c7 --- /dev/null +++ b/buffalo_rag/api/main.py @@ -0,0 +1,161 @@ +import os +import json +from typing import List, Dict, Any, Optional +from datetime import datetime + +from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends, Query +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse + +from buffalo_rag.scraper.scraper import BuffaloScraper +from buffalo_rag.embeddings.chunker import DocumentChunker +from buffalo_rag.vector_store.db import VectorStore +from buffalo_rag.model.rag import BuffaloRAG + +# Initialize FastAPI app +app = FastAPI( + title="BuffaloRAG API", + description="API for BuffaloRAG - AI Assistant for International Students at University at Buffalo", + version="1.0.0" +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Initialize components +vector_store = VectorStore() +rag = BuffaloRAG(vector_store=vector_store) + +# Pydantic models +class QueryRequest(BaseModel): + query: str + k: int = 5 + categories: Optional[List[str]] = None + +class QueryResponse(BaseModel): + query: str + response: str + sources: List[Dict[str, Any]] + timestamp: str + +class ScrapeRequest(BaseModel): + seed_url: str = "https://www.buffalo.edu/international-student-services.html" + max_pages: int = 100 + +class ScrapeResponse(BaseModel): + status: str + message: str + +# Background tasks +def run_scraper(seed_url: str, max_pages: int): + """Run the web scraper in the background.""" + scraper = BuffaloScraper(seed_url=seed_url) + scraper.scrape(max_pages=max_pages) + + # After scraping, update the embeddings and index + chunker = DocumentChunker() + chunks = chunker.create_chunks() + chunker.create_embeddings(chunks) + + # Reload the vector store + global vector_store + vector_store = VectorStore() + + # Update the RAG model + global rag + rag = BuffaloRAG(vector_store=vector_store) + +def refresh_index(): + """Refresh the vector index in the background.""" + chunker = DocumentChunker() + chunks = chunker.create_chunks() + chunker.create_embeddings(chunks) + + # Reload the vector store + global vector_store + vector_store = VectorStore() + + # Update the RAG model + global rag + rag = BuffaloRAG(vector_store=vector_store) + +# Setup static files directory +static_dir = os.path.join(os.path.dirname(__file__), "static") +os.makedirs(static_dir, exist_ok=True) + +# Add this after creating the FastAPI app +app.mount("/static", StaticFiles(directory=static_dir), name="static") + +# API endpoints +@app.post("/api/ask", response_model=QueryResponse) +async def ask(request: QueryRequest): + """Ask a question to the RAG system.""" + try: + response = rag.answer( + query=request.query, + k=request.k, + filter_categories=request.categories + ) + + # Add timestamp + response['timestamp'] = datetime.now().isoformat() + + # Log the query for analytics + with open("data/query_log.jsonl", "a") as f: + f.write(json.dumps(response) + "\n") + + return response + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/scrape", response_model=ScrapeResponse) +async def scrape(request: ScrapeRequest, background_tasks: BackgroundTasks): + """Trigger web scraping.""" + try: + background_tasks.add_task(run_scraper, request.seed_url, request.max_pages) + return { + "status": "success", + "message": f"Started scraping from {request.seed_url} (max {request.max_pages} pages)" + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/api/refresh-index", response_model=ScrapeResponse) +async def refresh(background_tasks: BackgroundTasks): + """Refresh the vector index.""" + try: + background_tasks.add_task(refresh_index) + return { + "status": "success", + "message": "Started refreshing the vector index" + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +# Add a route to serve the React app +@app.get("/", include_in_schema=False) +async def serve_frontend(): + return FileResponse(os.path.join(static_dir, "index.html")) + +@app.get("/{path:path}", include_in_schema=False) +async def serve_frontend_paths(path: str): + # First check if the file exists in static directory + file_path = os.path.join(static_dir, path) + if os.path.isfile(file_path): + return FileResponse(file_path) + + # Otherwise, return index.html for client-side routing + return FileResponse(os.path.join(static_dir, "index.html")) + +# Run the API server +if __name__ == "__main__": + import uvicorn + uvicorn.run("buffalo_rag.api.main:app", host="0.0.0.0", port=8000, reload=True) \ No newline at end of file diff --git a/buffalo_rag/api/static/asset-manifest.json b/buffalo_rag/api/static/asset-manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..200022cd90e26967130a37576e60b7b4c9a120e4 --- /dev/null +++ b/buffalo_rag/api/static/asset-manifest.json @@ -0,0 +1,10 @@ +{ + "files": { + "main.js": "/static/js/main.55953463.js", + "index.html": "/index.html", + "main.55953463.js.map": "/static/js/main.55953463.js.map" + }, + "entrypoints": [ + "static/js/main.55953463.js" + ] +} \ No newline at end of file diff --git a/buffalo_rag/api/static/index.html b/buffalo_rag/api/static/index.html new file mode 100644 index 0000000000000000000000000000000000000000..e5375e1514f040a891ef02493cb610fc5d99c657 --- /dev/null +++ b/buffalo_rag/api/static/index.html @@ -0,0 +1 @@ +