import os import argparse import subprocess import time import requests def run_scraper(): """Run the web scraper to collect data.""" from buffalo_rag.scraper.scraper import BuffaloScraper if os.path.exists("data/raw"): num_pages = len([name for name in os.listdir("data/raw") if os.path.isfile(os.path.join("data/raw", name))]) if num_pages > 100: print(f"{num_pages} scraped data files found under data/raw. Skipping data scraper stage.") else: print("Starting web scraper...") scraper = BuffaloScraper() scraper.scrape(max_pages=100) print("Scraping completed!") def build_embeddings(): """Process documents and create embeddings.""" from buffalo_rag.embeddings.chunker import DocumentChunker print("Creating document chunks and embeddings...") chunker = DocumentChunker() chunks = chunker.create_chunks() chunker.create_embeddings(chunks) print("Embeddings created!") def run_api(): """Run the FastAPI backend server.""" print("Starting API server...") subprocess.run(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]) def run_flask_frontend(): """Run the Flask frontend.""" print("Starting Flask frontend...") subprocess.run(["flask", "run", "--host=0.0.0.0", "--port=7860"], env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"}) def create_project_structure(): """Create the project folder structure.""" # Main directories directories = [ "buffalo_rag", "buffalo_rag/scraper", "buffalo_rag/embeddings", "buffalo_rag/vector_store", "buffalo_rag/model", "buffalo_rag/api", "buffalo_rag/frontend", "buffalo_rag/utils", "data", "data/raw", "data/processed", "data/embeddings", ] # Create directories for directory in directories: os.makedirs(directory, exist_ok=True) print("Project structure created!") def build_react_frontend(): """Build the React frontend.""" print("Building React frontend...") subprocess.run(["npm", "run", "build"], cwd="frontend") # Copy build files to static directory subprocess.run(["cp", "-r", "frontend/build/*", "buffalo_rag/api/static/"]) print("React frontend built successfully!") def setup_react_frontend(): """Install dependencies and build the React frontend.""" print("Setting up React frontend...") if not os.path.exists("frontend"): # Run the setup script subprocess.run(["bash", "setup_frontend.sh"]) # Install dependencies subprocess.run(["npm", "install"], cwd="frontend") # Build the frontend subprocess.run(["npm", "run", "build"], cwd="frontend") # Create static directory if it doesn't exist static_dir = os.path.join("buffalo_rag", "api", "static") os.makedirs(static_dir, exist_ok=True) # Copy build files to static directory if os.path.exists("frontend/build"): # On macOS/Linux subprocess.run(["cp", "-r", "frontend/build/.", static_dir]) else: print("Frontend build directory not found. Please build the frontend manually.") print("React frontend setup completed!") def wait_for_server(url, timeout=30, interval=1): """Waits for a server at the given URL to be reachable.""" start_time = time.time() print(f"Waiting for server at {url} to be ready...") while time.time() - start_time < timeout: try: # Make a simple request (e.g., to the root or a health check endpoint) response = requests.get(url, timeout=interval) if response.status_code < 500: # Consider 5xx errors as not ready print(f"Server at {url} is ready.") return True except requests.exceptions.RequestException: # Server is not ready yet, continue waiting pass time.sleep(interval) print(f"Timeout waiting for server at {url}.") return False def main(): parser = argparse.ArgumentParser(description="BuffaloRAG - AI Assistant for UB International Students") parser.add_argument("--setup", action="store_true", help="Create project structure") parser.add_argument("--flask-setup", action="store_true", help="Setup Flask frontend") parser.add_argument("--scrape", action="store_true", help="Run web scraper") parser.add_argument("--build", action="store_true", help="Build embeddings") parser.add_argument("--api", action="store_true", help="Run API server") parser.add_argument("--frontend", action="store_true", help="Run Flask frontend") parser.add_argument("--all", action="store_true", help="Run the complete pipeline") parser.add_argument("--run", action="store_true", help="Run frontend & backend servers") args = parser.parse_args() if args.setup or args.all: create_project_structure() if args.flask_setup or args.all or args.run: # Run the Flask setup script from setup_flask_templates import setup_flask_templates setup_flask_templates() if args.scrape or args.all: run_scraper() if args.build or args.all: build_embeddings() if args.api or args.all or args.run: # Start API in a separate process if args.all or args.run: api_process = subprocess.Popen(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]) # Wait for API to start api_ready = wait_for_server('http://localhost:8000/', timeout=60) if api_ready: time.sleep(3) # Start Flask in another process flask_process = subprocess.Popen(["flask", "run", "--host=0.0.0.0", "--port=7860"], env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"}) # Open the browser import webbrowser webbrowser.open('http://localhost:5000') # Wait for user to quit input("Press Enter to stop the server and exit...\n") if api_process.poll() is None: api_process.terminate() if flask_process.poll() is None: flask_process.terminate() if api_process.poll() is None: api_process.terminate() else: run_api() if args.frontend: run_flask_frontend() if __name__ == "__main__": main()