ubva / main.py
sivakum4's picture
Feat: HF Inference API
9108a9a
import os
import argparse
import subprocess
import time
import requests
def run_scraper():
"""Run the web scraper to collect data."""
from buffalo_rag.scraper.scraper import BuffaloScraper
if os.path.exists("data/raw"):
num_pages = len([name for name in os.listdir("data/raw") if os.path.isfile(os.path.join("data/raw", name))])
if num_pages > 100:
print(f"{num_pages} scraped data files found under data/raw. Skipping data scraper stage.")
else:
print("Starting web scraper...")
scraper = BuffaloScraper()
scraper.scrape(max_pages=100)
print("Scraping completed!")
def build_embeddings():
"""Process documents and create embeddings."""
from buffalo_rag.embeddings.chunker import DocumentChunker
print("Creating document chunks and embeddings...")
chunker = DocumentChunker()
chunks = chunker.create_chunks()
chunker.create_embeddings(chunks)
print("Embeddings created!")
def run_api():
"""Run the FastAPI backend server."""
print("Starting API server...")
subprocess.run(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"])
def run_flask_frontend():
"""Run the Flask frontend."""
print("Starting Flask frontend...")
subprocess.run(["flask", "run", "--host=0.0.0.0", "--port=7860"],
env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"})
def create_project_structure():
"""Create the project folder structure."""
# Main directories
directories = [
"buffalo_rag",
"buffalo_rag/scraper",
"buffalo_rag/embeddings",
"buffalo_rag/vector_store",
"buffalo_rag/model",
"buffalo_rag/api",
"buffalo_rag/frontend",
"buffalo_rag/utils",
"data",
"data/raw",
"data/processed",
"data/embeddings",
]
# Create directories
for directory in directories:
os.makedirs(directory, exist_ok=True)
print("Project structure created!")
def build_react_frontend():
"""Build the React frontend."""
print("Building React frontend...")
subprocess.run(["npm", "run", "build"], cwd="frontend")
# Copy build files to static directory
subprocess.run(["cp", "-r", "frontend/build/*", "buffalo_rag/api/static/"])
print("React frontend built successfully!")
def setup_react_frontend():
"""Install dependencies and build the React frontend."""
print("Setting up React frontend...")
if not os.path.exists("frontend"):
# Run the setup script
subprocess.run(["bash", "setup_frontend.sh"])
# Install dependencies
subprocess.run(["npm", "install"], cwd="frontend")
# Build the frontend
subprocess.run(["npm", "run", "build"], cwd="frontend")
# Create static directory if it doesn't exist
static_dir = os.path.join("buffalo_rag", "api", "static")
os.makedirs(static_dir, exist_ok=True)
# Copy build files to static directory
if os.path.exists("frontend/build"):
# On macOS/Linux
subprocess.run(["cp", "-r", "frontend/build/.", static_dir])
else:
print("Frontend build directory not found. Please build the frontend manually.")
print("React frontend setup completed!")
def wait_for_server(url, timeout=30, interval=1):
"""Waits for a server at the given URL to be reachable."""
start_time = time.time()
print(f"Waiting for server at {url} to be ready...")
while time.time() - start_time < timeout:
try:
# Make a simple request (e.g., to the root or a health check endpoint)
response = requests.get(url, timeout=interval)
if response.status_code < 500: # Consider 5xx errors as not ready
print(f"Server at {url} is ready.")
return True
except requests.exceptions.RequestException:
# Server is not ready yet, continue waiting
pass
time.sleep(interval)
print(f"Timeout waiting for server at {url}.")
return False
def main():
parser = argparse.ArgumentParser(description="BuffaloRAG - AI Assistant for UB International Students")
parser.add_argument("--setup", action="store_true", help="Create project structure")
parser.add_argument("--flask-setup", action="store_true", help="Setup Flask frontend")
parser.add_argument("--scrape", action="store_true", help="Run web scraper")
parser.add_argument("--build", action="store_true", help="Build embeddings")
parser.add_argument("--api", action="store_true", help="Run API server")
parser.add_argument("--frontend", action="store_true", help="Run Flask frontend")
parser.add_argument("--all", action="store_true", help="Run the complete pipeline")
parser.add_argument("--run", action="store_true", help="Run frontend & backend servers")
args = parser.parse_args()
if args.setup or args.all:
create_project_structure()
if args.flask_setup or args.all or args.run:
# Run the Flask setup script
from setup_flask_templates import setup_flask_templates
setup_flask_templates()
if args.scrape or args.all:
run_scraper()
if args.build or args.all:
build_embeddings()
if args.api or args.all or args.run:
# Start API in a separate process
if args.all or args.run:
api_process = subprocess.Popen(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"])
# Wait for API to start
api_ready = wait_for_server('http://localhost:8000/', timeout=60)
if api_ready:
time.sleep(3)
# Start Flask in another process
flask_process = subprocess.Popen(["flask", "run", "--host=0.0.0.0", "--port=7860"],
env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"})
# Open the browser
import webbrowser
webbrowser.open('http://localhost:5000')
# Wait for user to quit
input("Press Enter to stop the server and exit...\n")
if api_process.poll() is None:
api_process.terminate()
if flask_process.poll() is None:
flask_process.terminate()
if api_process.poll() is None:
api_process.terminate()
else:
run_api()
if args.frontend:
run_flask_frontend()
if __name__ == "__main__":
main()