File size: 6,712 Bytes
9108a9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import os
import argparse
import subprocess
import time
import requests
def run_scraper():
"""Run the web scraper to collect data."""
from buffalo_rag.scraper.scraper import BuffaloScraper
if os.path.exists("data/raw"):
num_pages = len([name for name in os.listdir("data/raw") if os.path.isfile(os.path.join("data/raw", name))])
if num_pages > 100:
print(f"{num_pages} scraped data files found under data/raw. Skipping data scraper stage.")
else:
print("Starting web scraper...")
scraper = BuffaloScraper()
scraper.scrape(max_pages=100)
print("Scraping completed!")
def build_embeddings():
"""Process documents and create embeddings."""
from buffalo_rag.embeddings.chunker import DocumentChunker
print("Creating document chunks and embeddings...")
chunker = DocumentChunker()
chunks = chunker.create_chunks()
chunker.create_embeddings(chunks)
print("Embeddings created!")
def run_api():
"""Run the FastAPI backend server."""
print("Starting API server...")
subprocess.run(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"])
def run_flask_frontend():
"""Run the Flask frontend."""
print("Starting Flask frontend...")
subprocess.run(["flask", "run", "--host=0.0.0.0", "--port=7860"],
env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"})
def create_project_structure():
"""Create the project folder structure."""
# Main directories
directories = [
"buffalo_rag",
"buffalo_rag/scraper",
"buffalo_rag/embeddings",
"buffalo_rag/vector_store",
"buffalo_rag/model",
"buffalo_rag/api",
"buffalo_rag/frontend",
"buffalo_rag/utils",
"data",
"data/raw",
"data/processed",
"data/embeddings",
]
# Create directories
for directory in directories:
os.makedirs(directory, exist_ok=True)
print("Project structure created!")
def build_react_frontend():
"""Build the React frontend."""
print("Building React frontend...")
subprocess.run(["npm", "run", "build"], cwd="frontend")
# Copy build files to static directory
subprocess.run(["cp", "-r", "frontend/build/*", "buffalo_rag/api/static/"])
print("React frontend built successfully!")
def setup_react_frontend():
"""Install dependencies and build the React frontend."""
print("Setting up React frontend...")
if not os.path.exists("frontend"):
# Run the setup script
subprocess.run(["bash", "setup_frontend.sh"])
# Install dependencies
subprocess.run(["npm", "install"], cwd="frontend")
# Build the frontend
subprocess.run(["npm", "run", "build"], cwd="frontend")
# Create static directory if it doesn't exist
static_dir = os.path.join("buffalo_rag", "api", "static")
os.makedirs(static_dir, exist_ok=True)
# Copy build files to static directory
if os.path.exists("frontend/build"):
# On macOS/Linux
subprocess.run(["cp", "-r", "frontend/build/.", static_dir])
else:
print("Frontend build directory not found. Please build the frontend manually.")
print("React frontend setup completed!")
def wait_for_server(url, timeout=30, interval=1):
"""Waits for a server at the given URL to be reachable."""
start_time = time.time()
print(f"Waiting for server at {url} to be ready...")
while time.time() - start_time < timeout:
try:
# Make a simple request (e.g., to the root or a health check endpoint)
response = requests.get(url, timeout=interval)
if response.status_code < 500: # Consider 5xx errors as not ready
print(f"Server at {url} is ready.")
return True
except requests.exceptions.RequestException:
# Server is not ready yet, continue waiting
pass
time.sleep(interval)
print(f"Timeout waiting for server at {url}.")
return False
def main():
parser = argparse.ArgumentParser(description="BuffaloRAG - AI Assistant for UB International Students")
parser.add_argument("--setup", action="store_true", help="Create project structure")
parser.add_argument("--flask-setup", action="store_true", help="Setup Flask frontend")
parser.add_argument("--scrape", action="store_true", help="Run web scraper")
parser.add_argument("--build", action="store_true", help="Build embeddings")
parser.add_argument("--api", action="store_true", help="Run API server")
parser.add_argument("--frontend", action="store_true", help="Run Flask frontend")
parser.add_argument("--all", action="store_true", help="Run the complete pipeline")
parser.add_argument("--run", action="store_true", help="Run frontend & backend servers")
args = parser.parse_args()
if args.setup or args.all:
create_project_structure()
if args.flask_setup or args.all or args.run:
# Run the Flask setup script
from setup_flask_templates import setup_flask_templates
setup_flask_templates()
if args.scrape or args.all:
run_scraper()
if args.build or args.all:
build_embeddings()
if args.api or args.all or args.run:
# Start API in a separate process
if args.all or args.run:
api_process = subprocess.Popen(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"])
# Wait for API to start
api_ready = wait_for_server('http://localhost:8000/', timeout=60)
if api_ready:
time.sleep(3)
# Start Flask in another process
flask_process = subprocess.Popen(["flask", "run", "--host=0.0.0.0", "--port=7860"],
env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"})
# Open the browser
import webbrowser
webbrowser.open('http://localhost:5000')
# Wait for user to quit
input("Press Enter to stop the server and exit...\n")
if api_process.poll() is None:
api_process.terminate()
if flask_process.poll() is None:
flask_process.terminate()
if api_process.poll() is None:
api_process.terminate()
else:
run_api()
if args.frontend:
run_flask_frontend()
if __name__ == "__main__":
main() |