File size: 6,712 Bytes
9108a9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import os
import argparse
import subprocess
import time
import requests

def run_scraper():
    """Run the web scraper to collect data."""
    from buffalo_rag.scraper.scraper import BuffaloScraper
    
    if os.path.exists("data/raw"):
        num_pages = len([name for name in os.listdir("data/raw") if os.path.isfile(os.path.join("data/raw", name))])
        if num_pages > 100:
            print(f"{num_pages} scraped data files found under data/raw. Skipping data scraper stage.")
        else:
            print("Starting web scraper...")
            scraper = BuffaloScraper()
            scraper.scrape(max_pages=100)
            print("Scraping completed!")

def build_embeddings():
    """Process documents and create embeddings."""
    from buffalo_rag.embeddings.chunker import DocumentChunker
    
    print("Creating document chunks and embeddings...")
    chunker = DocumentChunker()
    chunks = chunker.create_chunks()
    chunker.create_embeddings(chunks)
    print("Embeddings created!")

def run_api():
    """Run the FastAPI backend server."""
    print("Starting API server...")
    subprocess.run(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"])

def run_flask_frontend():
    """Run the Flask frontend."""
    print("Starting Flask frontend...")
    subprocess.run(["flask", "run", "--host=0.0.0.0", "--port=7860"], 
                  env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"})

def create_project_structure():
    """Create the project folder structure."""
    # Main directories
    directories = [
        "buffalo_rag",
        "buffalo_rag/scraper",
        "buffalo_rag/embeddings",
        "buffalo_rag/vector_store",
        "buffalo_rag/model",
        "buffalo_rag/api",
        "buffalo_rag/frontend",
        "buffalo_rag/utils",
        "data",
        "data/raw",
        "data/processed",
        "data/embeddings",
    ]

    # Create directories
    for directory in directories:
        os.makedirs(directory, exist_ok=True)
    
    print("Project structure created!")

def build_react_frontend():
    """Build the React frontend."""
    print("Building React frontend...")
    subprocess.run(["npm", "run", "build"], cwd="frontend")
    
    # Copy build files to static directory
    subprocess.run(["cp", "-r", "frontend/build/*", "buffalo_rag/api/static/"])
    print("React frontend built successfully!")

def setup_react_frontend():
    """Install dependencies and build the React frontend."""
    print("Setting up React frontend...")
    if not os.path.exists("frontend"):
        # Run the setup script
        subprocess.run(["bash", "setup_frontend.sh"])
    
    # Install dependencies
    subprocess.run(["npm", "install"], cwd="frontend")
    
    # Build the frontend
    subprocess.run(["npm", "run", "build"], cwd="frontend")
    
    # Create static directory if it doesn't exist
    static_dir = os.path.join("buffalo_rag", "api", "static")
    os.makedirs(static_dir, exist_ok=True)
    
    # Copy build files to static directory
    if os.path.exists("frontend/build"):
        # On macOS/Linux
        subprocess.run(["cp", "-r", "frontend/build/.", static_dir])
    else:
        print("Frontend build directory not found. Please build the frontend manually.")
    
    print("React frontend setup completed!")

def wait_for_server(url, timeout=30, interval=1):
    """Waits for a server at the given URL to be reachable."""
    start_time = time.time()
    print(f"Waiting for server at {url} to be ready...")
    while time.time() - start_time < timeout:
        try:
            # Make a simple request (e.g., to the root or a health check endpoint)
            response = requests.get(url, timeout=interval)
            if response.status_code < 500: # Consider 5xx errors as not ready
                print(f"Server at {url} is ready.")
                return True
        except requests.exceptions.RequestException:
            # Server is not ready yet, continue waiting
            pass
        time.sleep(interval)
    print(f"Timeout waiting for server at {url}.")
    return False

def main():
    parser = argparse.ArgumentParser(description="BuffaloRAG - AI Assistant for UB International Students")
    parser.add_argument("--setup", action="store_true", help="Create project structure")
    parser.add_argument("--flask-setup", action="store_true", help="Setup Flask frontend")
    parser.add_argument("--scrape", action="store_true", help="Run web scraper")
    parser.add_argument("--build", action="store_true", help="Build embeddings")
    parser.add_argument("--api", action="store_true", help="Run API server")
    parser.add_argument("--frontend", action="store_true", help="Run Flask frontend")
    parser.add_argument("--all", action="store_true", help="Run the complete pipeline")
    parser.add_argument("--run", action="store_true", help="Run frontend & backend servers")
    
    args = parser.parse_args()
    
    if args.setup or args.all:
        create_project_structure()
    
    if args.flask_setup or args.all or args.run:
        # Run the Flask setup script
        from setup_flask_templates import setup_flask_templates
        setup_flask_templates()
    
    if args.scrape or args.all:
        run_scraper()
    
    if args.build or args.all:
        build_embeddings()
    
    if args.api or args.all or args.run:
        # Start API in a separate process
        if args.all or args.run:

            api_process = subprocess.Popen(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"])
            # Wait for API to start
            api_ready = wait_for_server('http://localhost:8000/', timeout=60)

            if api_ready:
                time.sleep(3)
                # Start Flask in another process
                flask_process = subprocess.Popen(["flask", "run", "--host=0.0.0.0", "--port=7860"], 
                                          env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"})
            
                # Open the browser
                import webbrowser
                webbrowser.open('http://localhost:5000')
                
                # Wait for user to quit
                input("Press Enter to stop the server and exit...\n")
                if api_process.poll() is None:
                    api_process.terminate()
                if flask_process.poll() is None:
                    flask_process.terminate()

            if api_process.poll() is None:
                 api_process.terminate()
        else:
            run_api()
    
    if args.frontend:
        run_flask_frontend()

if __name__ == "__main__":
    main()