Spaces:

broadfield-dev
/

bible-app

Sleeping

App Files Files Community

broadfield-dev commited on 10 days ago

Commit

04eac3c

verified ·

1 Parent(s): 1dde6a2

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -43

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import sys
 import subprocess
-from flask import Flask, render_template, request, flash, redirect, url_for
 import torch
 from transformers import AutoTokenizer, AutoModel
 import os
@@ -19,6 +19,7 @@ CHROMA_PATH = "chroma_db"
 COLLECTION_NAME = "bible_verses"
 MODEL_NAME = "google/embeddinggemma-300m"
 DATASET_REPO = "broadfield-dev/bible-chromadb-gemma"
 # --- Global variables for resources ---
 chroma_collection = None
@@ -26,83 +27,88 @@ tokenizer = None
 embedding_model = None
 def load_resources():
-    """
-    Downloads the DB from the Hub if not present, then loads it and the model.
-    """
     global chroma_collection, tokenizer, embedding_model
     if chroma_collection and embedding_model:
         return True
     print("Attempting to load resources...")
     try:
-        # 1. Download the ChromaDB files from the Hugging Face Hub
-        # This will only download if the folder doesn't already exist.
-        print(f"Ensuring database is available locally from '{DATASET_REPO}'...")
-        snapshot_download(
-            repo_id=DATASET_REPO,
-            repo_type="dataset",
-            local_dir=CHROMA_PATH,
-            local_dir_use_symlinks=False # Recommended for Spaces
-        )
-        print("Database files are present locally.")
-        # 2. Initialize ChromaDB client from the downloaded files
         client = chromadb.PersistentClient(path=CHROMA_PATH)
         collection = client.get_collection(name=COLLECTION_NAME)
         if collection.count() == 0:
-            print(f"Warning: Database collection is empty.")
             return False
         chroma_collection = collection
         print(f"Successfully connected to DB with {collection.count()} items.")
-        # 3. Load the embedding model
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
         embedding_model = AutoModel.from_pretrained(MODEL_NAME)
         print(f"Embedding model '{MODEL_NAME}' loaded successfully.")
         return True
     except Exception as e:
-        print(f"Could not load resources. The database may not be built yet.")
         print(f"Error: {e}")
         return False
-# Try to load resources on startup.
 resources_loaded = load_resources()
-# --- 3. Define App Routes (Unchanged from previous ChromaDB version) ---
 @app.route('/')
 def home():
-    if not resources_loaded:
-        flash(f"Welcome! Database not ready. Use the admin panel to build it.", "warning")
     return render_template('index.html')
 @app.route('/build-rag', methods=['POST'])
 def build_rag_route():
-    print("Vector database build process requested.")
     try:
-        process = subprocess.Popen(
-            [sys.executable, "build_rag.py"],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True
-        )
-        print(f"Started build process with PID: {process.pid}")
-        flash("Database build & push initiated! This can take several minutes. Check logs for progress. The app will be ready when it completes.", "info")
     except Exception as e:
-        print(f"Failed to start build process: {e}")
-        flash(f"An error occurred: {e}", "error")
-    return redirect(url_for('home'))
 @app.route('/search', methods=['POST'])
 def search():
     global resources_loaded
     if not resources_loaded:
-        print("Reloading resources for search...")
         resources_loaded = load_resources()
         if not resources_loaded:
-            flash("Database not ready. Please wait for the build process to finish.", "error")
             return redirect(url_for('home'))
     user_query = request.form['query']
@@ -120,9 +126,7 @@ def search():
     )
     results_list = []
-    documents = search_results['documents'][0]
-    metadatas = search_results['metadatas'][0]
-    distances = search_results['distances'][0]
     for i in range(len(documents)):
         results_list.append({

 import sys
 import subprocess
+from flask import Flask, render_template, request, flash, redirect, url_for, jsonify
 import torch
 from transformers import AutoTokenizer, AutoModel
 import os
 COLLECTION_NAME = "bible_verses"
 MODEL_NAME = "google/embeddinggemma-300m"
 DATASET_REPO = "broadfield-dev/bible-chromadb-gemma"
+STATUS_FILE = "build_status.log"  # File to track build status
 # --- Global variables for resources ---
 chroma_collection = None
 embedding_model = None
 def load_resources():
+    """Downloads the DB from the Hub if not present, then loads it and the model."""
     global chroma_collection, tokenizer, embedding_model
     if chroma_collection and embedding_model:
         return True
     print("Attempting to load resources...")
     try:
+        if not os.path.exists(CHROMA_PATH) or not os.listdir(CHROMA_PATH):
+            print(f"Local DB not found. Downloading from '{DATASET_REPO}'...")
+            snapshot_download(
+                repo_id=DATASET_REPO,
+                repo_type="dataset",
+                local_dir=CHROMA_PATH,
+                local_dir_use_symlinks=False
+            )
+            print("Database files downloaded.")
+        else:
+            print("Local database files found.")
         client = chromadb.PersistentClient(path=CHROMA_PATH)
         collection = client.get_collection(name=COLLECTION_NAME)
         if collection.count() == 0:
+            print("Warning: Database collection is empty.")
             return False
         chroma_collection = collection
         print(f"Successfully connected to DB with {collection.count()} items.")
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
         embedding_model = AutoModel.from_pretrained(MODEL_NAME)
         print(f"Embedding model '{MODEL_NAME}' loaded successfully.")
         return True
     except Exception as e:
+        print(f"Could not load resources. The database may not be built yet or the repo is empty.")
         print(f"Error: {e}")
         return False
 resources_loaded = load_resources()
+# --- 3. Define App Routes ---
 @app.route('/')
 def home():
     return render_template('index.html')
 @app.route('/build-rag', methods=['POST'])
 def build_rag_route():
+    """Triggers the build script and immediately responds."""
     try:
+        # Clear old status and set to "In Progress"
+        with open(STATUS_FILE, "w") as f:
+            f.write("IN_PROGRESS: Starting build process...")
+        # Start the build process in the background
+        subprocess.Popen([sys.executable, "build_rag.py"])
+        return jsonify({"status": "started"})
     except Exception as e:
+        with open(STATUS_FILE, "w") as f:
+            f.write(f"FAILED: Could not start process - {e}")
+        return jsonify({"status": "error", "message": str(e)}), 500
+@app.route('/status')
+def status():
+    """Endpoint for the frontend to poll for build status."""
+    if not os.path.exists(STATUS_FILE):
+        return jsonify({"status": "NOT_STARTED"})
+    with open(STATUS_FILE, "r") as f:
+        status_line = f.read().strip()
+    status_code, _, message = status_line.partition(': ')
+    return jsonify({"status": status_code, "message": message})
 @app.route('/search', methods=['POST'])
 def search():
     global resources_loaded
     if not resources_loaded:
         resources_loaded = load_resources()
         if not resources_loaded:
+            flash("Database not ready. Please wait for the build process to finish and then refresh the page.", "error")
             return redirect(url_for('home'))
     user_query = request.form['query']
     )
     results_list = []
+    documents, metadatas, distances = search_results['documents'][0], search_results['metadatas'][0], search_results['distances'][0]
     for i in range(len(documents)):
         results_list.append({