Spaces:

ankanghosh
/

anveshak

Sleeping

App Files Files Community

ankanghosh commited on Mar 19

Commit

38deecc

verified ·

1 Parent(s): 40ca1f2

Update rag_engine.py

Browse files

Files changed (1) hide show

rag_engine.py +85 -52

rag_engine.py CHANGED Viewed

@@ -12,13 +12,15 @@ import unicodedata
 import streamlit as st
 from utils import setup_gcp_auth, setup_openai_auth
-# Initialize session state for model and tokenizer
 if 'model' not in st.session_state:
     st.session_state.model = None
 if 'tokenizer' not in st.session_state:
     st.session_state.tokenizer = None
 if 'device' not in st.session_state:
-    st.session_state.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     print(f"Using device: {st.session_state.device}")
 # Load GCP authentication from utility function
@@ -58,58 +60,86 @@ def load_model():
             # Force model to CPU - more stable than GPU for this use case
             os.environ["CUDA_VISIBLE_DEVICES"] = ""
-            print("Loading tokenizer...")
-            tokenizer = AutoTokenizer.from_pretrained("intfloat/e5-small-v2")
-            print("Loading model...")
-            model = AutoModel.from_pretrained(
-                "intfloat/e5-small-v2",
-                torch_dtype=torch.float16,  # Use half precision
-                low_cpu_mem_usage=True,
-                device_map="auto"  # Let transformers decide
-            )
-            model.eval()
-            torch.set_grad_enabled(False)
-            st.session_state.tokenizer = tokenizer
-            st.session_state.model = model
-            print("✅ Model loaded successfully")
         return st.session_state.tokenizer, st.session_state.model
     except Exception as e:
         print(f"❌ Error loading model: {str(e)}")
         raise
 def download_file_from_gcs(gcs_path, local_path):
     """Download a file from GCS to local storage."""
-    blob = bucket.blob(gcs_path)
-    blob.download_to_filename(local_path)
-    print(f"✅ Downloaded {gcs_path} → {local_path}")
-# Download necessary files
-download_file_from_gcs(faiss_index_file_gcs, local_faiss_index_file)
-download_file_from_gcs(text_chunks_file_gcs, local_text_chunks_file)
-download_file_from_gcs(metadata_file_gcs, local_metadata_file)
-# Load FAISS index
-faiss_index = faiss.read_index(local_faiss_index_file)
-# Load text chunks
-text_chunks = {}  # {ID -> (Title, Author, Text)}
-with open(local_text_chunks_file, "r", encoding="utf-8") as f:
-    for line in f:
-        parts = line.strip().split("\t")
-        if len(parts) == 4:
-            text_chunks[int(parts[0])] = (parts[1], parts[2], parts[3])
-# Load metadata.jsonl for publisher information
-metadata_dict = {}
-with open(local_metadata_file, "r", encoding="utf-8") as f:
-    for line in f:
-        item = json.loads(line)
-        metadata_dict[item["Title"]] = item  # Store for easy lookup
 print(f"✅ FAISS index and text chunks loaded. {len(text_chunks)} passages available.")
@@ -155,7 +185,8 @@ def get_embedding(text):
         return embeddings
     except Exception as e:
         print(f"❌ Embedding error: {str(e)}")
-        return np.zeros((1, 1024), dtype=np.float32)
 def retrieve_passages(query, top_k=5, similarity_threshold=0.5):
     """Retrieve top-k most relevant passages using FAISS with metadata."""
@@ -198,6 +229,7 @@ def retrieve_passages(query, top_k=5, similarity_threshold=0.5):
         return retrieved_passages, retrieved_sources
     except Exception as e:
         print(f"❌ Error in retrieve_passages: {str(e)}")
         return [], []
 def answer_with_llm(query, context=None, word_limit=100):
@@ -265,8 +297,13 @@ def answer_with_llm(query, context=None, word_limit=100):
     except Exception as e:
         print(f"❌ LLM API error: {str(e)}")
         return "I apologize, but I'm unable to answer at the moment."
 def process_query(query, top_k=5, word_limit=100):
     """Process a query through the RAG pipeline with proper formatting."""
     print(f"\n🔍 Processing query: {query}")
@@ -280,8 +317,4 @@ def process_query(query, top_k=5, word_limit=100):
     else:
         llm_answer_with_rag = "⚠️ No relevant context found."
-    return {"query": query, "answer_with_rag": llm_answer_with_rag, "citations": sources}
-def format_citations(sources):
-    """Format citations to display each one on a new line."""
-    return "\n".join([f"📚 {title} by {author}, Published by {publisher}" for title, author, publisher in sources])

 import streamlit as st
 from utils import setup_gcp_auth, setup_openai_auth
+# Initialize session state for model and tokenizer FIRST - before any usage
 if 'model' not in st.session_state:
     st.session_state.model = None
+    print("Initialized st.session_state.model to None")
 if 'tokenizer' not in st.session_state:
     st.session_state.tokenizer = None
+    print("Initialized st.session_state.tokenizer to None")
 if 'device' not in st.session_state:
+    st.session_state.device = torch.device("cpu")  # Force CPU for stability
     print(f"Using device: {st.session_state.device}")
 # Load GCP authentication from utility function
             # Force model to CPU - more stable than GPU for this use case
             os.environ["CUDA_VISIBLE_DEVICES"] = ""
+            with st.spinner("Loading tokenizer and model... This may take a minute."):
+                print("Loading tokenizer...")
+                tokenizer = AutoTokenizer.from_pretrained("intfloat/e5-small-v2")
+                print("Loading model...")
+                model = AutoModel.from_pretrained(
+                    "intfloat/e5-small-v2",
+                    torch_dtype=torch.float16,  # Use half precision
+                    low_cpu_mem_usage=True,
+                    # Remove device_map - it requires accelerate and causes issues
+                )
+                model.eval()
+                torch.set_grad_enabled(False)
+                st.session_state.tokenizer = tokenizer
+                st.session_state.model = model
+                print("✅ Model loaded successfully")
         return st.session_state.tokenizer, st.session_state.model
     except Exception as e:
         print(f"❌ Error loading model: {str(e)}")
+        st.error(f"Error loading model: {str(e)}")
         raise
 def download_file_from_gcs(gcs_path, local_path):
     """Download a file from GCS to local storage."""
+    try:
+        blob = bucket.blob(gcs_path)
+        blob.download_to_filename(local_path)
+        print(f"✅ Downloaded {gcs_path} → {local_path}")
+    except Exception as e:
+        print(f"❌ Error downloading {gcs_path}: {str(e)}")
+        st.error(f"Error downloading {gcs_path}: {str(e)}")
+        raise
+# Add error handling around file downloads
+try:
+    # Download necessary files with a spinner to show progress
+    with st.spinner("Downloading necessary files..."):
+        download_file_from_gcs(faiss_index_file_gcs, local_faiss_index_file)
+        download_file_from_gcs(text_chunks_file_gcs, local_text_chunks_file)
+        download_file_from_gcs(metadata_file_gcs, local_metadata_file)
+except Exception as e:
+    st.error(f"Error setting up data files: {str(e)}")
+    raise
+# Load FAISS index with error handling
+try:
+    faiss_index = faiss.read_index(local_faiss_index_file)
+except Exception as e:
+    print(f"❌ Error loading FAISS index: {str(e)}")
+    st.error(f"Error loading FAISS index: {str(e)}")
+    raise
+# Load text chunks with error handling
+try:
+    text_chunks = {}  # {ID -> (Title, Author, Text)}
+    with open(local_text_chunks_file, "r", encoding="utf-8") as f:
+        for line in f:
+            parts = line.strip().split("\t")
+            if len(parts) == 4:
+                text_chunks[int(parts[0])] = (parts[1], parts[2], parts[3])
+except Exception as e:
+    print(f"❌ Error loading text chunks: {str(e)}")
+    st.error(f"Error loading text chunks: {str(e)}")
+    raise
+# Load metadata.jsonl for publisher information with error handling
+try:
+    metadata_dict = {}
+    with open(local_metadata_file, "r", encoding="utf-8") as f:
+        for line in f:
+            item = json.loads(line)
+            metadata_dict[item["Title"]] = item  # Store for easy lookup
+except Exception as e:
+    print(f"❌ Error loading metadata: {str(e)}")
+    st.error(f"Error loading metadata: {str(e)}")
+    raise
 print(f"✅ FAISS index and text chunks loaded. {len(text_chunks)} passages available.")
         return embeddings
     except Exception as e:
         print(f"❌ Embedding error: {str(e)}")
+        st.error(f"Embedding error: {str(e)}")
+        return np.zeros((1, 384), dtype=np.float32)  # Changed from 1024 to 384 for e5-small-v2
 def retrieve_passages(query, top_k=5, similarity_threshold=0.5):
     """Retrieve top-k most relevant passages using FAISS with metadata."""
         return retrieved_passages, retrieved_sources
     except Exception as e:
         print(f"❌ Error in retrieve_passages: {str(e)}")
+        st.error(f"Error in retrieve_passages: {str(e)}")
         return [], []
 def answer_with_llm(query, context=None, word_limit=100):
     except Exception as e:
         print(f"❌ LLM API error: {str(e)}")
+        st.error(f"LLM API error: {str(e)}")
         return "I apologize, but I'm unable to answer at the moment."
+def format_citations(sources):
+    """Format citations to display each one on a new line."""
+    return "\n".join([f"📚 {title} by {author}, Published by {publisher}" for title, author, publisher in sources])
 def process_query(query, top_k=5, word_limit=100):
     """Process a query through the RAG pipeline with proper formatting."""
     print(f"\n🔍 Processing query: {query}")
     else:
         llm_answer_with_rag = "⚠️ No relevant context found."
+    return {"query": query, "answer_with_rag": llm_answer_with_rag, "citations": sources}