Spaces:

ankanghosh
/

anveshak

Sleeping

App Files Files Community

ankanghosh commited on Mar 21

Commit

01c3615

verified ·

1 Parent(s): b273adc

Update rag_engine.py

Browse files

Files changed (1) hide show

rag_engine.py +9 -9

rag_engine.py CHANGED Viewed

@@ -232,7 +232,7 @@ def get_embedding(text):
         return np.zeros((1, 384), dtype=np.float32)
 @st.cache_data(ttl=900)
-def retrieve_passages(query, faiss_index, text_chunks, metadata_dict, top_k=5, similarity_threshold=0.5):
     """Retrieve top-k most relevant passages using FAISS with metadata."""
     try:
         print(f"\n🔍 Retrieving passages for query: {query}")
@@ -241,7 +241,7 @@ def retrieve_passages(query, faiss_index, text_chunks, metadata_dict, top_k=5, s
         query_embedding = get_embedding(query)
         # Search in FAISS index
-        distances, indices = faiss_index.search(query_embedding, top_k * 2)
         print(f"Found {len(distances[0])} potential matches")
         retrieved_passages = []
@@ -251,8 +251,8 @@ def retrieve_passages(query, faiss_index, text_chunks, metadata_dict, top_k=5, s
         # Process results
         for dist, idx in zip(distances[0], indices[0]):
             print(f"Distance: {dist:.4f}, Index: {idx}")
-            if idx in text_chunks and dist >= similarity_threshold:
-                title_with_txt, author, text = text_chunks[idx]
                 # Clean title
                 clean_title = title_with_txt.replace(".txt", "") if title_with_txt.endswith(".txt") else title_with_txt
@@ -263,7 +263,7 @@ def retrieve_passages(query, faiss_index, text_chunks, metadata_dict, top_k=5, s
                     continue
                 # Get metadata
-                metadata_entry = metadata_dict.get(clean_title, {})
                 author = metadata_entry.get("Author", "Unknown")
                 publisher = metadata_entry.get("Publisher", "Unknown")
@@ -389,9 +389,9 @@ def process_query(query, top_k=5, word_limit=100):
     # Get relevant passages
     retrieved_context, retrieved_sources = retrieve_passages(
         query,
-        faiss_index,
-        text_chunks,
-        metadata_dict,
         top_k=top_k
     )
@@ -405,7 +405,7 @@ def process_query(query, top_k=5, word_limit=100):
     else:
         llm_answer_with_rag = "⚠️ No relevant context found."
-    # Clean up.
     del retrieved_context, retrieved_sources
     gc.collect()

         return np.zeros((1, 384), dtype=np.float32)
 @st.cache_data(ttl=900)
+def retrieve_passages(query, _faiss_index, _text_chunks, _metadata_dict, top_k=5, similarity_threshold=0.5):
     """Retrieve top-k most relevant passages using FAISS with metadata."""
     try:
         print(f"\n🔍 Retrieving passages for query: {query}")
         query_embedding = get_embedding(query)
         # Search in FAISS index
+        distances, indices = _faiss_index.search(query_embedding, top_k * 2)
         print(f"Found {len(distances[0])} potential matches")
         retrieved_passages = []
         # Process results
         for dist, idx in zip(distances[0], indices[0]):
             print(f"Distance: {dist:.4f}, Index: {idx}")
+            if idx in _text_chunks and dist >= similarity_threshold:
+                title_with_txt, author, text = _text_chunks[idx]
                 # Clean title
                 clean_title = title_with_txt.replace(".txt", "") if title_with_txt.endswith(".txt") else title_with_txt
                     continue
                 # Get metadata
+                metadata_entry = _metadata_dict.get(clean_title, {})
                 author = metadata_entry.get("Author", "Unknown")
                 publisher = metadata_entry.get("Publisher", "Unknown")
     # Get relevant passages
     retrieved_context, retrieved_sources = retrieve_passages(
         query,
+        _faiss_index=faiss_index,
+        _text_chunks=text_chunks,
+        _metadata_dict=metadata_dict,
         top_k=top_k
     )
     else:
         llm_answer_with_rag = "⚠️ No relevant context found."
+    # Clean up
     del retrieved_context, retrieved_sources
     gc.collect()