SearchGPT

Running

App Files Files Community

Shreyas094 commited on Aug 4, 2024

Commit

b2ea3aa

verified ·

1 Parent(s): 3fb0e1b

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -4

app.py CHANGED Viewed

@@ -461,8 +461,9 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
         return
     all_filtered_docs = []
-    k_per_doc = max(5, 20 // len(selected_docs))  # Adjust this value as needed
     for doc_name in selected_docs:
         logging.info(f"Retrieving documents for: {doc_name}")
         try:
@@ -474,6 +475,16 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
         except Exception as e:
             logging.error(f"Error retrieving documents for {doc_name}: {str(e)}")
     logging.info(f"Total number of filtered documents: {len(all_filtered_docs)}")
     if not all_filtered_docs:
@@ -483,14 +494,14 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
     for doc in all_filtered_docs:
         try:
-            source = doc.metadata['source'] if isinstance(doc, Document) else doc.get('metadata', {}).get('source', 'Unknown')
-            content = doc.page_content if isinstance(doc, Document) else doc.get('page_content', '')
             logging.info(f"Document source: {source}")
             logging.info(f"Document content preview: {content[:100]}...")  # Log first 100 characters of each document
         except Exception as e:
             logging.error(f"Error processing document: {str(e)}")
-    context_str = "\n".join([doc.page_content if isinstance(doc, Document) else doc.get('page_content', '') for doc in all_filtered_docs])
     logging.info(f"Total context length: {len(context_str)}")
     if model == "@cf/meta/llama-3.1-8b-instruct":

         return
     all_filtered_docs = []
+    k_per_doc = max(10, 30 // len(selected_docs))  # Increased k_per_doc
+    # First, try to retrieve documents with filtering
     for doc_name in selected_docs:
         logging.info(f"Retrieving documents for: {doc_name}")
         try:
         except Exception as e:
             logging.error(f"Error retrieving documents for {doc_name}: {str(e)}")
+    # If no documents are found, try retrieving without filtering
+    if not all_filtered_docs:
+        logging.warning("No documents found with filtering. Attempting retrieval without filters.")
+        try:
+            retriever = database.as_retriever(search_kwargs={"k": k_per_doc * len(selected_docs)})
+            all_filtered_docs = retriever.get_relevant_documents(query)
+            logging.info(f"Retrieved {len(all_filtered_docs)} documents without filtering")
+        except Exception as e:
+            logging.error(f"Error retrieving documents without filtering: {str(e)}")
     logging.info(f"Total number of filtered documents: {len(all_filtered_docs)}")
     if not all_filtered_docs:
     for doc in all_filtered_docs:
         try:
+            source = doc.metadata['source'] if hasattr(doc, 'metadata') else doc.get('metadata', {}).get('source', 'Unknown')
+            content = doc.page_content if hasattr(doc, 'page_content') else doc.get('page_content', '')
             logging.info(f"Document source: {source}")
             logging.info(f"Document content preview: {content[:100]}...")  # Log first 100 characters of each document
         except Exception as e:
             logging.error(f"Error processing document: {str(e)}")
+    context_str = "\n".join([doc.page_content if hasattr(doc, 'page_content') else doc.get('page_content', '') for doc in all_filtered_docs])
     logging.info(f"Total context length: {len(context_str)}")
     if model == "@cf/meta/llama-3.1-8b-instruct":