Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -460,14 +460,28 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
|
|
460 |
yield "No documents available. Please upload PDF documents to answer questions."
|
461 |
return
|
462 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
463 |
all_filtered_docs = []
|
464 |
-
k_per_doc = max(
|
465 |
|
466 |
-
# First, try to retrieve documents with filtering
|
467 |
for doc_name in selected_docs:
|
468 |
logging.info(f"Retrieving documents for: {doc_name}")
|
469 |
try:
|
470 |
-
doc_filter = lambda doc:
|
471 |
doc_retriever = database.as_retriever(search_kwargs={"k": k_per_doc, "filter": doc_filter})
|
472 |
relevant_docs = doc_retriever.get_relevant_documents(query)
|
473 |
all_filtered_docs.extend(relevant_docs)
|
@@ -475,16 +489,6 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
|
|
475 |
except Exception as e:
|
476 |
logging.error(f"Error retrieving documents for {doc_name}: {str(e)}")
|
477 |
|
478 |
-
# If no documents are found, try retrieving without filtering
|
479 |
-
if not all_filtered_docs:
|
480 |
-
logging.warning("No documents found with filtering. Attempting retrieval without filters.")
|
481 |
-
try:
|
482 |
-
retriever = database.as_retriever(search_kwargs={"k": k_per_doc * len(selected_docs)})
|
483 |
-
all_filtered_docs = retriever.get_relevant_documents(query)
|
484 |
-
logging.info(f"Retrieved {len(all_filtered_docs)} documents without filtering")
|
485 |
-
except Exception as e:
|
486 |
-
logging.error(f"Error retrieving documents without filtering: {str(e)}")
|
487 |
-
|
488 |
logging.info(f"Total number of filtered documents: {len(all_filtered_docs)}")
|
489 |
|
490 |
if not all_filtered_docs:
|
@@ -494,14 +498,14 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
|
|
494 |
|
495 |
for doc in all_filtered_docs:
|
496 |
try:
|
497 |
-
source = doc.metadata['source']
|
498 |
-
content = doc.page_content
|
499 |
logging.info(f"Document source: {source}")
|
500 |
logging.info(f"Document content preview: {content[:100]}...") # Log first 100 characters of each document
|
501 |
except Exception as e:
|
502 |
logging.error(f"Error processing document: {str(e)}")
|
503 |
|
504 |
-
context_str = "\n".join([doc.page_content
|
505 |
logging.info(f"Total context length: {len(context_str)}")
|
506 |
|
507 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
|
|
460 |
yield "No documents available. Please upload PDF documents to answer questions."
|
461 |
return
|
462 |
|
463 |
+
# Log the total number of documents in the database
|
464 |
+
doc_count = len(database.docstore._dict)
|
465 |
+
logging.info(f"Total documents in the database: {doc_count}")
|
466 |
+
|
467 |
+
# Print out some sample documents
|
468 |
+
sample_docs = list(database.docstore._dict.values())[:5] # Get first 5 documents
|
469 |
+
for i, doc in enumerate(sample_docs):
|
470 |
+
logging.info(f"Sample document {i}:")
|
471 |
+
logging.info(f" Type: {type(doc)}")
|
472 |
+
logging.info(f" Attributes: {dir(doc)}")
|
473 |
+
if hasattr(doc, 'metadata'):
|
474 |
+
logging.info(f" Metadata: {doc.metadata}")
|
475 |
+
if hasattr(doc, 'page_content'):
|
476 |
+
logging.info(f" Content preview: {doc.page_content[:100]}...")
|
477 |
+
|
478 |
all_filtered_docs = []
|
479 |
+
k_per_doc = max(20, 50 // len(selected_docs)) # Increased k_per_doc
|
480 |
|
|
|
481 |
for doc_name in selected_docs:
|
482 |
logging.info(f"Retrieving documents for: {doc_name}")
|
483 |
try:
|
484 |
+
doc_filter = lambda doc: doc.metadata['source'] == doc_name
|
485 |
doc_retriever = database.as_retriever(search_kwargs={"k": k_per_doc, "filter": doc_filter})
|
486 |
relevant_docs = doc_retriever.get_relevant_documents(query)
|
487 |
all_filtered_docs.extend(relevant_docs)
|
|
|
489 |
except Exception as e:
|
490 |
logging.error(f"Error retrieving documents for {doc_name}: {str(e)}")
|
491 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
492 |
logging.info(f"Total number of filtered documents: {len(all_filtered_docs)}")
|
493 |
|
494 |
if not all_filtered_docs:
|
|
|
498 |
|
499 |
for doc in all_filtered_docs:
|
500 |
try:
|
501 |
+
source = doc.metadata['source']
|
502 |
+
content = doc.page_content
|
503 |
logging.info(f"Document source: {source}")
|
504 |
logging.info(f"Document content preview: {content[:100]}...") # Log first 100 characters of each document
|
505 |
except Exception as e:
|
506 |
logging.error(f"Error processing document: {str(e)}")
|
507 |
|
508 |
+
context_str = "\n".join([doc.page_content for doc in all_filtered_docs])
|
509 |
logging.info(f"Total context length: {len(context_str)}")
|
510 |
|
511 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|