Shreyas094 commited on
Commit
28c1fd5
·
verified ·
1 Parent(s): 0a9aa92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -16
app.py CHANGED
@@ -460,14 +460,28 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
460
  yield "No documents available. Please upload PDF documents to answer questions."
461
  return
462
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
  all_filtered_docs = []
464
- k_per_doc = max(5, 10 // len(selected_docs)) # Increased k_per_doc
465
 
466
- # First, try to retrieve documents with filtering
467
  for doc_name in selected_docs:
468
  logging.info(f"Retrieving documents for: {doc_name}")
469
  try:
470
- doc_filter = lambda doc: isinstance(doc, dict) and doc.get('metadata', {}).get('source') == doc_name
471
  doc_retriever = database.as_retriever(search_kwargs={"k": k_per_doc, "filter": doc_filter})
472
  relevant_docs = doc_retriever.get_relevant_documents(query)
473
  all_filtered_docs.extend(relevant_docs)
@@ -475,16 +489,6 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
475
  except Exception as e:
476
  logging.error(f"Error retrieving documents for {doc_name}: {str(e)}")
477
 
478
- # If no documents are found, try retrieving without filtering
479
- if not all_filtered_docs:
480
- logging.warning("No documents found with filtering. Attempting retrieval without filters.")
481
- try:
482
- retriever = database.as_retriever(search_kwargs={"k": k_per_doc * len(selected_docs)})
483
- all_filtered_docs = retriever.get_relevant_documents(query)
484
- logging.info(f"Retrieved {len(all_filtered_docs)} documents without filtering")
485
- except Exception as e:
486
- logging.error(f"Error retrieving documents without filtering: {str(e)}")
487
-
488
  logging.info(f"Total number of filtered documents: {len(all_filtered_docs)}")
489
 
490
  if not all_filtered_docs:
@@ -494,14 +498,14 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
494
 
495
  for doc in all_filtered_docs:
496
  try:
497
- source = doc.metadata['source'] if hasattr(doc, 'metadata') else doc.get('metadata', {}).get('source', 'Unknown')
498
- content = doc.page_content if hasattr(doc, 'page_content') else doc.get('page_content', '')
499
  logging.info(f"Document source: {source}")
500
  logging.info(f"Document content preview: {content[:100]}...") # Log first 100 characters of each document
501
  except Exception as e:
502
  logging.error(f"Error processing document: {str(e)}")
503
 
504
- context_str = "\n".join([doc.page_content if hasattr(doc, 'page_content') else doc.get('page_content', '') for doc in all_filtered_docs])
505
  logging.info(f"Total context length: {len(context_str)}")
506
 
507
  if model == "@cf/meta/llama-3.1-8b-instruct":
 
460
  yield "No documents available. Please upload PDF documents to answer questions."
461
  return
462
 
463
+ # Log the total number of documents in the database
464
+ doc_count = len(database.docstore._dict)
465
+ logging.info(f"Total documents in the database: {doc_count}")
466
+
467
+ # Print out some sample documents
468
+ sample_docs = list(database.docstore._dict.values())[:5] # Get first 5 documents
469
+ for i, doc in enumerate(sample_docs):
470
+ logging.info(f"Sample document {i}:")
471
+ logging.info(f" Type: {type(doc)}")
472
+ logging.info(f" Attributes: {dir(doc)}")
473
+ if hasattr(doc, 'metadata'):
474
+ logging.info(f" Metadata: {doc.metadata}")
475
+ if hasattr(doc, 'page_content'):
476
+ logging.info(f" Content preview: {doc.page_content[:100]}...")
477
+
478
  all_filtered_docs = []
479
+ k_per_doc = max(20, 50 // len(selected_docs)) # Increased k_per_doc
480
 
 
481
  for doc_name in selected_docs:
482
  logging.info(f"Retrieving documents for: {doc_name}")
483
  try:
484
+ doc_filter = lambda doc: doc.metadata['source'] == doc_name
485
  doc_retriever = database.as_retriever(search_kwargs={"k": k_per_doc, "filter": doc_filter})
486
  relevant_docs = doc_retriever.get_relevant_documents(query)
487
  all_filtered_docs.extend(relevant_docs)
 
489
  except Exception as e:
490
  logging.error(f"Error retrieving documents for {doc_name}: {str(e)}")
491
 
 
 
 
 
 
 
 
 
 
 
492
  logging.info(f"Total number of filtered documents: {len(all_filtered_docs)}")
493
 
494
  if not all_filtered_docs:
 
498
 
499
  for doc in all_filtered_docs:
500
  try:
501
+ source = doc.metadata['source']
502
+ content = doc.page_content
503
  logging.info(f"Document source: {source}")
504
  logging.info(f"Document content preview: {content[:100]}...") # Log first 100 characters of each document
505
  except Exception as e:
506
  logging.error(f"Error processing document: {str(e)}")
507
 
508
+ context_str = "\n".join([doc.page_content for doc in all_filtered_docs])
509
  logging.info(f"Total context length: {len(context_str)}")
510
 
511
  if model == "@cf/meta/llama-3.1-8b-instruct":