Shreyas094 commited on
Commit
b0cb50b
·
verified ·
1 Parent(s): 28c1fd5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -40
app.py CHANGED
@@ -460,52 +460,25 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
460
  yield "No documents available. Please upload PDF documents to answer questions."
461
  return
462
 
463
- # Log the total number of documents in the database
464
- doc_count = len(database.docstore._dict)
465
- logging.info(f"Total documents in the database: {doc_count}")
466
-
467
- # Print out some sample documents
468
- sample_docs = list(database.docstore._dict.values())[:5] # Get first 5 documents
469
- for i, doc in enumerate(sample_docs):
470
- logging.info(f"Sample document {i}:")
471
- logging.info(f" Type: {type(doc)}")
472
- logging.info(f" Attributes: {dir(doc)}")
473
- if hasattr(doc, 'metadata'):
474
- logging.info(f" Metadata: {doc.metadata}")
475
- if hasattr(doc, 'page_content'):
476
- logging.info(f" Content preview: {doc.page_content[:100]}...")
477
-
478
- all_filtered_docs = []
479
- k_per_doc = max(20, 50 // len(selected_docs)) # Increased k_per_doc
480
-
481
- for doc_name in selected_docs:
482
- logging.info(f"Retrieving documents for: {doc_name}")
483
- try:
484
- doc_filter = lambda doc: doc.metadata['source'] == doc_name
485
- doc_retriever = database.as_retriever(search_kwargs={"k": k_per_doc, "filter": doc_filter})
486
- relevant_docs = doc_retriever.get_relevant_documents(query)
487
- all_filtered_docs.extend(relevant_docs)
488
- logging.info(f"Retrieved {len(relevant_docs)} documents for {doc_name}")
489
- except Exception as e:
490
- logging.error(f"Error retrieving documents for {doc_name}: {str(e)}")
491
-
492
- logging.info(f"Total number of filtered documents: {len(all_filtered_docs)}")
493
 
494
- if not all_filtered_docs:
 
 
 
 
495
  logging.warning(f"No relevant information found in the selected documents: {selected_docs}")
496
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
497
  return
498
 
499
- for doc in all_filtered_docs:
500
- try:
501
- source = doc.metadata['source']
502
- content = doc.page_content
503
- logging.info(f"Document source: {source}")
504
- logging.info(f"Document content preview: {content[:100]}...") # Log first 100 characters of each document
505
- except Exception as e:
506
- logging.error(f"Error processing document: {str(e)}")
507
 
508
- context_str = "\n".join([doc.page_content for doc in all_filtered_docs])
509
  logging.info(f"Total context length: {len(context_str)}")
510
 
511
  if model == "@cf/meta/llama-3.1-8b-instruct":
 
460
  yield "No documents available. Please upload PDF documents to answer questions."
461
  return
462
 
463
+ retriever = database.as_retriever(search_kwargs={"k": 10})
464
+ logging.info(f"Retrieving relevant documents for query: {query}")
465
+ relevant_docs = retriever.get_relevant_documents(query)
466
+ logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
 
468
+ # Filter relevant_docs based on selected documents
469
+ filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
470
+ logging.info(f"Number of filtered documents: {len(filtered_docs)}")
471
+
472
+ if not filtered_docs:
473
  logging.warning(f"No relevant information found in the selected documents: {selected_docs}")
474
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
475
  return
476
 
477
+ for doc in filtered_docs:
478
+ logging.info(f"Document source: {doc.metadata['source']}")
479
+ logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
 
 
 
 
 
480
 
481
+ context_str = "\n".join([doc.page_content for doc in filtered_docs])
482
  logging.info(f"Total context length: {len(context_str)}")
483
 
484
  if model == "@cf/meta/llama-3.1-8b-instruct":