Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -372,12 +372,42 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
| 372 |
|
| 373 |
logging.basicConfig(level=logging.DEBUG)
|
| 374 |
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 381 |
|
| 382 |
def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
|
| 383 |
headers = {
|
|
@@ -465,9 +495,12 @@ After writing the document, please provide a list of sources used in your respon
|
|
| 465 |
main_content += chunk
|
| 466 |
yield main_content, "" # Yield partial main content without sources
|
| 467 |
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
|
|
|
|
|
|
|
|
|
| 471 |
|
| 472 |
def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
|
| 473 |
logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
|
|
@@ -557,43 +590,6 @@ css = """
|
|
| 557 |
}
|
| 558 |
"""
|
| 559 |
|
| 560 |
-
def get_context_for_summary(selected_docs):
|
| 561 |
-
embed = get_embeddings()
|
| 562 |
-
if os.path.exists("faiss_database"):
|
| 563 |
-
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
| 564 |
-
retriever = database.as_retriever(search_kwargs={"k": 5}) # Retrieve top 5 most relevant chunks
|
| 565 |
-
|
| 566 |
-
# Create a generic query that covers common financial summary topics
|
| 567 |
-
generic_query = "financial performance revenue profit assets liabilities cash flow key metrics highlights"
|
| 568 |
-
|
| 569 |
-
relevant_docs = retriever.get_relevant_documents(generic_query)
|
| 570 |
-
filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
|
| 571 |
-
|
| 572 |
-
if not filtered_docs:
|
| 573 |
-
return "No relevant information found in the selected documents for summary generation."
|
| 574 |
-
|
| 575 |
-
context_str = "\n".join([doc.page_content for doc in filtered_docs])
|
| 576 |
-
return context_str
|
| 577 |
-
else:
|
| 578 |
-
return "No documents available for summary generation."
|
| 579 |
-
|
| 580 |
-
def get_context_for_query(query, selected_docs):
|
| 581 |
-
embed = get_embeddings()
|
| 582 |
-
if os.path.exists("faiss_database"):
|
| 583 |
-
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
| 584 |
-
retriever = database.as_retriever(search_kwargs={"k": 3}) # Retrieve top 3 most relevant chunks
|
| 585 |
-
|
| 586 |
-
relevant_docs = retriever.get_relevant_documents(query)
|
| 587 |
-
filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
|
| 588 |
-
|
| 589 |
-
if not filtered_docs:
|
| 590 |
-
return "No relevant information found in the selected documents for the given query."
|
| 591 |
-
|
| 592 |
-
context_str = "\n".join([doc.page_content for doc in filtered_docs])
|
| 593 |
-
return context_str
|
| 594 |
-
else:
|
| 595 |
-
return "No documents available to answer the query."
|
| 596 |
-
|
| 597 |
uploaded_documents = []
|
| 598 |
|
| 599 |
def display_documents():
|
|
@@ -603,24 +599,23 @@ def display_documents():
|
|
| 603 |
label="Select documents to query or delete"
|
| 604 |
)
|
| 605 |
|
| 606 |
-
# Add this new function
|
| 607 |
-
def refresh_documents():
|
| 608 |
-
global uploaded_documents
|
| 609 |
-
uploaded_documents = load_documents()
|
| 610 |
-
return display_documents()
|
| 611 |
-
|
| 612 |
def initial_conversation():
|
| 613 |
return [
|
| 614 |
(None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
|
| 615 |
"1. Set the toggle for Web Search and PDF Search from the checkbox in Additional Inputs drop down window\n"
|
| 616 |
"2. Use web search to find information\n"
|
| 617 |
-
"3.
|
| 618 |
-
"4.
|
| 619 |
"To get started, upload some PDFs or ask me a question!")
|
| 620 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
|
| 622 |
# Define the checkbox outside the demo block
|
| 623 |
-
document_selector =
|
| 624 |
|
| 625 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
| 626 |
|
|
@@ -678,7 +673,7 @@ demo = gr.ChatInterface(
|
|
| 678 |
|
| 679 |
# Add file upload functionality
|
| 680 |
with demo:
|
| 681 |
-
gr.Markdown("## Upload PDF Documents")
|
| 682 |
|
| 683 |
with gr.Row():
|
| 684 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
|
|
|
| 372 |
|
| 373 |
logging.basicConfig(level=logging.DEBUG)
|
| 374 |
|
| 375 |
+
def get_context_for_summary(selected_docs):
|
| 376 |
+
embed = get_embeddings()
|
| 377 |
+
if os.path.exists("faiss_database"):
|
| 378 |
+
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
| 379 |
+
retriever = database.as_retriever(search_kwargs={"k": 5}) # Retrieve top 5 most relevant chunks
|
| 380 |
+
|
| 381 |
+
# Create a generic query that covers common financial summary topics
|
| 382 |
+
generic_query = "financial performance revenue profit assets liabilities cash flow key metrics highlights"
|
| 383 |
+
|
| 384 |
+
relevant_docs = retriever.get_relevant_documents(generic_query)
|
| 385 |
+
filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
|
| 386 |
+
|
| 387 |
+
if not filtered_docs:
|
| 388 |
+
return "No relevant information found in the selected documents for summary generation."
|
| 389 |
+
|
| 390 |
+
context_str = "\n".join([doc.page_content for doc in filtered_docs])
|
| 391 |
+
return context_str
|
| 392 |
+
else:
|
| 393 |
+
return "No documents available for summary generation."
|
| 394 |
+
|
| 395 |
+
def get_context_for_query(query, selected_docs):
|
| 396 |
+
embed = get_embeddings()
|
| 397 |
+
if os.path.exists("faiss_database"):
|
| 398 |
+
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
| 399 |
+
retriever = database.as_retriever(search_kwargs={"k": 3}) # Retrieve top 3 most relevant chunks
|
| 400 |
+
|
| 401 |
+
relevant_docs = retriever.get_relevant_documents(query)
|
| 402 |
+
filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
|
| 403 |
+
|
| 404 |
+
if not filtered_docs:
|
| 405 |
+
return "No relevant information found in the selected documents for the given query."
|
| 406 |
+
|
| 407 |
+
context_str = "\n".join([doc.page_content for doc in filtered_docs])
|
| 408 |
+
return context_str
|
| 409 |
+
else:
|
| 410 |
+
return "No documents available to answer the query."
|
| 411 |
|
| 412 |
def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
|
| 413 |
headers = {
|
|
|
|
| 495 |
main_content += chunk
|
| 496 |
yield main_content, "" # Yield partial main content without sources
|
| 497 |
|
| 498 |
+
INSTRUCTION_PROMPTS = {
|
| 499 |
+
"Asset Managers": "Summarize the key financial metrics, assets under management, and performance highlights for this asset management company.",
|
| 500 |
+
"Consumer Finance Companies": "Provide a summary of the company's loan portfolio, interest income, credit quality, and key operational metrics.",
|
| 501 |
+
"Mortgage REITs": "Summarize the REIT's mortgage-backed securities portfolio, net interest income, book value per share, and dividend yield.",
|
| 502 |
+
# Add more instruction prompts as needed
|
| 503 |
+
}
|
| 504 |
|
| 505 |
def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
|
| 506 |
logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
|
|
|
|
| 590 |
}
|
| 591 |
"""
|
| 592 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
uploaded_documents = []
|
| 594 |
|
| 595 |
def display_documents():
|
|
|
|
| 599 |
label="Select documents to query or delete"
|
| 600 |
)
|
| 601 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 602 |
def initial_conversation():
|
| 603 |
return [
|
| 604 |
(None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
|
| 605 |
"1. Set the toggle for Web Search and PDF Search from the checkbox in Additional Inputs drop down window\n"
|
| 606 |
"2. Use web search to find information\n"
|
| 607 |
+
"3. Upload the documents and ask questions about uploaded PDF documents by selecting your respective document\n"
|
| 608 |
+
"4. For any queries feel free to reach out @[email protected] or discord - shreyas094\n\n"
|
| 609 |
"To get started, upload some PDFs or ask me a question!")
|
| 610 |
]
|
| 611 |
+
# Add this new function
|
| 612 |
+
def refresh_documents():
|
| 613 |
+
global uploaded_documents
|
| 614 |
+
uploaded_documents = load_documents()
|
| 615 |
+
return display_documents()
|
| 616 |
|
| 617 |
# Define the checkbox outside the demo block
|
| 618 |
+
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
| 619 |
|
| 620 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
| 621 |
|
|
|
|
| 673 |
|
| 674 |
# Add file upload functionality
|
| 675 |
with demo:
|
| 676 |
+
gr.Markdown("## Upload and Manage PDF Documents")
|
| 677 |
|
| 678 |
with gr.Row():
|
| 679 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|