Update app.py
Browse files
app.py
CHANGED
|
@@ -471,6 +471,75 @@ def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2)
|
|
| 471 |
|
| 472 |
logging.info("Finished generating response for Excel data")
|
| 473 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
# Modify the existing respond function to handle both PDF and web search
|
| 475 |
def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
|
| 476 |
logging.info(f"User Query: {message}")
|
|
@@ -500,7 +569,6 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
| 500 |
yield final_summary
|
| 501 |
else:
|
| 502 |
yield "Unable to generate a response. Please try a different query."
|
| 503 |
-
|
| 504 |
else:
|
| 505 |
try:
|
| 506 |
embed = get_embeddings()
|
|
@@ -548,6 +616,9 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
| 548 |
if model == "gemini-1.5-flash":
|
| 549 |
for chunk in get_response_from_gemini(message, model, selected_docs, file_type, num_calls, temperature):
|
| 550 |
yield chunk
|
|
|
|
|
|
|
|
|
|
| 551 |
else:
|
| 552 |
for response in get_response_from_excel(message, model, excel_context, num_calls, temperature):
|
| 553 |
yield response
|
|
@@ -561,6 +632,9 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
| 561 |
elif model == "@cf/meta/llama-3.1-8b-instruct":
|
| 562 |
for response in get_response_from_cloudflare(prompt="", context=other_context, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
|
| 563 |
yield response
|
|
|
|
|
|
|
|
|
|
| 564 |
else:
|
| 565 |
for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
|
| 566 |
yield response
|
|
|
|
| 471 |
|
| 472 |
logging.info("Finished generating response for Excel data")
|
| 473 |
|
| 474 |
+
def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1, temperature=0.2):
|
| 475 |
+
logging.info(f"Getting response from Llama using model: {model}")
|
| 476 |
+
|
| 477 |
+
# Initialize the Hugging Face client
|
| 478 |
+
client = InferenceClient(model, token=huggingface_token)
|
| 479 |
+
|
| 480 |
+
if file_type == "excel":
|
| 481 |
+
# Excel functionality
|
| 482 |
+
system_instruction = """You are a highly specialized Python programmer with deep expertise in data analysis and visualization using Excel spreadsheets.
|
| 483 |
+
Your primary goal is to generate accurate and efficient Python code to perform calculations or create visualizations based on the user's requests.
|
| 484 |
+
Strictly use the data provided to write code that identifies key metrics, trends, and significant details relevant to the query.
|
| 485 |
+
Do not make assumptions or include any information that is not explicitly supported by the dataset.
|
| 486 |
+
If the user requests a calculation, provide the appropriate Python code to execute it, and if a visualization is needed, generate code using the matplotlib library to create the chart."""
|
| 487 |
+
|
| 488 |
+
# Get the context from selected Excel documents
|
| 489 |
+
embed = get_embeddings()
|
| 490 |
+
office_database = FAISS.load_local("office_faiss_database", embed, allow_dangerous_deserialization=True)
|
| 491 |
+
retriever = office_database.as_retriever(search_kwargs={"k": 10})
|
| 492 |
+
relevant_docs = retriever.get_relevant_documents(query)
|
| 493 |
+
context = "\n".join([doc.page_content for doc in relevant_docs if doc.metadata["source"] in selected_docs])
|
| 494 |
+
|
| 495 |
+
messages = [
|
| 496 |
+
{"role": "system", "content": system_instruction},
|
| 497 |
+
{"role": "user", "content": f"Based on the following data extracted from Excel spreadsheets:\n{context}\n\nPlease provide the Python code needed to execute the following task: '{query}'. Ensure that the code is derived directly from the dataset. If a chart is requested, use the matplotlib library to generate the appropriate visualization."}
|
| 498 |
+
]
|
| 499 |
+
|
| 500 |
+
elif file_type == "pdf":
|
| 501 |
+
# PDF functionality
|
| 502 |
+
embed = get_embeddings()
|
| 503 |
+
pdf_database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
| 504 |
+
|
| 505 |
+
retriever = pdf_database.as_retriever(search_kwargs={"k": 10})
|
| 506 |
+
relevant_docs = retriever.get_relevant_documents(query)
|
| 507 |
+
|
| 508 |
+
context_str = "\n".join([doc.page_content for doc in relevant_docs if doc.metadata["source"] in selected_docs])
|
| 509 |
+
|
| 510 |
+
system_instruction = """You are a highly specialized financial analyst assistant with expertise in analyzing and summarizing financial documents.
|
| 511 |
+
Your goal is to provide accurate, detailed, and precise summaries based on the context provided.
|
| 512 |
+
Avoid making assumptions or adding information that is not explicitly supported by the context from the PDF documents."""
|
| 513 |
+
|
| 514 |
+
messages = [
|
| 515 |
+
{"role": "system", "content": system_instruction},
|
| 516 |
+
{"role": "user", "content": f"Using the following context from the PDF documents:\n{context_str}\n\nPlease generate a step-by-step reasoning before arriving at a comprehensive and accurate summary addressing the following question: '{query}'. Ensure your response is strictly based on the provided context, highlighting key metrics, trends, and significant details relevant to the query. Avoid any speculative or unverified information."}
|
| 517 |
+
]
|
| 518 |
+
|
| 519 |
+
else:
|
| 520 |
+
raise ValueError("Invalid file type. Use 'excel' or 'pdf'.")
|
| 521 |
+
|
| 522 |
+
full_response = ""
|
| 523 |
+
for _ in range(num_calls):
|
| 524 |
+
try:
|
| 525 |
+
# Generate content with streaming enabled
|
| 526 |
+
for message in client.chat_completion(
|
| 527 |
+
messages=messages,
|
| 528 |
+
max_new_tokens=20000,
|
| 529 |
+
temperature=temperature,
|
| 530 |
+
stream=True,
|
| 531 |
+
top_p=0.8,
|
| 532 |
+
):
|
| 533 |
+
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
| 534 |
+
chunk = message.choices[0].delta.content
|
| 535 |
+
full_response += chunk
|
| 536 |
+
yield full_response # Yield the accumulated response so far
|
| 537 |
+
except Exception as e:
|
| 538 |
+
yield f"An error occurred with the Llama model: {str(e)}. Please try again."
|
| 539 |
+
|
| 540 |
+
if not full_response:
|
| 541 |
+
yield "No response generated from the Llama model."
|
| 542 |
+
|
| 543 |
# Modify the existing respond function to handle both PDF and web search
|
| 544 |
def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
|
| 545 |
logging.info(f"User Query: {message}")
|
|
|
|
| 569 |
yield final_summary
|
| 570 |
else:
|
| 571 |
yield "Unable to generate a response. Please try a different query."
|
|
|
|
| 572 |
else:
|
| 573 |
try:
|
| 574 |
embed = get_embeddings()
|
|
|
|
| 616 |
if model == "gemini-1.5-flash":
|
| 617 |
for chunk in get_response_from_gemini(message, model, selected_docs, file_type, num_calls, temperature):
|
| 618 |
yield chunk
|
| 619 |
+
elif "llama" in model.lower():
|
| 620 |
+
for chunk in get_response_from_llama(message, model, selected_docs, file_type, num_calls, temperature):
|
| 621 |
+
yield chunk
|
| 622 |
else:
|
| 623 |
for response in get_response_from_excel(message, model, excel_context, num_calls, temperature):
|
| 624 |
yield response
|
|
|
|
| 632 |
elif model == "@cf/meta/llama-3.1-8b-instruct":
|
| 633 |
for response in get_response_from_cloudflare(prompt="", context=other_context, query=message, num_calls=num_calls, temperature=temperature, search_type="document"):
|
| 634 |
yield response
|
| 635 |
+
elif "llama" in model.lower():
|
| 636 |
+
for chunk in get_response_from_llama(message, model, selected_docs, file_type, num_calls, temperature):
|
| 637 |
+
yield chunk
|
| 638 |
else:
|
| 639 |
for response in get_response_from_pdf(message, model, selected_docs, num_calls, temperature):
|
| 640 |
yield response
|