Update app.py
Browse files
app.py
CHANGED
|
@@ -1,16 +1,22 @@
|
|
| 1 |
#!/usr/bin/env python
|
| 2 |
import os
|
| 3 |
import shutil
|
|
|
|
| 4 |
import json
|
| 5 |
import torch
|
| 6 |
import re
|
| 7 |
import requests
|
| 8 |
import transformers
|
| 9 |
import chardet
|
|
|
|
|
|
|
| 10 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 11 |
from transformers.models.llama.configuration_llama import LlamaConfig
|
| 12 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 13 |
import gradio as gr
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Solve permission issues
|
| 16 |
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
|
|
@@ -24,6 +30,44 @@ os.environ["GRADIO_FLAGGING_DIR"] = "/tmp/flagged"
|
|
| 24 |
os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/sentence_transformers"
|
| 25 |
os.environ["HF_HUB_CACHE"] = "/tmp/huggingface/hf_cache"
|
| 26 |
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "60"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# Load Required Modules
|
| 29 |
from langchain.embeddings import HuggingFaceEmbeddings
|
|
@@ -53,7 +97,26 @@ from crewai.tools import tool
|
|
| 53 |
from geopy.geocoders import Nominatim
|
| 54 |
from timezonefinder import TimezoneFinder
|
| 55 |
from langchain_experimental.agents import create_pandas_dataframe_agent
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
session_retriever = None
|
| 59 |
session_qa_chain = None
|
|
@@ -193,7 +256,8 @@ def get_file_path(file):
|
|
| 193 |
else:
|
| 194 |
return None
|
| 195 |
|
| 196 |
-
# Original functionalities (Tabs 1-4) functions
|
|
|
|
| 197 |
def rag_llama_qa(query):
|
| 198 |
output = RetrievalQA.from_chain_type(
|
| 199 |
llm=llm_local,
|
|
@@ -206,9 +270,30 @@ def rag_llama_qa(query):
|
|
| 206 |
idx = lower_text.find("answer:")
|
| 207 |
return output[idx + len("answer:"):].strip() if idx != -1 else output
|
| 208 |
|
|
|
|
| 209 |
def rag_gpt4_qa(query):
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
|
|
|
| 212 |
def upload_and_chat(file, query):
|
| 213 |
file_path = get_file_path(file)
|
| 214 |
if file_path is None:
|
|
@@ -230,8 +315,26 @@ def upload_and_chat(file, query):
|
|
| 230 |
return_source_documents=False,
|
| 231 |
chain_type_kwargs={"prompt": custom_prompt}
|
| 232 |
)
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
|
|
|
|
|
|
|
|
|
| 235 |
initial_prompt = PromptTemplate(
|
| 236 |
input_variables=["text"],
|
| 237 |
template="""Write a concise and structured summary of the following content. Focus on capturing the main ideas and key details:
|
|
@@ -254,6 +357,7 @@ Refine the summary based on the new content below. Add or update information onl
|
|
| 254 |
"""
|
| 255 |
)
|
| 256 |
|
|
|
|
| 257 |
def document_summarize(file):
|
| 258 |
file_path = get_file_path(file)
|
| 259 |
if file_path is None:
|
|
@@ -453,7 +557,7 @@ def time_tool(query: str) -> str:
|
|
| 453 |
location = "London"
|
| 454 |
|
| 455 |
location_key = location.lower()
|
| 456 |
-
tz_str =
|
| 457 |
now = datetime.now(ZoneInfo(tz_str))
|
| 458 |
|
| 459 |
# return time or date
|
|
@@ -982,6 +1086,49 @@ crew = Crew(
|
|
| 982 |
llm=crew_llm
|
| 983 |
)
|
| 984 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 985 |
def multi_agent_chat_advanced(query: str, file=None) -> str:
|
| 986 |
global session_retriever, session_qa_chain, csv_dataframe
|
| 987 |
|
|
@@ -1060,7 +1207,28 @@ def multi_agent_chat_advanced(query: str, file=None) -> str:
|
|
| 1060 |
# If using QA Chain is appropriate
|
| 1061 |
if use_file_chain:
|
| 1062 |
try:
|
| 1063 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1064 |
except Exception as e:
|
| 1065 |
return f"Document QA Error: {e}"
|
| 1066 |
|
|
@@ -1084,6 +1252,471 @@ def multi_agent_chat_advanced(query: str, file=None) -> str:
|
|
| 1084 |
except Exception as e:
|
| 1085 |
return f"Multi-Agent Error: {e}"
|
| 1086 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1087 |
# Gradio Interface Settings
|
| 1088 |
demo_description = """
|
| 1089 |
**Context**:
|
|
@@ -1167,14 +1800,52 @@ This demo presents a GPT-style Multi-Agent AI Assistant, built with **LangChain,
|
|
| 1167 |
|
| 1168 |
Feel free to upload a document and ask related questions, or just type a question directly—no file upload required. *Note: CSV file analysis and auto visualisation is coming soon.*
|
| 1169 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1170 |
|
| 1171 |
demo = gr.TabbedInterface(
|
| 1172 |
interface_list=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1173 |
gr.Interface(
|
| 1174 |
fn=multi_agent_chat_advanced,
|
| 1175 |
inputs=[
|
| 1176 |
gr.Textbox(label="Enter your query"),
|
| 1177 |
-
gr.File(label="Upload file (CSV, PDF, TXT, DOCX)", file_count="single")
|
| 1178 |
],
|
| 1179 |
outputs="text",
|
| 1180 |
title="Multi-Agent AI Assistant",
|
|
@@ -1183,7 +1854,7 @@ demo = gr.TabbedInterface(
|
|
| 1183 |
),
|
| 1184 |
gr.Interface(
|
| 1185 |
fn=document_summarize,
|
| 1186 |
-
inputs=[gr.File(label="Upload PDF, TXT, or DOCX")],
|
| 1187 |
outputs="text",
|
| 1188 |
title="Document Summarisation",
|
| 1189 |
allow_flagging="never",
|
|
@@ -1191,7 +1862,7 @@ demo = gr.TabbedInterface(
|
|
| 1191 |
),
|
| 1192 |
gr.Interface(
|
| 1193 |
fn=upload_and_chat,
|
| 1194 |
-
inputs=[gr.File(label="Upload PDF, TXT, or DOCX"), gr.Textbox(label="Ask a question")],
|
| 1195 |
outputs="text",
|
| 1196 |
title="Your Docs Q&A (Upload + GPT-4 RAG)",
|
| 1197 |
allow_flagging="never",
|
|
@@ -1213,13 +1884,16 @@ demo = gr.TabbedInterface(
|
|
| 1213 |
allow_flagging="never",
|
| 1214 |
description=demo_description
|
| 1215 |
),
|
|
|
|
| 1216 |
],
|
| 1217 |
tab_names=[
|
|
|
|
| 1218 |
"Multi-Agent AI Assistant",
|
| 1219 |
"Document Summarisation",
|
| 1220 |
"Your Docs Q&A (Upload + GPT-4 RAG)",
|
| 1221 |
"Biden Q&A (GPT-4 RAG)",
|
| 1222 |
-
"Biden Q&A (LLaMA RAG)"
|
|
|
|
| 1223 |
],
|
| 1224 |
title="Smart RAG + Multi-Agent Assistant (with Web + Document AI)"
|
| 1225 |
)
|
|
|
|
| 1 |
#!/usr/bin/env python
|
| 2 |
import os
|
| 3 |
import shutil
|
| 4 |
+
import tempfile
|
| 5 |
import json
|
| 6 |
import torch
|
| 7 |
import re
|
| 8 |
import requests
|
| 9 |
import transformers
|
| 10 |
import chardet
|
| 11 |
+
import deepeval
|
| 12 |
+
import difflib
|
| 13 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 14 |
from transformers.models.llama.configuration_llama import LlamaConfig
|
| 15 |
from huggingface_hub import hf_hub_download
|
| 16 |
+
from typing import List, Dict, Any
|
| 17 |
import gradio as gr
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
|
| 20 |
|
| 21 |
# Solve permission issues
|
| 22 |
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
|
|
|
|
| 30 |
os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/sentence_transformers"
|
| 31 |
os.environ["HF_HUB_CACHE"] = "/tmp/huggingface/hf_cache"
|
| 32 |
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "60"
|
| 33 |
+
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
| 34 |
+
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
|
| 35 |
+
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
|
| 36 |
+
|
| 37 |
+
# 設置環境變數,確保 AutoGen 可以寫入臨時目錄
|
| 38 |
+
os.environ["AUTOGEN_WORKSPACE"] = "/tmp/autogen_workspace"
|
| 39 |
+
os.makedirs("/tmp/autogen_workspace", exist_ok=True)
|
| 40 |
+
os.chmod("/tmp/autogen_workspace", 0o777) # 確保目錄可寫
|
| 41 |
+
|
| 42 |
+
# 設置 OpenAI API 相關環境變數
|
| 43 |
+
os.environ["OPENAI_API_TYPE"] = "open_ai" # 如果您使用的是 OpenAI API
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# ✅ 建立 temp 安全區
|
| 48 |
+
os.environ["DEEPEVAL_TELEMETRY_OPT_OUT"] = "YES"
|
| 49 |
+
os.environ["DEEPEVAL_RESULTS_FOLDER"] = "/tmp/deepeval_results"
|
| 50 |
+
os.makedirs("/tmp/deepeval_results", exist_ok=True)
|
| 51 |
+
|
| 52 |
+
# ✅ 修正 Python tempdir 基底(避免它寫 home)
|
| 53 |
+
import tempfile
|
| 54 |
+
tempfile.tempdir = "/tmp"
|
| 55 |
+
# 在此處加入 DeepEval 的 monkey-patch,避免全域更改工作目錄
|
| 56 |
+
original_evaluate = deepeval.evaluate
|
| 57 |
+
|
| 58 |
+
def patched_evaluate(*args, **kwargs):
|
| 59 |
+
current_dir = os.getcwd()
|
| 60 |
+
try:
|
| 61 |
+
os.chdir("/tmp")
|
| 62 |
+
return original_evaluate(*args, **kwargs)
|
| 63 |
+
finally:
|
| 64 |
+
os.chdir(current_dir)
|
| 65 |
+
|
| 66 |
+
deepeval.evaluate = patched_evaluate
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
SHOW_EVAL = os.getenv("SHOW_EVAL", "false").lower() == "true"
|
| 70 |
+
|
| 71 |
|
| 72 |
# Load Required Modules
|
| 73 |
from langchain.embeddings import HuggingFaceEmbeddings
|
|
|
|
| 97 |
from geopy.geocoders import Nominatim
|
| 98 |
from timezonefinder import TimezoneFinder
|
| 99 |
from langchain_experimental.agents import create_pandas_dataframe_agent
|
| 100 |
+
from langsmith import traceable
|
| 101 |
+
from deepeval import evaluate
|
| 102 |
+
from deepeval.metrics import AnswerRelevancyMetric
|
| 103 |
+
from deepeval.test_case import LLMTestCase
|
| 104 |
+
# from langgraph.graph import Graph
|
| 105 |
+
from langgraph.graph import StateGraph
|
| 106 |
+
from langchain_core.runnables import RunnableLambda
|
| 107 |
+
from langchain.chains import LLMChain
|
| 108 |
+
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
| 109 |
+
from sentence_transformers import SentenceTransformer
|
| 110 |
+
# === AutoGen for multi-intent collaboration ===
|
| 111 |
+
from autogen import AssistantAgent, UserProxyAgent, GroupChat, GroupChatManager
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
try:
|
| 116 |
+
from phoenix.trace.langchain import LangChainInstrumentor
|
| 117 |
+
LangChainInstrumentor().instrument()
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"[WARNING] Failed to load Phoenix LangChain trace: {e}")
|
| 120 |
|
| 121 |
session_retriever = None
|
| 122 |
session_qa_chain = None
|
|
|
|
| 256 |
else:
|
| 257 |
return None
|
| 258 |
|
| 259 |
+
# Original functionalities (Tabs 1-4) functions
|
| 260 |
+
@traceable(name="Biden LLaMA QA")
|
| 261 |
def rag_llama_qa(query):
|
| 262 |
output = RetrievalQA.from_chain_type(
|
| 263 |
llm=llm_local,
|
|
|
|
| 270 |
idx = lower_text.find("answer:")
|
| 271 |
return output[idx + len("answer:"):].strip() if idx != -1 else output
|
| 272 |
|
| 273 |
+
@traceable(name="GPT-4 Document QA")
|
| 274 |
def rag_gpt4_qa(query):
|
| 275 |
+
raw_answer = qa_gpt.run(query)
|
| 276 |
+
|
| 277 |
+
if SHOW_EVAL:
|
| 278 |
+
try:
|
| 279 |
+
top_docs = retriever.get_relevant_documents(query)
|
| 280 |
+
test_case = LLMTestCase(
|
| 281 |
+
input=query,
|
| 282 |
+
actual_output=raw_answer,
|
| 283 |
+
expected_output=raw_answer,
|
| 284 |
+
context=[doc.page_content for doc in top_docs[:3]]
|
| 285 |
+
)
|
| 286 |
+
metric = AnswerRelevancyMetric(model="gpt-4o-mini")
|
| 287 |
+
results = evaluate([test_case], [metric])
|
| 288 |
+
result = results[0]
|
| 289 |
+
print(f"[DeepEval Tab4] Input: {query}")
|
| 290 |
+
print(f"[DeepEval Tab4] Passed: {result.passed}, Score: {result.score:.2f}, Reason: {result.reason}")
|
| 291 |
+
except Exception as e:
|
| 292 |
+
print(f"[DeepEval Tab4] Evaluation failed: {e}")
|
| 293 |
+
|
| 294 |
+
return raw_answer
|
| 295 |
|
| 296 |
+
@traceable(name="Upload Document QA")
|
| 297 |
def upload_and_chat(file, query):
|
| 298 |
file_path = get_file_path(file)
|
| 299 |
if file_path is None:
|
|
|
|
| 315 |
return_source_documents=False,
|
| 316 |
chain_type_kwargs={"prompt": custom_prompt}
|
| 317 |
)
|
| 318 |
+
raw_answer = qa_temp.run(query)
|
| 319 |
+
if SHOW_EVAL:
|
| 320 |
+
try:
|
| 321 |
+
test_case = LLMTestCase(
|
| 322 |
+
input=query,
|
| 323 |
+
actual_output=raw_answer,
|
| 324 |
+
expected_output=raw_answer,
|
| 325 |
+
context=[d.page_content for d in chunks[:3]]
|
| 326 |
+
)
|
| 327 |
+
metric = AnswerRelevancyMetric(model="gpt-4o-mini") # default is GPT-4o
|
| 328 |
+
results = evaluate([test_case], [metric])
|
| 329 |
+
result = results[0]
|
| 330 |
+
print(f"[DeepEval QA] Input: {query}")
|
| 331 |
+
print(f"[DeepEval QA] Passed: {result.passed}, Score: {result.score:.2f}, Reason: {result.reason}")
|
| 332 |
+
except Exception as e:
|
| 333 |
+
print(f"[DeepEval QA] Evaluation failed: {e}")
|
| 334 |
|
| 335 |
+
return raw_answer
|
| 336 |
+
|
| 337 |
+
|
| 338 |
initial_prompt = PromptTemplate(
|
| 339 |
input_variables=["text"],
|
| 340 |
template="""Write a concise and structured summary of the following content. Focus on capturing the main ideas and key details:
|
|
|
|
| 357 |
"""
|
| 358 |
)
|
| 359 |
|
| 360 |
+
@traceable(name="Document Summarise")
|
| 361 |
def document_summarize(file):
|
| 362 |
file_path = get_file_path(file)
|
| 363 |
if file_path is None:
|
|
|
|
| 557 |
location = "London"
|
| 558 |
|
| 559 |
location_key = location.lower()
|
| 560 |
+
tz_str = location_to_timezone(location)
|
| 561 |
now = datetime.now(ZoneInfo(tz_str))
|
| 562 |
|
| 563 |
# return time or date
|
|
|
|
| 1086 |
llm=crew_llm
|
| 1087 |
)
|
| 1088 |
|
| 1089 |
+
# test qa
|
| 1090 |
+
def build_langgraph_doc_qa_chain(llm, retriever, memory, prompt):
|
| 1091 |
+
def retrieve_step(state):
|
| 1092 |
+
docs = state['retriever'].get_relevant_documents(state['query'])
|
| 1093 |
+
return {"docs": docs, **state}
|
| 1094 |
+
|
| 1095 |
+
def answer_step(state):
|
| 1096 |
+
prompt = state["prompt"]
|
| 1097 |
+
llm = state["llm"]
|
| 1098 |
+
docs = state["docs"]
|
| 1099 |
+
|
| 1100 |
+
llm_chain = LLMChain(llm=llm, prompt=prompt)
|
| 1101 |
+
doc_chain = StuffDocumentsChain(
|
| 1102 |
+
llm_chain=llm_chain,
|
| 1103 |
+
document_variable_name="context"
|
| 1104 |
+
)
|
| 1105 |
+
# 只執行一次,並傳入所有需要的參數
|
| 1106 |
+
answer = doc_chain.run({
|
| 1107 |
+
"input_documents": docs,
|
| 1108 |
+
"question": state["query"]
|
| 1109 |
+
})
|
| 1110 |
+
return {"answer": answer, **state}
|
| 1111 |
+
|
| 1112 |
+
builder = StateGraph(dict)
|
| 1113 |
+
builder.add_node("Retrieve", retrieve_step)
|
| 1114 |
+
builder.add_node("Answer", answer_step)
|
| 1115 |
+
builder.set_entry_point("Retrieve")
|
| 1116 |
+
builder.add_edge("Retrieve", "Answer")
|
| 1117 |
+
builder.set_finish_point("Answer")
|
| 1118 |
+
|
| 1119 |
+
compiled = builder.compile()
|
| 1120 |
+
|
| 1121 |
+
def run(query):
|
| 1122 |
+
return compiled.invoke({
|
| 1123 |
+
"query": query,
|
| 1124 |
+
"retriever": retriever,
|
| 1125 |
+
"llm": llm,
|
| 1126 |
+
"prompt": prompt
|
| 1127 |
+
})
|
| 1128 |
+
|
| 1129 |
+
return run
|
| 1130 |
+
|
| 1131 |
+
@traceable(name="Multi-Agent Chat")
|
| 1132 |
def multi_agent_chat_advanced(query: str, file=None) -> str:
|
| 1133 |
global session_retriever, session_qa_chain, csv_dataframe
|
| 1134 |
|
|
|
|
| 1207 |
# If using QA Chain is appropriate
|
| 1208 |
if use_file_chain:
|
| 1209 |
try:
|
| 1210 |
+
answer = session_qa_chain.run(query)
|
| 1211 |
+
#session_graph_chain = build_langgraph_doc_qa_chain(llm_gpt4, session_retriever, memory, custom_prompt)
|
| 1212 |
+
#answer = session_graph_chain(query)["answer"]
|
| 1213 |
+
|
| 1214 |
+
# ✅ DeepEval 評估僅在 Tab1 文件 QA 的情況下觸發
|
| 1215 |
+
if SHOW_EVAL:
|
| 1216 |
+
try:
|
| 1217 |
+
test_case = LLMTestCase(
|
| 1218 |
+
input=query,
|
| 1219 |
+
actual_output=answer,
|
| 1220 |
+
expected_output=answer,
|
| 1221 |
+
context=[d.page_content for d in session_retriever.get_relevant_documents(query)[:3]]
|
| 1222 |
+
)
|
| 1223 |
+
metric = AnswerRelevancyMetric(model="gpt-4o-mini")
|
| 1224 |
+
results = evaluate([test_case], [metric])
|
| 1225 |
+
result = results[0]
|
| 1226 |
+
print(f"[DeepEval Tab1] Input: {query}")
|
| 1227 |
+
print(f"[DeepEval Tab1] Passed: {result.passed}, Score: {result.score:.2f}, Reason: {result.reason}")
|
| 1228 |
+
except Exception as e:
|
| 1229 |
+
print(f"[DeepEval Tab1] Evaluation failed: {e}")
|
| 1230 |
+
|
| 1231 |
+
return answer
|
| 1232 |
except Exception as e:
|
| 1233 |
return f"Document QA Error: {e}"
|
| 1234 |
|
|
|
|
| 1252 |
except Exception as e:
|
| 1253 |
return f"Multi-Agent Error: {e}"
|
| 1254 |
|
| 1255 |
+
|
| 1256 |
+
|
| 1257 |
+
# LangGraph 使用的節點函數(會接續你的 Crew Agent)
|
| 1258 |
+
# 初始化 embedding model
|
| 1259 |
+
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 1260 |
+
|
| 1261 |
+
# Intent Embedding 分類(支援檔名)
|
| 1262 |
+
INTENT_LABELS = {
|
| 1263 |
+
"DocQA": ["document", "file", "paper", "cb", "proposal", "project"],
|
| 1264 |
+
"Summarise": ["summarise", "summary", "abstract", "key points", "overview", "main points"],
|
| 1265 |
+
"General": ["who are you", "tell me something", "what can you do", "fun fact"],
|
| 1266 |
+
}
|
| 1267 |
+
|
| 1268 |
+
def parse_query(query: str) -> dict:
|
| 1269 |
+
prompt = """Analyze the following query and determine required subtasks. Return a JSON object containing:
|
| 1270 |
+
- summarize_files: list of document indices to summarize
|
| 1271 |
+
- qa_pairs: list of QA objects [{"question": "question", "doc_indices": [relevant doc indices]}]
|
| 1272 |
+
- compare_files: list of document index pairs to compare [[doc1_idx, doc2_idx]]
|
| 1273 |
+
- find_relations: boolean, whether to analyze document relationships
|
| 1274 |
+
|
| 1275 |
+
For example, query "What are the differences between document A and B, and summarize A" should return:
|
| 1276 |
+
{
|
| 1277 |
+
"summarize_files": [0],
|
| 1278 |
+
"qa_pairs": [],
|
| 1279 |
+
"compare_files": [[0, 1]],
|
| 1280 |
+
"find_relations": false
|
| 1281 |
+
}
|
| 1282 |
+
|
| 1283 |
+
Query: """ + query
|
| 1284 |
+
|
| 1285 |
+
response = llm_gpt4.invoke(prompt)
|
| 1286 |
+
try:
|
| 1287 |
+
return json.loads(response.content)
|
| 1288 |
+
except:
|
| 1289 |
+
return {
|
| 1290 |
+
"summarize_files": [],
|
| 1291 |
+
"qa_pairs": [{"question": query, "doc_indices": [0]}],
|
| 1292 |
+
"compare_files": [],
|
| 1293 |
+
"find_relations": False
|
| 1294 |
+
}
|
| 1295 |
+
|
| 1296 |
+
|
| 1297 |
+
def autogen_multi_document_analysis(query: str, docs: list, file_names: list) -> str:
|
| 1298 |
+
try:
|
| 1299 |
+
# 建立絕對路徑的暫存目錄,並確保它存在
|
| 1300 |
+
import tempfile
|
| 1301 |
+
import os
|
| 1302 |
+
|
| 1303 |
+
# 建立一個臨時工作目錄
|
| 1304 |
+
temp_dir = tempfile.mkdtemp(dir="/tmp")
|
| 1305 |
+
os.environ["OPENAI_CACHE_DIR"] = temp_dir
|
| 1306 |
+
|
| 1307 |
+
# 設置 AutoGen 的工作目錄
|
| 1308 |
+
os.environ["AUTOGEN_CACHE_PATH"] = temp_dir
|
| 1309 |
+
os.environ["AUTOGEN_CACHEDIR"] = temp_dir
|
| 1310 |
+
os.environ["OPENAI_CACHE_PATH"] = temp_dir
|
| 1311 |
+
|
| 1312 |
+
# 強制 AutoGen 使用我們的臨時目錄而不是 ./.cache
|
| 1313 |
+
import autogen
|
| 1314 |
+
if hasattr(autogen, "set_cache_dir"):
|
| 1315 |
+
autogen.set_cache_dir(temp_dir)
|
| 1316 |
+
|
| 1317 |
+
# 準備文件上下文
|
| 1318 |
+
context = "\n\n".join(
|
| 1319 |
+
f"Document {name}:\n{doc[:2000]}..."
|
| 1320 |
+
for name, doc in zip(file_names, docs)
|
| 1321 |
+
)
|
| 1322 |
+
|
| 1323 |
+
# 配置 LLM
|
| 1324 |
+
config_list = [{
|
| 1325 |
+
"model": "gpt-4o-mini",
|
| 1326 |
+
"api_key": openai_api_key
|
| 1327 |
+
}]
|
| 1328 |
+
|
| 1329 |
+
# 基礎配置 - 不包含任何緩存相關參數
|
| 1330 |
+
llm_config = {
|
| 1331 |
+
"config_list": config_list,
|
| 1332 |
+
"temperature": 0
|
| 1333 |
+
}
|
| 1334 |
+
|
| 1335 |
+
# 在進行 AutoGen 處理前,切換到臨時目錄
|
| 1336 |
+
original_dir = os.getcwd()
|
| 1337 |
+
os.chdir(temp_dir)
|
| 1338 |
+
|
| 1339 |
+
try:
|
| 1340 |
+
# 以下是您的 AutoGen 處理代碼
|
| 1341 |
+
user_proxy = UserProxyAgent(
|
| 1342 |
+
name="User",
|
| 1343 |
+
system_message="A user seeking information from multiple documents.",
|
| 1344 |
+
human_input_mode="NEVER",
|
| 1345 |
+
code_execution_config={"use_docker": False},
|
| 1346 |
+
llm_config=llm_config
|
| 1347 |
+
)
|
| 1348 |
+
|
| 1349 |
+
|
| 1350 |
+
|
| 1351 |
+
# 定義文檔分析專家
|
| 1352 |
+
doc_analyzer = AssistantAgent(
|
| 1353 |
+
name="DocumentAnalyzer",
|
| 1354 |
+
system_message="""You are an expert at analyzing and comparing documents. Focus on:
|
| 1355 |
+
1. Key similarities and differences
|
| 1356 |
+
2. Main themes and topics
|
| 1357 |
+
3. Relationships between documents
|
| 1358 |
+
4. Evidence-based analysis""",
|
| 1359 |
+
llm_config=llm_config
|
| 1360 |
+
)
|
| 1361 |
+
|
| 1362 |
+
# 定義問答專家
|
| 1363 |
+
qa_expert = AssistantAgent(
|
| 1364 |
+
name="QAExpert",
|
| 1365 |
+
system_message="""You are an expert at extracting specific information. Focus on:
|
| 1366 |
+
1. Finding relevant details
|
| 1367 |
+
2. Answering specific questions
|
| 1368 |
+
3. Cross-referencing information
|
| 1369 |
+
4. Providing evidence""",
|
| 1370 |
+
llm_config=llm_config
|
| 1371 |
+
)
|
| 1372 |
+
|
| 1373 |
+
# 定義總結專家
|
| 1374 |
+
summarizer = AssistantAgent(
|
| 1375 |
+
name="Summarizer",
|
| 1376 |
+
system_message="""You are an expert at summarizing content. Focus on:
|
| 1377 |
+
1. Key points and findings
|
| 1378 |
+
2. Important relationships
|
| 1379 |
+
3. Critical conclusions
|
| 1380 |
+
4. Comprehensive overview""",
|
| 1381 |
+
llm_config=llm_config
|
| 1382 |
+
)
|
| 1383 |
+
|
| 1384 |
+
# 創建群組聊天
|
| 1385 |
+
groupchat = GroupChat(
|
| 1386 |
+
agents=[user_proxy, doc_analyzer, qa_expert, summarizer],
|
| 1387 |
+
messages=[],
|
| 1388 |
+
max_round=5
|
| 1389 |
+
)
|
| 1390 |
+
|
| 1391 |
+
# 創建管理器
|
| 1392 |
+
manager = GroupChatManager(
|
| 1393 |
+
groupchat=groupchat,
|
| 1394 |
+
llm_config=llm_config
|
| 1395 |
+
)
|
| 1396 |
+
|
| 1397 |
+
# 準備任務提示
|
| 1398 |
+
task_prompt = f"""Analyze these documents and answer the query:
|
| 1399 |
+
|
| 1400 |
+
Query: {query}
|
| 1401 |
+
|
| 1402 |
+
Documents Context:
|
| 1403 |
+
{context}
|
| 1404 |
+
|
| 1405 |
+
Requirements:
|
| 1406 |
+
1. Provide a direct and clear answer
|
| 1407 |
+
2. Support all claims with evidence from the documents
|
| 1408 |
+
3. Consider relationships between all documents
|
| 1409 |
+
4. If comparing, analyze all relevant aspects
|
| 1410 |
+
5. If summarizing, cover all important points
|
| 1411 |
+
6. If looking for specific content, search thoroughly
|
| 1412 |
+
7. If analyzing relationships, consider all connections
|
| 1413 |
+
|
| 1414 |
+
Please provide a comprehensive and well-structured answer."""
|
| 1415 |
+
|
| 1416 |
+
# 執行群組討論
|
| 1417 |
+
user_proxy.initiate_chat(manager, message=task_prompt)
|
| 1418 |
+
return user_proxy.last_message()["content"]
|
| 1419 |
+
finally:
|
| 1420 |
+
# 完成後,切回原始目錄
|
| 1421 |
+
os.chdir(original_dir)
|
| 1422 |
+
|
| 1423 |
+
return result
|
| 1424 |
+
|
| 1425 |
+
except Exception as e:
|
| 1426 |
+
print(f"ERROR in AutoGen processing: {str(e)}")
|
| 1427 |
+
return f"Error analyzing documents: {str(e)}"
|
| 1428 |
+
|
| 1429 |
+
|
| 1430 |
+
|
| 1431 |
+
|
| 1432 |
+
|
| 1433 |
+
|
| 1434 |
+
# === AutoGen 多代理人協作邏輯 ===
|
| 1435 |
+
|
| 1436 |
+
|
| 1437 |
+
def detect_intent_embedding(query, file_names=[]):
|
| 1438 |
+
query_emb = embedding_model.encode(query, normalize_embeddings=True)
|
| 1439 |
+
best_label = None
|
| 1440 |
+
best_score = -1
|
| 1441 |
+
all_phrases = INTENT_LABELS.copy()
|
| 1442 |
+
if file_names:
|
| 1443 |
+
all_phrases["DocQA"] += [name.lower() for name in file_names]
|
| 1444 |
+
for label, examples in all_phrases.items():
|
| 1445 |
+
for example in examples:
|
| 1446 |
+
example_emb = embedding_model.encode(example, normalize_embeddings=True)
|
| 1447 |
+
score = float(query_emb @ example_emb.T)
|
| 1448 |
+
if score > best_score:
|
| 1449 |
+
best_score = score
|
| 1450 |
+
best_label = label
|
| 1451 |
+
return best_label if best_label else "General"
|
| 1452 |
+
|
| 1453 |
+
def decide_next(state):
|
| 1454 |
+
query = state.get("query", "")
|
| 1455 |
+
file_names = state.get("file_names", [])
|
| 1456 |
+
label = detect_intent_embedding(query, file_names)
|
| 1457 |
+
return label
|
| 1458 |
+
|
| 1459 |
+
# === 定義 Task 物件 ===
|
| 1460 |
+
docqa_task = Task(
|
| 1461 |
+
description="Document QA Task: Answer questions based on the uploaded document.",
|
| 1462 |
+
expected_output="Answer from Document QA Agent.",
|
| 1463 |
+
agent=document_qa_agent,
|
| 1464 |
+
input_variables=["query"]
|
| 1465 |
+
)
|
| 1466 |
+
|
| 1467 |
+
general_task = Task(
|
| 1468 |
+
description="General Chat Task: Answer general queries.",
|
| 1469 |
+
expected_output="Answer from General Agent.",
|
| 1470 |
+
agent=general_agent,
|
| 1471 |
+
input_variables=["query"]
|
| 1472 |
+
)
|
| 1473 |
+
|
| 1474 |
+
summariser_task = Task(
|
| 1475 |
+
description="Summarisation Task: Summarise document content.",
|
| 1476 |
+
expected_output="Summary output.",
|
| 1477 |
+
agent=summarizer_agent, # 注意此處名稱須與定義一致(使用字母 z)
|
| 1478 |
+
input_variables=["query"]
|
| 1479 |
+
)
|
| 1480 |
+
|
| 1481 |
+
search_task = Task(
|
| 1482 |
+
description="Search Task: Retrieve information from the web.",
|
| 1483 |
+
expected_output="Answer from Search Agent.",
|
| 1484 |
+
agent=search_agent,
|
| 1485 |
+
input_variables=["query"]
|
| 1486 |
+
)
|
| 1487 |
+
|
| 1488 |
+
# === LangGraph 節點函數 ===
|
| 1489 |
+
|
| 1490 |
+
def general_run(state):
|
| 1491 |
+
"""改用直接 LLM 回答取代 General Agent"""
|
| 1492 |
+
try:
|
| 1493 |
+
prompt = f"""You are a helpful AI assistant. Please answer the following question:
|
| 1494 |
+
{state["query"]}
|
| 1495 |
+
|
| 1496 |
+
Provide a clear and informative answer."""
|
| 1497 |
+
|
| 1498 |
+
response = llm_gpt4.invoke(prompt)
|
| 1499 |
+
answer = response.content if hasattr(response, 'content') else str(response)
|
| 1500 |
+
return {"answer": answer}
|
| 1501 |
+
except Exception as e:
|
| 1502 |
+
print(f"ERROR in general_run: {str(e)}")
|
| 1503 |
+
return {"answer": "I apologize, but I'm having trouble processing your request."}
|
| 1504 |
+
|
| 1505 |
+
|
| 1506 |
+
def docqa_run(state):
|
| 1507 |
+
"""文件問答處理"""
|
| 1508 |
+
try:
|
| 1509 |
+
# 如果有檢索器,使用檢索器
|
| 1510 |
+
if "retriever" in state:
|
| 1511 |
+
relevant_docs = state["retriever"].get_relevant_documents(state["query"])
|
| 1512 |
+
context = "\n".join(d.page_content for d in relevant_docs)
|
| 1513 |
+
else:
|
| 1514 |
+
context = "\n".join(state["docs"])
|
| 1515 |
+
|
| 1516 |
+
prompt = f"""Based on the following context, please answer the question:
|
| 1517 |
+
Question: {state["query"]}
|
| 1518 |
+
|
| 1519 |
+
Context:
|
| 1520 |
+
{context[:3000]}
|
| 1521 |
+
|
| 1522 |
+
Provide a detailed and accurate answer based on the context."""
|
| 1523 |
+
|
| 1524 |
+
response = llm_gpt4.invoke(prompt)
|
| 1525 |
+
return {"answer": response.content if hasattr(response, 'content') else str(response)}
|
| 1526 |
+
except Exception as e:
|
| 1527 |
+
print(f"ERROR in docqa_run: {str(e)}")
|
| 1528 |
+
return general_run(state)
|
| 1529 |
+
|
| 1530 |
+
|
| 1531 |
+
def summariser_run(state):
|
| 1532 |
+
"""文件摘要處理"""
|
| 1533 |
+
try:
|
| 1534 |
+
context = "\n".join(state["docs"])
|
| 1535 |
+
prompt = f"""Please provide a comprehensive summary of the following document:
|
| 1536 |
+
{context[:3000]}
|
| 1537 |
+
|
| 1538 |
+
Focus on:
|
| 1539 |
+
1. Main topics and key points
|
| 1540 |
+
2. Important findings or conclusions
|
| 1541 |
+
3. Significant details"""
|
| 1542 |
+
|
| 1543 |
+
response = llm_gpt4.invoke(prompt)
|
| 1544 |
+
return {"summary": response.content if hasattr(response, 'content') else str(response)}
|
| 1545 |
+
except Exception as e:
|
| 1546 |
+
print(f"ERROR in summariser_run: {str(e)}")
|
| 1547 |
+
return {"summary": "Error generating summary."}
|
| 1548 |
+
|
| 1549 |
+
# === LangGraph 定義 ===
|
| 1550 |
+
def build_langgraph_pipeline():
|
| 1551 |
+
graph = StateGraph(dict)
|
| 1552 |
+
graph.add_node("Router", lambda state: state) # Router 僅傳遞狀態
|
| 1553 |
+
graph.add_node("DocQA", docqa_run)
|
| 1554 |
+
graph.add_node("General", general_run)
|
| 1555 |
+
graph.add_node("Summarise", summariser_run)
|
| 1556 |
+
graph.set_entry_point("Router")
|
| 1557 |
+
graph.add_conditional_edges("Router", decide_next, {
|
| 1558 |
+
"DocQA": "DocQA",
|
| 1559 |
+
"General": "General",
|
| 1560 |
+
"Summarise": "Summarise",
|
| 1561 |
+
})
|
| 1562 |
+
graph.set_finish_point("DocQA")
|
| 1563 |
+
graph.set_finish_point("General")
|
| 1564 |
+
graph.set_finish_point("Summarise")
|
| 1565 |
+
return graph.compile()
|
| 1566 |
+
|
| 1567 |
+
from tempfile import mkdtemp
|
| 1568 |
+
|
| 1569 |
+
def get_file_path_tab6(file):
|
| 1570 |
+
if isinstance(file, str):
|
| 1571 |
+
print("DEBUG: File is a string:", file)
|
| 1572 |
+
if os.path.exists(file):
|
| 1573 |
+
print("DEBUG: File exists:", file)
|
| 1574 |
+
return file
|
| 1575 |
+
else:
|
| 1576 |
+
print("DEBUG: File does not exist:", file)
|
| 1577 |
+
return None
|
| 1578 |
+
elif isinstance(file, dict):
|
| 1579 |
+
print("DEBUG: File is a dict:", file)
|
| 1580 |
+
data = file.get("data")
|
| 1581 |
+
name = file.get("name")
|
| 1582 |
+
print("DEBUG: Data:", data, "Name:", name)
|
| 1583 |
+
if data:
|
| 1584 |
+
if isinstance(data, str) and os.path.exists(data):
|
| 1585 |
+
print("DEBUG: Data is a valid file path:", data)
|
| 1586 |
+
return data
|
| 1587 |
+
else:
|
| 1588 |
+
temp_dir = mkdtemp()
|
| 1589 |
+
file_path = os.path.join(temp_dir, name if name else "uploaded_file")
|
| 1590 |
+
print("DEBUG: Writing data to temporary file:", file_path)
|
| 1591 |
+
with open(file_path, "wb") as f:
|
| 1592 |
+
if isinstance(data, str):
|
| 1593 |
+
f.write(data.encode("utf-8"))
|
| 1594 |
+
else:
|
| 1595 |
+
f.write(data)
|
| 1596 |
+
if os.path.exists(file_path):
|
| 1597 |
+
print("DEBUG: Temporary file created:", file_path)
|
| 1598 |
+
return file_path
|
| 1599 |
+
else:
|
| 1600 |
+
print("ERROR: Temporary file not created:", file_path)
|
| 1601 |
+
return None
|
| 1602 |
+
else:
|
| 1603 |
+
print("DEBUG: No data in dict, returning None")
|
| 1604 |
+
return None
|
| 1605 |
+
elif hasattr(file, "save"):
|
| 1606 |
+
print("DEBUG: File has save attribute")
|
| 1607 |
+
temp_dir = mkdtemp()
|
| 1608 |
+
file_path = os.path.join(temp_dir, file.name)
|
| 1609 |
+
file.save(file_path)
|
| 1610 |
+
if os.path.exists(file_path):
|
| 1611 |
+
print("DEBUG: File saved to:", file_path)
|
| 1612 |
+
return file_path
|
| 1613 |
+
else:
|
| 1614 |
+
print("ERROR: File not saved properly:", file_path)
|
| 1615 |
+
return None
|
| 1616 |
+
else:
|
| 1617 |
+
print("DEBUG: File type unrecognized")
|
| 1618 |
+
if hasattr(file, "name"):
|
| 1619 |
+
if os.path.exists(file.name):
|
| 1620 |
+
return file.name
|
| 1621 |
+
return None
|
| 1622 |
+
|
| 1623 |
+
def langgraph_tab6_main(query: str, file=None):
|
| 1624 |
+
try:
|
| 1625 |
+
print(f"DEBUG: Starting processing with query: {query}")
|
| 1626 |
+
|
| 1627 |
+
# 如果沒有文件,直接使用 general_run
|
| 1628 |
+
if not file:
|
| 1629 |
+
return general_run({"query": query})["answer"]
|
| 1630 |
+
|
| 1631 |
+
# 處理文件列表
|
| 1632 |
+
files = file if isinstance(file, list) else [file]
|
| 1633 |
+
all_docs = []
|
| 1634 |
+
file_names = []
|
| 1635 |
+
docs_by_file = []
|
| 1636 |
+
|
| 1637 |
+
# 處理上傳的文件
|
| 1638 |
+
for f in files:
|
| 1639 |
+
try:
|
| 1640 |
+
path = get_file_path_tab6(f)
|
| 1641 |
+
if not path:
|
| 1642 |
+
continue
|
| 1643 |
+
|
| 1644 |
+
file_names.append(os.path.basename(path))
|
| 1645 |
+
|
| 1646 |
+
# 根據文件類型選擇加載器
|
| 1647 |
+
if path.lower().endswith('.pdf'):
|
| 1648 |
+
loader = PyPDFLoader(path)
|
| 1649 |
+
elif path.lower().endswith('.docx'):
|
| 1650 |
+
loader = UnstructuredWordDocumentLoader(path)
|
| 1651 |
+
else:
|
| 1652 |
+
loader = TextLoader(path)
|
| 1653 |
+
|
| 1654 |
+
docs = loader.load()
|
| 1655 |
+
if docs:
|
| 1656 |
+
text = "\n".join(doc.page_content for doc in docs if hasattr(doc, 'page_content'))
|
| 1657 |
+
docs_by_file.append(text)
|
| 1658 |
+
all_docs.extend(docs)
|
| 1659 |
+
except Exception as e:
|
| 1660 |
+
print(f"ERROR processing file: {str(e)}")
|
| 1661 |
+
continue
|
| 1662 |
+
|
| 1663 |
+
if not docs_by_file:
|
| 1664 |
+
return general_run({"query": query})["answer"]
|
| 1665 |
+
|
| 1666 |
+
# 建立檢索器
|
| 1667 |
+
try:
|
| 1668 |
+
chunks = RecursiveCharacterTextSplitter(
|
| 1669 |
+
chunk_size=500,
|
| 1670 |
+
chunk_overlap=50
|
| 1671 |
+
).split_documents(all_docs)
|
| 1672 |
+
|
| 1673 |
+
db = FAISS.from_documents(chunks, embeddings)
|
| 1674 |
+
retriever = db.as_retriever(search_kwargs={"k": 5})
|
| 1675 |
+
|
| 1676 |
+
global session_retriever, session_qa_chain
|
| 1677 |
+
session_retriever = retriever
|
| 1678 |
+
session_qa_chain = ConversationalRetrievalChain.from_llm(
|
| 1679 |
+
llm=llm_gpt4,
|
| 1680 |
+
retriever=retriever,
|
| 1681 |
+
memory=ConversationBufferMemory(
|
| 1682 |
+
memory_key="chat_history",
|
| 1683 |
+
return_messages=True
|
| 1684 |
+
),
|
| 1685 |
+
)
|
| 1686 |
+
except Exception as e:
|
| 1687 |
+
print(f"ERROR setting up retriever: {str(e)}")
|
| 1688 |
+
retriever = None
|
| 1689 |
+
|
| 1690 |
+
# 檢測是否為多文件查詢
|
| 1691 |
+
# 檢測是否為多文件查詢或複雜查詢
|
| 1692 |
+
if len(docs_by_file) > 1 or "compare" in query.lower() or "relation" in query.lower():
|
| 1693 |
+
return autogen_multi_document_analysis(query, docs_by_file, file_names)
|
| 1694 |
+
|
| 1695 |
+
# 使用 LangGraph 處理單文件查詢
|
| 1696 |
+
state = {
|
| 1697 |
+
"query": query,
|
| 1698 |
+
"file_names": file_names,
|
| 1699 |
+
"docs": docs_by_file,
|
| 1700 |
+
"retriever": retriever
|
| 1701 |
+
}
|
| 1702 |
+
|
| 1703 |
+
# 根據查詢意圖選擇處理方式
|
| 1704 |
+
intent = detect_intent_embedding(query, file_names)
|
| 1705 |
+
if intent == "Summarise":
|
| 1706 |
+
return summariser_run(state)["summary"]
|
| 1707 |
+
elif intent == "DocQA":
|
| 1708 |
+
return docqa_run(state)["answer"]
|
| 1709 |
+
else:
|
| 1710 |
+
return general_run(state)["answer"]
|
| 1711 |
+
|
| 1712 |
+
except Exception as e:
|
| 1713 |
+
print(f"ERROR in main function: {str(e)}")
|
| 1714 |
+
return f"I apologize, but I encountered an error: {str(e)}"
|
| 1715 |
+
|
| 1716 |
+
|
| 1717 |
+
|
| 1718 |
+
|
| 1719 |
+
|
| 1720 |
# Gradio Interface Settings
|
| 1721 |
demo_description = """
|
| 1722 |
**Context**:
|
|
|
|
| 1800 |
|
| 1801 |
Feel free to upload a document and ask related questions, or just type a question directly—no file upload required. *Note: CSV file analysis and auto visualisation is coming soon.*
|
| 1802 |
"""
|
| 1803 |
+
demo_description6 = """
|
| 1804 |
+
**Context**:
|
| 1805 |
+
This is a **smart multi-document reasoning assistant**, powered by **LangGraph**, **CrewAI**, and **AutoGen**.
|
| 1806 |
+
Upload zero to multiple files and ask anything — the system will uses **embedding-based intent detection** to decide whether to summarise, extract, compare, or analyse relationships.
|
| 1807 |
+
|
| 1808 |
+
For complex multi-file tasks, it triggers a **collaborative AutoGen team** to deeply reason across documents and generate contextual, evidence-based answers.
|
| 1809 |
+
|
| 1810 |
+
**Supported Features**:
|
| 1811 |
+
- 📄 Multi-document support (PDF, DOCX, TXT)
|
| 1812 |
+
- 🔍 Embedding-based intent detection and semantic routing
|
| 1813 |
+
- 🤖 Agents: Summariser, QA Agent, General Agent, Search Agent
|
| 1814 |
+
- 🔀 Orchestrated by LangGraph + AutoGen (fallbacks + task handoff)
|
| 1815 |
+
- 🧠 AutoGen multi-agent collaboration for cross-file reasoning
|
| 1816 |
+
- 🌐 Online search fallback if all the other agent can't handle tasks
|
| 1817 |
+
|
| 1818 |
+
**Sample Questions**:
|
| 1819 |
+
1. Who are you? | What is GPT4? *(→ General Chat Agent)*
|
| 1820 |
+
2. Summarise the document/file/your_doc_name. *(→ Summarisation Agent)*
|
| 1821 |
+
3. What is LangChain used for? | What are the latest trends in AI startups in 2025? | Tell me the most recent breakthrough in quantum computing. *(→ Online Rag Agent)*
|
| 1822 |
+
4. What's the title in the document? | What are the key ideas mentioned in this file? *(→ RAG QA Agent)*
|
| 1823 |
+
5. Compare the proposals in DocA and DocB. | Summarise all files. | Is DocA one of the project in the DocB or DocC. | Which argument is stronger across these files? | Do these documents mention similar policies? | What's the difference between the files? *(→ AutoGen)*
|
| 1824 |
+
6. What is LangChain used for? | What are the latest trends in AI startups in 2025? | Tell me the most recent breakthrough in quantum computing. *(→ Online Rag Agent)*
|
| 1825 |
+
|
| 1826 |
+
> Built for users who need clear, explainable, and context-aware answers — whether you’re working on documents in law, finance, research, or tech.
|
| 1827 |
+
"""
|
| 1828 |
+
|
| 1829 |
+
|
| 1830 |
|
| 1831 |
demo = gr.TabbedInterface(
|
| 1832 |
interface_list=[
|
| 1833 |
+
gr.Interface(
|
| 1834 |
+
fn=langgraph_tab6_main,
|
| 1835 |
+
inputs=[
|
| 1836 |
+
gr.Textbox(label="Ask anything"),
|
| 1837 |
+
gr.File(label="Upload one or more files", file_types=[".pdf", ".txt", ".docx"], file_count="multiple")
|
| 1838 |
+
],
|
| 1839 |
+
outputs="text",
|
| 1840 |
+
title="Smart Multi-Doc QA (LangGraph + AutoGen)",
|
| 1841 |
+
allow_flagging="never",
|
| 1842 |
+
description=demo_description6
|
| 1843 |
+
),
|
| 1844 |
gr.Interface(
|
| 1845 |
fn=multi_agent_chat_advanced,
|
| 1846 |
inputs=[
|
| 1847 |
gr.Textbox(label="Enter your query"),
|
| 1848 |
+
gr.File(label="Upload file (CSV, PDF, TXT, DOCX)", file_types=[".pdf", ".txt", ".docx"], file_count="single")
|
| 1849 |
],
|
| 1850 |
outputs="text",
|
| 1851 |
title="Multi-Agent AI Assistant",
|
|
|
|
| 1854 |
),
|
| 1855 |
gr.Interface(
|
| 1856 |
fn=document_summarize,
|
| 1857 |
+
inputs=[gr.File(label="Upload PDF, TXT, or DOCX", file_types=[".pdf", ".txt", ".docx"])],
|
| 1858 |
outputs="text",
|
| 1859 |
title="Document Summarisation",
|
| 1860 |
allow_flagging="never",
|
|
|
|
| 1862 |
),
|
| 1863 |
gr.Interface(
|
| 1864 |
fn=upload_and_chat,
|
| 1865 |
+
inputs=[gr.File(label="Upload PDF, TXT, or DOCX", file_types=[".pdf", ".txt", ".docx"]), gr.Textbox(label="Ask a question")],
|
| 1866 |
outputs="text",
|
| 1867 |
title="Your Docs Q&A (Upload + GPT-4 RAG)",
|
| 1868 |
allow_flagging="never",
|
|
|
|
| 1884 |
allow_flagging="never",
|
| 1885 |
description=demo_description
|
| 1886 |
),
|
| 1887 |
+
|
| 1888 |
],
|
| 1889 |
tab_names=[
|
| 1890 |
+
"Multi-Doc QA",
|
| 1891 |
"Multi-Agent AI Assistant",
|
| 1892 |
"Document Summarisation",
|
| 1893 |
"Your Docs Q&A (Upload + GPT-4 RAG)",
|
| 1894 |
"Biden Q&A (GPT-4 RAG)",
|
| 1895 |
+
"Biden Q&A (LLaMA RAG)",
|
| 1896 |
+
|
| 1897 |
],
|
| 1898 |
title="Smart RAG + Multi-Agent Assistant (with Web + Document AI)"
|
| 1899 |
)
|