ChienChung commited on
Commit
fdf059e
·
verified ·
1 Parent(s): b017e92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +684 -10
app.py CHANGED
@@ -1,16 +1,22 @@
1
  #!/usr/bin/env python
2
  import os
3
  import shutil
 
4
  import json
5
  import torch
6
  import re
7
  import requests
8
  import transformers
9
  import chardet
 
 
10
  from transformers import AutoModelForCausalLM, AutoTokenizer
11
  from transformers.models.llama.configuration_llama import LlamaConfig
12
  from huggingface_hub import hf_hub_download
 
13
  import gradio as gr
 
 
14
 
15
  # Solve permission issues
16
  os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
@@ -24,6 +30,44 @@ os.environ["GRADIO_FLAGGING_DIR"] = "/tmp/flagged"
24
  os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/sentence_transformers"
25
  os.environ["HF_HUB_CACHE"] = "/tmp/huggingface/hf_cache"
26
  os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "60"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Load Required Modules
29
  from langchain.embeddings import HuggingFaceEmbeddings
@@ -53,7 +97,26 @@ from crewai.tools import tool
53
  from geopy.geocoders import Nominatim
54
  from timezonefinder import TimezoneFinder
55
  from langchain_experimental.agents import create_pandas_dataframe_agent
56
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  session_retriever = None
59
  session_qa_chain = None
@@ -193,7 +256,8 @@ def get_file_path(file):
193
  else:
194
  return None
195
 
196
- # Original functionalities (Tabs 1-4) functions
 
197
  def rag_llama_qa(query):
198
  output = RetrievalQA.from_chain_type(
199
  llm=llm_local,
@@ -206,9 +270,30 @@ def rag_llama_qa(query):
206
  idx = lower_text.find("answer:")
207
  return output[idx + len("answer:"):].strip() if idx != -1 else output
208
 
 
209
  def rag_gpt4_qa(query):
210
- return qa_gpt.run(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
 
212
  def upload_and_chat(file, query):
213
  file_path = get_file_path(file)
214
  if file_path is None:
@@ -230,8 +315,26 @@ def upload_and_chat(file, query):
230
  return_source_documents=False,
231
  chain_type_kwargs={"prompt": custom_prompt}
232
  )
233
- return qa_temp.run(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
 
 
 
235
  initial_prompt = PromptTemplate(
236
  input_variables=["text"],
237
  template="""Write a concise and structured summary of the following content. Focus on capturing the main ideas and key details:
@@ -254,6 +357,7 @@ Refine the summary based on the new content below. Add or update information onl
254
  """
255
  )
256
 
 
257
  def document_summarize(file):
258
  file_path = get_file_path(file)
259
  if file_path is None:
@@ -453,7 +557,7 @@ def time_tool(query: str) -> str:
453
  location = "London"
454
 
455
  location_key = location.lower()
456
- tz_str = zone_map.get(location_key, "Europe/London")
457
  now = datetime.now(ZoneInfo(tz_str))
458
 
459
  # return time or date
@@ -982,6 +1086,49 @@ crew = Crew(
982
  llm=crew_llm
983
  )
984
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
985
  def multi_agent_chat_advanced(query: str, file=None) -> str:
986
  global session_retriever, session_qa_chain, csv_dataframe
987
 
@@ -1060,7 +1207,28 @@ def multi_agent_chat_advanced(query: str, file=None) -> str:
1060
  # If using QA Chain is appropriate
1061
  if use_file_chain:
1062
  try:
1063
- return session_qa_chain.run(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1064
  except Exception as e:
1065
  return f"Document QA Error: {e}"
1066
 
@@ -1084,6 +1252,471 @@ def multi_agent_chat_advanced(query: str, file=None) -> str:
1084
  except Exception as e:
1085
  return f"Multi-Agent Error: {e}"
1086
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1087
  # Gradio Interface Settings
1088
  demo_description = """
1089
  **Context**:
@@ -1167,14 +1800,52 @@ This demo presents a GPT-style Multi-Agent AI Assistant, built with **LangChain,
1167
 
1168
  Feel free to upload a document and ask related questions, or just type a question directly—no file upload required. *Note: CSV file analysis and auto visualisation is coming soon.*
1169
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1170
 
1171
  demo = gr.TabbedInterface(
1172
  interface_list=[
 
 
 
 
 
 
 
 
 
 
 
1173
  gr.Interface(
1174
  fn=multi_agent_chat_advanced,
1175
  inputs=[
1176
  gr.Textbox(label="Enter your query"),
1177
- gr.File(label="Upload file (CSV, PDF, TXT, DOCX)", file_count="single")
1178
  ],
1179
  outputs="text",
1180
  title="Multi-Agent AI Assistant",
@@ -1183,7 +1854,7 @@ demo = gr.TabbedInterface(
1183
  ),
1184
  gr.Interface(
1185
  fn=document_summarize,
1186
- inputs=[gr.File(label="Upload PDF, TXT, or DOCX")],
1187
  outputs="text",
1188
  title="Document Summarisation",
1189
  allow_flagging="never",
@@ -1191,7 +1862,7 @@ demo = gr.TabbedInterface(
1191
  ),
1192
  gr.Interface(
1193
  fn=upload_and_chat,
1194
- inputs=[gr.File(label="Upload PDF, TXT, or DOCX"), gr.Textbox(label="Ask a question")],
1195
  outputs="text",
1196
  title="Your Docs Q&A (Upload + GPT-4 RAG)",
1197
  allow_flagging="never",
@@ -1213,13 +1884,16 @@ demo = gr.TabbedInterface(
1213
  allow_flagging="never",
1214
  description=demo_description
1215
  ),
 
1216
  ],
1217
  tab_names=[
 
1218
  "Multi-Agent AI Assistant",
1219
  "Document Summarisation",
1220
  "Your Docs Q&A (Upload + GPT-4 RAG)",
1221
  "Biden Q&A (GPT-4 RAG)",
1222
- "Biden Q&A (LLaMA RAG)"
 
1223
  ],
1224
  title="Smart RAG + Multi-Agent Assistant (with Web + Document AI)"
1225
  )
 
1
  #!/usr/bin/env python
2
  import os
3
  import shutil
4
+ import tempfile
5
  import json
6
  import torch
7
  import re
8
  import requests
9
  import transformers
10
  import chardet
11
+ import deepeval
12
+ import difflib
13
  from transformers import AutoModelForCausalLM, AutoTokenizer
14
  from transformers.models.llama.configuration_llama import LlamaConfig
15
  from huggingface_hub import hf_hub_download
16
+ from typing import List, Dict, Any
17
  import gradio as gr
18
+ from pathlib import Path
19
+
20
 
21
  # Solve permission issues
22
  os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
 
30
  os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/sentence_transformers"
31
  os.environ["HF_HUB_CACHE"] = "/tmp/huggingface/hf_cache"
32
  os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "60"
33
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
34
+ os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
35
+ os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
36
+
37
+ # 設置環境變數,確保 AutoGen 可以寫入臨時目錄
38
+ os.environ["AUTOGEN_WORKSPACE"] = "/tmp/autogen_workspace"
39
+ os.makedirs("/tmp/autogen_workspace", exist_ok=True)
40
+ os.chmod("/tmp/autogen_workspace", 0o777) # 確保目錄可寫
41
+
42
+ # 設置 OpenAI API 相關環境變數
43
+ os.environ["OPENAI_API_TYPE"] = "open_ai" # 如果您使用的是 OpenAI API
44
+
45
+
46
+
47
+ # ✅ 建立 temp 安全區
48
+ os.environ["DEEPEVAL_TELEMETRY_OPT_OUT"] = "YES"
49
+ os.environ["DEEPEVAL_RESULTS_FOLDER"] = "/tmp/deepeval_results"
50
+ os.makedirs("/tmp/deepeval_results", exist_ok=True)
51
+
52
+ # ✅ 修正 Python tempdir 基底(避免它寫 home)
53
+ import tempfile
54
+ tempfile.tempdir = "/tmp"
55
+ # 在此處加入 DeepEval 的 monkey-patch,避免全域更改工作目錄
56
+ original_evaluate = deepeval.evaluate
57
+
58
+ def patched_evaluate(*args, **kwargs):
59
+ current_dir = os.getcwd()
60
+ try:
61
+ os.chdir("/tmp")
62
+ return original_evaluate(*args, **kwargs)
63
+ finally:
64
+ os.chdir(current_dir)
65
+
66
+ deepeval.evaluate = patched_evaluate
67
+
68
+
69
+ SHOW_EVAL = os.getenv("SHOW_EVAL", "false").lower() == "true"
70
+
71
 
72
  # Load Required Modules
73
  from langchain.embeddings import HuggingFaceEmbeddings
 
97
  from geopy.geocoders import Nominatim
98
  from timezonefinder import TimezoneFinder
99
  from langchain_experimental.agents import create_pandas_dataframe_agent
100
+ from langsmith import traceable
101
+ from deepeval import evaluate
102
+ from deepeval.metrics import AnswerRelevancyMetric
103
+ from deepeval.test_case import LLMTestCase
104
+ # from langgraph.graph import Graph
105
+ from langgraph.graph import StateGraph
106
+ from langchain_core.runnables import RunnableLambda
107
+ from langchain.chains import LLMChain
108
+ from langchain.chains.combine_documents.stuff import StuffDocumentsChain
109
+ from sentence_transformers import SentenceTransformer
110
+ # === AutoGen for multi-intent collaboration ===
111
+ from autogen import AssistantAgent, UserProxyAgent, GroupChat, GroupChatManager
112
+
113
+
114
+
115
+ try:
116
+ from phoenix.trace.langchain import LangChainInstrumentor
117
+ LangChainInstrumentor().instrument()
118
+ except Exception as e:
119
+ print(f"[WARNING] Failed to load Phoenix LangChain trace: {e}")
120
 
121
  session_retriever = None
122
  session_qa_chain = None
 
256
  else:
257
  return None
258
 
259
+ # Original functionalities (Tabs 1-4) functions
260
+ @traceable(name="Biden LLaMA QA")
261
  def rag_llama_qa(query):
262
  output = RetrievalQA.from_chain_type(
263
  llm=llm_local,
 
270
  idx = lower_text.find("answer:")
271
  return output[idx + len("answer:"):].strip() if idx != -1 else output
272
 
273
+ @traceable(name="GPT-4 Document QA")
274
  def rag_gpt4_qa(query):
275
+ raw_answer = qa_gpt.run(query)
276
+
277
+ if SHOW_EVAL:
278
+ try:
279
+ top_docs = retriever.get_relevant_documents(query)
280
+ test_case = LLMTestCase(
281
+ input=query,
282
+ actual_output=raw_answer,
283
+ expected_output=raw_answer,
284
+ context=[doc.page_content for doc in top_docs[:3]]
285
+ )
286
+ metric = AnswerRelevancyMetric(model="gpt-4o-mini")
287
+ results = evaluate([test_case], [metric])
288
+ result = results[0]
289
+ print(f"[DeepEval Tab4] Input: {query}")
290
+ print(f"[DeepEval Tab4] Passed: {result.passed}, Score: {result.score:.2f}, Reason: {result.reason}")
291
+ except Exception as e:
292
+ print(f"[DeepEval Tab4] Evaluation failed: {e}")
293
+
294
+ return raw_answer
295
 
296
+ @traceable(name="Upload Document QA")
297
  def upload_and_chat(file, query):
298
  file_path = get_file_path(file)
299
  if file_path is None:
 
315
  return_source_documents=False,
316
  chain_type_kwargs={"prompt": custom_prompt}
317
  )
318
+ raw_answer = qa_temp.run(query)
319
+ if SHOW_EVAL:
320
+ try:
321
+ test_case = LLMTestCase(
322
+ input=query,
323
+ actual_output=raw_answer,
324
+ expected_output=raw_answer,
325
+ context=[d.page_content for d in chunks[:3]]
326
+ )
327
+ metric = AnswerRelevancyMetric(model="gpt-4o-mini") # default is GPT-4o
328
+ results = evaluate([test_case], [metric])
329
+ result = results[0]
330
+ print(f"[DeepEval QA] Input: {query}")
331
+ print(f"[DeepEval QA] Passed: {result.passed}, Score: {result.score:.2f}, Reason: {result.reason}")
332
+ except Exception as e:
333
+ print(f"[DeepEval QA] Evaluation failed: {e}")
334
 
335
+ return raw_answer
336
+
337
+
338
  initial_prompt = PromptTemplate(
339
  input_variables=["text"],
340
  template="""Write a concise and structured summary of the following content. Focus on capturing the main ideas and key details:
 
357
  """
358
  )
359
 
360
+ @traceable(name="Document Summarise")
361
  def document_summarize(file):
362
  file_path = get_file_path(file)
363
  if file_path is None:
 
557
  location = "London"
558
 
559
  location_key = location.lower()
560
+ tz_str = location_to_timezone(location)
561
  now = datetime.now(ZoneInfo(tz_str))
562
 
563
  # return time or date
 
1086
  llm=crew_llm
1087
  )
1088
 
1089
+ # test qa
1090
+ def build_langgraph_doc_qa_chain(llm, retriever, memory, prompt):
1091
+ def retrieve_step(state):
1092
+ docs = state['retriever'].get_relevant_documents(state['query'])
1093
+ return {"docs": docs, **state}
1094
+
1095
+ def answer_step(state):
1096
+ prompt = state["prompt"]
1097
+ llm = state["llm"]
1098
+ docs = state["docs"]
1099
+
1100
+ llm_chain = LLMChain(llm=llm, prompt=prompt)
1101
+ doc_chain = StuffDocumentsChain(
1102
+ llm_chain=llm_chain,
1103
+ document_variable_name="context"
1104
+ )
1105
+ # 只執行一次,並傳入所有需要的參數
1106
+ answer = doc_chain.run({
1107
+ "input_documents": docs,
1108
+ "question": state["query"]
1109
+ })
1110
+ return {"answer": answer, **state}
1111
+
1112
+ builder = StateGraph(dict)
1113
+ builder.add_node("Retrieve", retrieve_step)
1114
+ builder.add_node("Answer", answer_step)
1115
+ builder.set_entry_point("Retrieve")
1116
+ builder.add_edge("Retrieve", "Answer")
1117
+ builder.set_finish_point("Answer")
1118
+
1119
+ compiled = builder.compile()
1120
+
1121
+ def run(query):
1122
+ return compiled.invoke({
1123
+ "query": query,
1124
+ "retriever": retriever,
1125
+ "llm": llm,
1126
+ "prompt": prompt
1127
+ })
1128
+
1129
+ return run
1130
+
1131
+ @traceable(name="Multi-Agent Chat")
1132
  def multi_agent_chat_advanced(query: str, file=None) -> str:
1133
  global session_retriever, session_qa_chain, csv_dataframe
1134
 
 
1207
  # If using QA Chain is appropriate
1208
  if use_file_chain:
1209
  try:
1210
+ answer = session_qa_chain.run(query)
1211
+ #session_graph_chain = build_langgraph_doc_qa_chain(llm_gpt4, session_retriever, memory, custom_prompt)
1212
+ #answer = session_graph_chain(query)["answer"]
1213
+
1214
+ # ✅ DeepEval 評估僅在 Tab1 文件 QA 的情況下觸發
1215
+ if SHOW_EVAL:
1216
+ try:
1217
+ test_case = LLMTestCase(
1218
+ input=query,
1219
+ actual_output=answer,
1220
+ expected_output=answer,
1221
+ context=[d.page_content for d in session_retriever.get_relevant_documents(query)[:3]]
1222
+ )
1223
+ metric = AnswerRelevancyMetric(model="gpt-4o-mini")
1224
+ results = evaluate([test_case], [metric])
1225
+ result = results[0]
1226
+ print(f"[DeepEval Tab1] Input: {query}")
1227
+ print(f"[DeepEval Tab1] Passed: {result.passed}, Score: {result.score:.2f}, Reason: {result.reason}")
1228
+ except Exception as e:
1229
+ print(f"[DeepEval Tab1] Evaluation failed: {e}")
1230
+
1231
+ return answer
1232
  except Exception as e:
1233
  return f"Document QA Error: {e}"
1234
 
 
1252
  except Exception as e:
1253
  return f"Multi-Agent Error: {e}"
1254
 
1255
+
1256
+
1257
+ # LangGraph 使用的節點函數(會接續你的 Crew Agent)
1258
+ # 初始化 embedding model
1259
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
1260
+
1261
+ # Intent Embedding 分類(支援檔名)
1262
+ INTENT_LABELS = {
1263
+ "DocQA": ["document", "file", "paper", "cb", "proposal", "project"],
1264
+ "Summarise": ["summarise", "summary", "abstract", "key points", "overview", "main points"],
1265
+ "General": ["who are you", "tell me something", "what can you do", "fun fact"],
1266
+ }
1267
+
1268
+ def parse_query(query: str) -> dict:
1269
+ prompt = """Analyze the following query and determine required subtasks. Return a JSON object containing:
1270
+ - summarize_files: list of document indices to summarize
1271
+ - qa_pairs: list of QA objects [{"question": "question", "doc_indices": [relevant doc indices]}]
1272
+ - compare_files: list of document index pairs to compare [[doc1_idx, doc2_idx]]
1273
+ - find_relations: boolean, whether to analyze document relationships
1274
+
1275
+ For example, query "What are the differences between document A and B, and summarize A" should return:
1276
+ {
1277
+ "summarize_files": [0],
1278
+ "qa_pairs": [],
1279
+ "compare_files": [[0, 1]],
1280
+ "find_relations": false
1281
+ }
1282
+
1283
+ Query: """ + query
1284
+
1285
+ response = llm_gpt4.invoke(prompt)
1286
+ try:
1287
+ return json.loads(response.content)
1288
+ except:
1289
+ return {
1290
+ "summarize_files": [],
1291
+ "qa_pairs": [{"question": query, "doc_indices": [0]}],
1292
+ "compare_files": [],
1293
+ "find_relations": False
1294
+ }
1295
+
1296
+
1297
+ def autogen_multi_document_analysis(query: str, docs: list, file_names: list) -> str:
1298
+ try:
1299
+ # 建立絕對路徑的暫存目錄,並確保它存在
1300
+ import tempfile
1301
+ import os
1302
+
1303
+ # 建立一個臨時工作目錄
1304
+ temp_dir = tempfile.mkdtemp(dir="/tmp")
1305
+ os.environ["OPENAI_CACHE_DIR"] = temp_dir
1306
+
1307
+ # 設置 AutoGen 的工作目錄
1308
+ os.environ["AUTOGEN_CACHE_PATH"] = temp_dir
1309
+ os.environ["AUTOGEN_CACHEDIR"] = temp_dir
1310
+ os.environ["OPENAI_CACHE_PATH"] = temp_dir
1311
+
1312
+ # 強制 AutoGen 使用我們的臨時目錄而不是 ./.cache
1313
+ import autogen
1314
+ if hasattr(autogen, "set_cache_dir"):
1315
+ autogen.set_cache_dir(temp_dir)
1316
+
1317
+ # 準備文件上下文
1318
+ context = "\n\n".join(
1319
+ f"Document {name}:\n{doc[:2000]}..."
1320
+ for name, doc in zip(file_names, docs)
1321
+ )
1322
+
1323
+ # 配置 LLM
1324
+ config_list = [{
1325
+ "model": "gpt-4o-mini",
1326
+ "api_key": openai_api_key
1327
+ }]
1328
+
1329
+ # 基礎配置 - 不包含任何緩存相關參數
1330
+ llm_config = {
1331
+ "config_list": config_list,
1332
+ "temperature": 0
1333
+ }
1334
+
1335
+ # 在進行 AutoGen 處理前,切換到臨時目錄
1336
+ original_dir = os.getcwd()
1337
+ os.chdir(temp_dir)
1338
+
1339
+ try:
1340
+ # 以下是您的 AutoGen 處理代碼
1341
+ user_proxy = UserProxyAgent(
1342
+ name="User",
1343
+ system_message="A user seeking information from multiple documents.",
1344
+ human_input_mode="NEVER",
1345
+ code_execution_config={"use_docker": False},
1346
+ llm_config=llm_config
1347
+ )
1348
+
1349
+
1350
+
1351
+ # 定義文檔分析專家
1352
+ doc_analyzer = AssistantAgent(
1353
+ name="DocumentAnalyzer",
1354
+ system_message="""You are an expert at analyzing and comparing documents. Focus on:
1355
+ 1. Key similarities and differences
1356
+ 2. Main themes and topics
1357
+ 3. Relationships between documents
1358
+ 4. Evidence-based analysis""",
1359
+ llm_config=llm_config
1360
+ )
1361
+
1362
+ # 定義問答專家
1363
+ qa_expert = AssistantAgent(
1364
+ name="QAExpert",
1365
+ system_message="""You are an expert at extracting specific information. Focus on:
1366
+ 1. Finding relevant details
1367
+ 2. Answering specific questions
1368
+ 3. Cross-referencing information
1369
+ 4. Providing evidence""",
1370
+ llm_config=llm_config
1371
+ )
1372
+
1373
+ # 定義總結專家
1374
+ summarizer = AssistantAgent(
1375
+ name="Summarizer",
1376
+ system_message="""You are an expert at summarizing content. Focus on:
1377
+ 1. Key points and findings
1378
+ 2. Important relationships
1379
+ 3. Critical conclusions
1380
+ 4. Comprehensive overview""",
1381
+ llm_config=llm_config
1382
+ )
1383
+
1384
+ # 創建群組聊天
1385
+ groupchat = GroupChat(
1386
+ agents=[user_proxy, doc_analyzer, qa_expert, summarizer],
1387
+ messages=[],
1388
+ max_round=5
1389
+ )
1390
+
1391
+ # 創建管理器
1392
+ manager = GroupChatManager(
1393
+ groupchat=groupchat,
1394
+ llm_config=llm_config
1395
+ )
1396
+
1397
+ # 準備任務提示
1398
+ task_prompt = f"""Analyze these documents and answer the query:
1399
+
1400
+ Query: {query}
1401
+
1402
+ Documents Context:
1403
+ {context}
1404
+
1405
+ Requirements:
1406
+ 1. Provide a direct and clear answer
1407
+ 2. Support all claims with evidence from the documents
1408
+ 3. Consider relationships between all documents
1409
+ 4. If comparing, analyze all relevant aspects
1410
+ 5. If summarizing, cover all important points
1411
+ 6. If looking for specific content, search thoroughly
1412
+ 7. If analyzing relationships, consider all connections
1413
+
1414
+ Please provide a comprehensive and well-structured answer."""
1415
+
1416
+ # 執行群組討論
1417
+ user_proxy.initiate_chat(manager, message=task_prompt)
1418
+ return user_proxy.last_message()["content"]
1419
+ finally:
1420
+ # 完成後,切回原始目錄
1421
+ os.chdir(original_dir)
1422
+
1423
+ return result
1424
+
1425
+ except Exception as e:
1426
+ print(f"ERROR in AutoGen processing: {str(e)}")
1427
+ return f"Error analyzing documents: {str(e)}"
1428
+
1429
+
1430
+
1431
+
1432
+
1433
+
1434
+ # === AutoGen 多代理人協作邏輯 ===
1435
+
1436
+
1437
+ def detect_intent_embedding(query, file_names=[]):
1438
+ query_emb = embedding_model.encode(query, normalize_embeddings=True)
1439
+ best_label = None
1440
+ best_score = -1
1441
+ all_phrases = INTENT_LABELS.copy()
1442
+ if file_names:
1443
+ all_phrases["DocQA"] += [name.lower() for name in file_names]
1444
+ for label, examples in all_phrases.items():
1445
+ for example in examples:
1446
+ example_emb = embedding_model.encode(example, normalize_embeddings=True)
1447
+ score = float(query_emb @ example_emb.T)
1448
+ if score > best_score:
1449
+ best_score = score
1450
+ best_label = label
1451
+ return best_label if best_label else "General"
1452
+
1453
+ def decide_next(state):
1454
+ query = state.get("query", "")
1455
+ file_names = state.get("file_names", [])
1456
+ label = detect_intent_embedding(query, file_names)
1457
+ return label
1458
+
1459
+ # === 定義 Task 物件 ===
1460
+ docqa_task = Task(
1461
+ description="Document QA Task: Answer questions based on the uploaded document.",
1462
+ expected_output="Answer from Document QA Agent.",
1463
+ agent=document_qa_agent,
1464
+ input_variables=["query"]
1465
+ )
1466
+
1467
+ general_task = Task(
1468
+ description="General Chat Task: Answer general queries.",
1469
+ expected_output="Answer from General Agent.",
1470
+ agent=general_agent,
1471
+ input_variables=["query"]
1472
+ )
1473
+
1474
+ summariser_task = Task(
1475
+ description="Summarisation Task: Summarise document content.",
1476
+ expected_output="Summary output.",
1477
+ agent=summarizer_agent, # 注意此處名稱須與定義一致(使用字母 z)
1478
+ input_variables=["query"]
1479
+ )
1480
+
1481
+ search_task = Task(
1482
+ description="Search Task: Retrieve information from the web.",
1483
+ expected_output="Answer from Search Agent.",
1484
+ agent=search_agent,
1485
+ input_variables=["query"]
1486
+ )
1487
+
1488
+ # === LangGraph 節點函數 ===
1489
+
1490
+ def general_run(state):
1491
+ """改用直接 LLM 回答取代 General Agent"""
1492
+ try:
1493
+ prompt = f"""You are a helpful AI assistant. Please answer the following question:
1494
+ {state["query"]}
1495
+
1496
+ Provide a clear and informative answer."""
1497
+
1498
+ response = llm_gpt4.invoke(prompt)
1499
+ answer = response.content if hasattr(response, 'content') else str(response)
1500
+ return {"answer": answer}
1501
+ except Exception as e:
1502
+ print(f"ERROR in general_run: {str(e)}")
1503
+ return {"answer": "I apologize, but I'm having trouble processing your request."}
1504
+
1505
+
1506
+ def docqa_run(state):
1507
+ """文件問答處理"""
1508
+ try:
1509
+ # 如果有檢索器,使用檢索器
1510
+ if "retriever" in state:
1511
+ relevant_docs = state["retriever"].get_relevant_documents(state["query"])
1512
+ context = "\n".join(d.page_content for d in relevant_docs)
1513
+ else:
1514
+ context = "\n".join(state["docs"])
1515
+
1516
+ prompt = f"""Based on the following context, please answer the question:
1517
+ Question: {state["query"]}
1518
+
1519
+ Context:
1520
+ {context[:3000]}
1521
+
1522
+ Provide a detailed and accurate answer based on the context."""
1523
+
1524
+ response = llm_gpt4.invoke(prompt)
1525
+ return {"answer": response.content if hasattr(response, 'content') else str(response)}
1526
+ except Exception as e:
1527
+ print(f"ERROR in docqa_run: {str(e)}")
1528
+ return general_run(state)
1529
+
1530
+
1531
+ def summariser_run(state):
1532
+ """文件摘要處理"""
1533
+ try:
1534
+ context = "\n".join(state["docs"])
1535
+ prompt = f"""Please provide a comprehensive summary of the following document:
1536
+ {context[:3000]}
1537
+
1538
+ Focus on:
1539
+ 1. Main topics and key points
1540
+ 2. Important findings or conclusions
1541
+ 3. Significant details"""
1542
+
1543
+ response = llm_gpt4.invoke(prompt)
1544
+ return {"summary": response.content if hasattr(response, 'content') else str(response)}
1545
+ except Exception as e:
1546
+ print(f"ERROR in summariser_run: {str(e)}")
1547
+ return {"summary": "Error generating summary."}
1548
+
1549
+ # === LangGraph 定義 ===
1550
+ def build_langgraph_pipeline():
1551
+ graph = StateGraph(dict)
1552
+ graph.add_node("Router", lambda state: state) # Router 僅傳遞狀態
1553
+ graph.add_node("DocQA", docqa_run)
1554
+ graph.add_node("General", general_run)
1555
+ graph.add_node("Summarise", summariser_run)
1556
+ graph.set_entry_point("Router")
1557
+ graph.add_conditional_edges("Router", decide_next, {
1558
+ "DocQA": "DocQA",
1559
+ "General": "General",
1560
+ "Summarise": "Summarise",
1561
+ })
1562
+ graph.set_finish_point("DocQA")
1563
+ graph.set_finish_point("General")
1564
+ graph.set_finish_point("Summarise")
1565
+ return graph.compile()
1566
+
1567
+ from tempfile import mkdtemp
1568
+
1569
+ def get_file_path_tab6(file):
1570
+ if isinstance(file, str):
1571
+ print("DEBUG: File is a string:", file)
1572
+ if os.path.exists(file):
1573
+ print("DEBUG: File exists:", file)
1574
+ return file
1575
+ else:
1576
+ print("DEBUG: File does not exist:", file)
1577
+ return None
1578
+ elif isinstance(file, dict):
1579
+ print("DEBUG: File is a dict:", file)
1580
+ data = file.get("data")
1581
+ name = file.get("name")
1582
+ print("DEBUG: Data:", data, "Name:", name)
1583
+ if data:
1584
+ if isinstance(data, str) and os.path.exists(data):
1585
+ print("DEBUG: Data is a valid file path:", data)
1586
+ return data
1587
+ else:
1588
+ temp_dir = mkdtemp()
1589
+ file_path = os.path.join(temp_dir, name if name else "uploaded_file")
1590
+ print("DEBUG: Writing data to temporary file:", file_path)
1591
+ with open(file_path, "wb") as f:
1592
+ if isinstance(data, str):
1593
+ f.write(data.encode("utf-8"))
1594
+ else:
1595
+ f.write(data)
1596
+ if os.path.exists(file_path):
1597
+ print("DEBUG: Temporary file created:", file_path)
1598
+ return file_path
1599
+ else:
1600
+ print("ERROR: Temporary file not created:", file_path)
1601
+ return None
1602
+ else:
1603
+ print("DEBUG: No data in dict, returning None")
1604
+ return None
1605
+ elif hasattr(file, "save"):
1606
+ print("DEBUG: File has save attribute")
1607
+ temp_dir = mkdtemp()
1608
+ file_path = os.path.join(temp_dir, file.name)
1609
+ file.save(file_path)
1610
+ if os.path.exists(file_path):
1611
+ print("DEBUG: File saved to:", file_path)
1612
+ return file_path
1613
+ else:
1614
+ print("ERROR: File not saved properly:", file_path)
1615
+ return None
1616
+ else:
1617
+ print("DEBUG: File type unrecognized")
1618
+ if hasattr(file, "name"):
1619
+ if os.path.exists(file.name):
1620
+ return file.name
1621
+ return None
1622
+
1623
+ def langgraph_tab6_main(query: str, file=None):
1624
+ try:
1625
+ print(f"DEBUG: Starting processing with query: {query}")
1626
+
1627
+ # 如果沒有文件,直接使用 general_run
1628
+ if not file:
1629
+ return general_run({"query": query})["answer"]
1630
+
1631
+ # 處理文件列表
1632
+ files = file if isinstance(file, list) else [file]
1633
+ all_docs = []
1634
+ file_names = []
1635
+ docs_by_file = []
1636
+
1637
+ # 處理上傳的文件
1638
+ for f in files:
1639
+ try:
1640
+ path = get_file_path_tab6(f)
1641
+ if not path:
1642
+ continue
1643
+
1644
+ file_names.append(os.path.basename(path))
1645
+
1646
+ # 根據文件類型選擇加載器
1647
+ if path.lower().endswith('.pdf'):
1648
+ loader = PyPDFLoader(path)
1649
+ elif path.lower().endswith('.docx'):
1650
+ loader = UnstructuredWordDocumentLoader(path)
1651
+ else:
1652
+ loader = TextLoader(path)
1653
+
1654
+ docs = loader.load()
1655
+ if docs:
1656
+ text = "\n".join(doc.page_content for doc in docs if hasattr(doc, 'page_content'))
1657
+ docs_by_file.append(text)
1658
+ all_docs.extend(docs)
1659
+ except Exception as e:
1660
+ print(f"ERROR processing file: {str(e)}")
1661
+ continue
1662
+
1663
+ if not docs_by_file:
1664
+ return general_run({"query": query})["answer"]
1665
+
1666
+ # 建立檢索器
1667
+ try:
1668
+ chunks = RecursiveCharacterTextSplitter(
1669
+ chunk_size=500,
1670
+ chunk_overlap=50
1671
+ ).split_documents(all_docs)
1672
+
1673
+ db = FAISS.from_documents(chunks, embeddings)
1674
+ retriever = db.as_retriever(search_kwargs={"k": 5})
1675
+
1676
+ global session_retriever, session_qa_chain
1677
+ session_retriever = retriever
1678
+ session_qa_chain = ConversationalRetrievalChain.from_llm(
1679
+ llm=llm_gpt4,
1680
+ retriever=retriever,
1681
+ memory=ConversationBufferMemory(
1682
+ memory_key="chat_history",
1683
+ return_messages=True
1684
+ ),
1685
+ )
1686
+ except Exception as e:
1687
+ print(f"ERROR setting up retriever: {str(e)}")
1688
+ retriever = None
1689
+
1690
+ # 檢測是否為多文件查詢
1691
+ # 檢測是否為多文件查詢或複雜查詢
1692
+ if len(docs_by_file) > 1 or "compare" in query.lower() or "relation" in query.lower():
1693
+ return autogen_multi_document_analysis(query, docs_by_file, file_names)
1694
+
1695
+ # 使用 LangGraph 處理單文件查詢
1696
+ state = {
1697
+ "query": query,
1698
+ "file_names": file_names,
1699
+ "docs": docs_by_file,
1700
+ "retriever": retriever
1701
+ }
1702
+
1703
+ # 根據查詢意圖選擇處理方式
1704
+ intent = detect_intent_embedding(query, file_names)
1705
+ if intent == "Summarise":
1706
+ return summariser_run(state)["summary"]
1707
+ elif intent == "DocQA":
1708
+ return docqa_run(state)["answer"]
1709
+ else:
1710
+ return general_run(state)["answer"]
1711
+
1712
+ except Exception as e:
1713
+ print(f"ERROR in main function: {str(e)}")
1714
+ return f"I apologize, but I encountered an error: {str(e)}"
1715
+
1716
+
1717
+
1718
+
1719
+
1720
  # Gradio Interface Settings
1721
  demo_description = """
1722
  **Context**:
 
1800
 
1801
  Feel free to upload a document and ask related questions, or just type a question directly—no file upload required. *Note: CSV file analysis and auto visualisation is coming soon.*
1802
  """
1803
+ demo_description6 = """
1804
+ **Context**:
1805
+ This is a **smart multi-document reasoning assistant**, powered by **LangGraph**, **CrewAI**, and **AutoGen**.
1806
+ Upload zero to multiple files and ask anything — the system will uses **embedding-based intent detection** to decide whether to summarise, extract, compare, or analyse relationships.
1807
+
1808
+ For complex multi-file tasks, it triggers a **collaborative AutoGen team** to deeply reason across documents and generate contextual, evidence-based answers.
1809
+
1810
+ **Supported Features**:
1811
+ - 📄 Multi-document support (PDF, DOCX, TXT)
1812
+ - 🔍 Embedding-based intent detection and semantic routing
1813
+ - 🤖 Agents: Summariser, QA Agent, General Agent, Search Agent
1814
+ - 🔀 Orchestrated by LangGraph + AutoGen (fallbacks + task handoff)
1815
+ - 🧠 AutoGen multi-agent collaboration for cross-file reasoning
1816
+ - 🌐 Online search fallback if all the other agent can't handle tasks
1817
+
1818
+ **Sample Questions**:
1819
+ 1. Who are you? | What is GPT4? *(→ General Chat Agent)*
1820
+ 2. Summarise the document/file/your_doc_name. *(→ Summarisation Agent)*
1821
+ 3. What is LangChain used for? | What are the latest trends in AI startups in 2025? | Tell me the most recent breakthrough in quantum computing. *(→ Online Rag Agent)*
1822
+ 4. What's the title in the document? | What are the key ideas mentioned in this file? *(→ RAG QA Agent)*
1823
+ 5. Compare the proposals in DocA and DocB. | Summarise all files. | Is DocA one of the project in the DocB or DocC. | Which argument is stronger across these files? | Do these documents mention similar policies? | What's the difference between the files? *(→ AutoGen)*
1824
+ 6. What is LangChain used for? | What are the latest trends in AI startups in 2025? | Tell me the most recent breakthrough in quantum computing. *(→ Online Rag Agent)*
1825
+
1826
+ > Built for users who need clear, explainable, and context-aware answers — whether you’re working on documents in law, finance, research, or tech.
1827
+ """
1828
+
1829
+
1830
 
1831
  demo = gr.TabbedInterface(
1832
  interface_list=[
1833
+ gr.Interface(
1834
+ fn=langgraph_tab6_main,
1835
+ inputs=[
1836
+ gr.Textbox(label="Ask anything"),
1837
+ gr.File(label="Upload one or more files", file_types=[".pdf", ".txt", ".docx"], file_count="multiple")
1838
+ ],
1839
+ outputs="text",
1840
+ title="Smart Multi-Doc QA (LangGraph + AutoGen)",
1841
+ allow_flagging="never",
1842
+ description=demo_description6
1843
+ ),
1844
  gr.Interface(
1845
  fn=multi_agent_chat_advanced,
1846
  inputs=[
1847
  gr.Textbox(label="Enter your query"),
1848
+ gr.File(label="Upload file (CSV, PDF, TXT, DOCX)", file_types=[".pdf", ".txt", ".docx"], file_count="single")
1849
  ],
1850
  outputs="text",
1851
  title="Multi-Agent AI Assistant",
 
1854
  ),
1855
  gr.Interface(
1856
  fn=document_summarize,
1857
+ inputs=[gr.File(label="Upload PDF, TXT, or DOCX", file_types=[".pdf", ".txt", ".docx"])],
1858
  outputs="text",
1859
  title="Document Summarisation",
1860
  allow_flagging="never",
 
1862
  ),
1863
  gr.Interface(
1864
  fn=upload_and_chat,
1865
+ inputs=[gr.File(label="Upload PDF, TXT, or DOCX", file_types=[".pdf", ".txt", ".docx"]), gr.Textbox(label="Ask a question")],
1866
  outputs="text",
1867
  title="Your Docs Q&A (Upload + GPT-4 RAG)",
1868
  allow_flagging="never",
 
1884
  allow_flagging="never",
1885
  description=demo_description
1886
  ),
1887
+
1888
  ],
1889
  tab_names=[
1890
+ "Multi-Doc QA",
1891
  "Multi-Agent AI Assistant",
1892
  "Document Summarisation",
1893
  "Your Docs Q&A (Upload + GPT-4 RAG)",
1894
  "Biden Q&A (GPT-4 RAG)",
1895
+ "Biden Q&A (LLaMA RAG)",
1896
+
1897
  ],
1898
  title="Smart RAG + Multi-Agent Assistant (with Web + Document AI)"
1899
  )