MonicaChen0330 commited on
Commit
522baae
·
verified ·
1 Parent(s): fc85120

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -24
app.py CHANGED
@@ -1,9 +1,11 @@
1
  import os
2
  import sys
3
  import math
 
4
  import requests
5
  import gradio as gr
6
  import pandas as pd
 
7
  from datasets import Dataset
8
  from tqdm import tqdm
9
  from ragas import evaluate, SingleTurnSample
@@ -18,7 +20,7 @@ from ragas.metrics import (
18
  # 設定輸出編碼為 UTF-8(解決中文顯示問題)
19
  sys.stdout.reconfigure(encoding="utf-8")
20
 
21
- # 支援從Google Drive下載 Ground Truth
22
  gt_url = os.environ.get("GT_URL")
23
  gt_path = "tender_groundtruth.csv"
24
 
@@ -55,8 +57,58 @@ def log_to_google_sheet(question, answer, contexts, scores):
55
  except Exception as e:
56
  print("寫入 Google Sheet 失敗:", str(e))
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def RAG_evaluation(uploaded_file, user_api_key):
59
  try:
 
 
 
 
 
60
  os.environ["OPENAI_API_KEY"] = user_api_key
61
  print("評估開始")
62
 
@@ -76,10 +128,10 @@ def RAG_evaluation(uploaded_file, user_api_key):
76
  print("未合併題目:", missing["Question"].tolist())
77
  if merged_df.empty:
78
  return pd.DataFrame([{"錯誤訊息": "合併後無資料,請確認題目與 GT 是否對應"}]), None
79
-
80
  llm_wrapper = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini-2024-07-18"))
81
  embedding_wrapper = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-large"))
82
-
83
  batch_size = 10
84
  records = []
85
  for batch_start in tqdm(range(0, len(merged_df), batch_size), desc="RAGAS Batch Evaluating"):
@@ -104,13 +156,18 @@ def RAG_evaluation(uploaded_file, user_api_key):
104
  result = evaluate(
105
  dataset=dataset,
106
  metrics=[
107
- LLMContextPrecisionWithReference(), LLMContextRecall(), ContextEntityRecall(),
108
- NoiseSensitivity(), Faithfulness(), ResponseRelevancy(),
109
- SemanticSimilarity(), FactualCorrectness()
 
 
 
 
 
110
  ],
111
  llm=llm_wrapper,
112
  embeddings=embedding_wrapper,
113
- show_progress=False
114
  )
115
 
116
  result_df = result.to_pandas()
@@ -128,7 +185,7 @@ def RAG_evaluation(uploaded_file, user_api_key):
128
  "Context Entity Recall": getattr(row, "context_entity_recall", None),
129
  # "Noise Sensitivity": getattr(row, "noise_sensitivity_relevant", None)
130
  }
131
-
132
  for key in list(record.keys()):
133
  val = record[key]
134
  if isinstance(val, float) and not math.isfinite(val):
@@ -156,9 +213,11 @@ def RAG_evaluation(uploaded_file, user_api_key):
156
  avg_row["Question"] = "Average"
157
  score_df = pd.concat([score_df, pd.DataFrame([avg_row])], ignore_index=True)
158
 
159
- output_path = "result_output.csv"
 
 
160
  score_df.to_csv(output_path, index=False, encoding="utf-8-sig")
161
- print("評估結果已儲存為 CSV:", output_path)
162
 
163
  return score_df, output_path
164
 
@@ -192,30 +251,45 @@ def check_csv_and_run(file, key):
192
  missing_questions = "\n".join(f"- {q}" for q in invalid_rows["Question"].tolist())
193
  return pd.DataFrame([{"錯誤訊息": f"發現 {len(invalid_rows)} 筆資料中 Answer 或 Context 為空:\n{missing_questions}"}]), None
194
 
 
195
  try:
196
  for i, val in df["Context"].dropna().items():
197
  if not isinstance(eval(val), list):
198
  return pd.DataFrame([{"錯誤訊息": f"第 {i + 1} 筆 Context 欄格式錯誤,請確認其內容應為 list"}]), None
199
  except Exception as e:
200
  return pd.DataFrame([{"錯誤訊息": f"Context 欄格式解析錯誤,請確認其為有效的 list 格式,例如 ['A', 'B']:{str(e)}"}]), None
201
-
202
- except Exception as e:
203
- return pd.DataFrame([{"錯誤訊息": f"發生錯誤:{str(e)}"}]), None
204
-
205
- # 若上傳之待評估檔案無錯誤,執行評估
206
- try:
207
- return RAG_evaluation(file, key)
 
208
  except Exception as e:
209
- return pd.DataFrame([{"錯誤訊息": f"RAG 評估失敗:{str(e)}"}]), None
210
 
211
  # Gradio 介面
212
  with gr.Blocks() as demo:
213
- gr.Markdown("## 📐 RAG系統評估工具")
214
  gr.Markdown("""
215
- ### 📄 使用說明
216
- 請上傳您RAG系統產出的結果檔案(包含 Question, Context, Answer 欄位),並填入您的OpenAI API Key,以評估您的RAG系統。
217
- #### 完整評估需要數小時,無即時回應並不是當機,請耐心等候。
218
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
  file_input = gr.File(label="上傳 Evaluation_Dataset.csv")
221
  api_key_input = gr.Textbox(label="OpenAI API Key", type="password")
@@ -224,13 +298,25 @@ with gr.Blocks() as demo:
224
  result_output = gr.Dataframe(label="評估結果")
225
  download_link = gr.File(label="下載評估結果(CSV)")
226
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  def wrapped_fn(file, key):
228
  return RAG_evaluation(file, key)
229
 
230
  submit_btn.click(
231
  fn=check_csv_and_run,
232
  inputs=[file_input, api_key_input],
233
- outputs=[result_output, download_link]
234
  )
235
 
236
  demo.launch()
 
1
  import os
2
  import sys
3
  import math
4
+ from openai import OpenAI
5
  import requests
6
  import gradio as gr
7
  import pandas as pd
8
+ import concurrent.futures
9
  from datasets import Dataset
10
  from tqdm import tqdm
11
  from ragas import evaluate, SingleTurnSample
 
20
  # 設定輸出編碼為 UTF-8(解決中文顯示問題)
21
  sys.stdout.reconfigure(encoding="utf-8")
22
 
23
+ # Google Drive下載 Ground Truth
24
  gt_url = os.environ.get("GT_URL")
25
  gt_path = "tender_groundtruth.csv"
26
 
 
57
  except Exception as e:
58
  print("寫入 Google Sheet 失敗:", str(e))
59
 
60
+ def fetch_sheet_content():
61
+ DEFAULT_ANNOUNCEMENT = "尚無公告"
62
+ DEFAULT_FAQ = "尚無常見問題"
63
+
64
+ try:
65
+ url = os.environ.get("ANNOUNCEMENT_URL")
66
+ if not url:
67
+ print("Warning: 未設定 ANNOUNCEMENT_URL")
68
+ return DEFAULT_ANNOUNCEMENT, DEFAULT_FAQ
69
+
70
+ df = pd.read_csv(url)
71
+
72
+ announcement = df["Announcement"].iloc[0].strip() if "Announcement" in df.columns else DEFAULT_ANNOUNCEMENT
73
+ faq = df["FAQ"].iloc[0].strip() if "FAQ" in df.columns else DEFAULT_FAQ
74
+
75
+ announcement = announcement.replace("\\n", "<br>").replace("\n", "<br>")
76
+ faq = faq.replace("\\n", "<br>").replace("\n", "<br>")
77
+
78
+ return announcement or DEFAULT_ANNOUNCEMENT, faq or DEFAULT_FAQ
79
+
80
+ except Exception as e:
81
+ print("載入 Sheet 錯誤:", e)
82
+ return DEFAULT_ANNOUNCEMENT, DEFAULT_FAQ
83
+
84
+
85
+ def validate_openai_key(api_key):
86
+ try:
87
+ client = OpenAI(api_key=api_key)
88
+ client.chat.completions.create(
89
+ model="gpt-3.5-turbo",
90
+ messages=[{"role": "user", "content": "hi"}],
91
+ max_tokens=1
92
+ )
93
+ return None
94
+ except Exception as e:
95
+ err_msg = str(e)
96
+ if "Incorrect API key provided" in err_msg:
97
+ return pd.DataFrame([{"錯誤訊息": " 您輸入的 OpenAI API Key 有誤,請確認是否貼錯、字數不符或格式異常。"}]), None
98
+ elif "exceeded your current quota" in err_msg:
99
+ return pd.DataFrame([{"錯誤訊息": "您的 OpenAI 帳戶額度已用盡,請前往帳戶頁面檢查餘額。"}]), None
100
+ elif "Rate limit" in err_msg:
101
+ return pd.DataFrame([{"錯誤訊息": "OpenAI 請求頻率過高,請稍後再試"}]), None
102
+ else:
103
+ return pd.DataFrame([{"錯誤訊息": f"API Key 錯誤:{err_msg}"}]), None
104
+
105
  def RAG_evaluation(uploaded_file, user_api_key):
106
  try:
107
+ # 檢查 OpenAI API Key 是否有效
108
+ validation_result = validate_openai_key(user_api_key)
109
+ if validation_result:
110
+ return validation_result
111
+
112
  os.environ["OPENAI_API_KEY"] = user_api_key
113
  print("評估開始")
114
 
 
128
  print("未合併題目:", missing["Question"].tolist())
129
  if merged_df.empty:
130
  return pd.DataFrame([{"錯誤訊息": "合併後無資料,請確認題目與 GT 是否對應"}]), None
131
+
132
  llm_wrapper = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini-2024-07-18"))
133
  embedding_wrapper = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model="text-embedding-3-large"))
134
+
135
  batch_size = 10
136
  records = []
137
  for batch_start in tqdm(range(0, len(merged_df), batch_size), desc="RAGAS Batch Evaluating"):
 
156
  result = evaluate(
157
  dataset=dataset,
158
  metrics=[
159
+ LLMContextPrecisionWithReference(), # context precision
160
+ LLMContextRecall(), # context recall
161
+ ContextEntityRecall(),
162
+ # NoiseSensitivity(),
163
+ Faithfulness(), # faithfulness
164
+ ResponseRelevancy(), # answer relevancy
165
+ SemanticSimilarity(), # semantic similarity
166
+ # FactualCorrectness()
167
  ],
168
  llm=llm_wrapper,
169
  embeddings=embedding_wrapper,
170
+ show_progress=True
171
  )
172
 
173
  result_df = result.to_pandas()
 
185
  "Context Entity Recall": getattr(row, "context_entity_recall", None),
186
  # "Noise Sensitivity": getattr(row, "noise_sensitivity_relevant", None)
187
  }
188
+
189
  for key in list(record.keys()):
190
  val = record[key]
191
  if isinstance(val, float) and not math.isfinite(val):
 
213
  avg_row["Question"] = "Average"
214
  score_df = pd.concat([score_df, pd.DataFrame([avg_row])], ignore_index=True)
215
 
216
+ original_name = os.path.basename(uploaded_file.name)
217
+ filename = os.path.splitext(original_name)[0]
218
+ output_path = f"{filename}_result.csv"
219
  score_df.to_csv(output_path, index=False, encoding="utf-8-sig")
220
+ print("評估結果已儲存:", output_path)
221
 
222
  return score_df, output_path
223
 
 
251
  missing_questions = "\n".join(f"- {q}" for q in invalid_rows["Question"].tolist())
252
  return pd.DataFrame([{"錯誤訊息": f"發現 {len(invalid_rows)} 筆資料中 Answer 或 Context 為空:\n{missing_questions}"}]), None
253
 
254
+ # check eval context
255
  try:
256
  for i, val in df["Context"].dropna().items():
257
  if not isinstance(eval(val), list):
258
  return pd.DataFrame([{"錯誤訊息": f"第 {i + 1} 筆 Context 欄格式錯誤,請確認其內容應為 list"}]), None
259
  except Exception as e:
260
  return pd.DataFrame([{"錯誤訊息": f"Context 欄格式解析錯誤,請確認其為有效的 list 格式,例如 ['A', 'B']:{str(e)}"}]), None
261
+
262
+ # 若上傳之待評估檔案無錯誤,執行評估
263
+ try:
264
+ return RAG_evaluation(file, key)
265
+ # 檢查 OpenAI API Key 是否有效
266
+ except Exception as e:
267
+ error_message = str(e)
268
+ return pd.DataFrame([{"錯誤訊息": f"系統錯誤:{error_message}"}]), None
269
  except Exception as e:
270
+ return pd.DataFrame([{"錯誤訊息": f"評估失敗:{str(e)}"}]), None
271
 
272
  # Gradio 介面
273
  with gr.Blocks() as demo:
 
274
  gr.Markdown("""
275
+ # 📐 RAG系統評估工具 (分流B)
276
+
277
+ ### 📄 使用說明
278
+ - 請上傳您 RAG 系統產出的結果檔案(需包含欄位:Question、Context、Answer),並填入您的 OpenAI API Key,以進行評估。
279
+ - ⏳ 完整評估**通常需耗時 1 小時以上**,若無即時回應,請**耐心等候**,系統並未當機,謝謝您的理解。
280
+
281
+ ### 🚦 分流措施
282
+ 本工具部署於 Hugging Face Public Space,若同時有多位使用者使用,系統會將您的評估請求**排入佇列**。
283
+ 為避免長時間等待,建議您**先僅送出 1 筆資料進行測試**,若進度條顯示之預估**等待時間超過 2 小時(7000 秒以上),可能是其他使用者正在使用**。
284
+
285
+ 本頁為**分流 B**,您可以考慮改用其他分流或稍後再試,感謝您的耐心與配合!
286
+ - 🔁 [分流 A](https://huggingface.co/spaces/KSLab/RAG_Evaluator_A)
287
+ - 🔁 [分流 B](https://huggingface.co/spaces/KSLab/RAG_Evaluator_B)
288
+ - 🔁 [分流 C](https://huggingface.co/spaces/KSLab/RAG_Evaluator_C)
289
+
290
+ ### 📢 系統公告
291
+ """)
292
+ announcement_display = gr.Markdown()
293
 
294
  file_input = gr.File(label="上傳 Evaluation_Dataset.csv")
295
  api_key_input = gr.Textbox(label="OpenAI API Key", type="password")
 
298
  result_output = gr.Dataframe(label="評估結果")
299
  download_link = gr.File(label="下載評估結果(CSV)")
300
 
301
+ # 常見QA
302
+ gr.Markdown("""
303
+ ---
304
+ ### ❓ 常見問題 & 解答
305
+ """)
306
+ faq_display = gr.Markdown()
307
+
308
+ # 載入公告與 FAQ
309
+ def load_sheet():
310
+ return fetch_sheet_content()
311
+ demo.load(fn=load_sheet, inputs=[], outputs=[announcement_display, faq_display])
312
+
313
  def wrapped_fn(file, key):
314
  return RAG_evaluation(file, key)
315
 
316
  submit_btn.click(
317
  fn=check_csv_and_run,
318
  inputs=[file_input, api_key_input],
319
+ outputs=[result_output, download_link],
320
  )
321
 
322
  demo.launch()