yoshizen commited on
Commit
af37df4
·
verified ·
1 Parent(s): 19870ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -153
app.py CHANGED
@@ -1,158 +1,56 @@
1
- import json
2
- import time
3
- import requests
4
  import gradio as gr
5
- import pandas as pd
6
- from tqdm import tqdm
7
- from typing import Tuple, List, Dict, Any
8
- from agent import GAIAExpertAgent
9
-
10
- # Константы
11
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
-
13
- class EvaluationRunner:
14
- """Оптимизированный обработчик оценки"""
15
-
16
- def __init__(self, api_url=DEFAULT_API_URL):
17
- self.api_url = api_url
18
- self.questions_url = f"{api_url}/questions"
19
- self.submit_url = f"{api_url}/submit"
20
- self.results_url = f"{api_url}/results"
21
- self.correct_answers = 0
22
- self.total_questions = 0
23
-
24
- def run_evaluation(self, agent, username: str, agent_code: str) -> Tuple[str, pd.DataFrame]:
25
- questions_data = self._fetch_questions()
26
- if not isinstance(questions_data, list):
27
- return questions_data, pd.DataFrame()
28
-
29
- results_log, answers_payload = self._run_agent_on_questions(agent, questions_data)
30
- if not answers_payload:
31
- return "No answers generated", pd.DataFrame()
32
-
33
- submission_result = self._submit_answers(username, agent_code, answers_payload)
34
- return submission_result, pd.DataFrame(results_log)
35
-
36
- def _fetch_questions(self):
37
- try:
38
- response = requests.get(self.questions_url, timeout=30)
39
- response.raise_for_status()
40
- questions_data = response.json()
41
- self.total_questions = len(questions_data)
42
- print(f"Fetched {self.total_questions} questions")
43
- return questions_data
44
- except Exception as e:
45
- return f"Error: {str(e)}"
46
-
47
- def _run_agent_on_questions(self, agent, questions_data):
48
- results_log = []
49
- answers_payload = []
50
-
51
- print(f"Processing {len(questions_data)} questions...")
52
- for item in tqdm(questions_data, desc="Questions"):
53
- task_id = item.get("task_id")
54
- question_text = item.get("question")
55
-
56
- if not task_id or not question_text:
57
- continue
58
-
59
- try:
60
- json_response = agent(question_text, task_id)
61
- response_obj = json.loads(json_response)
62
- answer = response_obj.get("final_answer", "")
63
-
64
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
65
- results_log.append({
66
- "Task ID": task_id,
67
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
68
- "Answer": answer[:50] + "..." if len(answer) > 50 else answer
69
- })
70
- except Exception as e:
71
- answers_payload.append({"task_id": task_id, "submitted_answer": f"ERROR: {str(e)}"})
72
- results_log.append({
73
- "Task ID": task_id,
74
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
75
- "Answer": f"ERROR: {str(e)}"
76
- })
77
-
78
- return results_log, answers_payload
79
-
80
- def _submit_answers(self, username: str, agent_code: str, answers_payload):
81
- submission_data = {
82
- "username": username.strip(),
83
- "agent_code": agent_code.strip(),
84
- "answers": answers_payload
85
- }
86
-
87
- print("Submitting answers...")
88
- try:
89
- response = requests.post(
90
- self.submit_url,
91
- json=submission_data,
92
- headers={"Content-Type": "application/json"},
93
- timeout=60
94
  )
95
- response.raise_for_status()
96
- return response.json().get("message", "Answers submitted successfully")
97
- except Exception as e:
98
- return f"Submission failed: {str(e)}"
99
-
100
-
101
- def run_evaluation(username: str, agent_code: str, model_name: str):
102
- print("Initializing GAIA Expert Agent...")
103
- agent = GAIAExpertAgent(model_name=model_name)
104
-
105
- print("Starting evaluation...")
106
- runner = EvaluationRunner()
107
- result, results_df = runner.run_evaluation(agent, username, agent_code)
108
-
109
- # Добавляем счетчики вопросов
110
- total_questions = runner.total_questions
111
- # Для простоты будем считать, что правильные ответы мы не знаем (GAIA API не возвращает сразу)
112
- correct_answers = 0
113
-
114
- return result, correct_answers, total_questions, results_df
115
-
116
-
117
- def create_gradio_interface():
118
- with gr.Blocks(title="GAIA Expert Agent") as demo:
119
- gr.Markdown("# 🧠 GAIA Expert Agent Evaluation")
120
-
121
- with gr.Row():
122
- with gr.Column():
123
- gr.Markdown("### Configuration")
124
- username = gr.Textbox(label="Hugging Face Username", value="yoshizen")
125
- agent_code = gr.Textbox(
126
- label="Agent Code",
127
- value="https://huggingface.co/spaces/yoshizen/FinalTest"
128
- )
129
- model_name = gr.Dropdown(
130
- label="Model",
131
- choices=[
132
- "google/flan-t5-small",
133
- "google/flan-t5-base",
134
- "google/flan-t5-large"
135
- ],
136
- value="google/flan-t5-large"
137
- )
138
- run_button = gr.Button("🚀 Run Evaluation", variant="primary")
139
-
140
- with gr.Column():
141
- gr.Markdown("### Results")
142
- result_text = gr.Textbox(label="Submission Status")
143
- correct_answers = gr.Number(label="Correct Answers")
144
- total_questions = gr.Number(label="Total Questions")
145
- results_table = gr.Dataframe(label="Processed Questions", interactive=False)
146
 
147
- run_button.click(
148
- fn=run_evaluation,
149
- inputs=[username, agent_code, model_name],
150
- outputs=[result_text, correct_answers, total_questions, results_table]
151
- )
152
-
153
- return demo
154
-
 
 
 
 
155
 
156
  if __name__ == "__main__":
157
- demo = create_gradio_interface()
158
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from gaia_agent import GAIAExpertAgent
3
+ from evaluation_runner import EvaluationRunner
4
+
5
+ # Инициализация компонентов
6
+ agent = GAIAExpertAgent(model_name="google/flan-t5-large")
7
+ runner = EvaluationRunner()
8
+
9
+ def run_evaluation(username: str, agent_code: str):
10
+ """Основная функция для запуска оценки"""
11
+ try:
12
+ result, correct, total, df = runner.run_evaluation(
13
+ agent=agent,
14
+ username=username,
15
+ agent_code=agent_code
16
+ )
17
+ return result, correct, total, df
18
+ except Exception as e:
19
+ return f"Error: {str(e)}", 0, 0, None
20
+
21
+ # Интерфейс Gradio
22
+ with gr.Blocks(title="GAIA Agent Evaluation") as demo:
23
+ gr.Markdown("# 🏆 GAIA Agent Certification")
24
+
25
+ with gr.Row():
26
+ with gr.Column():
27
+ gr.Markdown("### Configuration")
28
+ username = gr.Textbox(
29
+ label="Hugging Face Username",
30
+ value="yoshizen"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  )
32
+ agent_code = gr.Textbox(
33
+ label="Agent Code",
34
+ value="https://huggingface.co/spaces/yoshizen/FinalTest"
35
+ )
36
+ run_btn = gr.Button("Run Evaluation", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ with gr.Column():
39
+ gr.Markdown("### Results")
40
+ result_output = gr.Textbox(label="Status")
41
+ correct_output = gr.Number(label="Correct Answers")
42
+ total_output = gr.Number(label="Total Questions")
43
+ results_table = gr.Dataframe(label="Details")
44
+
45
+ run_btn.click(
46
+ fn=run_evaluation,
47
+ inputs=[username, agent_code],
48
+ outputs=[result_output, correct_output, total_output, results_table]
49
+ )
50
 
51
  if __name__ == "__main__":
52
+ demo.launch(
53
+ server_name="0.0.0.0",
54
+ server_port=7860,
55
+ share=False # Для Spaces оставить False
56
+ )