yoshizen commited on
Commit
5a25e2d
·
verified ·
1 Parent(s): a3faa74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -218
app.py CHANGED
@@ -2,16 +2,8 @@ import re
2
  import requests
3
  import pandas as pd
4
  import torch
5
- import gradio as gr
6
- from tqdm import tqdm
7
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
8
- from typing import List, Dict, Any, Tuple, Optional
9
  import json
10
- import ast
11
- import numpy as np
12
- from PIL import Image, UnidentifiedImageError
13
- import io
14
- import base64
15
  import logging
16
  import time
17
  import sys
@@ -22,108 +14,106 @@ logger = logging.getLogger("GAIA-Mastermind")
22
 
23
  # Конфигурация
24
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
25
- MODEL_NAME = "google/flan-t5-large" # Оптимизировано для CPU
26
  API_RETRIES = 3
27
- API_TIMEOUT = 45
28
 
29
- # === ЯДРО СИСТЕМЫ ===
30
- class GAIAThoughtProcessor:
31
  def __init__(self):
32
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
33
- logger.info(f"Инициализация GAIAThoughtProcessor на {self.device.upper()}")
34
 
35
  try:
36
- # Оптимизированная загрузка модели для CPU
37
  self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
38
  self.model = AutoModelForSeq2SeqLM.from_pretrained(
39
  MODEL_NAME,
40
- device_map="auto" if torch.cuda.is_available() else None,
41
  torch_dtype=torch.float32,
42
  low_cpu_mem_usage=True
43
  ).eval()
44
-
45
- # Создаем пайплайн для генерации текста
46
- self.text_generator = pipeline(
47
- "text2text-generation",
48
- model=self.model,
49
- tokenizer=self.tokenizer,
50
- device=-1 if self.device == "cpu" else 0,
51
- max_new_tokens=128
52
- )
53
-
54
- logger.info("✅ GAIAThoughtProcessor готов")
55
  except Exception as e:
56
- logger.exception("Ошибка инициализации модели")
57
  raise RuntimeError(f"Ошибка инициализации: {str(e)}")
58
 
59
- def process_question(self, question: str, task_id: str) -> str:
60
- """Упрощенная обработка вопроса"""
61
  try:
62
- prompt = f"Реши задачу шаг за шагом: {question}\n\nФинальный ответ:"
63
-
64
- result = self.text_generator(
65
- prompt,
66
- max_new_tokens=128,
67
- num_beams=2,
68
- early_stopping=True,
69
- temperature=0.1
70
  )
71
 
72
- response = result[0]['generated_text'].strip()
 
 
 
 
 
73
 
74
- # Создаем JSON ответ
75
- return json.dumps({"final_answer": response})
76
-
77
  except Exception as e:
78
- logger.error(f"Ошибка обработки вопроса: {str(e)}")
79
- return json.dumps({
80
- "task_id": task_id,
81
- "error": str(e),
82
- "final_answer": f"ERROR: {str(e)}"
83
- })
84
 
85
- # === СИСТЕМА ОЦЕНКИ ===
86
- class GAIAEvaluationRunner:
87
  def __init__(self, api_url: str = DEFAULT_API_URL):
88
  self.api_url = api_url
89
  self.questions_url = f"{api_url}/questions"
90
  self.submit_url = f"{api_url}/submit"
91
  self.session = requests.Session()
92
- self.session.headers.update({
93
- "Accept": "application/json",
94
- "User-Agent": "GAIA-Mastermind/1.0",
95
- "Content-Type": "application/json"
96
- })
97
- logger.info(f"🌐 Инициализирован GAIAEvaluationRunner для {api_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- def _fetch_questions(self) -> Tuple[list, str]:
100
  """Получение вопросов с API"""
101
- logger.info(f"🔍 Запрос вопросов с {self.questions_url}")
102
  try:
103
- response = self.session.get(
104
- self.questions_url,
105
- timeout=API_TIMEOUT
106
- )
107
-
108
- logger.info(f"Статус ответа: {response.status_code}")
109
-
110
  if response.status_code == 200:
111
- questions = response.json()
112
- logger.info(f"Получено {len(questions)} вопросов")
113
- return questions, "success"
114
- else:
115
- error_msg = f"Ошибка API: HTTP {response.status_code}"
116
- logger.error(error_msg)
117
- return [], error_msg
118
-
119
  except Exception as e:
120
- error_msg = f"Ошибка соединения: {str(e)}"
121
- logger.exception(error_msg)
122
- return [], error_msg
123
 
124
- def _submit_answers(self, username: str, agent_code: str, answers: list) -> Tuple[str, int]:
125
  """Отправка ответов на сервер"""
126
- logger.info(f"📤 Отправка ответов для пользователя {username}")
127
  try:
128
  payload = {
129
  "username": username.strip(),
@@ -137,152 +127,27 @@ class GAIAEvaluationRunner:
137
  timeout=API_TIMEOUT * 2
138
  )
139
 
140
- logger.info(f"Статус отправки: {response.status_code}")
141
-
142
  if response.status_code == 200:
143
  result = response.json()
144
  score = result.get("score", 0)
145
- return result.get("message", "Ответы успешно отправлены"), score
146
- else:
147
- error = f"HTTP Ошибка {response.status_code}"
148
- if response.text:
149
- error += f": {response.text[:200]}"
150
- logger.error(error)
151
- return error, 0
152
-
153
  except Exception as e:
154
- error = f"Ошибка отправки: {str(e)}"
155
- logger.exception(error)
156
- return error, 0
157
-
158
- def run_evaluation(self, agent, username: str, agent_code: str, progress=gr.Progress()):
159
- """Основной процесс оценки"""
160
- # Получение вопросов
161
- progress(0.1, desc="Получение вопросов")
162
- questions, status = self._fetch_questions()
163
- if status != "success":
164
- return status, 0, 0, pd.DataFrame()
165
-
166
- total_questions = len(questions)
167
- if total_questions == 0:
168
- return "Получено 0 вопросов", 0, 0, pd.DataFrame()
169
-
170
- # Обработка вопросов
171
- results = []
172
- answers = []
173
-
174
- for i, q in enumerate(questions):
175
- progress(i / total_questions, desc=f"Обработка задачи {i+1}/{total_questions}")
176
- try:
177
- task_id = q.get("task_id", f"task_{i}")
178
- logger.info(f"🔧 Обработка задачи {task_id}")
179
-
180
- json_response = agent.process_question(q["question"], task_id)
181
-
182
- # Парсинг ответа
183
- try:
184
- response_obj = json.loads(json_response)
185
- final_answer = response_obj.get("final_answer", "")
186
- except:
187
- final_answer = json_response
188
-
189
- answers.append({
190
- "task_id": task_id,
191
- "answer": str(final_answer)[:500]
192
- })
193
-
194
- results.append({
195
- "Task ID": task_id,
196
- "Question": q["question"][:50] + "..." if len(q["question"]) > 50 else q["question"],
197
- "Answer": str(final_answer)[:50] + "..." if len(str(final_answer)) > 50 else str(final_answer),
198
- "Status": "Processed"
199
- })
200
- except Exception as e:
201
- logger.error(f"Ошибка обработки задачи: {str(e)}")
202
- answers.append({
203
- "task_id": task_id,
204
- "answer": f"ERROR: {str(e)}"
205
- })
206
- results.append({
207
- "Task ID": task_id,
208
- "Question": "Error",
209
- "Answer": f"ERROR: {str(e)}",
210
- "Status": "Failed"
211
- })
212
-
213
- # Отправка ответов
214
- progress(0.9, desc="Отправка результатов")
215
- submission_result, score = self._submit_answers(username, agent_code, answers)
216
- return submission_result, score, total_questions, pd.DataFrame(results)
217
-
218
- # === ИНТЕРФЕЙС GRADIO ===
219
- def run_evaluation(username: str, agent_code: str, progress=gr.Progress()):
220
- try:
221
- progress(0, desc="Инициализация агента")
222
- agent = GAIAThoughtProcessor()
223
-
224
- progress(0.1, desc="Подключение к API")
225
- runner = GAIAEvaluationRunner()
226
-
227
- # Запуск оценки
228
- return runner.run_evaluation(agent, username, agent_code, progress)
229
-
230
- except Exception as e:
231
- logger.exception("Критическая ошибка в run_evaluation")
232
- error_df = pd.DataFrame([{
233
- "Task ID": "ERROR",
234
- "Question": f"Критическая ошибка: {str(e)}",
235
- "Answer": "См. логи",
236
- "Status": "Failed"
237
- }])
238
- return f"Ошибка: {str(e)}", 0, 0, error_df
239
 
240
- # Создание интерфейса
241
- with gr.Blocks(title="GAIA Mastermind") as demo:
242
- gr.Markdown("# GAIA Mastermind")
243
- gr.Markdown("Многошаговое решение задач с декомпозицией")
244
-
245
- with gr.Row():
246
- with gr.Column():
247
- gr.Markdown("## 🔐 Авторизация")
248
- username = gr.Textbox(label="HF Username", value="yoshizen")
249
- agent_code = gr.Textbox(label="Agent Code", value="https://huggingface.co/spaces/yoshizen/FinalTest")
250
- run_btn = gr.Button("Запустить оценку")
251
-
252
- gr.Markdown("## ⚙️ Статус системы")
253
- sys_info = gr.Textbox(label="Системная информация", interactive=False)
254
-
255
- with gr.Column():
256
- gr.Markdown("## 📊 Результаты GAIA")
257
- with gr.Row():
258
- result_output = gr.Textbox(label="Статус отправки", interactive=False)
259
- correct_output = gr.Number(label="Правильные ответы", interactive=False)
260
- total_output = gr.Number(label="Всего вопросов", interactive=False)
261
-
262
- results_table = gr.Dataframe(
263
- label="Детализация ответов",
264
- headers=["Task ID", "Question", "Answer", "Status"],
265
- interactive=False
266
- )
267
 
268
- # Системная информация
269
- def get_system_info():
270
- device = "GPU" if torch.cuda.is_available() else "CPU"
271
- return f"Device: {device} | Model: {MODEL_NAME} | API: {DEFAULT_API_URL}"
272
 
273
- demo.load(get_system_info, inputs=None, outputs=sys_info)
 
 
274
 
275
- run_btn.click(
276
- fn=run_evaluation,
277
- inputs=[username, agent_code],
278
- outputs=[result_output, correct_output, total_output, results_table],
279
- concurrency_limit=1
280
- )
281
-
282
- if __name__ == "__main__":
283
- demo.queue(max_size=1).launch(
284
- server_name="0.0.0.0",
285
- server_port=7860,
286
- share=False,
287
- show_error=True
288
- )
 
2
  import requests
3
  import pandas as pd
4
  import torch
 
 
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
6
  import json
 
 
 
 
 
7
  import logging
8
  import time
9
  import sys
 
14
 
15
  # Конфигурация
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
+ MODEL_NAME = "google/flan-t5-base" # Упрощенная модель для быстрой работы
18
  API_RETRIES = 3
19
+ API_TIMEOUT = 30
20
 
21
+ class GAIAExpert:
 
22
  def __init__(self):
23
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
24
+ logger.info(f"Инициализация модели на {self.device.upper()}")
25
 
26
  try:
 
27
  self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
28
  self.model = AutoModelForSeq2SeqLM.from_pretrained(
29
  MODEL_NAME,
 
30
  torch_dtype=torch.float32,
31
  low_cpu_mem_usage=True
32
  ).eval()
33
+ logger.info("Модель готова")
 
 
 
 
 
 
 
 
 
 
34
  except Exception as e:
35
+ logger.exception("Ошибка загрузки модели")
36
  raise RuntimeError(f"Ошибка инициализации: {str(e)}")
37
 
38
+ def process_question(self, question: str) -> str:
39
+ """Обработка вопроса с минимальной задержкой"""
40
  try:
41
+ inputs = self.tokenizer(
42
+ f"Вопрос: {question}\nОтвет:",
43
+ return_tensors="pt",
44
+ max_length=256,
45
+ truncation=True
 
 
 
46
  )
47
 
48
+ outputs = self.model.generate(
49
+ **inputs,
50
+ max_new_tokens=50,
51
+ num_beams=1, # Ускорение генерации
52
+ early_stopping=True
53
+ )
54
 
55
+ answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
56
+ return json.dumps({"final_answer": answer.strip()})
 
57
  except Exception as e:
58
+ return json.dumps({"final_answer": f"ERROR: {str(e)}"})
 
 
 
 
 
59
 
60
+ class GAIAEvaluator:
 
61
  def __init__(self, api_url: str = DEFAULT_API_URL):
62
  self.api_url = api_url
63
  self.questions_url = f"{api_url}/questions"
64
  self.submit_url = f"{api_url}/submit"
65
  self.session = requests.Session()
66
+ self.session.headers.update({"Content-Type": "application/json"})
67
+
68
+ def run_evaluation(self, username: str, agent_code: str):
69
+ """Консольный процесс оценки без интерфейса"""
70
+ agent = GAIAExpert()
71
+
72
+ # Получение вопросов
73
+ questions = self._fetch_questions()
74
+ if not isinstance(questions, list):
75
+ logger.error(f"Ошибка получения вопросов: {questions}")
76
+ return 0, 0
77
+
78
+ # Обработка вопросов
79
+ answers = []
80
+ correct = 0
81
+
82
+ for i, q in enumerate(questions):
83
+ task_id = q.get("task_id", f"task_{i}")
84
+ logger.info(f"Обработка задачи {i+1}/{len(questions)}: {q['question'][:50]}...")
85
+
86
+ try:
87
+ json_response = agent.process_question(q["question"])
88
+ response_obj = json.loads(json_response)
89
+ answer = response_obj.get("final_answer", "")
90
+
91
+ answers.append({
92
+ "task_id": task_id,
93
+ "answer": str(answer)[:300]
94
+ })
95
+ except Exception as e:
96
+ logger.error(f"Ошибка обработки: {str(e)}")
97
+ answers.append({
98
+ "task_id": task_id,
99
+ "answer": f"ERROR: {str(e)}"
100
+ })
101
+
102
+ # Отправка ответов
103
+ return self._submit_answers(username, agent_code, answers)
104
 
105
+ def _fetch_questions(self):
106
  """Получение вопросов с API"""
 
107
  try:
108
+ response = self.session.get(self.questions_url, timeout=API_TIMEOUT)
 
 
 
 
 
 
109
  if response.status_code == 200:
110
+ return response.json()
111
+ return f"HTTP error {response.status_code}"
 
 
 
 
 
 
112
  except Exception as e:
113
+ return f"Connection error: {str(e)}"
 
 
114
 
115
+ def _submit_answers(self, username: str, agent_code: str, answers: list) -> Tuple[int, int]:
116
  """Отправка ответов на сервер"""
 
117
  try:
118
  payload = {
119
  "username": username.strip(),
 
127
  timeout=API_TIMEOUT * 2
128
  )
129
 
 
 
130
  if response.status_code == 200:
131
  result = response.json()
132
  score = result.get("score", 0)
133
+ return score, len(answers)
134
+ return 0, len(answers)
 
 
 
 
 
 
135
  except Exception as e:
136
+ logger.error(f"Ошибка отправки: {str(e)}")
137
+ return 0, len(answers)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ if __name__ == "__main__":
140
+ # Параметры запуска
141
+ USERNAME = "yoshizen"
142
+ AGENT_CODE = "https://huggingface.co/spaces/yoshizen/FinalTest"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
+ logger.info(f"Запуск оценки для {USERNAME}")
 
 
 
145
 
146
+ start_time = time.time()
147
+ evaluator = GAIAEvaluator()
148
+ score, total = evaluator.run_evaluation(USERNAME, AGENT_CODE)
149
 
150
+ elapsed = time.time() - start_time
151
+ logger.info(f"Оценка завершена за {elapsed:.1f} сек")
152
+ logger.info(f"Результат: {score}/{total} правильных ответов")
153
+ logger.info(f"Точность: {score/total*100 if total > 0 else 0:.1f}%")