yoshizen commited on
Commit
865c342
·
verified ·
1 Parent(s): 6a2aeb0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -41
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import json
2
  import re
3
  import requests
4
  import pandas as pd
@@ -6,6 +5,7 @@ import torch
6
  import gradio as gr
7
  from tqdm import tqdm
8
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
9
 
10
  # Конфигурация
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -15,40 +15,42 @@ class GAIAExpertAgent:
15
  def __init__(self, model_name: str = MODEL_NAME):
16
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
17
  print(f"⚡ Инициализация агента на {self.device.upper()}")
 
 
18
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
19
  self.model = AutoModelForSeq2SeqLM.from_pretrained(
20
  model_name,
21
  device_map="auto",
22
- torch_dtype=torch.float16 if "cuda" in self.device else torch.float32
 
23
  ).eval()
24
  print("✅ Агент готов")
25
 
26
  def __call__(self, question: str, task_id: str = None) -> str:
27
  try:
28
- # Специальные обработчики для GAIA
29
- if "reverse" in question.lower() or "rewsna" in question:
30
- return json.dumps({"final_answer": question[::-1][:100]})
31
- if "how many" in question.lower() or "сколько" in question.lower():
32
- numbers = re.findall(r'\d+', question)
33
- result = str(sum(map(int, numbers))) if numbers else "42"
34
- return json.dumps({"final_answer": result})
35
-
36
- # Стандартная обработка
37
  inputs = self.tokenizer(
38
- f"GAIA Question: {question}\nAnswer:",
39
  return_tensors="pt",
40
- max_length=256,
41
  truncation=True
42
  ).to(self.device)
43
 
 
44
  outputs = self.model.generate(
45
  **inputs,
46
- max_new_tokens=50,
47
- num_beams=3,
48
- early_stopping=True
 
49
  )
50
 
51
  answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
52
  return json.dumps({"final_answer": answer.strip()})
53
 
54
  except Exception as e:
@@ -60,8 +62,9 @@ class EvaluationRunner:
60
  self.api_url = api_url
61
  self.questions_url = f"{api_url}/questions"
62
  self.submit_url = f"{api_url}/submit"
 
63
 
64
- def run_evaluation(self, agent, username: str, agent_code: str):
65
  # Получение вопросов
66
  questions = self._fetch_questions()
67
  if not isinstance(questions, list):
@@ -70,7 +73,7 @@ class EvaluationRunner:
70
  # Обработка вопросов
71
  results = []
72
  answers = []
73
- for q in tqdm(questions, desc="Processing"):
74
  try:
75
  json_response = agent(q["question"], q["task_id"])
76
  response_obj = json.loads(json_response)
@@ -78,13 +81,13 @@ class EvaluationRunner:
78
 
79
  answers.append({
80
  "task_id": q["task_id"],
81
- "submitted_answer": str(answer)[:300]
82
  })
83
 
84
  results.append({
85
  "Task ID": q["task_id"],
86
- "Question": q["question"][:70] + "..." if len(q["question"]) > 70 else q["question"],
87
- "Answer": str(answer)[:50] + "..." if len(str(answer)) > 50 else str(answer)
88
  })
89
  except Exception as e:
90
  results.append({
@@ -99,56 +102,88 @@ class EvaluationRunner:
99
 
100
  def _fetch_questions(self):
101
  try:
102
- response = requests.get(self.questions_url, timeout=30)
 
 
 
 
103
  response.raise_for_status()
104
  return response.json()
105
  except Exception as e:
106
- return f"Fetch error: {str(e)}"
107
 
108
  def _submit_answers(self, username: str, agent_code: str, answers: list):
109
  try:
110
- response = requests.post(
111
  self.submit_url,
112
  json={
113
  "username": username.strip(),
114
  "agent_code": agent_code.strip(),
115
  "answers": answers
116
  },
117
- timeout=60
118
  )
119
  response.raise_for_status()
120
- return response.json().get("message", "Answers submitted")
121
  except Exception as e:
122
- return f"Submission error: {str(e)}"
123
 
124
 
125
- def run_evaluation(username: str, agent_code: str):
 
 
126
  agent = GAIAExpertAgent()
 
 
127
  runner = EvaluationRunner()
128
- return runner.run_evaluation(agent, username, agent_code)
 
 
 
 
 
 
 
 
 
129
 
130
 
131
- # Интерфейс Gradio
132
- with gr.Blocks(title="GAIA Agent") as demo:
133
- gr.Markdown("# 🧠 GAIA Agent Evaluation")
 
134
 
135
  with gr.Row():
136
- with gr.Column():
 
137
  username = gr.Textbox(label="HF Username", value="yoshizen")
138
  agent_code = gr.Textbox(label="Agent Code", value="https://huggingface.co/spaces/yoshizen/FinalTest")
139
- run_btn = gr.Button("Run Evaluation", variant="primary")
140
 
141
- with gr.Column():
142
- result_output = gr.Textbox(label="Status")
143
- correct_output = gr.Number(label="Correct Answers")
144
- total_output = gr.Number(label="Total Questions")
145
- results_table = gr.Dataframe(label="Details")
 
 
 
 
 
 
 
 
146
 
147
  run_btn.click(
148
  fn=run_evaluation,
149
  inputs=[username, agent_code],
150
- outputs=[result_output, correct_output, total_output, results_table]
 
151
  )
152
 
153
  if __name__ == "__main__":
154
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
1
  import re
2
  import requests
3
  import pandas as pd
 
5
  import gradio as gr
6
  from tqdm import tqdm
7
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
8
+ import json # Добавлен отсутствующий импорт
9
 
10
  # Конфигурация
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
15
  def __init__(self, model_name: str = MODEL_NAME):
16
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
17
  print(f"⚡ Инициализация агента на {self.device.upper()}")
18
+
19
+ # Оптимизация загрузки модели
20
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
21
  self.model = AutoModelForSeq2SeqLM.from_pretrained(
22
  model_name,
23
  device_map="auto",
24
+ torch_dtype=torch.float16 if "cuda" in self.device else torch.float32,
25
+ low_cpu_mem_usage=True # Снижение потребления CPU памяти
26
  ).eval()
27
  print("✅ Агент готов")
28
 
29
  def __call__(self, question: str, task_id: str = None) -> str:
30
  try:
31
+ # Убраны жесткие эвристики - они мешают реальным задачам GAIA
 
 
 
 
 
 
 
 
32
  inputs = self.tokenizer(
33
+ f"Solve step-by-step: {question}\nFinal Answer:",
34
  return_tensors="pt",
35
+ max_length=512, # Увеличен контекст
36
  truncation=True
37
  ).to(self.device)
38
 
39
+ # Улучшена генерация
40
  outputs = self.model.generate(
41
  **inputs,
42
+ max_new_tokens=256, # Увеличен лимит для сложных ответов
43
+ num_beams=5, # Улучшено качество поиска
44
+ early_stopping=True,
45
+ repetition_penalty=2.0 # Предотвращение циклов
46
  )
47
 
48
  answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
49
+
50
+ # Очистка памяти CUDA
51
+ if "cuda" in self.device:
52
+ torch.cuda.empty_cache()
53
+
54
  return json.dumps({"final_answer": answer.strip()})
55
 
56
  except Exception as e:
 
62
  self.api_url = api_url
63
  self.questions_url = f"{api_url}/questions"
64
  self.submit_url = f"{api_url}/submit"
65
+ self.session = requests.Session() # Сессия для повторных запросов
66
 
67
+ def run_evaluation(self, agent, username: str, agent_code: str, progress=tqdm):
68
  # Получение вопросов
69
  questions = self._fetch_questions()
70
  if not isinstance(questions, list):
 
73
  # Обработка вопросов
74
  results = []
75
  answers = []
76
+ for q in progress(questions, desc="Processing GAIA tasks"):
77
  try:
78
  json_response = agent(q["question"], q["task_id"])
79
  response_obj = json.loads(json_response)
 
81
 
82
  answers.append({
83
  "task_id": q["task_id"],
84
+ "submitted_answer": str(answer)[:500] # Увеличен лимит
85
  })
86
 
87
  results.append({
88
  "Task ID": q["task_id"],
89
+ "Question": q["question"],
90
+ "Answer": str(answer)
91
  })
92
  except Exception as e:
93
  results.append({
 
102
 
103
  def _fetch_questions(self):
104
  try:
105
+ response = self.session.get(
106
+ self.questions_url,
107
+ timeout=60, # Увеличен таймаут
108
+ headers={"Accept": "application/json"}
109
+ )
110
  response.raise_for_status()
111
  return response.json()
112
  except Exception as e:
113
+ return f"Ошибка получения вопросов: {str(e)}"
114
 
115
  def _submit_answers(self, username: str, agent_code: str, answers: list):
116
  try:
117
+ response = self.session.post(
118
  self.submit_url,
119
  json={
120
  "username": username.strip(),
121
  "agent_code": agent_code.strip(),
122
  "answers": answers
123
  },
124
+ timeout=120 # Увеличен таймаут
125
  )
126
  response.raise_for_status()
127
+ return response.json().get("message", "Ответы успешно отправлены")
128
  except Exception as e:
129
+ return f"Ошибка отправки: {str(e)}"
130
 
131
 
132
+ # Важно: Инициализация агента при запуске, а не при импорте
133
+ def run_evaluation(username: str, agent_code: str, progress=gr.Progress()):
134
+ progress(0, desc="Инициализация модели...")
135
  agent = GAIAExpertAgent()
136
+
137
+ progress(0, desc="Запуск оценки...")
138
  runner = EvaluationRunner()
139
+
140
+ # Обертка tqdm для Gradio Progress
141
+ class ProgressWrapper:
142
+ def __call__(self, iterable, desc=""):
143
+ progress(0, desc=desc)
144
+ for i, x in enumerate(iterable):
145
+ progress(i / len(iterable))
146
+ yield x
147
+
148
+ return runner.run_evaluation(agent, username, agent_code, progress=ProgressWrapper())
149
 
150
 
151
+ # Оптимизированный интерфейс Gradio
152
+ with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo:
153
+ gr.Markdown("""# 🧠 GAIA Mastermind Agent
154
+ ## *Многошаговое решение сложных задач*""")
155
 
156
  with gr.Row():
157
+ with gr.Column(scale=1):
158
+ gr.Markdown("### 🔐 Авторизация")
159
  username = gr.Textbox(label="HF Username", value="yoshizen")
160
  agent_code = gr.Textbox(label="Agent Code", value="https://huggingface.co/spaces/yoshizen/FinalTest")
161
+ run_btn = gr.Button("Запустить оценку", variant="primary")
162
 
163
+ with gr.Column(scale=2):
164
+ gr.Markdown("### 📊 Результаты")
165
+ with gr.Row():
166
+ result_output = gr.Textbox(label="Статус")
167
+ correct_output = gr.Number(label="Правильные ответы")
168
+ total_output = gr.Number(label="Всего вопросов")
169
+ results_table = gr.Dataframe(
170
+ label="Детализация ответов",
171
+ interactive=True,
172
+ wrap=True,
173
+ overflow_row_behaviour="paginate",
174
+ height=500
175
+ )
176
 
177
  run_btn.click(
178
  fn=run_evaluation,
179
  inputs=[username, agent_code],
180
+ outputs=[result_output, correct_output, total_output, results_table],
181
+ concurrency_limit=1 # Защита от перегрузки
182
  )
183
 
184
  if __name__ == "__main__":
185
+ demo.queue(max_size=10).launch(
186
+ server_name="0.0.0.0",
187
+ server_port=7860,
188
+ show_error=True
189
+ )