import re import requests import pandas as pd import torch import gradio as gr from tqdm import tqdm from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import json # Добавлен отсутствующий импорт # Конфигурация DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" MODEL_NAME = "google/flan-t5-large" class GAIAExpertAgent: def __init__(self, model_name: str = MODEL_NAME): self.device = "cuda" if torch.cuda.is_available() else "cpu" print(f"⚡ Инициализация агента на {self.device.upper()}") # Оптимизация загрузки модели self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForSeq2SeqLM.from_pretrained( model_name, device_map="auto", torch_dtype=torch.float16 if "cuda" in self.device else torch.float32, low_cpu_mem_usage=True # Снижение потребления CPU памяти ).eval() print("✅ Агент готов") def __call__(self, question: str, task_id: str = None) -> str: try: # Убраны жесткие эвристики - они мешают реальным задачам GAIA inputs = self.tokenizer( f"Solve step-by-step: {question}\nFinal Answer:", return_tensors="pt", max_length=512, # Увеличен контекст truncation=True ).to(self.device) # Улучшена генерация outputs = self.model.generate( **inputs, max_new_tokens=256, # Увеличен лимит для сложных ответов num_beams=5, # Улучшено качество поиска early_stopping=True, repetition_penalty=2.0 # Предотвращение циклов ) answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True) # Очистка памяти CUDA if "cuda" in self.device: torch.cuda.empty_cache() return json.dumps({"final_answer": answer.strip()}) except Exception as e: return json.dumps({"final_answer": f"ERROR: {str(e)}"}) class EvaluationRunner: def __init__(self, api_url: str = DEFAULT_API_URL): self.api_url = api_url self.questions_url = f"{api_url}/questions" self.submit_url = f"{api_url}/submit" self.session = requests.Session() # Сессия для повторных запросов def run_evaluation(self, agent, username: str, agent_code: str, progress=tqdm): # Получение вопросов questions = self._fetch_questions() if not isinstance(questions, list): return questions, 0, 0, pd.DataFrame() # Обработка вопросов results = [] answers = [] for q in progress(questions, desc="Processing GAIA tasks"): try: json_response = agent(q["question"], q["task_id"]) response_obj = json.loads(json_response) answer = response_obj.get("final_answer", "") answers.append({ "task_id": q["task_id"], "submitted_answer": str(answer)[:500] # Увеличен лимит }) results.append({ "Task ID": q["task_id"], "Question": q["question"], "Answer": str(answer) }) except Exception as e: results.append({ "Task ID": q.get("task_id", "N/A"), "Question": "Error", "Answer": f"ERROR: {str(e)}" }) # Отправка ответов submission_result = self._submit_answers(username, agent_code, answers) return submission_result, 0, len(questions), pd.DataFrame(results) def _fetch_questions(self): try: response = self.session.get( self.questions_url, timeout=60, # Увеличен таймаут headers={"Accept": "application/json"} ) response.raise_for_status() return response.json() except Exception as e: return f"Ошибка получения вопросов: {str(e)}" def _submit_answers(self, username: str, agent_code: str, answers: list): try: response = self.session.post( self.submit_url, json={ "username": username.strip(), "agent_code": agent_code.strip(), "answers": answers }, timeout=120 # Увеличен таймаут ) response.raise_for_status() return response.json().get("message", "Ответы успешно отправлены") except Exception as e: return f"Ошибка отправки: {str(e)}" # Важно: Инициализация агента при запуске, а не при импорте def run_evaluation(username: str, agent_code: str, progress=gr.Progress()): progress(0, desc="Инициализация модели...") agent = GAIAExpertAgent() progress(0, desc="Запуск оценки...") runner = EvaluationRunner() # Обертка tqdm для Gradio Progress class ProgressWrapper: def __call__(self, iterable, desc=""): progress(0, desc=desc) for i, x in enumerate(iterable): progress(i / len(iterable)) yield x return runner.run_evaluation(agent, username, agent_code, progress=ProgressWrapper()) # Оптимизированный интерфейс Gradio with gr.Blocks(title="GAIA Agent", theme=gr.themes.Soft()) as demo: gr.Markdown("""# 🧠 GAIA Mastermind Agent ## *Многошаговое решение сложных задач*""") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 🔐 Авторизация") username = gr.Textbox(label="HF Username", value="yoshizen") agent_code = gr.Textbox(label="Agent Code", value="https://huggingface.co/spaces/yoshizen/FinalTest") run_btn = gr.Button("Запустить оценку", variant="primary") with gr.Column(scale=2): gr.Markdown("### 📊 Результаты") with gr.Row(): result_output = gr.Textbox(label="Статус") correct_output = gr.Number(label="Правильные ответы") total_output = gr.Number(label="Всего вопросов") results_table = gr.Dataframe( label="Детализация ответов", interactive=True, wrap=True, overflow_row_behaviour="paginate", height=500 ) run_btn.click( fn=run_evaluation, inputs=[username, agent_code], outputs=[result_output, correct_output, total_output, results_table], concurrency_limit=1 # Защита от перегрузки ) if __name__ == "__main__": demo.queue(max_size=10).launch( server_name="0.0.0.0", server_port=7860, show_error=True )