|
import re |
|
import requests |
|
import pandas as pd |
|
import torch |
|
import gradio as gr |
|
from tqdm import tqdm |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
from typing import List, Dict, Any, Tuple, Optional |
|
import json |
|
import ast |
|
import numpy as np |
|
from PIL import Image, UnidentifiedImageError |
|
import io |
|
import base64 |
|
import logging |
|
import time |
|
import sys |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger("GAIA-Mastermind") |
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
MODEL_NAME = "google/flan-t5-large" |
|
API_RETRIES = 3 |
|
API_TIMEOUT = 45 |
|
|
|
|
|
class GAIAThoughtProcessor: |
|
def __init__(self): |
|
self.device = "cuda" if torch.cuda.is_available() else "cpu" |
|
logger.info(f"⚡ Инициализация GAIAThoughtProcessor на {self.device.upper()}") |
|
|
|
try: |
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
self.model = AutoModelForSeq2SeqLM.from_pretrained( |
|
MODEL_NAME, |
|
device_map="auto" if torch.cuda.is_available() else None, |
|
torch_dtype=torch.float32, |
|
low_cpu_mem_usage=True |
|
).eval() |
|
|
|
|
|
self.text_generator = pipeline( |
|
"text2text-generation", |
|
model=self.model, |
|
tokenizer=self.tokenizer, |
|
device=-1 if self.device == "cpu" else 0, |
|
max_new_tokens=128 |
|
) |
|
|
|
logger.info("✅ GAIAThoughtProcessor готов") |
|
except Exception as e: |
|
logger.exception("Ошибка инициализации модели") |
|
raise RuntimeError(f"Ошибка инициализации: {str(e)}") |
|
|
|
def process_question(self, question: str, task_id: str) -> str: |
|
"""Упрощенная обработка вопроса""" |
|
try: |
|
prompt = f"Реши задачу шаг за шагом: {question}\n\nФинальный ответ:" |
|
|
|
result = self.text_generator( |
|
prompt, |
|
max_new_tokens=128, |
|
num_beams=2, |
|
early_stopping=True, |
|
temperature=0.1 |
|
) |
|
|
|
response = result[0]['generated_text'].strip() |
|
|
|
|
|
return json.dumps({"final_answer": response}) |
|
|
|
except Exception as e: |
|
logger.error(f"Ошибка обработки вопроса: {str(e)}") |
|
return json.dumps({ |
|
"task_id": task_id, |
|
"error": str(e), |
|
"final_answer": f"ERROR: {str(e)}" |
|
}) |
|
|
|
|
|
class GAIAEvaluationRunner: |
|
def __init__(self, api_url: str = DEFAULT_API_URL): |
|
self.api_url = api_url |
|
self.questions_url = f"{api_url}/questions" |
|
self.submit_url = f"{api_url}/submit" |
|
self.session = requests.Session() |
|
self.session.headers.update({ |
|
"Accept": "application/json", |
|
"User-Agent": "GAIA-Mastermind/1.0", |
|
"Content-Type": "application/json" |
|
}) |
|
logger.info(f"🌐 Инициализирован GAIAEvaluationRunner для {api_url}") |
|
|
|
def _fetch_questions(self) -> Tuple[list, str]: |
|
"""Получение вопросов с API""" |
|
logger.info(f"🔍 Запрос вопросов с {self.questions_url}") |
|
try: |
|
response = self.session.get( |
|
self.questions_url, |
|
timeout=API_TIMEOUT |
|
) |
|
|
|
logger.info(f"Статус ответа: {response.status_code}") |
|
|
|
if response.status_code == 200: |
|
questions = response.json() |
|
logger.info(f"Получено {len(questions)} вопросов") |
|
return questions, "success" |
|
else: |
|
error_msg = f"Ошибка API: HTTP {response.status_code}" |
|
logger.error(error_msg) |
|
return [], error_msg |
|
|
|
except Exception as e: |
|
error_msg = f"Ошибка соединения: {str(e)}" |
|
logger.exception(error_msg) |
|
return [], error_msg |
|
|
|
def _submit_answers(self, username: str, agent_code: str, answers: list) -> Tuple[str, int]: |
|
"""Отправка ответов на сервер""" |
|
logger.info(f"📤 Отправка ответов для пользователя {username}") |
|
try: |
|
payload = { |
|
"username": username.strip(), |
|
"agent_code": agent_code.strip(), |
|
"answers": answers |
|
} |
|
|
|
response = self.session.post( |
|
self.submit_url, |
|
json=payload, |
|
timeout=API_TIMEOUT * 2 |
|
) |
|
|
|
logger.info(f"Статус отправки: {response.status_code}") |
|
|
|
if response.status_code == 200: |
|
result = response.json() |
|
score = result.get("score", 0) |
|
return result.get("message", "Ответы успешно отправлены"), score |
|
else: |
|
error = f"HTTP Ошибка {response.status_code}" |
|
if response.text: |
|
error += f": {response.text[:200]}" |
|
logger.error(error) |
|
return error, 0 |
|
|
|
except Exception as e: |
|
error = f"Ошибка отправки: {str(e)}" |
|
logger.exception(error) |
|
return error, 0 |
|
|
|
def run_evaluation(self, agent, username: str, agent_code: str, progress=gr.Progress()): |
|
"""Основной процесс оценки""" |
|
|
|
progress(0.1, desc="Получение вопросов") |
|
questions, status = self._fetch_questions() |
|
if status != "success": |
|
return status, 0, 0, pd.DataFrame() |
|
|
|
total_questions = len(questions) |
|
if total_questions == 0: |
|
return "Получено 0 вопросов", 0, 0, pd.DataFrame() |
|
|
|
|
|
results = [] |
|
answers = [] |
|
|
|
for i, q in enumerate(questions): |
|
progress(i / total_questions, desc=f"Обработка задачи {i+1}/{total_questions}") |
|
try: |
|
task_id = q.get("task_id", f"task_{i}") |
|
logger.info(f"🔧 Обработка задачи {task_id}") |
|
|
|
json_response = agent.process_question(q["question"], task_id) |
|
|
|
|
|
try: |
|
response_obj = json.loads(json_response) |
|
final_answer = response_obj.get("final_answer", "") |
|
except: |
|
final_answer = json_response |
|
|
|
answers.append({ |
|
"task_id": task_id, |
|
"answer": str(final_answer)[:500] |
|
}) |
|
|
|
results.append({ |
|
"Task ID": task_id, |
|
"Question": q["question"][:50] + "..." if len(q["question"]) > 50 else q["question"], |
|
"Answer": str(final_answer)[:50] + "..." if len(str(final_answer)) > 50 else str(final_answer), |
|
"Status": "Processed" |
|
}) |
|
except Exception as e: |
|
logger.error(f"Ошибка обработки задачи: {str(e)}") |
|
answers.append({ |
|
"task_id": task_id, |
|
"answer": f"ERROR: {str(e)}" |
|
}) |
|
results.append({ |
|
"Task ID": task_id, |
|
"Question": "Error", |
|
"Answer": f"ERROR: {str(e)}", |
|
"Status": "Failed" |
|
}) |
|
|
|
|
|
progress(0.9, desc="Отправка результатов") |
|
submission_result, score = self._submit_answers(username, agent_code, answers) |
|
return submission_result, score, total_questions, pd.DataFrame(results) |
|
|
|
|
|
def run_evaluation(username: str, agent_code: str, progress=gr.Progress()): |
|
try: |
|
progress(0, desc="Инициализация агента") |
|
agent = GAIAThoughtProcessor() |
|
|
|
progress(0.1, desc="Подключение к API") |
|
runner = GAIAEvaluationRunner() |
|
|
|
|
|
return runner.run_evaluation(agent, username, agent_code, progress) |
|
|
|
except Exception as e: |
|
logger.exception("Критическая ошибка в run_evaluation") |
|
error_df = pd.DataFrame([{ |
|
"Task ID": "ERROR", |
|
"Question": f"Критическая ошибка: {str(e)}", |
|
"Answer": "См. логи", |
|
"Status": "Failed" |
|
}]) |
|
return f"Ошибка: {str(e)}", 0, 0, error_df |
|
|
|
|
|
with gr.Blocks(title="GAIA Mastermind") as demo: |
|
gr.Markdown("# GAIA Mastermind") |
|
gr.Markdown("Многошаговое решение задач с декомпозицией") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("## 🔐 Авторизация") |
|
username = gr.Textbox(label="HF Username", value="yoshizen") |
|
agent_code = gr.Textbox(label="Agent Code", value="https://huggingface.co/spaces/yoshizen/FinalTest") |
|
run_btn = gr.Button("Запустить оценку") |
|
|
|
gr.Markdown("## ⚙️ Статус системы") |
|
sys_info = gr.Textbox(label="Системная информация", interactive=False) |
|
|
|
with gr.Column(): |
|
gr.Markdown("## 📊 Результаты GAIA") |
|
with gr.Row(): |
|
result_output = gr.Textbox(label="Статус отправки", interactive=False) |
|
correct_output = gr.Number(label="Правильные ответы", interactive=False) |
|
total_output = gr.Number(label="Всего вопросов", interactive=False) |
|
|
|
results_table = gr.Dataframe( |
|
label="Детализация ответов", |
|
headers=["Task ID", "Question", "Answer", "Status"], |
|
interactive=False |
|
) |
|
|
|
|
|
def get_system_info(): |
|
device = "GPU" if torch.cuda.is_available() else "CPU" |
|
return f"Device: {device} | Model: {MODEL_NAME} | API: {DEFAULT_API_URL}" |
|
|
|
demo.load(get_system_info, inputs=None, outputs=sys_info) |
|
|
|
run_btn.click( |
|
fn=run_evaluation, |
|
inputs=[username, agent_code], |
|
outputs=[result_output, correct_output, total_output, results_table], |
|
concurrency_limit=1 |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.queue(max_size=1).launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=False, |
|
show_error=True |
|
) |