FinalTest / app.py
yoshizen's picture
Update app.py
a3faa74 verified
raw
history blame
11.7 kB
import re
import requests
import pandas as pd
import torch
import gradio as gr
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from typing import List, Dict, Any, Tuple, Optional
import json
import ast
import numpy as np
from PIL import Image, UnidentifiedImageError
import io
import base64
import logging
import time
import sys
# Настройка логирования
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("GAIA-Mastermind")
# Конфигурация
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MODEL_NAME = "google/flan-t5-large" # Оптимизировано для CPU
API_RETRIES = 3
API_TIMEOUT = 45
# === ЯДРО СИСТЕМЫ ===
class GAIAThoughtProcessor:
def __init__(self):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"⚡ Инициализация GAIAThoughtProcessor на {self.device.upper()}")
try:
# Оптимизированная загрузка модели для CPU
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
self.model = AutoModelForSeq2SeqLM.from_pretrained(
MODEL_NAME,
device_map="auto" if torch.cuda.is_available() else None,
torch_dtype=torch.float32,
low_cpu_mem_usage=True
).eval()
# Создаем пайплайн для генерации текста
self.text_generator = pipeline(
"text2text-generation",
model=self.model,
tokenizer=self.tokenizer,
device=-1 if self.device == "cpu" else 0,
max_new_tokens=128
)
logger.info("✅ GAIAThoughtProcessor готов")
except Exception as e:
logger.exception("Ошибка инициализации модели")
raise RuntimeError(f"Ошибка инициализации: {str(e)}")
def process_question(self, question: str, task_id: str) -> str:
"""Упрощенная обработка вопроса"""
try:
prompt = f"Реши задачу шаг за шагом: {question}\n\nФинальный ответ:"
result = self.text_generator(
prompt,
max_new_tokens=128,
num_beams=2,
early_stopping=True,
temperature=0.1
)
response = result[0]['generated_text'].strip()
# Создаем JSON ответ
return json.dumps({"final_answer": response})
except Exception as e:
logger.error(f"Ошибка обработки вопроса: {str(e)}")
return json.dumps({
"task_id": task_id,
"error": str(e),
"final_answer": f"ERROR: {str(e)}"
})
# === СИСТЕМА ОЦЕНКИ ===
class GAIAEvaluationRunner:
def __init__(self, api_url: str = DEFAULT_API_URL):
self.api_url = api_url
self.questions_url = f"{api_url}/questions"
self.submit_url = f"{api_url}/submit"
self.session = requests.Session()
self.session.headers.update({
"Accept": "application/json",
"User-Agent": "GAIA-Mastermind/1.0",
"Content-Type": "application/json"
})
logger.info(f"🌐 Инициализирован GAIAEvaluationRunner для {api_url}")
def _fetch_questions(self) -> Tuple[list, str]:
"""Получение вопросов с API"""
logger.info(f"🔍 Запрос вопросов с {self.questions_url}")
try:
response = self.session.get(
self.questions_url,
timeout=API_TIMEOUT
)
logger.info(f"Статус ответа: {response.status_code}")
if response.status_code == 200:
questions = response.json()
logger.info(f"Получено {len(questions)} вопросов")
return questions, "success"
else:
error_msg = f"Ошибка API: HTTP {response.status_code}"
logger.error(error_msg)
return [], error_msg
except Exception as e:
error_msg = f"Ошибка соединения: {str(e)}"
logger.exception(error_msg)
return [], error_msg
def _submit_answers(self, username: str, agent_code: str, answers: list) -> Tuple[str, int]:
"""Отправка ответов на сервер"""
logger.info(f"📤 Отправка ответов для пользователя {username}")
try:
payload = {
"username": username.strip(),
"agent_code": agent_code.strip(),
"answers": answers
}
response = self.session.post(
self.submit_url,
json=payload,
timeout=API_TIMEOUT * 2
)
logger.info(f"Статус отправки: {response.status_code}")
if response.status_code == 200:
result = response.json()
score = result.get("score", 0)
return result.get("message", "Ответы успешно отправлены"), score
else:
error = f"HTTP Ошибка {response.status_code}"
if response.text:
error += f": {response.text[:200]}"
logger.error(error)
return error, 0
except Exception as e:
error = f"Ошибка отправки: {str(e)}"
logger.exception(error)
return error, 0
def run_evaluation(self, agent, username: str, agent_code: str, progress=gr.Progress()):
"""Основной процесс оценки"""
# Получение вопросов
progress(0.1, desc="Получение вопросов")
questions, status = self._fetch_questions()
if status != "success":
return status, 0, 0, pd.DataFrame()
total_questions = len(questions)
if total_questions == 0:
return "Получено 0 вопросов", 0, 0, pd.DataFrame()
# Обработка вопросов
results = []
answers = []
for i, q in enumerate(questions):
progress(i / total_questions, desc=f"Обработка задачи {i+1}/{total_questions}")
try:
task_id = q.get("task_id", f"task_{i}")
logger.info(f"🔧 Обработка задачи {task_id}")
json_response = agent.process_question(q["question"], task_id)
# Парсинг ответа
try:
response_obj = json.loads(json_response)
final_answer = response_obj.get("final_answer", "")
except:
final_answer = json_response
answers.append({
"task_id": task_id,
"answer": str(final_answer)[:500]
})
results.append({
"Task ID": task_id,
"Question": q["question"][:50] + "..." if len(q["question"]) > 50 else q["question"],
"Answer": str(final_answer)[:50] + "..." if len(str(final_answer)) > 50 else str(final_answer),
"Status": "Processed"
})
except Exception as e:
logger.error(f"Ошибка обработки задачи: {str(e)}")
answers.append({
"task_id": task_id,
"answer": f"ERROR: {str(e)}"
})
results.append({
"Task ID": task_id,
"Question": "Error",
"Answer": f"ERROR: {str(e)}",
"Status": "Failed"
})
# Отправка ответов
progress(0.9, desc="Отправка результатов")
submission_result, score = self._submit_answers(username, agent_code, answers)
return submission_result, score, total_questions, pd.DataFrame(results)
# === ИНТЕРФЕЙС GRADIO ===
def run_evaluation(username: str, agent_code: str, progress=gr.Progress()):
try:
progress(0, desc="Инициализация агента")
agent = GAIAThoughtProcessor()
progress(0.1, desc="Подключение к API")
runner = GAIAEvaluationRunner()
# Запуск оценки
return runner.run_evaluation(agent, username, agent_code, progress)
except Exception as e:
logger.exception("Критическая ошибка в run_evaluation")
error_df = pd.DataFrame([{
"Task ID": "ERROR",
"Question": f"Критическая ошибка: {str(e)}",
"Answer": "См. логи",
"Status": "Failed"
}])
return f"Ошибка: {str(e)}", 0, 0, error_df
# Создание интерфейса
with gr.Blocks(title="GAIA Mastermind") as demo:
gr.Markdown("# GAIA Mastermind")
gr.Markdown("Многошаговое решение задач с декомпозицией")
with gr.Row():
with gr.Column():
gr.Markdown("## 🔐 Авторизация")
username = gr.Textbox(label="HF Username", value="yoshizen")
agent_code = gr.Textbox(label="Agent Code", value="https://huggingface.co/spaces/yoshizen/FinalTest")
run_btn = gr.Button("Запустить оценку")
gr.Markdown("## ⚙️ Статус системы")
sys_info = gr.Textbox(label="Системная информация", interactive=False)
with gr.Column():
gr.Markdown("## 📊 Результаты GAIA")
with gr.Row():
result_output = gr.Textbox(label="Статус отправки", interactive=False)
correct_output = gr.Number(label="Правильные ответы", interactive=False)
total_output = gr.Number(label="Всего вопросов", interactive=False)
results_table = gr.Dataframe(
label="Детализация ответов",
headers=["Task ID", "Question", "Answer", "Status"],
interactive=False
)
# Системная информация
def get_system_info():
device = "GPU" if torch.cuda.is_available() else "CPU"
return f"Device: {device} | Model: {MODEL_NAME} | API: {DEFAULT_API_URL}"
demo.load(get_system_info, inputs=None, outputs=sys_info)
run_btn.click(
fn=run_evaluation,
inputs=[username, agent_code],
outputs=[result_output, correct_output, total_output, results_table],
concurrency_limit=1
)
if __name__ == "__main__":
demo.queue(max_size=1).launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)