FinalTest / app.py
yoshizen's picture
Update app.py
5a25e2d verified
raw
history blame
5.96 kB
import re
import requests
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import json
import logging
import time
import sys
# Настройка логирования
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("GAIA-Mastermind")
# Конфигурация
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MODEL_NAME = "google/flan-t5-base" # Упрощенная модель для быстрой работы
API_RETRIES = 3
API_TIMEOUT = 30
class GAIAExpert:
def __init__(self):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Инициализация модели на {self.device.upper()}")
try:
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
self.model = AutoModelForSeq2SeqLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
low_cpu_mem_usage=True
).eval()
logger.info("Модель готова")
except Exception as e:
logger.exception("Ошибка загрузки модели")
raise RuntimeError(f"Ошибка инициализации: {str(e)}")
def process_question(self, question: str) -> str:
"""Обработка вопроса с минимальной задержкой"""
try:
inputs = self.tokenizer(
f"Вопрос: {question}\nОтвет:",
return_tensors="pt",
max_length=256,
truncation=True
)
outputs = self.model.generate(
**inputs,
max_new_tokens=50,
num_beams=1, # Ускорение генерации
early_stopping=True
)
answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return json.dumps({"final_answer": answer.strip()})
except Exception as e:
return json.dumps({"final_answer": f"ERROR: {str(e)}"})
class GAIAEvaluator:
def __init__(self, api_url: str = DEFAULT_API_URL):
self.api_url = api_url
self.questions_url = f"{api_url}/questions"
self.submit_url = f"{api_url}/submit"
self.session = requests.Session()
self.session.headers.update({"Content-Type": "application/json"})
def run_evaluation(self, username: str, agent_code: str):
"""Консольный процесс оценки без интерфейса"""
agent = GAIAExpert()
# Получение вопросов
questions = self._fetch_questions()
if not isinstance(questions, list):
logger.error(f"Ошибка получения вопросов: {questions}")
return 0, 0
# Обработка вопросов
answers = []
correct = 0
for i, q in enumerate(questions):
task_id = q.get("task_id", f"task_{i}")
logger.info(f"Обработка задачи {i+1}/{len(questions)}: {q['question'][:50]}...")
try:
json_response = agent.process_question(q["question"])
response_obj = json.loads(json_response)
answer = response_obj.get("final_answer", "")
answers.append({
"task_id": task_id,
"answer": str(answer)[:300]
})
except Exception as e:
logger.error(f"Ошибка обработки: {str(e)}")
answers.append({
"task_id": task_id,
"answer": f"ERROR: {str(e)}"
})
# Отправка ответов
return self._submit_answers(username, agent_code, answers)
def _fetch_questions(self):
"""Получение вопросов с API"""
try:
response = self.session.get(self.questions_url, timeout=API_TIMEOUT)
if response.status_code == 200:
return response.json()
return f"HTTP error {response.status_code}"
except Exception as e:
return f"Connection error: {str(e)}"
def _submit_answers(self, username: str, agent_code: str, answers: list) -> Tuple[int, int]:
"""Отправка ответов на сервер"""
try:
payload = {
"username": username.strip(),
"agent_code": agent_code.strip(),
"answers": answers
}
response = self.session.post(
self.submit_url,
json=payload,
timeout=API_TIMEOUT * 2
)
if response.status_code == 200:
result = response.json()
score = result.get("score", 0)
return score, len(answers)
return 0, len(answers)
except Exception as e:
logger.error(f"Ошибка отправки: {str(e)}")
return 0, len(answers)
if __name__ == "__main__":
# Параметры запуска
USERNAME = "yoshizen"
AGENT_CODE = "https://huggingface.co/spaces/yoshizen/FinalTest"
logger.info(f"Запуск оценки для {USERNAME}")
start_time = time.time()
evaluator = GAIAEvaluator()
score, total = evaluator.run_evaluation(USERNAME, AGENT_CODE)
elapsed = time.time() - start_time
logger.info(f"Оценка завершена за {elapsed:.1f} сек")
logger.info(f"Результат: {score}/{total} правильных ответов")
logger.info(f"Точность: {score/total*100 if total > 0 else 0:.1f}%")