File size: 5,958 Bytes
6a2aeb0 ecb4e3d 737fe0e ecb4e3d 737fe0e a3faa74 737fe0e 6a2aeb0 5a25e2d 737fe0e 5a25e2d 6a2aeb0 5a25e2d 737fe0e ecb4e3d 5a25e2d ecb4e3d 0d32a9e a3faa74 0d32a9e 5a25e2d 0d32a9e 5a25e2d 0d32a9e 737fe0e 5a25e2d ecb4e3d 5a25e2d ecb4e3d 737fe0e 5a25e2d a3faa74 5a25e2d 737fe0e 5a25e2d 6a2aeb0 5a25e2d 6a2aeb0 737fe0e 5a25e2d 737fe0e 5a25e2d ecb4e3d a3faa74 5a25e2d a3faa74 5a25e2d a3faa74 5a25e2d af37df4 5a25e2d ecb4e3d a3faa74 5a25e2d a3faa74 5a25e2d af37df4 5a25e2d 737fe0e 5a25e2d 737fe0e 5a25e2d 737fe0e 5a25e2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import re
import requests
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import json
import logging
import time
import sys
# Настройка логирования
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("GAIA-Mastermind")
# Конфигурация
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MODEL_NAME = "google/flan-t5-base" # Упрощенная модель для быстрой работы
API_RETRIES = 3
API_TIMEOUT = 30
class GAIAExpert:
def __init__(self):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Инициализация модели на {self.device.upper()}")
try:
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
self.model = AutoModelForSeq2SeqLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
low_cpu_mem_usage=True
).eval()
logger.info("Модель готова")
except Exception as e:
logger.exception("Ошибка загрузки модели")
raise RuntimeError(f"Ошибка инициализации: {str(e)}")
def process_question(self, question: str) -> str:
"""Обработка вопроса с минимальной задержкой"""
try:
inputs = self.tokenizer(
f"Вопрос: {question}\nОтвет:",
return_tensors="pt",
max_length=256,
truncation=True
)
outputs = self.model.generate(
**inputs,
max_new_tokens=50,
num_beams=1, # Ускорение генерации
early_stopping=True
)
answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return json.dumps({"final_answer": answer.strip()})
except Exception as e:
return json.dumps({"final_answer": f"ERROR: {str(e)}"})
class GAIAEvaluator:
def __init__(self, api_url: str = DEFAULT_API_URL):
self.api_url = api_url
self.questions_url = f"{api_url}/questions"
self.submit_url = f"{api_url}/submit"
self.session = requests.Session()
self.session.headers.update({"Content-Type": "application/json"})
def run_evaluation(self, username: str, agent_code: str):
"""Консольный процесс оценки без интерфейса"""
agent = GAIAExpert()
# Получение вопросов
questions = self._fetch_questions()
if not isinstance(questions, list):
logger.error(f"Ошибка получения вопросов: {questions}")
return 0, 0
# Обработка вопросов
answers = []
correct = 0
for i, q in enumerate(questions):
task_id = q.get("task_id", f"task_{i}")
logger.info(f"Обработка задачи {i+1}/{len(questions)}: {q['question'][:50]}...")
try:
json_response = agent.process_question(q["question"])
response_obj = json.loads(json_response)
answer = response_obj.get("final_answer", "")
answers.append({
"task_id": task_id,
"answer": str(answer)[:300]
})
except Exception as e:
logger.error(f"Ошибка обработки: {str(e)}")
answers.append({
"task_id": task_id,
"answer": f"ERROR: {str(e)}"
})
# Отправка ответов
return self._submit_answers(username, agent_code, answers)
def _fetch_questions(self):
"""Получение вопросов с API"""
try:
response = self.session.get(self.questions_url, timeout=API_TIMEOUT)
if response.status_code == 200:
return response.json()
return f"HTTP error {response.status_code}"
except Exception as e:
return f"Connection error: {str(e)}"
def _submit_answers(self, username: str, agent_code: str, answers: list) -> Tuple[int, int]:
"""Отправка ответов на сервер"""
try:
payload = {
"username": username.strip(),
"agent_code": agent_code.strip(),
"answers": answers
}
response = self.session.post(
self.submit_url,
json=payload,
timeout=API_TIMEOUT * 2
)
if response.status_code == 200:
result = response.json()
score = result.get("score", 0)
return score, len(answers)
return 0, len(answers)
except Exception as e:
logger.error(f"Ошибка отправки: {str(e)}")
return 0, len(answers)
if __name__ == "__main__":
# Параметры запуска
USERNAME = "yoshizen"
AGENT_CODE = "https://huggingface.co/spaces/yoshizen/FinalTest"
logger.info(f"Запуск оценки для {USERNAME}")
start_time = time.time()
evaluator = GAIAEvaluator()
score, total = evaluator.run_evaluation(USERNAME, AGENT_CODE)
elapsed = time.time() - start_time
logger.info(f"Оценка завершена за {elapsed:.1f} сек")
logger.info(f"Результат: {score}/{total} правильных ответов")
logger.info(f"Точность: {score/total*100 if total > 0 else 0:.1f}%") |