File size: 5,958 Bytes
6a2aeb0
 
 
 
ecb4e3d
737fe0e
 
 
ecb4e3d
737fe0e
 
a3faa74
737fe0e
6a2aeb0
 
 
5a25e2d
737fe0e
5a25e2d
6a2aeb0
5a25e2d
737fe0e
ecb4e3d
5a25e2d
ecb4e3d
0d32a9e
 
 
 
a3faa74
0d32a9e
 
5a25e2d
0d32a9e
5a25e2d
0d32a9e
737fe0e
5a25e2d
 
ecb4e3d
5a25e2d
 
 
 
 
ecb4e3d
737fe0e
5a25e2d
 
 
 
 
 
a3faa74
5a25e2d
 
737fe0e
5a25e2d
6a2aeb0
5a25e2d
6a2aeb0
 
 
 
737fe0e
5a25e2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
737fe0e
5a25e2d
ecb4e3d
a3faa74
5a25e2d
a3faa74
5a25e2d
 
a3faa74
5a25e2d
af37df4
5a25e2d
ecb4e3d
a3faa74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a25e2d
 
a3faa74
5a25e2d
 
af37df4
5a25e2d
 
 
 
737fe0e
5a25e2d
737fe0e
5a25e2d
 
 
737fe0e
5a25e2d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import re
import requests
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import json
import logging
import time
import sys

# Настройка логирования
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("GAIA-Mastermind")

# Конфигурация
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MODEL_NAME = "google/flan-t5-base"  # Упрощенная модель для быстрой работы
API_RETRIES = 3
API_TIMEOUT = 30

class GAIAExpert:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        logger.info(f"Инициализация модели на {self.device.upper()}")
        
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
            self.model = AutoModelForSeq2SeqLM.from_pretrained(
                MODEL_NAME,
                torch_dtype=torch.float32,
                low_cpu_mem_usage=True
            ).eval()
            logger.info("Модель готова")
        except Exception as e:
            logger.exception("Ошибка загрузки модели")
            raise RuntimeError(f"Ошибка инициализации: {str(e)}")

    def process_question(self, question: str) -> str:
        """Обработка вопроса с минимальной задержкой"""
        try:
            inputs = self.tokenizer(
                f"Вопрос: {question}\nОтвет:",
                return_tensors="pt",
                max_length=256,
                truncation=True
            )
            
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=50,
                num_beams=1,  # Ускорение генерации
                early_stopping=True
            )
            
            answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            return json.dumps({"final_answer": answer.strip()})
        except Exception as e:
            return json.dumps({"final_answer": f"ERROR: {str(e)}"})

class GAIAEvaluator:
    def __init__(self, api_url: str = DEFAULT_API_URL):
        self.api_url = api_url
        self.questions_url = f"{api_url}/questions"
        self.submit_url = f"{api_url}/submit"
        self.session = requests.Session()
        self.session.headers.update({"Content-Type": "application/json"})

    def run_evaluation(self, username: str, agent_code: str):
        """Консольный процесс оценки без интерфейса"""
        agent = GAIAExpert()
        
        # Получение вопросов
        questions = self._fetch_questions()
        if not isinstance(questions, list):
            logger.error(f"Ошибка получения вопросов: {questions}")
            return 0, 0
        
        # Обработка вопросов
        answers = []
        correct = 0
        
        for i, q in enumerate(questions):
            task_id = q.get("task_id", f"task_{i}")
            logger.info(f"Обработка задачи {i+1}/{len(questions)}: {q['question'][:50]}...")
            
            try:
                json_response = agent.process_question(q["question"])
                response_obj = json.loads(json_response)
                answer = response_obj.get("final_answer", "")
                
                answers.append({
                    "task_id": task_id,
                    "answer": str(answer)[:300]
                })
            except Exception as e:
                logger.error(f"Ошибка обработки: {str(e)}")
                answers.append({
                    "task_id": task_id,
                    "answer": f"ERROR: {str(e)}"
                })
        
        # Отправка ответов
        return self._submit_answers(username, agent_code, answers)

    def _fetch_questions(self):
        """Получение вопросов с API"""
        try:
            response = self.session.get(self.questions_url, timeout=API_TIMEOUT)
            if response.status_code == 200:
                return response.json()
            return f"HTTP error {response.status_code}"
        except Exception as e:
            return f"Connection error: {str(e)}"

    def _submit_answers(self, username: str, agent_code: str, answers: list) -> Tuple[int, int]:
        """Отправка ответов на сервер"""
        try:
            payload = {
                "username": username.strip(),
                "agent_code": agent_code.strip(),
                "answers": answers
            }
            
            response = self.session.post(
                self.submit_url,
                json=payload,
                timeout=API_TIMEOUT * 2
            )
            
            if response.status_code == 200:
                result = response.json()
                score = result.get("score", 0)
                return score, len(answers)
            return 0, len(answers)
        except Exception as e:
            logger.error(f"Ошибка отправки: {str(e)}")
            return 0, len(answers)

if __name__ == "__main__":
    # Параметры запуска
    USERNAME = "yoshizen"
    AGENT_CODE = "https://huggingface.co/spaces/yoshizen/FinalTest"
    
    logger.info(f"Запуск оценки для {USERNAME}")
    
    start_time = time.time()
    evaluator = GAIAEvaluator()
    score, total = evaluator.run_evaluation(USERNAME, AGENT_CODE)
    
    elapsed = time.time() - start_time
    logger.info(f"Оценка завершена за {elapsed:.1f} сек")
    logger.info(f"Результат: {score}/{total} правильных ответов")
    logger.info(f"Точность: {score/total*100 if total > 0 else 0:.1f}%")