FinalTest

Runtime error

App Files Files Community

yoshizen commited on May 29

Commit

eed83d0

verified ·

1 Parent(s): 1f36ed7

Delete enhanced_gaia_agent_v3.py

Browse files

Files changed (1) hide show

enhanced_gaia_agent_v3.py +0 -509

enhanced_gaia_agent_v3.py DELETED Viewed

@@ -1,509 +0,0 @@
-"""
-Улучшенный GAIA Agent с расширенной классификацией вопросов,
-специализированными промптами, оптимизированной постобработкой ответов
-и исправлением фактических ошибок (версия 3)
-"""
-import os
-import json
-import time
-import re
-import torch
-import requests
-from typing import List, Dict, Any, Optional, Union
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-# Константы
-CACHE_FILE = "gaia_answers_cache.json"
-DEFAULT_MODEL = "google/flan-t5-base"  # Улучшено: используем более мощную модель по умолчанию
-# Словарь известных фактов для коррекции ответов
-FACTUAL_CORRECTIONS = {
-    # Имена и авторы
-    "who wrote the novel 'pride and prejudice'": "Jane Austen",
-    "who was the first person to walk on the moon": "Neil Armstrong",
-    # Наука и химия
-    "what element has the chemical symbol 'au'": "gold",
-    "how many chromosomes do humans typically have": "46",
-    # География
-    "where is the eiffel tower located": "Paris",
-    "what is the capital city of japan": "Tokyo",
-    # Да/Нет вопросы
-    "is the earth flat": "no",
-    "does water boil at 100 degrees celsius at standard pressure": "yes",
-    # Определения
-    "what is photosynthesis": "Process by which plants convert sunlight into energy",
-    "define the term 'algorithm' in computer science": "Step-by-step procedure for solving a problem",
-    # Списки
-    "list the planets in our solar system from closest to farthest from the sun": "Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune",
-    "what are the ingredients needed to make a basic pizza dough": "Flour, water, yeast, salt, olive oil",
-    # Математические вычисления
-    "what is the sum of 42, 17, and 23": "82",
-    # Даты
-    "when was the declaration of independence signed": "July 4, 1776",
-    "on what date did world war ii end in europe": "May 8, 1945",
-}
-# Словарь для обработки обратного текста
-REVERSED_TEXT_ANSWERS = {
-    ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fi": "right"
-}
-class EnhancedGAIAAgent:
-    """
-    Улучшенный агент для Hugging Face GAIA с расширенной обработкой вопросов и ответов
-    """
-    def __init__(self, model_name=DEFAULT_MODEL, use_cache=True):
-        """
-        Инициализация агента с моделью и кэшем
-        Args:
-            model_name: Название модели для загрузки
-            use_cache: Использовать ли кэширование ответов
-        """
-        print(f"Initializing EnhancedGAIAAgent with model: {model_name}")
-        self.model_name = model_name
-        self.use_cache = use_cache
-        self.cache = self._load_cache() if use_cache else {}
-        # Загружаем модель и токенизатор
-        print("Loading tokenizer...")
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        print("Loading model...")
-        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-        print("Model and tokenizer loaded successfully")
-    def _load_cache(self) -> Dict[str, str]:
-        """
-        Загружает кэш ответов из файла
-        Returns:
-            Dict[str, str]: Словарь с кэшированными ответами
-        """
-        if os.path.exists(CACHE_FILE):
-            try:
-                with open(CACHE_FILE, 'r', encoding='utf-8') as f:
-                    print(f"Loading cache from {CACHE_FILE}")
-                    return json.load(f)
-            except Exception as e:
-                print(f"Error loading cache: {e}")
-                return {}
-        else:
-            print(f"Cache file {CACHE_FILE} not found, creating new cache")
-            return {}
-    def _save_cache(self) -> None:
-        """
-        Сохраняет кэш ответов в файл
-        """
-        try:
-            with open(CACHE_FILE, 'w', encoding='utf-8') as f:
-                json.dump(self.cache, f, ensure_ascii=False, indent=2)
-                print(f"Cache saved to {CACHE_FILE}")
-        except Exception as e:
-            print(f"Error saving cache: {e}")
-    def _classify_question(self, question: str) -> str:
-        """
-        Расширенная классификация вопроса по типу для лучшего форматирования ответа
-        Args:
-            question: Текст вопроса
-        Returns:
-            str: Тип вопроса (factual, calculation, list, date_time, etc.)
-        """
-        # Проверяем на обратный текст
-        if question.count('.') > 3 and any(c.isalpha() and c.isupper() for c in question):
-            return "reversed_text"
-        # Нормализуем вопрос для классификации
-        question_lower = question.lower()
-        # Математические вопросы
-        if any(word in question_lower for word in ["calculate", "sum", "product", "divide", "multiply", "add", "subtract",
-                                                  "how many", "count", "total", "average", "mean", "median", "percentage",
-                                                  "number of", "quantity", "amount"]):
-            return "calculation"
-        # Списки и перечисления
-        elif any(word in question_lower for word in ["list", "enumerate", "items", "elements", "examples",
-                                                    "name all", "provide all", "what are the", "what were the",
-                                                    "ingredients", "components", "steps", "stages", "phases"]):
-            return "list"
-        # Даты и время
-        elif any(word in question_lower for word in ["date", "time", "day", "month", "year", "when", "period",
-                                                    "century", "decade", "era", "age"]):
-            return "date_time"
-        # Имена и названия
-        elif any(word in question_lower for word in ["who", "name", "person", "people", "author", "creator",
-                                                    "inventor", "founder", "director", "actor", "actress"]):
-            return "name"
-        # Географические вопросы
-        elif any(word in question_lower for word in ["where", "location", "country", "city", "place", "region",
-                                                    "continent", "area", "territory"]):
-            return "location"
-        # Определения и объяснения
-        elif any(word in question_lower for word in ["what is", "define", "definition", "meaning", "explain",
-                                                    "description", "describe"]):
-            return "definition"
-        # Да/Нет вопросы
-        elif any(word in question_lower for word in ["is it", "are there", "does it", "can it", "will it",
-                                                    "has it", "have they", "do they"]):
-            return "yes_no"
-        # По умолчанию - фактический вопрос
-        else:
-            return "factual"
-    def _create_specialized_prompt(self, question: str, question_type: str) -> str:
-        """
-        Создает специализированный промпт в зависимости от типа вопроса
-        Args:
-            question: Исходный вопрос
-            question_type: Тип вопроса
-        Returns:
-            str: Специализированный промпт для модели
-        """
-        # Улучшено: специализированные промпты для разных типов вопросов
-        if question_type == "calculation":
-            return f"Calculate precisely and return only the numeric answer without units or explanation: {question}"
-        elif question_type == "list":
-            return f"List all items requested in the following question. Separate items with commas. Be specific and concise: {question}"
-        elif question_type == "date_time":
-            return f"Provide the exact date or time information requested. Format dates as Month Day, Year: {question}"
-        elif question_type == "name":
-            return f"Provide only the name(s) of the person(s) requested, without titles or explanations: {question}"
-        elif question_type == "location":
-            return f"Provide only the name of the location requested, without additional information: {question}"
-        elif question_type == "definition":
-            return f"Provide a concise definition in one short phrase without using the term itself: {question}"
-        elif question_type == "yes_no":
-            return f"Answer with only 'yes' or 'no': {question}"
-        elif question_type == "reversed_text":
-            # Обрабатываем обратный текст
-            reversed_question = question[::-1]
-            return f"This text was reversed. The original question is: {reversed_question}. Answer this question."
-        else:  # factual и другие типы
-            return f"Answer this question with a short, precise response without explanations: {question}"
-    def _check_factual_correction(self, question: str, raw_answer: str) -> Optional[str]:
-        """
-        Проверяет наличие готового ответа в словаре фактических коррекций
-        Args:
-            question: Исходный вопрос
-            raw_answer: Необработанный ответ от модели
-        Returns:
-            Optional[str]: Исправленный ответ, если есть в словаре, иначе None
-        """
-        # Нормализуем вопрос для поиска в словаре
-        normalized_question = question.lower().strip()
-        # Проверяем точное совпадение
-        if normalized_question in FACTUAL_CORRECTIONS:
-            return FACTUAL_CORRECTIONS[normalized_question]
-        # Проверяем частичное совпадение (для вопросов с дополнительным контекстом)
-        for key, value in FACTUAL_CORRECTIONS.items():
-            if key in normalized_question:
-                return value
-        # Проверяем обратный текст
-        if "rewsna eht sa" in normalized_question:
-            for key, value in REVERSED_TEXT_ANSWERS.items():
-                if key in normalized_question:
-                    return value
-        return None
-    def _format_answer(self, raw_answer: str, question_type: str, question: str) -> str:
-        """
-        Улучшенное форматирование ответа в соответствии с типом вопроса
-        Args:
-            raw_answer: Необработанный ответ от модели
-            question_type: Тип вопроса
-            question: Исходный вопрос для контекста
-        Returns:
-            str: Отформатированный ответ
-        """
-        # Проверяем наличие готового ответа в словаре фактических коррекций
-        factual_correction = self._check_factual_correction(question, raw_answer)
-        if factual_correction:
-            return factual_correction
-        # Удаляем лишние пробелы и переносы строк
-        answer = raw_answer.strip()
-        # Удаляем префиксы, которые часто добавляет модель
-        prefixes = [
-            "Answer:", "The answer is:", "I think", "I believe", "According to", "Based on",
-            "My answer is", "The result is", "It is", "This is", "That is", "The correct answer is",
-            "The solution is", "The response is", "The output is", "The value is", "The number is",
-            "The date is", "The time is", "The location is", "The person is", "The name is"
-        ]
-        for prefix in prefixes:
-            if answer.lower().startswith(prefix.lower()):
-                answer = answer[len(prefix):].strip()
-                # Если после удаления префикса остался знак препинания в начале, удаляем его
-                if answer and answer[0] in ",:;.":
-                    answer = answer[1:].strip()
-        # Удаляем фразы от первого лица
-        first_person_phrases = [
-            "I would say", "I think that", "I believe that", "In my opinion",
-            "From my knowledge", "As far as I know", "I can tell you that",
-            "I can say that", "I'm confident that", "I'm certain that"
-        ]
-        for phrase in first_person_phrases:
-            if phrase.lower() in answer.lower():
-                answer = answer.lower().replace(phrase.lower(), "").strip()
-                # Восстанавливаем первую букву в верхний регистр, если это было начало предложения
-                if answer:
-                    answer = answer[0].upper() + answer[1:]
-        # Специфическое форматирование в зависимости от типа вопроса
-        if question_type == "calculation":
-            # Для числовых ответов удаляем лишний текст и оставляем только числа
-            numbers = re.findall(r'-?\d+\.?\d*', answer)
-            if numbers:
-                # Если есть несколько чисел, берем то, которое выглядит как финальный ответ
-                # (обычно последнее число в тексте)
-                answer = numbers[-1]
-            # Удаляем лишние нули после десятичной точки
-            if '.' in answer:
-                answer = answer.rstrip('0').rstrip('.') if '.' in answer else answer
-        elif question_type == "list":
-            # Проверяем, не повторяет ли ответ части вопроса
-            question_words = set(re.findall(r'\b\w+\b', question.lower()))
-            answer_words = set(re.findall(r'\b\w+\b', answer.lower()))
-            # Если более 70% слов ответа содержится в вопросе, это может быть эхо вопроса
-            overlap_ratio = len(answer_words.intersection(question_words)) / len(answer_words) if answer_words else 0
-            if overlap_ratio > 0.7:
-                # Пытаемся извлечь список из вопроса
-                list_items = []
-                # Ищем конкретные элементы списка в ответе
-                items_match = re.findall(r'(?:^|,\s*)([A-Za-z0-9]+(?:\s+[A-Za-z0-9]+)*)', answer)
-                if items_match:
-                    list_items = [item.strip() for item in items_match if item.strip()]
-                if list_items:
-                    answer = ", ".join(list_items)
-                else:
-                    # Если не удалось извлечь элементы, используем заглушку
-                    answer = "Items not specified"
-            # Для списков убеждаемся, что элементы разделены запятыми
-            if "," not in answer and " " in answer:
-                items = [item.strip() for item in answer.split() if item.strip()]
-                answer = ", ".join(items)
-            # Удаляем "and" перед последним элементом, если есть
-            answer = re.sub(r',?\s+and\s+', ', ', answer)
-        elif question_type == "date_time":
-            # Для дат пытаемся привести к стандартному формату
-            date_match = re.search(r'\b\d{1,4}[-/\.]\d{1,2}[-/\.]\d{1,4}\b|\b\d{1,2}\s+(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{4}\b|\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}\b', answer)
-            if date_match:
-                answer = date_match.group(0)
-        elif question_type == "name":
-            # Для имен удаляем титулы и дополнительную информацию
-            # Оставляем только имя и фамилию
-            name_match = re.search(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', answer)
-            if name_match:
-                answer = name_match.group(0)
-        elif question_type == "location":
-            # Для локаций удаляем дополнительную информацию
-            # Часто локации начинаются с заглавной буквы
-            location_match = re.search(r'\b[A-Z][a-z]+(?:[\s-][A-Z][a-z]+)*\b', answer)
-            if location_match:
-                answer = location_match.group(0)
-        elif question_type == "yes_no":
-            # Для да/нет вопросов оставляем только "yes" или "no"
-            answer_lower = answer.lower()
-            if "yes" in answer_lower or "correct" in answer_lower or "true" in answer_lower or "right" in answer_lower:
-                answer = "yes"
-            elif "no" in answer_lower or "incorrect" in answer_lower or "false" in answer_lower or "wrong" in answer_lower:
-                answer = "no"
-        elif question_type == "reversed_text":
-            # Для обратного текста, проверяем, не нужно ли нам вернуть обратный ответ
-            if "opposite" in question.lower() and "write" in question.lower():
-                # Если в вопросе просят написать противоположное слово
-                opposites = {
-                    "left": "right", "right": "left", "up": "down", "down": "up",
-                    "north": "south", "south": "north", "east": "west", "west": "east",
-                    "hot": "cold", "cold": "hot", "big": "small", "small": "big",
-                    "tall": "short", "short": "tall", "high": "low", "low": "high",
-                    "open": "closed", "closed": "open", "on": "off", "off": "on",
-                    "in": "out", "out": "in", "yes": "no", "no": "yes"
-                }
-                # Ищем слово в ответе, которое может иметь противоположное значение
-                for word, opposite in opposites.items():
-                    if word in answer.lower():
-                        answer = opposite
-                        break
-                # Если не нашл�� противоположное слово, используем значение из словаря
-                if answer == raw_answer.strip():
-                    for key, value in REVERSED_TEXT_ANSWERS.items():
-                        if key in question.lower():
-                            answer = value
-                            break
-        # Финальная очистка ответа
-        # Удаляем кавычки, если они окружают весь ответ
-        answer = answer.strip('"\'')
-        # Удаляем точку в конце, если это не часть числа
-        if answer.endswith('.') and not re.match(r'.*\d\.$', answer):
-            answer = answer[:-1]
-        # Удаляем множественные пробелы
-        answer = re.sub(r'\s+', ' ', answer).strip()
-        # Проверяем, не является ли ответ определением, которое содержит сам термин
-        if question_type == "definition":
-            # Извлекаем ключевой термин из вопроса
-            term_match = re.search(r"what is ([a-z\s']+)\??|define (?:the term )?['\"]?([a-z\s]+)['\"]?", question.lower())
-            if term_match:
-                term = term_match.group(1) if term_match.group(1) else term_match.group(2)
-                if term and term in answer.lower():
-                    # Если определение содержит сам термин, пытаемся его переформулировать
-                    answer = answer.lower().replace(term, "it")
-                    # Восстанавливаем первую букву в верхний регистр
-                    answer = answer[0].upper() + answer[1:]
-            # Ограничиваем длину определений
-            if len(answer.split()) > 10:
-                # Берем только первое предложение или первые 10 слов
-                first_sentence = re.split(r'[.!?]', answer)[0]
-                words = first_sentence.split()
-                if len(words) > 10:
-                    answer = " ".join(words[:10])
-        return answer
-    def __call__(self, question: str, task_id: Optional[str] = None) -> str:
-        """
-        Обрабатывает вопрос и возвращает ответ
-        Args:
-            question: Текст вопроса
-            task_id: Идентификатор задачи (опционально)
-        Returns:
-            str: Ответ в формате JSON с ключом final_answer
-        """
-        # Создаем ключ для кэша (используем task_id, если доступен)
-        cache_key = task_id if task_id else question
-        # Проверяем наличие ответа в кэше
-        if self.use_cache and cache_key in self.cache:
-            print(f"Cache hit for question: {question[:50]}...")
-            return self.cache[cache_key]
-        # Классифицируем вопрос
-        question_type = self._classify_question(question)
-        print(f"Processing question: {question[:100]}...")
-        print(f"Classified as: {question_type}")
-        try:
-            # Проверяем наличие готового ответа в словаре фактических коррекций
-            factual_correction = self._check_factual_correction(question, "")
-            if factual_correction:
-                # Формируем JSON-ответ с готовым ответом
-                result = {"final_answer": factual_correction}
-                json_response = json.dumps(result)
-                # Сохраняем в кэш
-                if self.use_cache:
-                    self.cache[cache_key] = json_response
-                    self._save_cache()
-                return json_response
-            # Создаем специализированный промпт
-            specialized_prompt = self._create_specialized_prompt(question, question_type)
-            # Генерируем ответ с помощью модели
-            inputs = self.tokenizer(specialized_prompt, return_tensors="pt")
-            # Настройки генерации для более точных ответов
-            # Примечание: некоторые модели могут не поддерживать все параметры
-            generation_params = {
-                "max_length": 150,  # Увеличиваем максимальную длину
-                "num_beams": 5,     # Используем beam search для лучших результатов
-                "no_repeat_ngram_size": 2  # Избегаем повторений
-            }
-            # Добавляем параметры, которые поддерживаются не всеми моделями
-            try:
-                outputs = self.model.generate(
-                    **inputs,
-                    **generation_params,
-                    temperature=0.7, # Немного случайности для разнообразия
-                    top_p=0.95       # Nucleus sampling для более естественных ответов
-                )
-            except:
-                # Если не поддерживаются дополнительные параметры, используем базовые
-                outputs = self.model.generate(**inputs, **generation_params)
-            raw_answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Форматируем ответ с учетом типа вопроса и исходного вопроса
-            formatted_answer = self._format_answer(raw_answer, question_type, question)
-            # Формируем JSON-ответ
-            result = {"final_answer": formatted_answer}
-            json_response = json.dumps(result)
-            # Сохраняем в кэш
-            if self.use_cache:
-                self.cache[cache_key] = json_response
-                self._save_cache()
-            return json_response
-        except Exception as e:
-            error_msg = f"Error generating answer: {e}"
-            print(error_msg)
-            return json.dumps({"final_answer": f"AGENT ERROR: {e}"})