# Configuration and constants for GAIA Agent Evaluator import os # --- API Configuration --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" LLAMA_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1" HF_API_TOKEN = os.getenv("HF_API_TOKEN") HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {} # --- Request Configuration --- MAX_RETRIES = 3 RETRY_DELAY = 2 # seconds # --- Knowledge Base Content --- GAIA_KNOWLEDGE = """ ### AI and Agent Concepts - An agent is an autonomous entity that observes and acts upon an environment using sensors and actuators, usually to achieve specific goals. - GAIA (General AI Assistant) is a framework for creating and evaluating AI assistants that can perform a wide range of tasks. - The agent loop consists of perception, reasoning, and action. - RAG (Retrieval-Augmented Generation) combines retrieval of relevant information with generation capabilities of language models. - An LLM (Large Language Model) is a neural network trained on vast amounts of text data to understand and generate human language. ### Agent Capabilities - Tool use refers to an agent's ability to employ external tools like search engines, APIs, or specialized algorithms. - An effective agent should be able to decompose complex problems into manageable parts. - Chain-of-thought reasoning allows agents to break down problem-solving steps to improve accuracy. - Agents should apply appropriate reasoning strategies based on the type of question (factual, analytical, etc.) - Self-reflection helps agents identify and correct errors in their reasoning. ### Evaluation Criteria - Agent responses should be accurate, relevant, and factually correct. - Effective agents provide concise yet comprehensive answers. - Agents should acknowledge limitations and uncertainties when appropriate. - Good agents can follow multi-step instructions and fulfill all requirements. - Reasoning transparency helps users understand how the agent arrived at its conclusions. """ # --- Tool Pattern Matching --- YOUTUBE_PATTERNS = ["youtube.com", "youtu.be", "video", "watch?v=", "channel"] REVERSE_TEXT_PATTERNS = ["rewsna", "ecnetnes", "sdrow"] WIKIPEDIA_PATTERNS = [ "wikipedia", "article", "published", "featured article", "promoted", "nominated", "discography", "studio albums", "encyclopedia", "wiki", "featured content" ] WEB_SEARCH_PATTERNS = [ # Time indicators "current", "latest", "recent", "2023", "2022", "2021", "2020", "today", # Question words "how many", "where", "when", "who", "which", "what", "whose", # Sports and competitions "yankee", "walks", "athletes", "olympics", "competition", "pitcher", "baseball", # Specific entities that need web lookup "malko", "taishō tamai", "universe today", "nedoshivina", "specimens", "polish-language", "actor", "played", # Geographic and demographic "country", "nationality", "first name", "award number", "city", # Publications and research "published", "paper", "study", "research", "journal", "author", # Statistics and data "statistics", "data", "facts", "information about", "number of" ] AI_PATTERNS = ["agent", "gaia", "llm", "ai", "evaluation", "tool", "artificial intelligence"] FILE_PATTERNS = ["excel", "xlsx", "csv", "python", ".py", "mp3", "wav", "audio", "voice memo"] # --- Answer Cleaning Patterns --- ANSWER_PREFIXES_TO_REMOVE = [ "final answer:", "answer:", "the answer is:", "result:", "solution:", "conclusion:", "final answer is:", "direct answer:", "based on the context:", "according to:", "the result is:" ] LLM_RESPONSE_MARKERS = ["", "", "Answer:", "Response:", "Assistant:"] LLM_END_MARKERS = ["", "", "Human:", "User:"]