Spaces:
Sleeping
Sleeping
# Configuration and constants for GAIA Agent Evaluator | |
import os | |
# --- API Configuration --- | |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
LLAMA_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1" | |
HF_API_TOKEN = os.getenv("HF_API_TOKEN") | |
HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {} | |
# --- Request Configuration --- | |
MAX_RETRIES = 3 | |
RETRY_DELAY = 2 # seconds | |
# --- Knowledge Base Content --- | |
GAIA_KNOWLEDGE = """ | |
### AI and Agent Concepts | |
- An agent is an autonomous entity that observes and acts upon an environment using sensors and actuators, usually to achieve specific goals. | |
- GAIA (General AI Assistant) is a framework for creating and evaluating AI assistants that can perform a wide range of tasks. | |
- The agent loop consists of perception, reasoning, and action. | |
- RAG (Retrieval-Augmented Generation) combines retrieval of relevant information with generation capabilities of language models. | |
- An LLM (Large Language Model) is a neural network trained on vast amounts of text data to understand and generate human language. | |
### Agent Capabilities | |
- Tool use refers to an agent's ability to employ external tools like search engines, APIs, or specialized algorithms. | |
- An effective agent should be able to decompose complex problems into manageable parts. | |
- Chain-of-thought reasoning allows agents to break down problem-solving steps to improve accuracy. | |
- Agents should apply appropriate reasoning strategies based on the type of question (factual, analytical, etc.) | |
- Self-reflection helps agents identify and correct errors in their reasoning. | |
### Evaluation Criteria | |
- Agent responses should be accurate, relevant, and factually correct. | |
- Effective agents provide concise yet comprehensive answers. | |
- Agents should acknowledge limitations and uncertainties when appropriate. | |
- Good agents can follow multi-step instructions and fulfill all requirements. | |
- Reasoning transparency helps users understand how the agent arrived at its conclusions. | |
""" | |
# --- Tool Pattern Matching --- | |
YOUTUBE_PATTERNS = ["youtube.com", "youtu.be", "video", "watch?v=", "channel"] | |
REVERSE_TEXT_PATTERNS = ["rewsna", "ecnetnes", "sdrow"] | |
WIKIPEDIA_PATTERNS = [ | |
"wikipedia", "article", "published", "featured article", | |
"promoted", "nominated", "discography", "studio albums", | |
"encyclopedia", "wiki", "featured content" | |
] | |
WEB_SEARCH_PATTERNS = [ | |
# Time indicators | |
"current", "latest", "recent", "2023", "2022", "2021", "2020", "today", | |
# Question words | |
"how many", "where", "when", "who", "which", "what", "whose", | |
# Sports and competitions | |
"yankee", "walks", "athletes", "olympics", "competition", "pitcher", "baseball", | |
# Specific entities that need web lookup | |
"malko", "taishō tamai", "universe today", "nedoshivina", | |
"specimens", "polish-language", "actor", "played", | |
# Geographic and demographic | |
"country", "nationality", "first name", "award number", "city", | |
# Publications and research | |
"published", "paper", "study", "research", "journal", "author", | |
# Statistics and data | |
"statistics", "data", "facts", "information about", "number of" | |
] | |
AI_PATTERNS = ["agent", "gaia", "llm", "ai", "evaluation", "tool", "artificial intelligence"] | |
FILE_PATTERNS = ["excel", "xlsx", "csv", "python", ".py", "mp3", "wav", "audio", "voice memo"] | |
# --- Answer Cleaning Patterns --- | |
ANSWER_PREFIXES_TO_REMOVE = [ | |
"final answer:", "answer:", "the answer is:", "result:", | |
"solution:", "conclusion:", "final answer is:", "direct answer:", | |
"based on the context:", "according to:", "the result is:" | |
] | |
LLM_RESPONSE_MARKERS = ["<answer>", "<response>", "Answer:", "Response:", "Assistant:"] | |
LLM_END_MARKERS = ["</answer>", "</response>", "Human:", "User:"] | |