Spaces:

davidgturner
/

GaiaAgentEvaluator

Sleeping

App Files Files Community

GaiaAgentEvaluator / config.py

davidgturner

- changes for app.py

08e2c16 2 months ago

raw

history blame

3.82 kB

	# Configuration and constants for GAIA Agent Evaluator
	import os

	# --- API Configuration ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	LLAMA_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1"
	HF_API_TOKEN = os.getenv("HF_API_TOKEN")
	HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}

	# --- Request Configuration ---
	MAX_RETRIES = 3
	RETRY_DELAY = 2 # seconds

	# --- Knowledge Base Content ---
	GAIA_KNOWLEDGE = """
	### AI and Agent Concepts
	- An agent is an autonomous entity that observes and acts upon an environment using sensors and actuators, usually to achieve specific goals.
	- GAIA (General AI Assistant) is a framework for creating and evaluating AI assistants that can perform a wide range of tasks.
	- The agent loop consists of perception, reasoning, and action.
	- RAG (Retrieval-Augmented Generation) combines retrieval of relevant information with generation capabilities of language models.
	- An LLM (Large Language Model) is a neural network trained on vast amounts of text data to understand and generate human language.

	### Agent Capabilities
	- Tool use refers to an agent's ability to employ external tools like search engines, APIs, or specialized algorithms.
	- An effective agent should be able to decompose complex problems into manageable parts.
	- Chain-of-thought reasoning allows agents to break down problem-solving steps to improve accuracy.
	- Agents should apply appropriate reasoning strategies based on the type of question (factual, analytical, etc.)
	- Self-reflection helps agents identify and correct errors in their reasoning.

	### Evaluation Criteria
	- Agent responses should be accurate, relevant, and factually correct.
	- Effective agents provide concise yet comprehensive answers.
	- Agents should acknowledge limitations and uncertainties when appropriate.
	- Good agents can follow multi-step instructions and fulfill all requirements.
	- Reasoning transparency helps users understand how the agent arrived at its conclusions.
	"""

	# --- Tool Pattern Matching ---
	YOUTUBE_PATTERNS = ["youtube.com", "youtu.be", "video", "watch?v=", "channel"]

	REVERSE_TEXT_PATTERNS = ["rewsna", "ecnetnes", "sdrow"]

	WIKIPEDIA_PATTERNS = [
	"wikipedia", "article", "published", "featured article",
	"promoted", "nominated", "discography", "studio albums",
	"encyclopedia", "wiki", "featured content"
	]

	WEB_SEARCH_PATTERNS = [
	# Time indicators
	"current", "latest", "recent", "2023", "2022", "2021", "2020", "today",
	# Question words
	"how many", "where", "when", "who", "which", "what", "whose",
	# Sports and competitions
	"yankee", "walks", "athletes", "olympics", "competition", "pitcher", "baseball",
	# Specific entities that need web lookup
	"malko", "taishō tamai", "universe today", "nedoshivina",
	"specimens", "polish-language", "actor", "played",
	# Geographic and demographic
	"country", "nationality", "first name", "award number", "city",
	# Publications and research
	"published", "paper", "study", "research", "journal", "author",
	# Statistics and data
	"statistics", "data", "facts", "information about", "number of"
	]

	AI_PATTERNS = ["agent", "gaia", "llm", "ai", "evaluation", "tool", "artificial intelligence"]

	FILE_PATTERNS = ["excel", "xlsx", "csv", "python", ".py", "mp3", "wav", "audio", "voice memo"]

	# --- Answer Cleaning Patterns ---
	ANSWER_PREFIXES_TO_REMOVE = [
	"final answer:", "answer:", "the answer is:", "result:",
	"solution:", "conclusion:", "final answer is:", "direct answer:",
	"based on the context:", "according to:", "the result is:"
	]

	LLM_RESPONSE_MARKERS = ["<answer>", "<response>", "Answer:", "Response:", "Assistant:"]
	LLM_END_MARKERS = ["</answer>", "</response>", "Human:", "User:"]