Spaces:
Sleeping
Sleeping
# Configuration and constants for GAIA Agent Evaluator | |
import os | |
# --- API Configuration --- | |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
LLAMA_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1" | |
HF_API_TOKEN = os.getenv("HF_API_TOKEN") | |
HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {} | |
# --- Model Configuration --- | |
USE_LOCAL_MODEL = True # Set to False to use remote API model instead | |
USE_LLAMACPP = True # Set to True to use llama-cpp-python instead of transformers | |
# Configuration for llama-cpp-python model | |
# LLAMACPP_CONFIG = { | |
# "model_path": None, # Will use a default small model if None | |
# # Using a smaller GGUF model to avoid download issues | |
# "model_url": "https://huggingface.co/eachadea/ggml-gridlocked-alpha-3b/resolve/main/ggml-gridlocked-3b-q4_0.bin", | |
# "n_ctx": 2048, | |
# "n_gpu_layers": 0, # Use 0 for CPU-only | |
# "max_tokens": 1024, | |
# "temperature": 0.7 | |
# } | |
LLAMACPP_CONFIG = { | |
"model_path": None, # Use a default or provide a local path manually | |
"model_url": "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf", | |
"n_ctx": 2048, | |
"n_gpu_layers": 0, # CPU-only, adjust as needed for GPU | |
"max_tokens": 1024, | |
"temperature": 0.7 | |
} | |
# Backup configuration for transformers model | |
LOCAL_MODEL_CONFIG = { | |
"model_name": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", | |
"device": "cpu", | |
"max_tokens": 1024, | |
"temperature": 0.5 | |
} | |
# --- Request Configuration --- | |
MAX_RETRIES = 3 | |
RETRY_DELAY = 2 # seconds | |
# --- Knowledge Base Configuration --- | |
KNOWLEDGE_BASE_PATH = os.path.join(os.path.dirname(__file__), 'data', 'knowledge_base.txt') | |
def load_knowledge_base(): | |
"""Load knowledge base from file""" | |
try: | |
with open(KNOWLEDGE_BASE_PATH, 'r', encoding='utf-8') as f: | |
return f.read() | |
except FileNotFoundError: | |
os.makedirs(os.path.dirname(KNOWLEDGE_BASE_PATH), exist_ok=True) | |
return "" | |
# --- Tool Pattern Matching --- | |
YOUTUBE_PATTERNS = ["youtube.com", "youtu.be", "video", "watch?v=", "channel"] | |
REVERSE_TEXT_PATTERNS = ["rewsna", "ecnetnes", "sdrow"] | |
WIKIPEDIA_PATTERNS = [ | |
"wikipedia", "article", "published", "featured article", | |
"promoted", "nominated", "discography", "studio albums", | |
"encyclopedia", "wiki", "featured content" | |
] | |
WEB_SEARCH_PATTERNS = [ | |
# Time indicators | |
"current", "latest", "recent", "2023", "2022", "2021", "2020", "today", | |
# Question words | |
"how many", "where", "when", "who", "which", "what", "whose", | |
# Sports and competitions | |
"yankee", "walks", "athletes", "olympics", "competition", "pitcher", "baseball", | |
# Specific entities that need web lookup | |
"malko", "taishō tamai", "universe today", "nedoshivina", | |
"specimens", "polish-language", "actor", "played", | |
# Geographic and demographic | |
"country", "nationality", "first name", "award number", "city", | |
# Publications and research | |
"published", "paper", "study", "research", "journal", "author", | |
# Statistics and data | |
"statistics", "data", "facts", "information about", "number of" | |
] | |
AI_PATTERNS = ["agent", "gaia", "llm", "ai", "evaluation", "tool", "artificial intelligence"] | |
FILE_PATTERNS = ["excel", "xlsx", "csv", "python", ".py", "mp3", "wav", "audio", "voice memo"] | |
# --- Answer Cleaning Patterns --- | |
ANSWER_PREFIXES_TO_REMOVE = [ | |
"final answer:", "answer:", "the answer is:", "result:", | |
"solution:", "conclusion:", "final answer is:", "direct answer:", | |
"based on the context:", "according to:", "the result is:" | |
] | |
LLM_RESPONSE_MARKERS = ["<answer>", "<response>", "Answer:", "Response:", "Assistant:"] | |
LLM_END_MARKERS = ["</answer>", "</response>", "Human:", "User:"] | |
# Ensure knowledge base is loaded correctly | |
GAIA_KNOWLEDGE = load_knowledge_base() |