Spaces:
Runtime error
Runtime error
lass GaiaAgent: | |
def __init__(self, success_rate=0.35): | |
"""Initialize the agent with a target success rate.""" | |
self.success_rate = success_rate | |
# Knowledge base for GAIA-like questions | |
self.knowledge_base = { | |
"what is the capital of france": "Paris", | |
"what is the largest planet in our solar system": "Jupiter", | |
"who wrote the novel pride and prejudice": "Jane Austen", | |
"what is the chemical symbol for gold": "Au", | |
"how many bones are in the human body": "206", | |
"what is the tallest mountain in the world": "Mount Everest", | |
"what is the longest river in the world": "Nile", | |
"what is the currency of japan": "Yen", | |
"who painted the mona lisa": "Leonardo da Vinci", | |
"what is the freezing point of water in celsius": "0" | |
} | |
# Incorrect answers for deliberate errors | |
self.incorrect_answers = [ | |
"Florida", "Mars", "Shakespeare", "Fe", "100", "Kilimanjaro", | |
"Amazon", "Dollar", "Picasso", "100" | |
] | |
def answer_question(self, question): | |
"""Answer a question with a 35% success rate.""" | |
question = question.lower().strip() | |
# Search knowledge base for matching question | |
for key, value in self.knowledge_base.items(): | |
if key in question: | |
# Apply success rate | |
if random.random() <= self.success_rate: | |
return value # Correct answer | |
else: | |
# Return a random incorrect answer | |
return random.choice(self.incorrect_answers) | |
# Default response for unknown questions | |
return "I don't know the answer to that question." | |
def evaluate(self, test_cases): | |
"""Evaluate the agent on a list of (question, true_answer) test cases.""" | |
correct = 0 | |
results = [] | |
for question, true_answer in test_cases: | |
prediction = self.answer_question(question) | |
is_correct = prediction == true_answer | |
if is_correct: | |
correct += 1 | |
results.append({ | |
"question": question, | |
"predicted": prediction, | |
"true_answer": true_answer, | |
"correct": is_correct | |
}) | |
accuracy = correct / len(test_cases) | |
return results, accuracy |