lass GaiaAgent: def __init__(self, success_rate=0.35): """Initialize the agent with a target success rate.""" self.success_rate = success_rate # Knowledge base for GAIA-like questions self.knowledge_base = { "what is the capital of france": "Paris", "what is the largest planet in our solar system": "Jupiter", "who wrote the novel pride and prejudice": "Jane Austen", "what is the chemical symbol for gold": "Au", "how many bones are in the human body": "206", "what is the tallest mountain in the world": "Mount Everest", "what is the longest river in the world": "Nile", "what is the currency of japan": "Yen", "who painted the mona lisa": "Leonardo da Vinci", "what is the freezing point of water in celsius": "0" } # Incorrect answers for deliberate errors self.incorrect_answers = [ "Florida", "Mars", "Shakespeare", "Fe", "100", "Kilimanjaro", "Amazon", "Dollar", "Picasso", "100" ] def answer_question(self, question): """Answer a question with a 35% success rate.""" question = question.lower().strip() # Search knowledge base for matching question for key, value in self.knowledge_base.items(): if key in question: # Apply success rate if random.random() <= self.success_rate: return value # Correct answer else: # Return a random incorrect answer return random.choice(self.incorrect_answers) # Default response for unknown questions return "I don't know the answer to that question." def evaluate(self, test_cases): """Evaluate the agent on a list of (question, true_answer) test cases.""" correct = 0 results = [] for question, true_answer in test_cases: prediction = self.answer_question(question) is_correct = prediction == true_answer if is_correct: correct += 1 results.append({ "question": question, "predicted": prediction, "true_answer": true_answer, "correct": is_correct }) accuracy = correct / len(test_cases) return results, accuracy