Final_Assignment_Template

Sleeping

File size: 3,481 Bytes

a5c9e62

#!/usr/bin/env python3
"""
Test script to verify the agent's answer formatting works correctly.
"""

import os
from agent import smart_agent
from utils import format_gaia_answer

def test_answer_formatting():
    """Test the answer formatting function with various inputs."""
    
    test_cases = [
        # Test case: (raw_answer, expected_format)
        ("I think the answer is 42. FINAL ANSWER: 42", "42"),
        ("Let me calculate... FINAL ANSWER: 3.14159", "3.14159"),
        ("After research, FINAL ANSWER: New York", "New York"),
        ("The result is FINAL ANSWER: apple, banana, cherry", "apple, banana, cherry"),
        ("FINAL ANSWER: 1,234", "1234"),  # Should remove commas from numbers
        ("FINAL ANSWER: \"Hello World\"", "Hello World"),  # Should remove quotes
        ("FINAL ANSWER: approximately 100", "100"),  # Should remove qualifiers
        ("No clear final answer format here", "No clear final answer format here"),  # Fallback
    ]
    
    print("🧪 Testing answer formatting...")
    for i, (raw, expected) in enumerate(test_cases, 1):
        result = format_gaia_answer(raw)
        status = "✅" if result == expected else "❌"
        print(f"{status} Test {i}: '{raw}' -> '{result}' (expected: '{expected}')")
        if result != expected:
            print(f"   ⚠️  Mismatch detected!")
    
    print("\n" + "="*50)

def test_simple_question():
    """Test the agent with a simple question."""
    print("🤖 Testing agent with a simple question...")
    
    question = "What is 2 + 2?"
    try:
        answer, reasoning = smart_agent(question)
        print(f"Question: {question}")
        print(f"Answer: {answer}")
        print(f"Reasoning length: {len(reasoning)} characters")
        print(f"Raw reasoning preview: {reasoning[:200]}...")
        
        # Check if answer follows expected format
        if answer and answer.strip():
            print("✅ Agent returned a non-empty answer")
        else:
            print("❌ Agent returned empty answer")
            
    except Exception as e:
        print(f"❌ Error testing agent: {e}")
    
    print("\n" + "="*50)

def test_api_format():
    """Test that our submission format matches API expectations."""
    print("📡 Testing API submission format...")
    
    # Simulate what would be sent to the API
    sample_submission = {
        "task_id": "test_task_1",
        "model_answer": "42",
        "reasoning_trace": "I calculated 2+2 and got 4, but the question asks for something else..."
    }
    
    required_fields = ["task_id", "model_answer"]
    optional_fields = ["reasoning_trace"]
    
    print("Required fields check:")
    for field in required_fields:
        if field in sample_submission:
            print(f"✅ {field}: {sample_submission[field]}")
        else:
            print(f"❌ Missing required field: {field}")
    
    print("Optional fields check:")
    for field in optional_fields:
        if field in sample_submission:
            print(f"✅ {field}: Present ({len(str(sample_submission[field]))} chars)")
        else:
            print(f"ℹ️  Optional field not present: {field}")

if __name__ == "__main__":
    print("🔧 GAIA Agent Format Testing")
    print("="*50)
    
    # Test 1: Answer formatting
    test_answer_formatting()
    
    # Test 2: Simple agent question
    test_simple_question()
    
    # Test 3: API format
    test_api_format()
    
    print("🏁 Testing complete!")