Spaces:

schoolkithub
/

multi-agent-gaia-system

Running

Omachoko commited on about 7 hours ago

Commit

a1492aa

1 Parent(s): 5c26e67

🔧 Fix agent answer extraction issues

✅ Fixed InferenceClient import error when huggingface_hub unavailable
✅ Fixed overly aggressive answer extraction rejecting valid short answers
✅ Numbers and single letters now properly preserved (e.g., '8', 'A', 'B')
✅ Added diagnostic test script for troubleshooting
🎯 Agent now properly answers mathematical and factual questions

Files changed (2) hide show

gaia_system.py +22 -8
test_agent.py +140 -0

gaia_system.py CHANGED Viewed

@@ -177,7 +177,7 @@ class UniversalMultimodalToolkit:
         """Initialize all multimodal AI clients"""
         self.clients = {}
-        if self.hf_token:
             # Vision models
             self.clients['vision'] = InferenceClient(model="Salesforce/blip-image-captioning-large", token=self.hf_token)
             self.clients['image_gen'] = InferenceClient(model="stabilityai/stable-diffusion-xl-base-1.0", token=self.hf_token)
@@ -1625,18 +1625,26 @@ Think step by step about what tools you need, use them, then provide ONLY the fi
         # Quality validation - reject broken/incomplete responses
         answer = answer.strip()
-        # Reject clearly broken responses
         broken_patterns = [
             r'^s,?\s*$',      # Just "s," or "s"
             r'^s\s+\w+$',     # "s something"
             r'^(think|right|Unable to)$',  # Single incomplete words
             r'^Jagged$',      # Random single words
-            r'^\w{1,2}$'      # Single/double characters
         ]
-        for pattern in broken_patterns:
-            if re.match(pattern, answer, re.IGNORECASE):
-                return "Unable to provide complete answer"
         # Remove common prefixes but preserve content
         prefixes = ['answer:', 'result:', 'final:', 'conclusion:', 'the answer is', 'it is', 'this is']
@@ -1655,9 +1663,15 @@ Think step by step about what tools you need, use them, then provide ONLY the fi
         if (answer.startswith('"') and answer.endswith('"')) or (answer.startswith("'") and answer.endswith("'")):
             answer = answer[1:-1]
-        # Final validation
-        if len(answer) < 2:
             return "Unable to provide complete answer"
         return answer.strip()

         """Initialize all multimodal AI clients"""
         self.clients = {}
+        if self.hf_token and HF_AVAILABLE:
             # Vision models
             self.clients['vision'] = InferenceClient(model="Salesforce/blip-image-captioning-large", token=self.hf_token)
             self.clients['image_gen'] = InferenceClient(model="stabilityai/stable-diffusion-xl-base-1.0", token=self.hf_token)
         # Quality validation - reject broken/incomplete responses
         answer = answer.strip()
+        # Reject clearly broken responses but allow valid short answers
         broken_patterns = [
             r'^s,?\s*$',      # Just "s," or "s"
             r'^s\s+\w+$',     # "s something"
             r'^(think|right|Unable to)$',  # Single incomplete words
             r'^Jagged$',      # Random single words
         ]
+        # Don't reject numbers or valid single words
+        if answer.isdigit() or answer.replace('.', '').replace('-', '').isdigit():
+            # Valid number - keep it
+            pass
+        elif len(answer) == 1 and answer.isalpha():
+            # Single letter might be valid (like "A", "B" for multiple choice)
+            pass
+        else:
+            # Apply broken pattern checks for other cases
+            for pattern in broken_patterns:
+                if re.match(pattern, answer, re.IGNORECASE):
+                    return "Unable to provide complete answer"
         # Remove common prefixes but preserve content
         prefixes = ['answer:', 'result:', 'final:', 'conclusion:', 'the answer is', 'it is', 'this is']
         if (answer.startswith('"') and answer.endswith('"')) or (answer.startswith("'") and answer.endswith("'")):
             answer = answer[1:-1]
+        # Final validation - but allow valid single character answers
+        if len(answer) < 1:
             return "Unable to provide complete answer"
+        elif len(answer) == 1:
+            # Single character is OK if it's a digit or capital letter
+            if answer.isdigit() or answer.isupper():
+                return answer.strip()
+            else:
+                return "Unable to provide complete answer"
         return answer.strip()

test_agent.py ADDED Viewed

	@@ -0,0 +1,140 @@

+#!/usr/bin/env python3
+"""
+🔍 GAIA Agent Diagnostic Test
+Quick test to diagnose why the agent isn't answering questions
+"""
+import os
+import sys
+from gaia_system import BasicAgent, EnhancedMultiModelGAIASystem
+def test_basic_agent():
+    """Test the BasicAgent with simple questions"""
+    print("🧪 Testing BasicAgent...")
+    try:
+        # Initialize agent
+        agent = BasicAgent()
+        print("✅ Agent initialized successfully")
+        # Test simple questions
+        test_questions = [
+            "What is 2 + 2?",
+            "What is the capital of France?",
+            "How many days are in a week?",
+            "What color is the sky?"
+        ]
+        for i, question in enumerate(test_questions, 1):
+            print(f"\n📝 Test {i}: {question}")
+            try:
+                response = agent(question)
+                print(f"🤖 Response: '{response}'")
+                print(f"📏 Length: {len(response)} characters")
+                if not response or response.strip() == "":
+                    print("❌ Empty response!")
+                elif "Unable to" in response or "Error" in response:
+                    print("⚠️ Error response detected")
+                else:
+                    print("✅ Got non-empty response")
+            except Exception as e:
+                print(f"❌ Error: {e}")
+    except Exception as e:
+        print(f"❌ Failed to initialize agent: {e}")
+        return False
+    return True
+def test_enhanced_system():
+    """Test the EnhancedMultiModelGAIASystem directly"""
+    print("\n🧪 Testing EnhancedMultiModelGAIASystem...")
+    try:
+        # Test with HF token if available
+        hf_token = os.getenv('HF_TOKEN')
+        if hf_token:
+            print(f"✅ Found HF_TOKEN: {hf_token[:10]}...")
+        else:
+            print("⚠️ No HF_TOKEN found - using fallback mode")
+        system = EnhancedMultiModelGAIASystem(hf_token=hf_token)
+        print("✅ Enhanced system initialized")
+        # Test simple query
+        question = "What is 5 + 3?"
+        print(f"\n📝 Testing: {question}")
+        response = system.query_with_tools(question)
+        print(f"🤖 Raw response: '{response}'")
+        # Test fallback
+        fallback = system._fallback_response(question)
+        print(f"🛡️ Fallback response: '{fallback}'")
+        # Test answer extraction
+        if response:
+            extracted = system._extract_final_answer(response)
+            print(f"✨ Extracted answer: '{extracted}'")
+        return True
+    except Exception as e:
+        print(f"❌ Enhanced system error: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def test_model_availability():
+    """Test which AI models are available"""
+    print("\n🔍 Testing model availability...")
+    try:
+        system = EnhancedMultiModelGAIASystem()
+        print(f"📊 Available models: {len(system.clients)}")
+        for name, client_info in system.clients.items():
+            provider = client_info.get('provider', 'Unknown')
+            priority = client_info.get('priority', 999)
+            print(f"  - {name} (Priority: {priority}, Provider: {provider})")
+        if system.model_priority:
+            print(f"🎯 Top priority model: {system.model_priority[0]}")
+        else:
+            print("❌ No models in priority list!")
+        return True
+    except Exception as e:
+        print(f"❌ Model availability error: {e}")
+        return False
+def main():
+    """Run all diagnostic tests"""
+    print("🚀 GAIA Agent Diagnostic Tests\n")
+    # Test basic functionality
+    test1 = test_basic_agent()
+    test2 = test_enhanced_system()
+    test3 = test_model_availability()
+    print("\n📊 Test Summary:")
+    print(f"  BasicAgent: {'✅ PASS' if test1 else '❌ FAIL'}")
+    print(f"  Enhanced System: {'✅ PASS' if test2 else '❌ FAIL'}")
+    print(f"  Model Availability: {'✅ PASS' if test3 else '❌ FAIL'}")
+    if not any([test1, test2, test3]):
+        print("\n❌ All tests failed! Check dependencies and configuration.")
+        return False
+    elif not test1:
+        print("\n⚠️ BasicAgent failed - this is the issue for GAIA submissions!")
+        return False
+    else:
+        print("\n✅ Core functionality working - issue might be elsewhere")
+        return True
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)