Omachoko commited on
Commit
a1492aa
Β·
1 Parent(s): 5c26e67

πŸ”§ Fix agent answer extraction issues

Browse files

βœ… Fixed InferenceClient import error when huggingface_hub unavailable
βœ… Fixed overly aggressive answer extraction rejecting valid short answers
βœ… Numbers and single letters now properly preserved (e.g., '8', 'A', 'B')
βœ… Added diagnostic test script for troubleshooting
🎯 Agent now properly answers mathematical and factual questions

Files changed (2) hide show
  1. gaia_system.py +22 -8
  2. test_agent.py +140 -0
gaia_system.py CHANGED
@@ -177,7 +177,7 @@ class UniversalMultimodalToolkit:
177
  """Initialize all multimodal AI clients"""
178
  self.clients = {}
179
 
180
- if self.hf_token:
181
  # Vision models
182
  self.clients['vision'] = InferenceClient(model="Salesforce/blip-image-captioning-large", token=self.hf_token)
183
  self.clients['image_gen'] = InferenceClient(model="stabilityai/stable-diffusion-xl-base-1.0", token=self.hf_token)
@@ -1625,18 +1625,26 @@ Think step by step about what tools you need, use them, then provide ONLY the fi
1625
  # Quality validation - reject broken/incomplete responses
1626
  answer = answer.strip()
1627
 
1628
- # Reject clearly broken responses
1629
  broken_patterns = [
1630
  r'^s,?\s*$', # Just "s," or "s"
1631
  r'^s\s+\w+$', # "s something"
1632
  r'^(think|right|Unable to)$', # Single incomplete words
1633
  r'^Jagged$', # Random single words
1634
- r'^\w{1,2}$' # Single/double characters
1635
  ]
1636
 
1637
- for pattern in broken_patterns:
1638
- if re.match(pattern, answer, re.IGNORECASE):
1639
- return "Unable to provide complete answer"
 
 
 
 
 
 
 
 
 
1640
 
1641
  # Remove common prefixes but preserve content
1642
  prefixes = ['answer:', 'result:', 'final:', 'conclusion:', 'the answer is', 'it is', 'this is']
@@ -1655,9 +1663,15 @@ Think step by step about what tools you need, use them, then provide ONLY the fi
1655
  if (answer.startswith('"') and answer.endswith('"')) or (answer.startswith("'") and answer.endswith("'")):
1656
  answer = answer[1:-1]
1657
 
1658
- # Final validation
1659
- if len(answer) < 2:
1660
  return "Unable to provide complete answer"
 
 
 
 
 
 
1661
 
1662
  return answer.strip()
1663
 
 
177
  """Initialize all multimodal AI clients"""
178
  self.clients = {}
179
 
180
+ if self.hf_token and HF_AVAILABLE:
181
  # Vision models
182
  self.clients['vision'] = InferenceClient(model="Salesforce/blip-image-captioning-large", token=self.hf_token)
183
  self.clients['image_gen'] = InferenceClient(model="stabilityai/stable-diffusion-xl-base-1.0", token=self.hf_token)
 
1625
  # Quality validation - reject broken/incomplete responses
1626
  answer = answer.strip()
1627
 
1628
+ # Reject clearly broken responses but allow valid short answers
1629
  broken_patterns = [
1630
  r'^s,?\s*$', # Just "s," or "s"
1631
  r'^s\s+\w+$', # "s something"
1632
  r'^(think|right|Unable to)$', # Single incomplete words
1633
  r'^Jagged$', # Random single words
 
1634
  ]
1635
 
1636
+ # Don't reject numbers or valid single words
1637
+ if answer.isdigit() or answer.replace('.', '').replace('-', '').isdigit():
1638
+ # Valid number - keep it
1639
+ pass
1640
+ elif len(answer) == 1 and answer.isalpha():
1641
+ # Single letter might be valid (like "A", "B" for multiple choice)
1642
+ pass
1643
+ else:
1644
+ # Apply broken pattern checks for other cases
1645
+ for pattern in broken_patterns:
1646
+ if re.match(pattern, answer, re.IGNORECASE):
1647
+ return "Unable to provide complete answer"
1648
 
1649
  # Remove common prefixes but preserve content
1650
  prefixes = ['answer:', 'result:', 'final:', 'conclusion:', 'the answer is', 'it is', 'this is']
 
1663
  if (answer.startswith('"') and answer.endswith('"')) or (answer.startswith("'") and answer.endswith("'")):
1664
  answer = answer[1:-1]
1665
 
1666
+ # Final validation - but allow valid single character answers
1667
+ if len(answer) < 1:
1668
  return "Unable to provide complete answer"
1669
+ elif len(answer) == 1:
1670
+ # Single character is OK if it's a digit or capital letter
1671
+ if answer.isdigit() or answer.isupper():
1672
+ return answer.strip()
1673
+ else:
1674
+ return "Unable to provide complete answer"
1675
 
1676
  return answer.strip()
1677
 
test_agent.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ πŸ” GAIA Agent Diagnostic Test
4
+ Quick test to diagnose why the agent isn't answering questions
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ from gaia_system import BasicAgent, EnhancedMultiModelGAIASystem
10
+
11
+ def test_basic_agent():
12
+ """Test the BasicAgent with simple questions"""
13
+ print("πŸ§ͺ Testing BasicAgent...")
14
+
15
+ try:
16
+ # Initialize agent
17
+ agent = BasicAgent()
18
+ print("βœ… Agent initialized successfully")
19
+
20
+ # Test simple questions
21
+ test_questions = [
22
+ "What is 2 + 2?",
23
+ "What is the capital of France?",
24
+ "How many days are in a week?",
25
+ "What color is the sky?"
26
+ ]
27
+
28
+ for i, question in enumerate(test_questions, 1):
29
+ print(f"\nπŸ“ Test {i}: {question}")
30
+ try:
31
+ response = agent(question)
32
+ print(f"πŸ€– Response: '{response}'")
33
+ print(f"πŸ“ Length: {len(response)} characters")
34
+
35
+ if not response or response.strip() == "":
36
+ print("❌ Empty response!")
37
+ elif "Unable to" in response or "Error" in response:
38
+ print("⚠️ Error response detected")
39
+ else:
40
+ print("βœ… Got non-empty response")
41
+
42
+ except Exception as e:
43
+ print(f"❌ Error: {e}")
44
+
45
+ except Exception as e:
46
+ print(f"❌ Failed to initialize agent: {e}")
47
+ return False
48
+
49
+ return True
50
+
51
+ def test_enhanced_system():
52
+ """Test the EnhancedMultiModelGAIASystem directly"""
53
+ print("\nπŸ§ͺ Testing EnhancedMultiModelGAIASystem...")
54
+
55
+ try:
56
+ # Test with HF token if available
57
+ hf_token = os.getenv('HF_TOKEN')
58
+ if hf_token:
59
+ print(f"βœ… Found HF_TOKEN: {hf_token[:10]}...")
60
+ else:
61
+ print("⚠️ No HF_TOKEN found - using fallback mode")
62
+
63
+ system = EnhancedMultiModelGAIASystem(hf_token=hf_token)
64
+ print("βœ… Enhanced system initialized")
65
+
66
+ # Test simple query
67
+ question = "What is 5 + 3?"
68
+ print(f"\nπŸ“ Testing: {question}")
69
+
70
+ response = system.query_with_tools(question)
71
+ print(f"πŸ€– Raw response: '{response}'")
72
+
73
+ # Test fallback
74
+ fallback = system._fallback_response(question)
75
+ print(f"πŸ›‘οΈ Fallback response: '{fallback}'")
76
+
77
+ # Test answer extraction
78
+ if response:
79
+ extracted = system._extract_final_answer(response)
80
+ print(f"✨ Extracted answer: '{extracted}'")
81
+
82
+ return True
83
+
84
+ except Exception as e:
85
+ print(f"❌ Enhanced system error: {e}")
86
+ import traceback
87
+ traceback.print_exc()
88
+ return False
89
+
90
+ def test_model_availability():
91
+ """Test which AI models are available"""
92
+ print("\nπŸ” Testing model availability...")
93
+
94
+ try:
95
+ system = EnhancedMultiModelGAIASystem()
96
+
97
+ print(f"πŸ“Š Available models: {len(system.clients)}")
98
+ for name, client_info in system.clients.items():
99
+ provider = client_info.get('provider', 'Unknown')
100
+ priority = client_info.get('priority', 999)
101
+ print(f" - {name} (Priority: {priority}, Provider: {provider})")
102
+
103
+ if system.model_priority:
104
+ print(f"🎯 Top priority model: {system.model_priority[0]}")
105
+ else:
106
+ print("❌ No models in priority list!")
107
+
108
+ return True
109
+
110
+ except Exception as e:
111
+ print(f"❌ Model availability error: {e}")
112
+ return False
113
+
114
+ def main():
115
+ """Run all diagnostic tests"""
116
+ print("πŸš€ GAIA Agent Diagnostic Tests\n")
117
+
118
+ # Test basic functionality
119
+ test1 = test_basic_agent()
120
+ test2 = test_enhanced_system()
121
+ test3 = test_model_availability()
122
+
123
+ print("\nπŸ“Š Test Summary:")
124
+ print(f" BasicAgent: {'βœ… PASS' if test1 else '❌ FAIL'}")
125
+ print(f" Enhanced System: {'βœ… PASS' if test2 else '❌ FAIL'}")
126
+ print(f" Model Availability: {'βœ… PASS' if test3 else '❌ FAIL'}")
127
+
128
+ if not any([test1, test2, test3]):
129
+ print("\n❌ All tests failed! Check dependencies and configuration.")
130
+ return False
131
+ elif not test1:
132
+ print("\n⚠️ BasicAgent failed - this is the issue for GAIA submissions!")
133
+ return False
134
+ else:
135
+ print("\nβœ… Core functionality working - issue might be elsewhere")
136
+ return True
137
+
138
+ if __name__ == "__main__":
139
+ success = main()
140
+ sys.exit(0 if success else 1)