LamiaYT commited on
Commit
843728a
·
1 Parent(s): 07c53f3

Optimization

Browse files
Files changed (5) hide show
  1. .config +0 -60
  2. app.py +237 -389
  3. data/knowledge.txt +0 -0
  4. requirements.txt +32 -14
  5. test.py +146 -0
.config DELETED
@@ -1,60 +0,0 @@
1
- # Configuration file for GAIA Agent
2
-
3
- # Model Configuration
4
- MODEL_CONFIG = {
5
- "model_id": "microsoft/DialoGPT-medium", # Lightweight model for resource constraints
6
- "max_tokens": 512, # Reduced for memory efficiency
7
- "temperature": 0.1, # Low temperature for factual responses
8
- "fallback_model": "gpt-3.5-turbo", # Fallback if primary model fails
9
- }
10
-
11
- # Agent Configuration
12
- AGENT_CONFIG = {
13
- "max_iterations": 5, # Limit iterations to prevent infinite loops
14
- "verbosity_level": 1, # Moderate verbosity for debugging
15
- "timeout_seconds": 30, # Timeout for individual operations
16
- "max_retries": 2, # Number of retries for failed operations
17
- }
18
-
19
- # Tool Configuration
20
- TOOL_CONFIG = {
21
- "web_search": {
22
- "enabled": True,
23
- "max_results": 5, # Limit search results for efficiency
24
- "timeout": 10,
25
- },
26
- "calculator": {
27
- "enabled": True,
28
- "safe_mode": True, # Only allow safe mathematical expressions
29
- },
30
- "image_analyzer": {
31
- "enabled": True,
32
- "max_image_size": 5 * 1024 * 1024, # 5MB limit
33
- "supported_formats": [".jpg", ".jpeg", ".png", ".gif", ".bmp"],
34
- },
35
- "file_reader": {
36
- "enabled": True,
37
- "max_file_size": 10 * 1024 * 1024, # 10MB limit
38
- "supported_formats": [".txt", ".csv", ".json", ".md", ".py", ".js", ".html", ".css"],
39
- },
40
- "data_processor": {
41
- "enabled": True,
42
- "max_data_points": 10000, # Limit for large datasets
43
- }
44
- }
45
-
46
- # Performance Configuration
47
- PERFORMANCE_CONFIG = {
48
- "memory_limit_mb": 2048, # 2GB memory limit per process
49
- "cpu_limit_percent": 80, # Maximum CPU usage
50
- "garbage_collection_frequency": 10, # Run GC every N operations
51
- "cache_size": 100, # Number of cached responses
52
- }
53
-
54
- # API Configuration
55
- API_CONFIG = {
56
- "default_api_url": "https://agents-course-unit4-scoring.hf.space",
57
- "request_timeout": 60,
58
- "max_concurrent_requests": 2, # Limit concurrent requests
59
- }
60
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -3,361 +3,241 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
6
  import json
7
- import re
8
- import time
9
- from typing import List, Dict, Any, Optional
10
- from datetime import datetime
11
- import threading
12
- import queue
13
- from ctransformers import AutoModelForCausalLM
14
- import logging
15
-
16
- # Setup logging
17
- logging.basicConfig(level=logging.INFO)
18
- logger = logging.getLogger(__name__)
19
 
20
  # --- Constants ---
21
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
- class WebSearchTool:
24
- """Web search tool using Serper API for real-time information retrieval"""
 
 
 
 
 
 
 
 
 
25
 
26
- def __init__(self, api_key: str):
27
- self.api_key = api_key
28
- self.base_url = "https://google.serper.dev/search"
 
 
29
 
30
- def search(self, query: str, num_results: int = 5) -> Dict[str, Any]:
31
- """Perform web search and return structured results"""
32
  try:
 
 
 
 
 
33
  headers = {
34
  'X-API-KEY': self.api_key,
35
  'Content-Type': 'application/json'
36
  }
37
 
38
- payload = {
39
- 'q': query,
40
- 'num': num_results,
41
- 'gl': 'us',
42
- 'hl': 'en'
43
- }
44
-
45
- response = requests.post(self.base_url, json=payload, headers=headers, timeout=10)
46
  response.raise_for_status()
47
 
48
  data = response.json()
49
-
50
- # Extract and format results
51
  results = []
 
 
52
  if 'organic' in data:
53
- for item in data['organic'][:num_results]:
54
- results.append({
55
- 'title': item.get('title', ''),
56
- 'snippet': item.get('snippet', ''),
57
- 'link': item.get('link', ''),
58
- 'position': item.get('position', 0)
59
- })
60
 
61
- return {
62
- 'success': True,
63
- 'results': results,
64
- 'query': query,
65
- 'total_results': len(results)
66
- }
 
67
 
68
  except Exception as e:
69
- logger.error(f"Web search error: {e}")
70
- return {
71
- 'success': False,
72
- 'error': str(e),
73
- 'results': [],
74
- 'query': query,
75
- 'total_results': 0
76
- }
77
 
78
- class CalculatorTool:
79
- """Enhanced calculator tool for mathematical operations"""
 
 
 
80
 
81
- def calculate(self, expression: str) -> Dict[str, Any]:
82
  """Safely evaluate mathematical expressions"""
83
  try:
84
- # Clean the expression
85
- expression = expression.strip()
 
86
 
87
- # Replace common mathematical functions
88
- expression = expression.replace('^', '**') # Power operator
89
- expression = re.sub(r'\b(\d+)x(\d+)\b', r'\1*\2', expression) # Handle multiplication like 5x3
 
 
 
 
 
90
 
91
- # Allow only safe mathematical operations
92
- allowed_chars = set('0123456789+-*/().,eE pi')
93
- allowed_funcs = ['abs', 'round', 'min', 'max', 'sum', 'pow', 'sqrt']
94
 
95
- # Basic safety check
96
- if any(char.isalpha() and char not in 'pie' for char in expression):
97
- # Check if it contains allowed function names
98
- import math
99
- safe_dict = {
100
- "__builtins__": {},
101
- "abs": abs, "round": round, "min": min, "max": max,
102
- "sum": sum, "pow": pow, "sqrt": math.sqrt,
103
- "pi": math.pi, "e": math.e,
104
- "sin": math.sin, "cos": math.cos, "tan": math.tan,
105
- "log": math.log, "log10": math.log10,
106
- "exp": math.exp, "floor": math.floor, "ceil": math.ceil
107
- }
108
- result = eval(expression, safe_dict)
109
- else:
110
- result = eval(expression)
111
-
112
- return {
113
- 'success': True,
114
- 'result': result,
115
- 'expression': expression
116
- }
117
 
118
  except Exception as e:
119
- logger.error(f"Calculator error: {e}")
120
- return {
121
- 'success': False,
122
- 'error': str(e),
123
- 'expression': expression,
124
- 'result': None
125
- }
126
 
127
- class LocalLLMManager:
128
- """Manages local quantized LLM for reasoning"""
129
 
130
- def __init__(self):
131
- self.model = None
132
- self.model_loaded = False
133
- self.load_lock = threading.Lock()
134
-
135
- def load_model(self):
136
- """Load quantized model optimized for CPU inference"""
137
- with self.load_lock:
138
- if self.model_loaded:
139
- return
140
-
141
- try:
142
- logger.info("Loading quantized model...")
143
-
144
- # Use Phi-3-mini for better performance on CPU with limited resources
145
- self.model = AutoModelForCausalLM.from_pretrained(
146
- "microsoft/Phi-3-mini-4k-instruct-gguf",
147
- model_file="Phi-3-mini-4k-instruct-q4.gguf",
148
- model_type="phi3",
149
- gpu_layers=0, # CPU only
150
- context_length=3072, # Reduced context to save memory
151
- max_new_tokens=512,
152
- temperature=0.1,
153
- top_p=0.9,
154
- repetition_penalty=1.1
155
- )
156
-
157
- self.model_loaded = True
158
- logger.info("Model loaded successfully")
159
-
160
- except Exception as e:
161
- logger.error(f"Error loading model: {e}")
162
- # Fallback to a smaller model if Phi-3 fails
163
- try:
164
- logger.info("Trying fallback model...")
165
- self.model = AutoModelForCausalLM.from_pretrained(
166
- "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
167
- model_file="tinyllama-1.1b-chat-v1.0.q4_k_m.gguf",
168
- model_type="llama",
169
- gpu_layers=0,
170
- context_length=2048,
171
- max_new_tokens=256
172
- )
173
- self.model_loaded = True
174
- logger.info("Fallback model loaded successfully")
175
- except Exception as e2:
176
- logger.error(f"Fallback model also failed: {e2}")
177
- raise
178
 
179
- def generate(self, prompt: str, max_tokens: int = 256) -> str:
180
- """Generate response from local model"""
181
- if not self.model_loaded:
182
- self.load_model()
 
183
 
184
- if not self.model:
185
- return "Error: Model not available"
186
 
187
- try:
188
- # Format prompt for Phi-3
189
- formatted_prompt = f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n"
 
190
 
191
- response = self.model(
192
- formatted_prompt,
193
- max_new_tokens=min(max_tokens, 256), # Limit tokens for speed
194
- temperature=0.1,
195
- stop=["<|end|>", "<|user|>"]
196
- )
197
 
198
- # Clean response
199
- response = response.replace(formatted_prompt, "").strip()
200
- if "<|end|>" in response:
201
- response = response.split("<|end|>")[0].strip()
202
-
203
- return response
204
 
 
 
 
205
  except Exception as e:
206
- logger.error(f"Generation error: {e}")
207
- return f"Error generating response: {e}"
208
 
 
209
  class GAIAAgent:
210
- """Advanced GAIA agent with reasoning, tools, and multi-step problem solving"""
211
-
212
  def __init__(self):
213
- # Initialize tools
214
- self.serper_api_key = os.getenv("SERPER_API_KEY")
215
- if not self.serper_api_key:
216
- logger.warning("SERPER_API_KEY not found. Web search will be disabled.")
217
- self.web_search = None
218
- else:
219
- self.web_search = WebSearchTool(self.serper_api_key)
220
-
221
- self.calculator = CalculatorTool()
222
- self.llm = LocalLLMManager()
223
-
224
- # Agent configuration
225
- self.max_iterations = 5
226
- self.max_reasoning_length = 1000
227
-
228
- logger.info("GAIA Agent initialized")
229
-
230
- def _identify_question_type(self, question: str) -> str:
231
- """Identify the type of question to determine approach"""
232
- question_lower = question.lower()
233
-
234
- if any(word in question_lower for word in ['calculate', 'compute', 'math', '+', '-', '*', '/', '=', 'sum', 'multiply', 'divide']):
235
- return 'mathematical'
236
- elif any(word in question_lower for word in ['current', 'latest', 'recent', 'today', 'now', '2024', '2025']):
237
- return 'current_info'
238
- elif any(word in question_lower for word in ['who', 'what', 'where', 'when', 'why', 'how']):
239
- return 'factual'
240
- elif any(word in question_lower for word in ['analyze', 'compare', 'explain', 'reason']):
241
- return 'analytical'
242
- else:
243
- return 'general'
244
-
245
- def _use_web_search(self, query: str) -> str:
246
- """Use web search tool and format results"""
247
- if not self.web_search:
248
- return "Web search not available (API key missing)"
249
-
250
- results = self.web_search.search(query, num_results=3)
251
-
252
- if not results['success']:
253
- return f"Search failed: {results.get('error', 'Unknown error')}"
254
-
255
- if not results['results']:
256
- return "No search results found"
257
-
258
- formatted_results = f"Search results for '{query}':\n"
259
- for i, result in enumerate(results['results'], 1):
260
- formatted_results += f"{i}. {result['title']}\n {result['snippet']}\n\n"
261
-
262
- return formatted_results
263
-
264
- def _use_calculator(self, expression: str) -> str:
265
- """Use calculator tool and format result"""
266
- result = self.calculator.calculate(expression)
267
-
268
- if result['success']:
269
- return f"Calculation: {result['expression']} = {result['result']}"
270
- else:
271
- return f"Calculation error: {result['error']}"
272
-
273
- def _generate_reasoning(self, question: str, context: str = "") -> str:
274
- """Generate reasoning step using local LLM"""
275
- reasoning_prompt = f"""Question: {question}
276
-
277
- Context: {context}
278
-
279
- Think step by step about this question. Consider:
280
- 1. What information do I need?
281
- 2. What tools might help?
282
- 3. How should I approach this problem?
283
-
284
- Provide a clear reasoning step:"""
285
-
286
- try:
287
- reasoning = self.llm.generate(reasoning_prompt, max_tokens=200)
288
- return reasoning
289
- except Exception as e:
290
- logger.error(f"Reasoning generation error: {e}")
291
- return "Unable to generate reasoning step"
292
-
293
- def _generate_final_answer(self, question: str, context: str, reasoning_steps: List[str]) -> str:
294
- """Generate final answer using all available information"""
295
-
296
- all_reasoning = "\n".join([f"Step {i+1}: {step}" for i, step in enumerate(reasoning_steps)])
297
 
298
- answer_prompt = f"""Question: {question}
299
-
300
- Context and Information:
301
- {context}
302
-
303
- Reasoning Steps:
304
- {all_reasoning}
305
-
306
- Based on all the information and reasoning above, provide a clear, concise, and accurate final answer to the question:"""
307
-
308
  try:
309
- answer = self.llm.generate(answer_prompt, max_tokens=200)
310
- return answer.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  except Exception as e:
312
- logger.error(f"Answer generation error: {e}")
313
- return "Unable to generate final answer"
 
 
 
 
 
 
 
314
 
315
  def __call__(self, question: str) -> str:
316
- """Main agent execution method"""
317
- logger.info(f"Processing question: {question[:100]}...")
 
 
 
318
 
319
  try:
320
- # Initialize
321
- context = ""
322
- reasoning_steps = []
323
- question_type = self._identify_question_type(question)
324
-
325
- logger.info(f"Question type identified: {question_type}")
326
-
327
- # Step 1: Initial reasoning
328
- initial_reasoning = self._generate_reasoning(question)
329
- reasoning_steps.append(initial_reasoning)
330
- context += f"Initial reasoning: {initial_reasoning}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
- # Step 2: Apply tools based on question type
333
- if question_type == 'mathematical':
334
- # Try to extract mathematical expressions
335
- math_matches = re.findall(r'[\d\+\-\*/\(\)\.\s\^]+', question)
336
- for match in math_matches:
337
- if len(match.strip()) > 3: # Avoid single digits
338
- calc_result = self._use_calculator(match.strip())
339
- context += f"Calculation: {calc_result}\n"
340
-
341
- elif question_type in ['current_info', 'factual']:
342
- # Use web search for factual or current information
343
- search_result = self._use_web_search(question)
344
- context += f"Web search results: {search_result}\n"
345
-
346
- # Step 3: Additional reasoning with context
347
- if context:
348
- additional_reasoning = self._generate_reasoning(question, context)
349
- reasoning_steps.append(additional_reasoning)
350
- context += f"Additional reasoning: {additional_reasoning}\n\n"
351
 
352
- # Step 4: Generate final answer
353
- final_answer = self._generate_final_answer(question, context, reasoning_steps)
 
354
 
355
- logger.info(f"Generated answer: {final_answer[:100]}...")
356
- return final_answer
357
 
358
  except Exception as e:
359
- logger.error(f"Agent execution error: {e}")
360
- return f"Error processing question: {str(e)}"
 
 
 
 
 
 
 
361
 
362
  def run_and_submit_all(profile: gr.OAuthProfile | None):
363
  """
@@ -365,7 +245,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
365
  and displays the results.
366
  """
367
  # --- Determine HF Space Runtime URL and Repo URL ---
368
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
369
 
370
  if profile:
371
  username = f"{profile.username}"
@@ -380,15 +260,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
380
 
381
  # 1. Instantiate Agent
382
  try:
383
- print("Initializing GAIA Agent...")
384
  agent = GAIAAgent()
385
- print("GAIA Agent initialized successfully")
 
386
  except Exception as e:
387
  print(f"Error instantiating agent: {e}")
388
  return f"Error initializing agent: {e}", None
389
-
390
- # Agent code link
391
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
392
  print(f"Agent code: {agent_code}")
393
 
394
  # 2. Fetch Questions
@@ -406,7 +286,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
406
  return f"Error fetching questions: {e}", None
407
  except requests.exceptions.JSONDecodeError as e:
408
  print(f"Error decoding JSON response from questions endpoint: {e}")
409
- print(f"Response text: {response.text[:500]}")
410
  return f"Error decoding server response for questions: {e}", None
411
  except Exception as e:
412
  print(f"An unexpected error occurred fetching questions: {e}")
@@ -424,36 +303,30 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
424
  print(f"Skipping item with missing task_id or question: {item}")
425
  continue
426
 
427
- print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
428
-
429
  try:
430
- start_time = time.time()
431
  submitted_answer = agent(question_text)
432
- processing_time = time.time() - start_time
433
-
434
- print(f"Question {task_id} processed in {processing_time:.2f}s")
435
-
436
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
437
  results_log.append({
438
  "Task ID": task_id,
439
  "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
440
- "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer,
441
- "Processing Time (s)": f"{processing_time:.2f}"
442
  })
443
  except Exception as e:
444
  print(f"Error running agent on task {task_id}: {e}")
 
 
445
  results_log.append({
446
  "Task ID": task_id,
447
  "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
448
- "Submitted Answer": f"AGENT ERROR: {e}",
449
- "Processing Time (s)": "Error"
450
  })
451
 
452
  if not answers_payload:
453
  print("Agent did not produce any answers to submit.")
454
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
455
 
456
- # 4. Prepare Submission
457
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
458
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
459
  print(status_update)
@@ -461,7 +334,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
461
  # 5. Submit
462
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
463
  try:
464
- response = requests.post(submit_url, json=submission_data, timeout=120)
465
  response.raise_for_status()
466
  result_data = response.json()
467
  final_status = (
@@ -485,61 +358,49 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
485
  print(status_message)
486
  results_df = pd.DataFrame(results_log)
487
  return status_message, results_df
488
- except requests.exceptions.Timeout:
489
- status_message = "Submission Failed: The request timed out."
490
- print(status_message)
491
- results_df = pd.DataFrame(results_log)
492
- return status_message, results_df
493
- except requests.exceptions.RequestException as e:
494
- status_message = f"Submission Failed: Network error - {e}"
495
- print(status_message)
496
- results_df = pd.DataFrame(results_log)
497
- return status_message, results_df
498
  except Exception as e:
499
  status_message = f"An unexpected error occurred during submission: {e}"
500
  print(status_message)
501
  results_df = pd.DataFrame(results_log)
502
  return status_message, results_df
503
 
504
-
505
- # --- Build Gradio Interface using Blocks ---
506
  with gr.Blocks(title="GAIA Agent Evaluation") as demo:
507
- gr.Markdown("# GAIA Agent Evaluation Runner")
508
  gr.Markdown(
509
  """
510
- **Advanced GAIA Agent Features:**
511
- - 🧠 Local quantized LLM for reasoning (Phi-3-mini optimized for CPU)
512
- - 🔍 Web search capabilities via Serper API
513
- - 🧮 Mathematical calculation tools
514
- - 🎯 Multi-step problem solving approach
515
- - 🚀 Optimized for 16GB RAM / 2 vCPU constraints
516
 
517
  **Instructions:**
518
- 1. Ensure your SERPER_API_KEY environment variable is set for web search
519
- 2. Log in to your Hugging Face account using the button below
520
- 3. Click 'Run GAIA Evaluation' to start the comprehensive evaluation
521
 
522
- **Note:** Initial model loading may take 1-2 minutes. Subsequent questions will be processed faster.
523
  """
524
  )
525
 
526
  gr.LoginButton()
527
 
528
- run_button = gr.Button("🚀 Run GAIA Evaluation & Submit All Answers", variant="primary")
529
 
530
- status_output = gr.Textbox(label="📊 Evaluation Status & Results", lines=8, interactive=False)
531
- results_table = gr.DataFrame(label="📋 Detailed Question Results", wrap=True)
532
-
533
- # Add system info
534
- with gr.Accordion("🔧 System Information", open=False):
535
- gr.Markdown(f"""
536
- - **Environment**: Hugging Face Space
537
- - **Resources**: 16GB RAM, 2 vCPU
538
- - **Model**: Phi-3-mini-4k-instruct (quantized)
539
- - **Web Search**: {'✅ Enabled' if os.getenv('SERPER_API_KEY') else '❌ Disabled (no API key)'}
540
- - **Calculator**: ✅ Enabled
541
- - **Timestamp**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}
542
- """)
543
 
544
  run_button.click(
545
  fn=run_and_submit_all,
@@ -547,39 +408,26 @@ with gr.Blocks(title="GAIA Agent Evaluation") as demo:
547
  )
548
 
549
  if __name__ == "__main__":
550
- print("\n" + "="*70)
551
- print("🚀 GAIA AGENT EVALUATION SYSTEM STARTING")
552
- print("="*70)
553
 
554
- # Environment check
555
- space_host = os.getenv("SPACE_HOST")
556
- space_id = os.getenv("SPACE_ID")
557
  serper_key = os.getenv("SERPER_API_KEY")
558
-
559
- if space_host:
560
- print(f"✅ SPACE_HOST: {space_host}")
561
- print(f" 🌐 Runtime URL: https://{space_host}.hf.space")
562
- else:
563
- print("ℹ️ Running locally (SPACE_HOST not found)")
564
-
565
  if space_id:
566
- print(f" SPACE_ID: {space_id}")
567
- print(f" 📁 Repo URL: https://huggingface.co/spaces/{space_id}")
568
- else:
569
- print("ℹ️ SPACE_ID not found")
570
-
571
- if serper_key:
572
- print(" SERPER_API_KEY: Configured")
573
- else:
574
- print("⚠️ SERPER_API_KEY: Not found - Web search will be disabled")
575
-
576
- print("="*70)
577
- print("📚 GAIA Agent Features:")
578
- print(" 🧠 Local LLM reasoning")
579
- print(" 🔍 Web search integration")
580
- print(" 🧮 Mathematical calculations")
581
- print(" 🎯 Multi-step problem solving")
582
- print("="*70 + "\n")
583
 
584
- print("🎯 Launching GAIA Agent Evaluation Interface...")
585
  demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from smolagents import CodeAgent, HfApiModel
7
+ from smolagents.tools import DuckDuckGoSearchTool, PythonInterpreterTool
8
  import json
9
+ import tempfile
10
+ import urllib.parse
11
+ from pathlib import Path
 
 
 
 
 
 
 
 
 
12
 
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
+ # --- Custom Tools ---
17
+ class SerperSearchTool:
18
+ """Enhanced search tool using Serper API for more reliable results"""
19
+
20
+ name = "serper_search"
21
+ description = "Search the web using Serper API. Use this for finding current information, facts, and data."
22
+
23
+ def __init__(self):
24
+ self.api_key = os.getenv("SERPER_API_KEY")
25
+ if not self.api_key:
26
+ print("Warning: SERPER_API_KEY not found, falling back to DuckDuckGo")
27
 
28
+ def __call__(self, query: str) -> str:
29
+ """Search the web and return formatted results"""
30
+ if not self.api_key:
31
+ # Fallback to basic search if no Serper API key
32
+ return f"Search query: {query} - API key not available"
33
 
 
 
34
  try:
35
+ url = "https://google.serper.dev/search"
36
+ payload = json.dumps({
37
+ "q": query,
38
+ "num": 5
39
+ })
40
  headers = {
41
  'X-API-KEY': self.api_key,
42
  'Content-Type': 'application/json'
43
  }
44
 
45
+ response = requests.post(url, headers=headers, data=payload, timeout=10)
 
 
 
 
 
 
 
46
  response.raise_for_status()
47
 
48
  data = response.json()
 
 
49
  results = []
50
+
51
+ # Process organic results
52
  if 'organic' in data:
53
+ for item in data['organic'][:3]: # Top 3 results
54
+ results.append(f"Title: {item.get('title', 'N/A')}")
55
+ results.append(f"Content: {item.get('snippet', 'N/A')}")
56
+ results.append(f"URL: {item.get('link', 'N/A')}")
57
+ results.append("---")
 
 
58
 
59
+ # Add answer box if available
60
+ if 'answerBox' in data:
61
+ answer = data['answerBox']
62
+ results.insert(0, f"Answer: {answer.get('answer', answer.get('snippet', 'N/A'))}")
63
+ results.insert(1, "---")
64
+
65
+ return "\n".join(results) if results else f"No results found for: {query}"
66
 
67
  except Exception as e:
68
+ print(f"Serper search error: {e}")
69
+ return f"Search error for '{query}': {str(e)}"
 
 
 
 
 
 
70
 
71
+ class MathCalculatorTool:
72
+ """Tool for mathematical calculations and computations"""
73
+
74
+ name = "math_calculator"
75
+ description = "Perform mathematical calculations, solve equations, and handle numerical computations."
76
 
77
+ def __call__(self, expression: str) -> str:
78
  """Safely evaluate mathematical expressions"""
79
  try:
80
+ # Import math functions for calculations
81
+ import math
82
+ import operator
83
 
84
+ # Safe evaluation context
85
+ safe_dict = {
86
+ "abs": abs, "round": round, "min": min, "max": max,
87
+ "sum": sum, "pow": pow, "sqrt": math.sqrt,
88
+ "sin": math.sin, "cos": math.cos, "tan": math.tan,
89
+ "log": math.log, "log10": math.log10, "exp": math.exp,
90
+ "pi": math.pi, "e": math.e
91
+ }
92
 
93
+ # Clean the expression
94
+ expression = expression.replace("^", "**") # Handle exponents
 
95
 
96
+ result = eval(expression, {"__builtins__": {}}, safe_dict)
97
+ return f"Result: {result}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  except Exception as e:
100
+ return f"Math calculation error: {str(e)}"
 
 
 
 
 
 
101
 
102
+ class FileProcessorTool:
103
+ """Tool for processing various file formats"""
104
 
105
+ name = "file_processor"
106
+ description = "Process and extract information from files (text, CSV, JSON, etc.)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ def __call__(self, file_path: str, action: str = "read") -> str:
109
+ """Process files based on action type"""
110
+ try:
111
+ if not os.path.exists(file_path):
112
+ return f"File not found: {file_path}"
113
 
114
+ file_ext = Path(file_path).suffix.lower()
 
115
 
116
+ if file_ext in ['.txt', '.md']:
117
+ with open(file_path, 'r', encoding='utf-8') as f:
118
+ content = f.read()
119
+ return f"File content ({len(content)} chars):\n{content[:1000]}..."
120
 
121
+ elif file_ext == '.csv':
122
+ import pandas as pd
123
+ df = pd.read_csv(file_path)
124
+ return f"CSV file with {len(df)} rows and {len(df.columns)} columns:\n{df.head().to_string()}"
 
 
125
 
126
+ elif file_ext == '.json':
127
+ with open(file_path, 'r', encoding='utf-8') as f:
128
+ data = json.load(f)
129
+ return f"JSON data:\n{json.dumps(data, indent=2)[:1000]}..."
 
 
130
 
131
+ else:
132
+ return f"Unsupported file type: {file_ext}"
133
+
134
  except Exception as e:
135
+ return f"File processing error: {str(e)}"
 
136
 
137
+ # --- Enhanced Agent Definition ---
138
  class GAIAAgent:
 
 
139
  def __init__(self):
140
+ """Initialize the GAIA agent with tools and model"""
141
+ print("Initializing GAIA Agent...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
+ # Initialize model
 
 
 
 
 
 
 
 
 
144
  try:
145
+ hf_token = os.getenv("HUGGINGFACE_INFERENCE_TOKEN")
146
+ if not hf_token:
147
+ print("Warning: HUGGINGFACE_INFERENCE_TOKEN not found")
148
+
149
+ # Use a good model for reasoning
150
+ model = HfApiModel(
151
+ model_id="meta-llama/Llama-3.1-70B-Instruct",
152
+ token=hf_token
153
+ )
154
+
155
+ # Initialize tools
156
+ self.tools = [
157
+ SerperSearchTool(),
158
+ PythonInterpreterTool(),
159
+ MathCalculatorTool(),
160
+ FileProcessorTool(),
161
+ DuckDuckGoSearchTool() # Backup search
162
+ ]
163
+
164
+ # Initialize the agent
165
+ self.agent = CodeAgent(
166
+ tools=self.tools,
167
+ model=model,
168
+ max_steps=10,
169
+ verbosity_level=1
170
+ )
171
+
172
+ print("GAIA Agent initialized successfully with tools:", [tool.name for tool in self.tools])
173
+
174
  except Exception as e:
175
+ print(f"Error initializing GAIA Agent: {e}")
176
+ # Fallback to basic setup
177
+ try:
178
+ model = HfApiModel(model_id="microsoft/DialoGPT-medium")
179
+ self.agent = CodeAgent(tools=[PythonInterpreterTool()], model=model)
180
+ print("Fallback agent initialized")
181
+ except Exception as fallback_error:
182
+ print(f"Fallback initialization failed: {fallback_error}")
183
+ self.agent = None
184
 
185
  def __call__(self, question: str) -> str:
186
+ """Process a question using the GAIA agent"""
187
+ print(f"Processing question: {question[:100]}...")
188
+
189
+ if not self.agent:
190
+ return "Agent initialization failed. Please check your configuration."
191
 
192
  try:
193
+ # Enhanced prompt for better reasoning
194
+ enhanced_prompt = f"""
195
+ You are an AI assistant designed to answer questions accurately and thoroughly.
196
+ You have access to web search, Python interpreter, math calculator, and file processing tools.
197
+
198
+ Question: {question}
199
+
200
+ Please think step by step:
201
+ 1. Analyze what type of question this is
202
+ 2. Determine what tools or information you need
203
+ 3. Use appropriate tools to gather information
204
+ 4. Reason through the problem
205
+ 5. Provide a clear, accurate answer
206
+
207
+ If the question requires:
208
+ - Current information or facts: Use search tools
209
+ - Calculations: Use the math calculator or Python interpreter
210
+ - File analysis: Use the file processor tool
211
+ - Multi-step reasoning: Break it down systematically
212
+
213
+ Answer:"""
214
+
215
+ # Run the agent
216
+ result = self.agent.run(enhanced_prompt)
217
 
218
+ # Extract the final answer if it's structured
219
+ if isinstance(result, dict) and 'output' in result:
220
+ answer = result['output']
221
+ else:
222
+ answer = str(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
+ # Clean up the answer
225
+ if "Answer:" in answer:
226
+ answer = answer.split("Answer:")[-1].strip()
227
 
228
+ print(f"Agent response: {answer[:100]}...")
229
+ return answer
230
 
231
  except Exception as e:
232
+ error_msg = f"Error processing question: {str(e)}"
233
+ print(error_msg)
234
+
235
+ # Fallback to basic response
236
+ try:
237
+ basic_response = f"I encountered an error while processing this question: {question}. Error: {str(e)}"
238
+ return basic_response
239
+ except:
240
+ return "Unable to process this question due to technical difficulties."
241
 
242
  def run_and_submit_all(profile: gr.OAuthProfile | None):
243
  """
 
245
  and displays the results.
246
  """
247
  # --- Determine HF Space Runtime URL and Repo URL ---
248
+ space_id = os.getenv("SPACE_ID")
249
 
250
  if profile:
251
  username = f"{profile.username}"
 
260
 
261
  # 1. Instantiate Agent
262
  try:
 
263
  agent = GAIAAgent()
264
+ if not agent.agent:
265
+ return "Failed to initialize GAIA Agent. Please check your tokens and try again.", None
266
  except Exception as e:
267
  print(f"Error instantiating agent: {e}")
268
  return f"Error initializing agent: {e}", None
269
+
270
+ # Agent code URL
271
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local"
272
  print(f"Agent code: {agent_code}")
273
 
274
  # 2. Fetch Questions
 
286
  return f"Error fetching questions: {e}", None
287
  except requests.exceptions.JSONDecodeError as e:
288
  print(f"Error decoding JSON response from questions endpoint: {e}")
 
289
  return f"Error decoding server response for questions: {e}", None
290
  except Exception as e:
291
  print(f"An unexpected error occurred fetching questions: {e}")
 
303
  print(f"Skipping item with missing task_id or question: {item}")
304
  continue
305
 
 
 
306
  try:
307
+ print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
308
  submitted_answer = agent(question_text)
 
 
 
 
309
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
310
  results_log.append({
311
  "Task ID": task_id,
312
  "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
313
+ "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
 
314
  })
315
  except Exception as e:
316
  print(f"Error running agent on task {task_id}: {e}")
317
+ error_answer = f"AGENT ERROR: {e}"
318
+ answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
319
  results_log.append({
320
  "Task ID": task_id,
321
  "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
322
+ "Submitted Answer": error_answer
 
323
  })
324
 
325
  if not answers_payload:
326
  print("Agent did not produce any answers to submit.")
327
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
328
 
329
+ # 4. Prepare Submission
330
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
331
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
332
  print(status_update)
 
334
  # 5. Submit
335
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
336
  try:
337
+ response = requests.post(submit_url, json=submission_data, timeout=120) # Increased timeout
338
  response.raise_for_status()
339
  result_data = response.json()
340
  final_status = (
 
358
  print(status_message)
359
  results_df = pd.DataFrame(results_log)
360
  return status_message, results_df
 
 
 
 
 
 
 
 
 
 
361
  except Exception as e:
362
  status_message = f"An unexpected error occurred during submission: {e}"
363
  print(status_message)
364
  results_df = pd.DataFrame(results_log)
365
  return status_message, results_df
366
 
367
+ # --- Build Gradio Interface ---
 
368
  with gr.Blocks(title="GAIA Agent Evaluation") as demo:
369
+ gr.Markdown("# GAIA Benchmark Agent Evaluation")
370
  gr.Markdown(
371
  """
372
+ **Enhanced GAIA Agent with Multiple Tools:**
373
+ - 🔍 Web Search (Serper API + DuckDuckGo fallback)
374
+ - 🐍 Python Interpreter for calculations
375
+ - 🧮 Mathematical calculator
376
+ - 📁 File processor for various formats
377
+ - 🧠 Advanced reasoning with Llama-3.1-70B
378
 
379
  **Instructions:**
380
+ 1. Make sure you have SERPER_API_KEY and HUGGINGFACE_INFERENCE_TOKEN set
381
+ 2. Log in to your Hugging Face account
382
+ 3. Click 'Run GAIA Evaluation' to start the benchmark
383
 
384
+ **Target:** >40% accuracy on GAIA benchmark questions
385
  """
386
  )
387
 
388
  gr.LoginButton()
389
 
390
+ run_button = gr.Button("🚀 Run GAIA Evaluation & Submit", variant="primary")
391
 
392
+ status_output = gr.Textbox(
393
+ label="Evaluation Status & Results",
394
+ lines=6,
395
+ interactive=False,
396
+ placeholder="Click the button above to start evaluation..."
397
+ )
398
+
399
+ results_table = gr.DataFrame(
400
+ label="Questions and Agent Responses",
401
+ wrap=True,
402
+ interactive=False
403
+ )
 
404
 
405
  run_button.click(
406
  fn=run_and_submit_all,
 
408
  )
409
 
410
  if __name__ == "__main__":
411
+ print("\n" + "="*50)
412
+ print("🤖 GAIA Agent Evaluation System Starting")
413
+ print("="*50)
414
 
415
+ # Check environment variables
 
 
416
  serper_key = os.getenv("SERPER_API_KEY")
417
+ hf_token = os.getenv("HUGGINGFACE_INFERENCE_TOKEN")
418
+ space_id = os.getenv("SPACE_ID")
419
+
420
+ print(f" SERPER_API_KEY: {'Found' if serper_key else 'Missing (will use fallback search)'}")
421
+ print(f"✅ HF_TOKEN: {'Found' if hf_token else 'Missing (required for model access)'}")
422
+ print(f" SPACE_ID: {space_id if space_id else 'Not found (running locally)'}")
423
+
424
  if space_id:
425
+ print(f"🔗 Space URL: https://huggingface.co/spaces/{space_id}")
426
+
427
+ print("="*50)
428
+ print("🎯 Target: >40% accuracy on GAIA benchmark")
429
+ print("🛠️ Tools: Search, Python, Math, File Processing")
430
+ print("🧠 Model: Llama-3.1-70B-Instruct")
431
+ print("="*50 + "\n")
 
 
 
 
 
 
 
 
 
 
432
 
 
433
  demo.launch(debug=True, share=False)
data/knowledge.txt DELETED
File without changes
requirements.txt CHANGED
@@ -1,14 +1,32 @@
1
- gradio>=4.0.0
2
- transformers>=4.35.0
3
- torch>=2.0.0
4
- pandas>=1.5.0
5
- requests>=2.28.0
6
- beautifulsoup4>=4.11.0
7
- wikipedia>=1.4.0
8
- smolagents>=0.1.0
9
- accelerate>=0.20.0
10
- sentencepiece>=0.1.99
11
- openpyxl
12
- PyPDF2
13
- pillow
14
- ctransformers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ gradio==4.44.0
3
+ requests==2.31.0
4
+ pandas==2.1.4
5
+
6
+ # SmolagentS and AI dependencies
7
+ smolagents==0.2.0
8
+ transformers==4.45.2
9
+ torch==2.1.2
10
+ tokenizers==0.19.1
11
+
12
+ # Tool dependencies
13
+ duckduckgo-search==3.9.6
14
+ python-dotenv==1.0.0
15
+
16
+ # Utility libraries
17
+ numpy==1.24.4
18
+ urllib3==2.0.7
19
+ certifi==2023.11.17
20
+ charset-normalizer==3.3.2
21
+ idna==3.6
22
+
23
+ # Optional: for better JSON handling
24
+ orjson==3.9.10
25
+
26
+ # For file processing
27
+ openpyxl==3.1.2
28
+ python-docx==1.1.0
29
+
30
+ # Security and compatibility
31
+ cryptography==41.0.8
32
+ PyYAML==6.0.1
test.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script for GAIA Agent
4
+ Run this to verify your agent works before deploying
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ from pathlib import Path
10
+
11
+ # Add current directory to path
12
+ sys.path.append(str(Path(__file__).parent))
13
+
14
+ def test_environment():
15
+ """Test environment variables and dependencies"""
16
+ print("🧪 Testing Environment Setup")
17
+ print("-" * 40)
18
+
19
+ # Check environment variables
20
+ serper_key = os.getenv("SERPER_API_KEY")
21
+ hf_token = os.getenv("HUGGINGFACE_INFERENCE_TOKEN")
22
+
23
+ print(f"SERPER_API_KEY: {'✅ Found' if serper_key else '❌ Missing'}")
24
+ print(f"HF_TOKEN: {'✅ Found' if hf_token else '❌ Missing'}")
25
+
26
+ # Test imports
27
+ try:
28
+ import gradio as gr
29
+ print("Gradio: ✅ Imported")
30
+ except ImportError as e:
31
+ print(f"Gradio: ❌ Import failed - {e}")
32
+
33
+ try:
34
+ import smolagents
35
+ print("SmolagentS: ✅ Imported")
36
+ except ImportError as e:
37
+ print(f"SmolagentS: ❌ Import failed - {e}")
38
+
39
+ try:
40
+ import pandas as pd
41
+ print("Pandas: ✅ Imported")
42
+ except ImportError as e:
43
+ print(f"Pandas: ❌ Import failed - {e}")
44
+
45
+ try:
46
+ import requests
47
+ print("Requests: ✅ Imported")
48
+ except ImportError as e:
49
+ print(f"Requests: ❌ Import failed - {e}")
50
+
51
+ def test_agent_basic():
52
+ """Test basic agent functionality"""
53
+ print("\n🤖 Testing Agent Initialization")
54
+ print("-" * 40)
55
+
56
+ try:
57
+ # Import the agent
58
+ from app import GAIAAgent
59
+
60
+ # Initialize agent
61
+ agent = GAIAAgent()
62
+
63
+ if agent.agent is None:
64
+ print("❌ Agent initialization failed")
65
+ return False
66
+
67
+ print("✅ Agent initialized successfully")
68
+
69
+ # Test with simple questions
70
+ test_questions = [
71
+ "What is 2 + 2?",
72
+ "What is the capital of France?",
73
+ "Calculate the square root of 16"
74
+ ]
75
+
76
+ for i, question in enumerate(test_questions, 1):
77
+ print(f"\n📝 Test Question {i}: {question}")
78
+ try:
79
+ answer = agent(question)
80
+ print(f"✅ Answer: {answer[:100]}...")
81
+ except Exception as e:
82
+ print(f"❌ Error: {e}")
83
+
84
+ return True
85
+
86
+ except Exception as e:
87
+ print(f"❌ Agent test failed: {e}")
88
+ return False
89
+
90
+ def test_tools():
91
+ """Test individual tools"""
92
+ print("\n🛠️ Testing Individual Tools")
93
+ print("-" * 40)
94
+
95
+ try:
96
+ from app import SerperSearchTool, MathCalculatorTool
97
+
98
+ # Test search tool
99
+ search_tool = SerperSearchTool()
100
+ try:
101
+ result = search_tool("Python programming")
102
+ print(f"✅ Search Tool: {result[:100]}...")
103
+ except Exception as e:
104
+ print(f"❌ Search Tool Error: {e}")
105
+
106
+ # Test math tool
107
+ math_tool = MathCalculatorTool()
108
+ try:
109
+ result = math_tool("2 + 2")
110
+ print(f"✅ Math Tool: {result}")
111
+ except Exception as e:
112
+ print(f"❌ Math Tool Error: {e}")
113
+
114
+ # Test math tool with complex expression
115
+ try:
116
+ result = math_tool("sqrt(16) + 3 * 2")
117
+ print(f"✅ Math Complex: {result}")
118
+ except Exception as e:
119
+ print(f"❌ Math Complex Error: {e}")
120
+
121
+ except Exception as e:
122
+ print(f"❌ Tools test failed: {e}")
123
+
124
+ def main():
125
+ """Run all tests"""
126
+ print("🚀 GAIA Agent Test Suite")
127
+ print("=" * 50)
128
+
129
+ # Test environment
130
+ test_environment()
131
+
132
+ # Test tools
133
+ test_tools()
134
+
135
+ # Test agent
136
+ success = test_agent_basic()
137
+
138
+ print("\n" + "=" * 50)
139
+ if success:
140
+ print("✅ All tests passed! Your agent is ready for deployment.")
141
+ else:
142
+ print("❌ Some tests failed. Please check the errors above.")
143
+ print("=" * 50)
144
+
145
+ if __name__ == "__main__":
146
+ main()