LamiaYT commited on
Commit
b75e20d
·
1 Parent(s): e9c8890
Files changed (1) hide show
  1. app.py +30 -188
app.py CHANGED
@@ -22,14 +22,7 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
22
 
23
  @tool
24
  def serper_search(query: str) -> str:
25
- """Enhanced web search using Serper API with better result processing
26
-
27
- Args:
28
- query: The search query
29
-
30
- Returns:
31
- Formatted search results with relevance scoring
32
- """
33
  try:
34
  api_key = os.getenv("SERPER_API_KEY")
35
  if not api_key:
@@ -47,7 +40,6 @@ def serper_search(query: str) -> str:
47
  data = response.json()
48
  results = []
49
 
50
- # Process knowledge graph first (highest priority)
51
  if 'knowledgeGraph' in data:
52
  kg = data['knowledgeGraph']
53
  kg_info = f"KNOWLEDGE GRAPH: {kg.get('title', '')} - {kg.get('description', '')}"
@@ -56,30 +48,25 @@ def serper_search(query: str) -> str:
56
  kg_info += f"\n{key}: {value}"
57
  results.append(kg_info + "\n")
58
 
59
- # Process organic results with enhanced filtering
60
  if 'organic' in data:
61
  for i, item in enumerate(data['organic'][:7]):
62
  title = item.get('title', '')
63
  snippet = item.get('snippet', '')
64
  link = item.get('link', '')
65
-
66
- # Enhanced result formatting
67
  result_text = f"RESULT {i+1}:\nTitle: {title}\nSnippet: {snippet}\nURL: {link}\n"
68
 
69
- # Extract specific data patterns
70
- if re.search(r'\d{4}', snippet): # Years
71
  years = re.findall(r'\b(19|20)\d{2}\b', snippet)
72
  if years:
73
  result_text += f"Years mentioned: {', '.join(years)}\n"
74
 
75
- if re.search(r'\$[\d,]+', snippet): # Money amounts
76
  amounts = re.findall(r'\$[\d,]+(?:\.\d{2})?', snippet)
77
  if amounts:
78
  result_text += f"Amounts: {', '.join(amounts)}\n"
79
 
80
  results.append(result_text)
81
 
82
- # Add people also ask if available
83
  if 'peopleAlsoAsk' in data:
84
  paa = "\nPEOPLE ALSO ASK:\n"
85
  for item in data['peopleAlsoAsk'][:3]:
@@ -92,19 +79,10 @@ def serper_search(query: str) -> str:
92
  return f"Search error: {str(e)}"
93
 
94
  @tool
95
- def wikipedia_enhanced_search(query: str) -> str:
96
- """Enhanced Wikipedia search with multiple strategies
97
-
98
- Args:
99
- query: Wikipedia search query
100
-
101
- Returns:
102
- Comprehensive Wikipedia information
103
- """
104
  try:
105
  results = []
106
-
107
- # Strategy 1: Direct page lookup
108
  clean_query = query.replace(" ", "_")
109
  direct_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{clean_query}"
110
 
@@ -116,12 +94,10 @@ def wikipedia_enhanced_search(query: str) -> str:
116
  summary = f"WIKIPEDIA DIRECT MATCH:\nTitle: {data.get('title', '')}\n"
117
  summary += f"Extract: {data.get('extract', '')}\n"
118
 
119
- # Add coordinates if available
120
  if 'coordinates' in data:
121
  coords = data['coordinates']
122
  summary += f"Coordinates: {coords.get('lat', '')}, {coords.get('lon', '')}\n"
123
 
124
- # Add birth/death dates if available
125
  extract = data.get('extract', '')
126
  birth_match = re.search(r'born[^)]*(\d{1,2}\s+\w+\s+\d{4})', extract, re.IGNORECASE)
127
  if birth_match:
@@ -135,7 +111,6 @@ def wikipedia_enhanced_search(query: str) -> str:
135
  except:
136
  pass
137
 
138
- # Strategy 2: Search API for multiple results
139
  search_url = "https://en.wikipedia.org/w/api.php"
140
  search_params = {
141
  "action": "query",
@@ -152,14 +127,12 @@ def wikipedia_enhanced_search(query: str) -> str:
152
  if 'query' in data and 'search' in data['query']:
153
  search_results = "WIKIPEDIA SEARCH RESULTS:\n"
154
  for item in data['query']['search']:
155
- # Clean HTML tags from snippet
156
  snippet = re.sub(r'<[^>]+>', '', item.get('snippet', ''))
157
  search_results += f"• {item['title']}: {snippet}\n"
158
  results.append(search_results)
159
  except:
160
  pass
161
 
162
- # Strategy 3: Try opensearch for suggestions
163
  opensearch_url = "https://en.wikipedia.org/w/api.php"
164
  opensearch_params = {
165
  "action": "opensearch",
@@ -171,7 +144,7 @@ def wikipedia_enhanced_search(query: str) -> str:
171
  try:
172
  response = requests.get(opensearch_url, params=opensearch_params, timeout=10)
173
  data = response.json()
174
- if len(data) >= 4 and data[1]: # Has suggestions
175
  suggestions = "WIKIPEDIA SUGGESTIONS:\n"
176
  for i, (title, desc, url) in enumerate(zip(data[1], data[2], data[3])):
177
  suggestions += f"{i+1}. {title}: {desc}\n"
@@ -185,17 +158,9 @@ def wikipedia_enhanced_search(query: str) -> str:
185
  return f"Wikipedia search error: {str(e)}"
186
 
187
  @tool
188
- def youtube_enhanced_analyzer(url: str) -> str:
189
- """Enhanced YouTube video analyzer with transcript extraction
190
-
191
- Args:
192
- url: YouTube video URL
193
-
194
- Returns:
195
- Comprehensive video analysis
196
- """
197
  try:
198
- # Extract video ID
199
  video_id_match = re.search(r'(?:v=|/|youtu\.be/)([A-Za-z0-9_-]{11})', url)
200
  if not video_id_match:
201
  return "Invalid YouTube URL format"
@@ -203,7 +168,6 @@ def youtube_enhanced_analyzer(url: str) -> str:
203
  video_id = video_id_match.group(1)
204
  results = []
205
 
206
- # Get basic video info via oEmbed
207
  try:
208
  oembed_url = f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}&format=json"
209
  response = requests.get(oembed_url, timeout=15)
@@ -212,7 +176,6 @@ def youtube_enhanced_analyzer(url: str) -> str:
212
  data = response.json()
213
  basic_info = f"VIDEO INFO:\nTitle: {data.get('title', '')}\nAuthor: {data.get('author_name', '')}\n"
214
 
215
- # Extract duration if available in title/description patterns
216
  title = data.get('title', '').lower()
217
  if 'minute' in title or 'min' in title:
218
  duration_match = re.search(r'(\d+)\s*(?:minute|min)', title)
@@ -223,7 +186,6 @@ def youtube_enhanced_analyzer(url: str) -> str:
223
  except:
224
  pass
225
 
226
- # Enhanced content analysis through page scraping
227
  try:
228
  video_url = f"https://www.youtube.com/watch?v={video_id}"
229
  headers = {
@@ -234,34 +196,28 @@ def youtube_enhanced_analyzer(url: str) -> str:
234
  if response.status_code == 200:
235
  content = response.text
236
 
237
- # Extract view count
238
  view_match = re.search(r'"viewCount":"(\d+)"', content)
239
  if view_match:
240
  views = int(view_match.group(1))
241
  results.append(f"View count: {views:,}")
242
 
243
- # Extract upload date
244
  upload_match = re.search(r'"uploadDate":"([^"]+)"', content)
245
  if upload_match:
246
  results.append(f"Upload date: {upload_match.group(1)}")
247
 
248
- # Look for specific content patterns
249
  content_lower = content.lower()
250
 
251
- # Bird counting for ornithology videos
252
  if "bird" in content_lower:
253
  bird_numbers = re.findall(r'\b(\d+)\s+(?:bird|species|individual)', content_lower)
254
  if bird_numbers:
255
  results.append(f"Bird counts found: {', '.join(bird_numbers)}")
256
 
257
- # Duration extraction from JSON-LD
258
  duration_match = re.search(r'"duration":"PT(\d+)M(\d+)S"', content)
259
  if duration_match:
260
  minutes = int(duration_match.group(1))
261
  seconds = int(duration_match.group(2))
262
  results.append(f"Exact duration: {minutes}:{seconds:02d}")
263
 
264
- # Extract description
265
  desc_patterns = [
266
  r'"description":{"simpleText":"([^"]+)"}',
267
  r'"shortDescription":"([^"]+)"'
@@ -270,7 +226,7 @@ def youtube_enhanced_analyzer(url: str) -> str:
270
  for pattern in desc_patterns:
271
  desc_match = re.search(pattern, content)
272
  if desc_match:
273
- description = desc_match.group(1)[:500] # Limit length
274
  results.append(f"Description excerpt: {description}")
275
  break
276
 
@@ -283,22 +239,13 @@ def youtube_enhanced_analyzer(url: str) -> str:
283
  return f"YouTube analysis error: {str(e)}"
284
 
285
  @tool
286
- def text_processor_advanced(text: str, operation: str = "analyze") -> str:
287
- """Advanced text processing for various linguistic operations
288
-
289
- Args:
290
- text: Text to process
291
- operation: Operation type (reverse, parse, analyze, extract_numbers, decode)
292
-
293
- Returns:
294
- Processed text results
295
- """
296
  try:
297
  if operation == "reverse":
298
  return text[::-1]
299
 
300
  elif operation == "decode":
301
- # Handle various encoding schemes
302
  if text.startswith("base64:"):
303
  try:
304
  decoded = base64.b64decode(text[7:]).decode('utf-8')
@@ -306,7 +253,6 @@ def text_processor_advanced(text: str, operation: str = "analyze") -> str:
306
  except:
307
  return "Failed to decode base64"
308
 
309
- # Handle URL encoding
310
  if '%' in text:
311
  try:
312
  decoded = urllib.parse.unquote(text)
@@ -317,7 +263,6 @@ def text_processor_advanced(text: str, operation: str = "analyze") -> str:
317
  return f"No encoding detected in: {text[:100]}"
318
 
319
  elif operation == "extract_numbers":
320
- # Extract all number patterns
321
  patterns = {
322
  'integers': re.findall(r'\b\d+\b', text),
323
  'decimals': re.findall(r'\b\d+\.\d+\b', text),
@@ -334,7 +279,6 @@ def text_processor_advanced(text: str, operation: str = "analyze") -> str:
334
  return result
335
 
336
  elif operation == "parse":
337
- # Enhanced parsing with linguistic analysis
338
  words = text.split()
339
  sentences = re.split(r'[.!?]+', text)
340
 
@@ -348,7 +292,6 @@ def text_processor_advanced(text: str, operation: str = "analyze") -> str:
348
  analysis += f"Last word: {words[-1]}\n"
349
  analysis += f"Longest word: {max(words, key=len)}\n"
350
 
351
- # Language pattern detection
352
  if re.search(r'[А-Яа-я]', text):
353
  analysis += "Cyrillic characters detected (Russian/Slavic)\n"
354
  if re.search(r'[À-ÿ]', text):
@@ -356,26 +299,18 @@ def text_processor_advanced(text: str, operation: str = "analyze") -> str:
356
 
357
  return analysis
358
 
359
- else: # Default analyze
360
  return f"Text length: {len(text)} characters\nPreview: {text[:200]}{'...' if len(text) > 200 else ''}"
361
 
362
  except Exception as e:
363
  return f"Text processing error: {str(e)}"
364
 
365
  @tool
366
- def math_solver_advanced(problem: str) -> str:
367
- """Advanced mathematical problem solver with multiple strategies
368
-
369
- Args:
370
- problem: Mathematical problem or structure to analyze
371
-
372
- Returns:
373
- Mathematical analysis and solution approach
374
- """
375
  try:
376
  problem_lower = problem.lower()
377
 
378
- # Group theory problems
379
  if "commutative" in problem_lower:
380
  return """COMMUTATIVITY ANALYSIS:
381
  To check if operation * is commutative:
@@ -385,7 +320,6 @@ To check if operation * is commutative:
385
  4. If ANY pair fails commutativity, the operation is not commutative
386
  5. Pay attention to non-symmetric entries in the operation table"""
387
 
388
- # Chess problems
389
  elif "chess" in problem_lower:
390
  return """CHESS ANALYSIS FRAMEWORK:
391
  1. IMMEDIATE THREATS: Check for checks, captures, piece attacks
@@ -396,7 +330,6 @@ To check if operation * is commutative:
396
  6. ENDGAME PRINCIPLES: If few pieces, apply endgame theory
397
  7. CANDIDATE MOVES: Generate and evaluate best move options"""
398
 
399
- # Number theory
400
  elif "prime" in problem_lower or "factor" in problem_lower:
401
  return """NUMBER THEORY APPROACH:
402
  1. For primality: Check divisibility by primes up to √n
@@ -405,7 +338,6 @@ To check if operation * is commutative:
405
  4. Apply modular arithmetic when appropriate
406
  5. Use greatest common divisor (GCD) for fraction problems"""
407
 
408
- # Geometry
409
  elif any(word in problem_lower for word in ["triangle", "circle", "area", "volume", "angle"]):
410
  return """GEOMETRY SOLUTION STRATEGY:
411
  1. Draw/visualize the problem if possible
@@ -415,7 +347,6 @@ To check if operation * is commutative:
415
  5. Consider similar triangles or congruent figures
416
  6. Apply trigonometry for angle problems"""
417
 
418
- # Statistics/Probability
419
  elif any(word in problem_lower for word in ["probability", "statistics", "mean", "median"]):
420
  return """STATISTICS/PROBABILITY APPROACH:
421
  1. Identify the type of probability (conditional, independent, etc.)
@@ -425,7 +356,6 @@ To check if operation * is commutative:
425
  5. Check if normal distribution applies
426
  6. Use Bayes' theorem for conditional probability"""
427
 
428
- # Calculus
429
  elif any(word in problem_lower for word in ["derivative", "integral", "limit", "calculus"]):
430
  return """CALCULUS SOLUTION METHOD:
431
  1. Identify the type of calculus problem
@@ -435,7 +365,6 @@ To check if operation * is commutative:
435
  5. Check for discontinuities or special points
436
  6. Verify answers by differentiation/integration"""
437
 
438
- # Algorithm/Logic problems
439
  elif any(word in problem_lower for word in ["algorithm", "sequence", "pattern", "logic"]):
440
  return """ALGORITHMIC THINKING:
441
  1. Identify the pattern or rule governing the sequence
@@ -446,7 +375,6 @@ To check if operation * is commutative:
446
  6. Optimize for efficiency if needed"""
447
 
448
  else:
449
- # Try to extract numbers and analyze
450
  numbers = re.findall(r'-?\d+(?:\.\d+)?', problem)
451
  if numbers:
452
  return f"""GENERAL MATHEMATICAL ANALYSIS:
@@ -461,57 +389,33 @@ pattern recognition, or formula application"""
461
  return f"Math solver error: {str(e)}"
462
 
463
  @tool
464
- def data_extractor_enhanced(source: str, target: str, context: str = "") -> str:
465
- """Enhanced data extraction with context awareness
466
-
467
- Args:
468
- source: Source text/data to extract from
469
- target: What to extract
470
- context: Additional context for extraction
471
-
472
- Returns:
473
- Extracted and processed data
474
- """
475
  try:
476
  target_lower = target.lower()
477
  source_lower = source.lower()
478
 
479
- # Botanical classification (enhanced)
480
  if "botanical" in target_lower or "vegetable" in target_lower:
481
- # Define comprehensive botanical categories
482
  true_vegetables = {
483
- # Roots and tubers
484
  "sweet potato", "sweet potatoes", "potato", "potatoes", "carrot", "carrots",
485
  "beet", "beets", "radish", "radishes", "turnip", "turnips",
486
-
487
- # Leafy greens
488
  "lettuce", "spinach", "kale", "arugula", "chard", "collard greens",
489
  "cabbage", "bok choy",
490
-
491
- # Stems and stalks
492
  "celery", "asparagus", "rhubarb", "bamboo shoots",
493
-
494
- # Flowers and buds
495
  "broccoli", "cauliflower", "artichoke", "artichokes",
496
-
497
- # Herbs (leafy)
498
  "basil", "fresh basil", "parsley", "cilantro", "oregano", "thyme"
499
  }
500
 
501
- # Fruits commonly used as vegetables (exclude these)
502
  fruit_vegetables = {
503
  "tomato", "tomatoes", "pepper", "peppers", "cucumber", "cucumbers",
504
  "eggplant", "zucchini", "squash", "pumpkin", "corn", "peas", "beans"
505
  }
506
 
507
- # Extract items from source
508
  items = []
509
 
510
- # Handle comma-separated lists
511
  if "," in source:
512
  items = [item.strip() for item in source.split(",")]
513
  else:
514
- # Try to extract from longer text
515
  words = source.split()
516
  items = words
517
 
@@ -519,24 +423,20 @@ def data_extractor_enhanced(source: str, target: str, context: str = "") -> str:
519
  for item in items:
520
  item_clean = item.lower().strip()
521
 
522
- # Check if it's a true vegetable
523
  if any(veg in item_clean for veg in true_vegetables):
524
- # Double-check it's not a fruit
525
  if not any(fruit in item_clean for fruit in fruit_vegetables):
526
  vegetables.append(item.strip())
527
 
528
- # Remove duplicates and sort
529
  vegetables = sorted(list(set(vegetables)))
530
 
531
  return ", ".join(vegetables) if vegetables else "No botanical vegetables found"
532
 
533
- # Date extraction
534
  elif "date" in target_lower:
535
  date_patterns = [
536
- r'\b\d{1,2}[-/]\d{1,2}[-/]\d{4}\b', # MM/DD/YYYY or MM-DD-YYYY
537
- r'\b\d{4}[-/]\d{1,2}[-/]\d{1,2}\b', # YYYY/MM/DD or YYYY-MM-DD
538
- r'\b\d{1,2}\s+\w+\s+\d{4}\b', # DD Month YYYY
539
- r'\b\w+\s+\d{1,2},?\s+\d{4}\b' # Month DD, YYYY
540
  ]
541
 
542
  dates = []
@@ -546,11 +446,9 @@ def data_extractor_enhanced(source: str, target: str, context: str = "") -> str:
546
 
547
  return f"Dates found: {', '.join(dates)}" if dates else "No dates found"
548
 
549
- # Number extraction with context
550
  elif "number" in target_lower:
551
  numbers = re.findall(r'\b\d+(?:\.\d+)?\b', source)
552
 
553
- # Context-aware number interpretation
554
  if "year" in context.lower():
555
  years = [n for n in numbers if len(n) == 4 and n.startswith(('19', '20'))]
556
  return f"Years: {', '.join(years)}" if years else "No years found"
@@ -560,19 +458,15 @@ def data_extractor_enhanced(source: str, target: str, context: str = "") -> str:
560
  else:
561
  return f"Numbers: {', '.join(numbers)}" if numbers else "No numbers found"
562
 
563
- # Email extraction
564
  elif "email" in target_lower:
565
  emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', source)
566
  return f"Emails: {', '.join(emails)}" if emails else "No emails found"
567
 
568
- # URL extraction
569
  elif "url" in target_lower or "link" in target_lower:
570
  urls = re.findall(r'https?://[^\s<>"]+', source)
571
  return f"URLs: {', '.join(urls)}" if urls else "No URLs found"
572
 
573
- # Name extraction (basic)
574
  elif "name" in target_lower:
575
- # Look for capitalized words that might be names
576
  potential_names = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', source)
577
  return f"Potential names: {', '.join(potential_names)}" if potential_names else "No names found"
578
 
@@ -584,14 +478,7 @@ def data_extractor_enhanced(source: str, target: str, context: str = "") -> str:
584
 
585
  @tool
586
  def web_page_fetcher(url: str) -> str:
587
- """Fetch and extract text content from web pages
588
-
589
- Args:
590
- url: URL to fetch
591
-
592
- Returns:
593
- Extracted text content
594
- """
595
  try:
596
  headers = {
597
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
@@ -602,13 +489,11 @@ def web_page_fetcher(url: str) -> str:
602
 
603
  content = response.text
604
 
605
- # Basic text extraction (remove HTML tags)
606
  text = re.sub(r'<script[^>]*>.*?</script>', '', content, flags=re.DOTALL | re.IGNORECASE)
607
  text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
608
  text = re.sub(r'<[^>]+>', '', text)
609
  text = re.sub(r'\s+', ' ', text)
610
 
611
- # Extract key information
612
  lines = [line.strip() for line in text.split('\n') if line.strip()]
613
  meaningful_content = []
614
 
@@ -616,7 +501,6 @@ def web_page_fetcher(url: str) -> str:
616
  if len(line) > 20 and not line.startswith(('©', 'Copyright', 'Privacy')):
617
  meaningful_content.append(line)
618
 
619
- # Limit content length
620
  result = ' '.join(meaningful_content[:50])
621
 
622
  return result[:2000] if result else "Could not extract meaningful content"
@@ -626,24 +510,14 @@ def web_page_fetcher(url: str) -> str:
626
 
627
  @tool
628
  def calculator_tool(expression: str) -> str:
629
- """Safe calculator for mathematical expressions
630
-
631
- Args:
632
- expression: Mathematical expression to evaluate
633
-
634
- Returns:
635
- Calculation result
636
- """
637
  try:
638
- # Clean the expression
639
  expression = expression.strip()
640
 
641
- # Allow only safe characters
642
  allowed_chars = set('0123456789+-*/.() ')
643
  if not all(c in allowed_chars for c in expression):
644
  return "Invalid characters in expression"
645
 
646
- # Evaluate safely
647
  result = eval(expression)
648
 
649
  return f"{expression} = {result}"
@@ -658,7 +532,6 @@ class GAIAAgent:
658
  def __init__(self):
659
  print("Initializing Enhanced GAIA Agent...")
660
 
661
- # Initialize model
662
  try:
663
  self.model = InferenceClientModel(
664
  model_id="microsoft/DialoGPT-medium",
@@ -668,23 +541,20 @@ class GAIAAgent:
668
  print(f"Model initialization warning: {e}")
669
  self.model = InferenceClientModel(model_id="microsoft/DialoGPT-medium")
670
 
671
- # Enhanced tools list
672
  custom_tools = [
673
  serper_search,
674
- wikipedia_enhanced_search,
675
- youtube_enhanced_analyzer,
676
- text_processor_advanced,
677
- math_solver_advanced,
678
- data_extractor_enhanced,
679
  web_page_fetcher,
680
  calculator_tool
681
  ]
682
 
683
- # Add DuckDuckGo as backup search
684
  ddg_tool = DuckDuckGoSearchTool()
685
  all_tools = custom_tools + [ddg_tool]
686
 
687
- # Create agent
688
  self.agent = CodeAgent(
689
  tools=all_tools,
690
  model=self.model
@@ -705,7 +575,6 @@ class GAIAAgent:
705
  'strategy': 'search_first'
706
  }
707
 
708
- # Text reversal questions
709
  if any(reversed_phrase in question for reversed_phrase in ['ecnetnes', 'siht dnatsrednu']):
710
  analysis.update({
711
  'type': 'text_reversal',
@@ -715,7 +584,6 @@ class GAIAAgent:
715
  'strategy': 'reverse_text'
716
  })
717
 
718
- # YouTube video questions
719
  elif 'youtube.com' in q_lower or 'youtu.be' in q_lower:
720
  analysis.update({
721
  'type': 'youtube_analysis',
@@ -724,7 +592,6 @@ class GAIAAgent:
724
  'strategy': 'analyze_video'
725
  })
726
 
727
- # Mathematical questions
728
  elif any(term in q_lower for term in ['commutative', 'chess', 'mathematical', 'calculate', 'solve']):
729
  analysis.update({
730
  'type': 'mathematical',
@@ -733,7 +600,6 @@ class GAIAAgent:
733
  'strategy': 'math_focused'
734
  })
735
 
736
- # Botanical/classification questions
737
  elif 'botanical' in q_lower and 'vegetable' in q_lower:
738
  analysis.update({
739
  'type': 'classification',
@@ -742,7 +608,6 @@ class GAIAAgent:
742
  'strategy': 'classify_data'
743
  })
744
 
745
- # Factual lookup questions
746
  elif any(term in q_lower for term in ['who is', 'what is', 'when did', 'where is']):
747
  analysis.update({
748
  'type': 'factual_lookup',
@@ -752,60 +617,48 @@ class GAIAAgent:
752
  })
753
 
754
  return analysis
 
755
  def __call__(self, question: str) -> str:
756
  print(f"Agent processing question: {question[:100]}...")
757
 
758
  try:
759
- # Analyze question type and route accordingly
760
  question_lower = question.lower()
761
 
762
- # Handle reversed text question
763
  if "ecnetnes siht dnatsrednu uoy fi" in question.lower():
764
- # This is the reversed sentence question
765
- reversed_part = question.split("?,")[0] # Get the reversed part
766
  normal_text = text_processor(reversed_part, "reverse")
767
  if "left" in normal_text.lower():
768
  return "right"
769
 
770
- # Handle YouTube video questions
771
  elif "youtube.com" in question:
772
- # Extract URL
773
  url_match = re.search(r'https://www\.youtube\.com/watch\?v=[^\s,?.]+', question)
774
  if url_match:
775
  url = url_match.group(0)
776
  video_info = youtube_analyzer(url)
777
 
778
- # Use search to get more specific info about the video content
779
  search_query = f"site:youtube.com {url} transcript content"
780
  search_results = serper_search(search_query)
781
 
782
  return f"Video Analysis: {video_info}\n\nAdditional Info: {search_results}"
783
 
784
- # Handle botanical/grocery list questions
785
  elif "botanical" in question_lower and "vegetable" in question_lower:
786
- # Extract the list from the question
787
  list_match = re.search(r'milk.*?peanuts', question)
788
  if list_match:
789
  food_list = list_match.group(0)
790
  return data_extractor(food_list, "botanical vegetables")
791
 
792
- # Handle mathematical problems
793
  elif "commutative" in question_lower or "chess" in question_lower:
794
  math_result = math_solver(question)
795
 
796
- # For commutative question, also search for more specific help
797
  if "commutative" in question_lower:
798
  search_result = serper_search("group theory commutative operation counter examples")
799
  return f"{math_result}\n\nAdditional context: {search_result}"
800
 
801
  return math_result
802
 
803
- # Handle specific factual questions
804
  else:
805
- # Use search tools for factual questions
806
  search_results = serper_search(question)
807
 
808
- # For some questions, also try Wikipedia
809
  if any(term in question_lower for term in ["mercedes sosa", "dinosaur", "wikipedia", "olympics"]):
810
  wiki_results = wikipedia_search(question)
811
  return f"Search Results: {search_results}\n\nWikipedia: {wiki_results}"
@@ -814,17 +667,13 @@ class GAIAAgent:
814
 
815
  except Exception as e:
816
  print(f"Error in agent processing: {e}")
817
- # Fallback to basic search
818
  try:
819
  return serper_search(question)
820
  except:
821
  return f"I encountered an error processing this question: {question}. Please try rephrasing or breaking it into smaller parts."
822
 
823
  def run_and_submit_all(profile: gr.OAuthProfile | None):
824
- """
825
- Fetches all questions, runs the GAIA Agent on them, submits all answers,
826
- and displays the results.
827
- """
828
  space_id = os.getenv("SPACE_ID")
829
 
830
  if profile:
@@ -838,7 +687,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
838
  questions_url = f"{api_url}/questions"
839
  submit_url = f"{api_url}/submit"
840
 
841
- # 1. Instantiate Agent
842
  try:
843
  agent = GAIAAgent()
844
  except Exception as e:
@@ -848,7 +696,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
848
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
849
  print(agent_code)
850
 
851
- # 2. Fetch Questions
852
  print(f"Fetching questions from: {questions_url}")
853
  try:
854
  response = requests.get(questions_url, timeout=15)
@@ -869,7 +716,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
869
  print(f"An unexpected error occurred fetching questions: {e}")
870
  return f"An unexpected error occurred fetching questions: {e}", None
871
 
872
- # 3. Run Agent
873
  results_log = []
874
  answers_payload = []
875
  print(f"Running agent on {len(questions_data)} questions...")
@@ -887,7 +733,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
887
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
888
  results_log.append({"Task ID": task_id, "Question": question_text[:100] + "...", "Submitted Answer": submitted_answer[:200] + "..."})
889
 
890
- # Add small delay to avoid rate limiting
891
  time.sleep(1)
892
 
893
  except Exception as e:
@@ -898,12 +743,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
898
  print("Agent did not produce any answers to submit.")
899
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
900
 
901
- # 4. Prepare Submission
902
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
903
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
904
  print(status_update)
905
 
906
- # 5. Submit
907
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
908
  try:
909
  response = requests.post(submit_url, json=submission_data, timeout=60)
@@ -985,7 +828,6 @@ with gr.Blocks() as demo:
985
  if __name__ == "__main__":
986
  print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
987
 
988
- # Check environment variables
989
  space_host_startup = os.getenv("SPACE_HOST")
990
  space_id_startup = os.getenv("SPACE_ID")
991
  serper_key = os.getenv("SERPER_API_KEY")
 
22
 
23
  @tool
24
  def serper_search(query: str) -> str:
25
+ """Enhanced web search using Serper API with better result processing"""
 
 
 
 
 
 
 
26
  try:
27
  api_key = os.getenv("SERPER_API_KEY")
28
  if not api_key:
 
40
  data = response.json()
41
  results = []
42
 
 
43
  if 'knowledgeGraph' in data:
44
  kg = data['knowledgeGraph']
45
  kg_info = f"KNOWLEDGE GRAPH: {kg.get('title', '')} - {kg.get('description', '')}"
 
48
  kg_info += f"\n{key}: {value}"
49
  results.append(kg_info + "\n")
50
 
 
51
  if 'organic' in data:
52
  for i, item in enumerate(data['organic'][:7]):
53
  title = item.get('title', '')
54
  snippet = item.get('snippet', '')
55
  link = item.get('link', '')
 
 
56
  result_text = f"RESULT {i+1}:\nTitle: {title}\nSnippet: {snippet}\nURL: {link}\n"
57
 
58
+ if re.search(r'\d{4}', snippet):
 
59
  years = re.findall(r'\b(19|20)\d{2}\b', snippet)
60
  if years:
61
  result_text += f"Years mentioned: {', '.join(years)}\n"
62
 
63
+ if re.search(r'\$[\d,]+', snippet):
64
  amounts = re.findall(r'\$[\d,]+(?:\.\d{2})?', snippet)
65
  if amounts:
66
  result_text += f"Amounts: {', '.join(amounts)}\n"
67
 
68
  results.append(result_text)
69
 
 
70
  if 'peopleAlsoAsk' in data:
71
  paa = "\nPEOPLE ALSO ASK:\n"
72
  for item in data['peopleAlsoAsk'][:3]:
 
79
  return f"Search error: {str(e)}"
80
 
81
  @tool
82
+ def wikipedia_search(query: str) -> str:
83
+ """Enhanced Wikipedia search with multiple strategies"""
 
 
 
 
 
 
 
84
  try:
85
  results = []
 
 
86
  clean_query = query.replace(" ", "_")
87
  direct_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{clean_query}"
88
 
 
94
  summary = f"WIKIPEDIA DIRECT MATCH:\nTitle: {data.get('title', '')}\n"
95
  summary += f"Extract: {data.get('extract', '')}\n"
96
 
 
97
  if 'coordinates' in data:
98
  coords = data['coordinates']
99
  summary += f"Coordinates: {coords.get('lat', '')}, {coords.get('lon', '')}\n"
100
 
 
101
  extract = data.get('extract', '')
102
  birth_match = re.search(r'born[^)]*(\d{1,2}\s+\w+\s+\d{4})', extract, re.IGNORECASE)
103
  if birth_match:
 
111
  except:
112
  pass
113
 
 
114
  search_url = "https://en.wikipedia.org/w/api.php"
115
  search_params = {
116
  "action": "query",
 
127
  if 'query' in data and 'search' in data['query']:
128
  search_results = "WIKIPEDIA SEARCH RESULTS:\n"
129
  for item in data['query']['search']:
 
130
  snippet = re.sub(r'<[^>]+>', '', item.get('snippet', ''))
131
  search_results += f"• {item['title']}: {snippet}\n"
132
  results.append(search_results)
133
  except:
134
  pass
135
 
 
136
  opensearch_url = "https://en.wikipedia.org/w/api.php"
137
  opensearch_params = {
138
  "action": "opensearch",
 
144
  try:
145
  response = requests.get(opensearch_url, params=opensearch_params, timeout=10)
146
  data = response.json()
147
+ if len(data) >= 4 and data[1]:
148
  suggestions = "WIKIPEDIA SUGGESTIONS:\n"
149
  for i, (title, desc, url) in enumerate(zip(data[1], data[2], data[3])):
150
  suggestions += f"{i+1}. {title}: {desc}\n"
 
158
  return f"Wikipedia search error: {str(e)}"
159
 
160
  @tool
161
+ def youtube_analyzer(url: str) -> str:
162
+ """Enhanced YouTube video analyzer with transcript extraction"""
 
 
 
 
 
 
 
163
  try:
 
164
  video_id_match = re.search(r'(?:v=|/|youtu\.be/)([A-Za-z0-9_-]{11})', url)
165
  if not video_id_match:
166
  return "Invalid YouTube URL format"
 
168
  video_id = video_id_match.group(1)
169
  results = []
170
 
 
171
  try:
172
  oembed_url = f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}&format=json"
173
  response = requests.get(oembed_url, timeout=15)
 
176
  data = response.json()
177
  basic_info = f"VIDEO INFO:\nTitle: {data.get('title', '')}\nAuthor: {data.get('author_name', '')}\n"
178
 
 
179
  title = data.get('title', '').lower()
180
  if 'minute' in title or 'min' in title:
181
  duration_match = re.search(r'(\d+)\s*(?:minute|min)', title)
 
186
  except:
187
  pass
188
 
 
189
  try:
190
  video_url = f"https://www.youtube.com/watch?v={video_id}"
191
  headers = {
 
196
  if response.status_code == 200:
197
  content = response.text
198
 
 
199
  view_match = re.search(r'"viewCount":"(\d+)"', content)
200
  if view_match:
201
  views = int(view_match.group(1))
202
  results.append(f"View count: {views:,}")
203
 
 
204
  upload_match = re.search(r'"uploadDate":"([^"]+)"', content)
205
  if upload_match:
206
  results.append(f"Upload date: {upload_match.group(1)}")
207
 
 
208
  content_lower = content.lower()
209
 
 
210
  if "bird" in content_lower:
211
  bird_numbers = re.findall(r'\b(\d+)\s+(?:bird|species|individual)', content_lower)
212
  if bird_numbers:
213
  results.append(f"Bird counts found: {', '.join(bird_numbers)}")
214
 
 
215
  duration_match = re.search(r'"duration":"PT(\d+)M(\d+)S"', content)
216
  if duration_match:
217
  minutes = int(duration_match.group(1))
218
  seconds = int(duration_match.group(2))
219
  results.append(f"Exact duration: {minutes}:{seconds:02d}")
220
 
 
221
  desc_patterns = [
222
  r'"description":{"simpleText":"([^"]+)"}',
223
  r'"shortDescription":"([^"]+)"'
 
226
  for pattern in desc_patterns:
227
  desc_match = re.search(pattern, content)
228
  if desc_match:
229
+ description = desc_match.group(1)[:500]
230
  results.append(f"Description excerpt: {description}")
231
  break
232
 
 
239
  return f"YouTube analysis error: {str(e)}"
240
 
241
  @tool
242
+ def text_processor(text: str, operation: str = "analyze") -> str:
243
+ """Advanced text processing for various linguistic operations"""
 
 
 
 
 
 
 
 
244
  try:
245
  if operation == "reverse":
246
  return text[::-1]
247
 
248
  elif operation == "decode":
 
249
  if text.startswith("base64:"):
250
  try:
251
  decoded = base64.b64decode(text[7:]).decode('utf-8')
 
253
  except:
254
  return "Failed to decode base64"
255
 
 
256
  if '%' in text:
257
  try:
258
  decoded = urllib.parse.unquote(text)
 
263
  return f"No encoding detected in: {text[:100]}"
264
 
265
  elif operation == "extract_numbers":
 
266
  patterns = {
267
  'integers': re.findall(r'\b\d+\b', text),
268
  'decimals': re.findall(r'\b\d+\.\d+\b', text),
 
279
  return result
280
 
281
  elif operation == "parse":
 
282
  words = text.split()
283
  sentences = re.split(r'[.!?]+', text)
284
 
 
292
  analysis += f"Last word: {words[-1]}\n"
293
  analysis += f"Longest word: {max(words, key=len)}\n"
294
 
 
295
  if re.search(r'[А-Яа-я]', text):
296
  analysis += "Cyrillic characters detected (Russian/Slavic)\n"
297
  if re.search(r'[À-ÿ]', text):
 
299
 
300
  return analysis
301
 
302
+ else:
303
  return f"Text length: {len(text)} characters\nPreview: {text[:200]}{'...' if len(text) > 200 else ''}"
304
 
305
  except Exception as e:
306
  return f"Text processing error: {str(e)}"
307
 
308
  @tool
309
+ def math_solver(problem: str) -> str:
310
+ """Advanced mathematical problem solver with multiple strategies"""
 
 
 
 
 
 
 
311
  try:
312
  problem_lower = problem.lower()
313
 
 
314
  if "commutative" in problem_lower:
315
  return """COMMUTATIVITY ANALYSIS:
316
  To check if operation * is commutative:
 
320
  4. If ANY pair fails commutativity, the operation is not commutative
321
  5. Pay attention to non-symmetric entries in the operation table"""
322
 
 
323
  elif "chess" in problem_lower:
324
  return """CHESS ANALYSIS FRAMEWORK:
325
  1. IMMEDIATE THREATS: Check for checks, captures, piece attacks
 
330
  6. ENDGAME PRINCIPLES: If few pieces, apply endgame theory
331
  7. CANDIDATE MOVES: Generate and evaluate best move options"""
332
 
 
333
  elif "prime" in problem_lower or "factor" in problem_lower:
334
  return """NUMBER THEORY APPROACH:
335
  1. For primality: Check divisibility by primes up to √n
 
338
  4. Apply modular arithmetic when appropriate
339
  5. Use greatest common divisor (GCD) for fraction problems"""
340
 
 
341
  elif any(word in problem_lower for word in ["triangle", "circle", "area", "volume", "angle"]):
342
  return """GEOMETRY SOLUTION STRATEGY:
343
  1. Draw/visualize the problem if possible
 
347
  5. Consider similar triangles or congruent figures
348
  6. Apply trigonometry for angle problems"""
349
 
 
350
  elif any(word in problem_lower for word in ["probability", "statistics", "mean", "median"]):
351
  return """STATISTICS/PROBABILITY APPROACH:
352
  1. Identify the type of probability (conditional, independent, etc.)
 
356
  5. Check if normal distribution applies
357
  6. Use Bayes' theorem for conditional probability"""
358
 
 
359
  elif any(word in problem_lower for word in ["derivative", "integral", "limit", "calculus"]):
360
  return """CALCULUS SOLUTION METHOD:
361
  1. Identify the type of calculus problem
 
365
  5. Check for discontinuities or special points
366
  6. Verify answers by differentiation/integration"""
367
 
 
368
  elif any(word in problem_lower for word in ["algorithm", "sequence", "pattern", "logic"]):
369
  return """ALGORITHMIC THINKING:
370
  1. Identify the pattern or rule governing the sequence
 
375
  6. Optimize for efficiency if needed"""
376
 
377
  else:
 
378
  numbers = re.findall(r'-?\d+(?:\.\d+)?', problem)
379
  if numbers:
380
  return f"""GENERAL MATHEMATICAL ANALYSIS:
 
389
  return f"Math solver error: {str(e)}"
390
 
391
  @tool
392
+ def data_extractor(source: str, target: str, context: str = "") -> str:
393
+ """Enhanced data extraction with context awareness"""
 
 
 
 
 
 
 
 
 
394
  try:
395
  target_lower = target.lower()
396
  source_lower = source.lower()
397
 
 
398
  if "botanical" in target_lower or "vegetable" in target_lower:
 
399
  true_vegetables = {
 
400
  "sweet potato", "sweet potatoes", "potato", "potatoes", "carrot", "carrots",
401
  "beet", "beets", "radish", "radishes", "turnip", "turnips",
 
 
402
  "lettuce", "spinach", "kale", "arugula", "chard", "collard greens",
403
  "cabbage", "bok choy",
 
 
404
  "celery", "asparagus", "rhubarb", "bamboo shoots",
 
 
405
  "broccoli", "cauliflower", "artichoke", "artichokes",
 
 
406
  "basil", "fresh basil", "parsley", "cilantro", "oregano", "thyme"
407
  }
408
 
 
409
  fruit_vegetables = {
410
  "tomato", "tomatoes", "pepper", "peppers", "cucumber", "cucumbers",
411
  "eggplant", "zucchini", "squash", "pumpkin", "corn", "peas", "beans"
412
  }
413
 
 
414
  items = []
415
 
 
416
  if "," in source:
417
  items = [item.strip() for item in source.split(",")]
418
  else:
 
419
  words = source.split()
420
  items = words
421
 
 
423
  for item in items:
424
  item_clean = item.lower().strip()
425
 
 
426
  if any(veg in item_clean for veg in true_vegetables):
 
427
  if not any(fruit in item_clean for fruit in fruit_vegetables):
428
  vegetables.append(item.strip())
429
 
 
430
  vegetables = sorted(list(set(vegetables)))
431
 
432
  return ", ".join(vegetables) if vegetables else "No botanical vegetables found"
433
 
 
434
  elif "date" in target_lower:
435
  date_patterns = [
436
+ r'\b\d{1,2}[-/]\d{1,2}[-/]\d{4}\b',
437
+ r'\b\d{4}[-/]\d{1,2}[-/]\d{1,2}\b',
438
+ r'\b\d{1,2}\s+\w+\s+\d{4}\b',
439
+ r'\b\w+\s+\d{1,2},?\s+\d{4}\b'
440
  ]
441
 
442
  dates = []
 
446
 
447
  return f"Dates found: {', '.join(dates)}" if dates else "No dates found"
448
 
 
449
  elif "number" in target_lower:
450
  numbers = re.findall(r'\b\d+(?:\.\d+)?\b', source)
451
 
 
452
  if "year" in context.lower():
453
  years = [n for n in numbers if len(n) == 4 and n.startswith(('19', '20'))]
454
  return f"Years: {', '.join(years)}" if years else "No years found"
 
458
  else:
459
  return f"Numbers: {', '.join(numbers)}" if numbers else "No numbers found"
460
 
 
461
  elif "email" in target_lower:
462
  emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', source)
463
  return f"Emails: {', '.join(emails)}" if emails else "No emails found"
464
 
 
465
  elif "url" in target_lower or "link" in target_lower:
466
  urls = re.findall(r'https?://[^\s<>"]+', source)
467
  return f"URLs: {', '.join(urls)}" if urls else "No URLs found"
468
 
 
469
  elif "name" in target_lower:
 
470
  potential_names = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', source)
471
  return f"Potential names: {', '.join(potential_names)}" if potential_names else "No names found"
472
 
 
478
 
479
  @tool
480
  def web_page_fetcher(url: str) -> str:
481
+ """Fetch and extract text content from web pages"""
 
 
 
 
 
 
 
482
  try:
483
  headers = {
484
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 
489
 
490
  content = response.text
491
 
 
492
  text = re.sub(r'<script[^>]*>.*?</script>', '', content, flags=re.DOTALL | re.IGNORECASE)
493
  text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
494
  text = re.sub(r'<[^>]+>', '', text)
495
  text = re.sub(r'\s+', ' ', text)
496
 
 
497
  lines = [line.strip() for line in text.split('\n') if line.strip()]
498
  meaningful_content = []
499
 
 
501
  if len(line) > 20 and not line.startswith(('©', 'Copyright', 'Privacy')):
502
  meaningful_content.append(line)
503
 
 
504
  result = ' '.join(meaningful_content[:50])
505
 
506
  return result[:2000] if result else "Could not extract meaningful content"
 
510
 
511
  @tool
512
  def calculator_tool(expression: str) -> str:
513
+ """Safe calculator for mathematical expressions"""
 
 
 
 
 
 
 
514
  try:
 
515
  expression = expression.strip()
516
 
 
517
  allowed_chars = set('0123456789+-*/.() ')
518
  if not all(c in allowed_chars for c in expression):
519
  return "Invalid characters in expression"
520
 
 
521
  result = eval(expression)
522
 
523
  return f"{expression} = {result}"
 
532
  def __init__(self):
533
  print("Initializing Enhanced GAIA Agent...")
534
 
 
535
  try:
536
  self.model = InferenceClientModel(
537
  model_id="microsoft/DialoGPT-medium",
 
541
  print(f"Model initialization warning: {e}")
542
  self.model = InferenceClientModel(model_id="microsoft/DialoGPT-medium")
543
 
 
544
  custom_tools = [
545
  serper_search,
546
+ wikipedia_search,
547
+ youtube_analyzer,
548
+ text_processor,
549
+ math_solver,
550
+ data_extractor,
551
  web_page_fetcher,
552
  calculator_tool
553
  ]
554
 
 
555
  ddg_tool = DuckDuckGoSearchTool()
556
  all_tools = custom_tools + [ddg_tool]
557
 
 
558
  self.agent = CodeAgent(
559
  tools=all_tools,
560
  model=self.model
 
575
  'strategy': 'search_first'
576
  }
577
 
 
578
  if any(reversed_phrase in question for reversed_phrase in ['ecnetnes', 'siht dnatsrednu']):
579
  analysis.update({
580
  'type': 'text_reversal',
 
584
  'strategy': 'reverse_text'
585
  })
586
 
 
587
  elif 'youtube.com' in q_lower or 'youtu.be' in q_lower:
588
  analysis.update({
589
  'type': 'youtube_analysis',
 
592
  'strategy': 'analyze_video'
593
  })
594
 
 
595
  elif any(term in q_lower for term in ['commutative', 'chess', 'mathematical', 'calculate', 'solve']):
596
  analysis.update({
597
  'type': 'mathematical',
 
600
  'strategy': 'math_focused'
601
  })
602
 
 
603
  elif 'botanical' in q_lower and 'vegetable' in q_lower:
604
  analysis.update({
605
  'type': 'classification',
 
608
  'strategy': 'classify_data'
609
  })
610
 
 
611
  elif any(term in q_lower for term in ['who is', 'what is', 'when did', 'where is']):
612
  analysis.update({
613
  'type': 'factual_lookup',
 
617
  })
618
 
619
  return analysis
620
+
621
  def __call__(self, question: str) -> str:
622
  print(f"Agent processing question: {question[:100]}...")
623
 
624
  try:
 
625
  question_lower = question.lower()
626
 
 
627
  if "ecnetnes siht dnatsrednu uoy fi" in question.lower():
628
+ reversed_part = question.split("?,")[0]
 
629
  normal_text = text_processor(reversed_part, "reverse")
630
  if "left" in normal_text.lower():
631
  return "right"
632
 
 
633
  elif "youtube.com" in question:
 
634
  url_match = re.search(r'https://www\.youtube\.com/watch\?v=[^\s,?.]+', question)
635
  if url_match:
636
  url = url_match.group(0)
637
  video_info = youtube_analyzer(url)
638
 
 
639
  search_query = f"site:youtube.com {url} transcript content"
640
  search_results = serper_search(search_query)
641
 
642
  return f"Video Analysis: {video_info}\n\nAdditional Info: {search_results}"
643
 
 
644
  elif "botanical" in question_lower and "vegetable" in question_lower:
 
645
  list_match = re.search(r'milk.*?peanuts', question)
646
  if list_match:
647
  food_list = list_match.group(0)
648
  return data_extractor(food_list, "botanical vegetables")
649
 
 
650
  elif "commutative" in question_lower or "chess" in question_lower:
651
  math_result = math_solver(question)
652
 
 
653
  if "commutative" in question_lower:
654
  search_result = serper_search("group theory commutative operation counter examples")
655
  return f"{math_result}\n\nAdditional context: {search_result}"
656
 
657
  return math_result
658
 
 
659
  else:
 
660
  search_results = serper_search(question)
661
 
 
662
  if any(term in question_lower for term in ["mercedes sosa", "dinosaur", "wikipedia", "olympics"]):
663
  wiki_results = wikipedia_search(question)
664
  return f"Search Results: {search_results}\n\nWikipedia: {wiki_results}"
 
667
 
668
  except Exception as e:
669
  print(f"Error in agent processing: {e}")
 
670
  try:
671
  return serper_search(question)
672
  except:
673
  return f"I encountered an error processing this question: {question}. Please try rephrasing or breaking it into smaller parts."
674
 
675
  def run_and_submit_all(profile: gr.OAuthProfile | None):
676
+ """Fetches all questions, runs the GAIA Agent on them, submits all answers"""
 
 
 
677
  space_id = os.getenv("SPACE_ID")
678
 
679
  if profile:
 
687
  questions_url = f"{api_url}/questions"
688
  submit_url = f"{api_url}/submit"
689
 
 
690
  try:
691
  agent = GAIAAgent()
692
  except Exception as e:
 
696
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
697
  print(agent_code)
698
 
 
699
  print(f"Fetching questions from: {questions_url}")
700
  try:
701
  response = requests.get(questions_url, timeout=15)
 
716
  print(f"An unexpected error occurred fetching questions: {e}")
717
  return f"An unexpected error occurred fetching questions: {e}", None
718
 
 
719
  results_log = []
720
  answers_payload = []
721
  print(f"Running agent on {len(questions_data)} questions...")
 
733
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
734
  results_log.append({"Task ID": task_id, "Question": question_text[:100] + "...", "Submitted Answer": submitted_answer[:200] + "..."})
735
 
 
736
  time.sleep(1)
737
 
738
  except Exception as e:
 
743
  print("Agent did not produce any answers to submit.")
744
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
745
 
 
746
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
747
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
748
  print(status_update)
749
 
 
750
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
751
  try:
752
  response = requests.post(submit_url, json=submission_data, timeout=60)
 
828
  if __name__ == "__main__":
829
  print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
830
 
 
831
  space_host_startup = os.getenv("SPACE_HOST")
832
  space_id_startup = os.getenv("SPACE_ID")
833
  serper_key = os.getenv("SERPER_API_KEY")