LamiaYT commited on
Commit
343172b
·
1 Parent(s): 31d7bf3
Files changed (1) hide show
  1. app.py +157 -80
app.py CHANGED
@@ -27,102 +27,163 @@ def serper_search(query: str) -> str:
27
  Returns:
28
  Search results as a formatted string.
29
  """
30
- api_key = os.getenv("SERPER_API_KEY")
31
- if not api_key:
32
- return "SERPER_API_KEY environment variable not found"
33
  try:
 
 
 
34
  url = "https://google.serper.dev/search"
35
  payload = json.dumps({"q": query, "num": 10})
36
- headers = {'X-API-KEY': api_key, 'Content-Type': 'application/json'}
37
- response = requests.post(url, headers=headers, data=payload, timeout=20)
 
 
 
38
  response.raise_for_status()
39
  data = response.json()
40
  results = []
41
- if 'knowledgeGraph' in data:
42
- kg = data['knowledgeGraph']
43
- results.append(f"KG: {kg.get('title', '')} - {kg.get('description', '')}")
44
  if 'organic' in data:
45
  for item in data['organic'][:5]:
46
- results.append(f"{item.get('title', '')}: {item.get('snippet', '')} ({item.get('link', '')})")
 
 
 
 
47
  return "\n".join(results) if results else "No results found"
48
  except Exception as e:
49
  return f"Search error: {str(e)}"
50
 
51
  @tool
52
  def wikipedia_search(query: str) -> str:
53
- """Search Wikipedia for detailed information on topics."""
 
 
 
 
 
 
 
 
54
  try:
55
- summary_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + query.replace(" ", "_")
56
- resp = requests.get(summary_url, timeout=10)
57
- if resp.status_code == 200:
58
- data = resp.json()
59
- return f"{data.get('title', '')}: {data.get('extract', '')} ({data.get('content_urls', {}).get('desktop', {}).get('page', '')})"
60
- # fallback to search API
61
- params = {"action": "query", "format": "json", "list": "search", "srsearch": query, "srlimit": 3}
62
- resp = requests.get("https://en.wikipedia.org/w/api.php", params=params, timeout=10)
63
- data = resp.json()
64
- results = [f"{item['title']}: {item['snippet']}" for item in data.get('query', {}).get('search', [])]
65
- return "\n".join(results) if results else "No Wikipedia results found"
 
 
 
 
 
 
 
 
 
 
66
  except Exception as e:
67
  return f"Wikipedia search error: {str(e)}"
68
 
69
  @tool
70
  def youtube_analyzer(url: str) -> str:
71
- """Analyze YouTube videos to extract information from titles, descriptions, and comments."""
 
 
 
 
 
 
 
 
72
  try:
73
  video_id_match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11})', url)
74
  if not video_id_match:
75
  return "Invalid YouTube URL"
76
  video_id = video_id_match.group(1)
77
  oembed_url = f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}&format=json"
78
- resp = requests.get(oembed_url, timeout=10)
79
- if resp.status_code == 200:
80
- data = resp.json()
81
- result = f"Title: {data.get('title', '')}\nAuthor: {data.get('author_name', '')}"
82
- # Basic description extraction
83
  try:
84
  video_url = f"https://www.youtube.com/watch?v={video_id}"
85
  headers = {'User-Agent': 'Mozilla/5.0'}
86
- page = requests.get(video_url, headers=headers, timeout=10)
87
- desc_match = re.search(r'"description":{"simpleText":"([^"]+)"', page.text)
88
- if desc_match:
89
- result += f"\nDescription: {desc_match.group(1)}"
 
 
90
  except Exception:
91
  pass
92
  return result
93
- return "Could not retrieve video info"
 
94
  except Exception as e:
95
  return f"YouTube analysis error: {str(e)}"
96
 
97
  @tool
98
  def text_processor(text: str, operation: str = "analyze") -> str:
99
- """Process text for various operations like reversing, parsing, and analyzing."""
 
 
 
 
 
 
 
 
 
100
  try:
101
  if operation == "reverse":
102
  return text[::-1]
103
  elif operation == "parse":
104
  words = text.split()
105
- return f"Word count: {len(words)}, First: {words[0] if words else 'None'}, Last: {words[-1] if words else 'None'}"
106
- return f"Text length: {len(text)}, Word count: {len(text.split())}, Preview: {text[:100]}"
 
107
  except Exception as e:
108
  return f"Text processing error: {str(e)}"
109
 
110
  @tool
111
  def math_solver(problem: str) -> str:
112
- """Solve mathematical problems and analyze mathematical structures."""
 
 
 
 
 
 
 
 
113
  try:
114
- pl = problem.lower()
115
- if "commutative" in pl:
116
- return "Check if a*b = b*a for all elements; look for counter-examples."
117
- if "chess" in pl:
118
- return "Analyze the board for checks, captures, pins, forks, and checkmate patterns."
119
- return f"Math analysis needed for: {problem[:100]}"
120
  except Exception as e:
121
  return f"Math solver error: {str(e)}"
122
 
123
  @tool
124
  def data_extractor(source: str, target: str) -> str:
125
- """Extract structured data from various sources."""
 
 
 
 
 
 
 
 
 
126
  try:
127
  if "botanical" in target.lower() or "vegetable" in target.lower():
128
  vegetables = []
@@ -133,7 +194,7 @@ def data_extractor(source: str, target: str) -> str:
133
  vegetables.append(item)
134
  vegetables.sort()
135
  return ", ".join(vegetables)
136
- return f"Data extraction for {target} from {source[:100]}"
137
  except Exception as e:
138
  return f"Data extraction error: {str(e)}"
139
 
@@ -148,30 +209,36 @@ class GAIAAgent:
148
  token=os.getenv("HUGGINGFACE_INFERENCE_TOKEN")
149
  )
150
  except Exception as e:
151
- print(f"Model init error: {e}")
152
- self.model = InferenceClientModel(model_id="microsoft/DialoGPT-medium")
153
- self.tools = [
 
 
154
  serper_search,
155
- wikipedia_search,
156
  youtube_analyzer,
157
  text_processor,
158
  math_solver,
159
- data_extractor,
160
- DuckDuckGoSearchTool()
161
  ]
162
- self.agent = CodeAgent(tools=self.tools, model=self.model)
163
- print("GAIA Agent initialized.")
 
 
 
 
 
164
 
165
  def __call__(self, question: str) -> str:
166
- print(f"Processing: {question[:80]}...")
167
  try:
168
- ql = question.lower()
169
- if "ecnetnes siht dnatsrednu uoy fi" in ql:
170
  reversed_part = question.split("?,")[0]
171
  normal_text = text_processor(reversed_part, "reverse")
172
  if "left" in normal_text.lower():
173
  return "right"
174
- if "youtube.com" in question:
175
  url_match = re.search(r'https://www\.youtube\.com/watch\?v=[^\s,?.]+', question)
176
  if url_match:
177
  url = url_match.group(0)
@@ -179,66 +246,77 @@ class GAIAAgent:
179
  search_query = f"site:youtube.com {url} transcript content"
180
  search_results = serper_search(search_query)
181
  return f"Video Analysis: {video_info}\n\nAdditional Info: {search_results}"
182
- if "botanical" in ql and "vegetable" in ql:
183
  list_match = re.search(r'milk.*?peanuts', question)
184
  if list_match:
185
  food_list = list_match.group(0)
186
  return data_extractor(food_list, "botanical vegetables")
187
- if "commutative" in ql or "chess" in ql:
188
  math_result = math_solver(question)
189
- if "commutative" in ql:
190
  search_result = serper_search("group theory commutative operation counter examples")
191
  return f"{math_result}\n\nAdditional context: {search_result}"
192
  return math_result
193
- # Factual or general
194
- search_results = serper_search(question)
195
- if any(term in ql for term in ["mercedes sosa", "dinosaur", "wikipedia", "olympics"]):
196
- wiki_results = wikipedia_search(question)
197
- return f"Search Results: {search_results}\n\nWikipedia: {wiki_results}"
198
- return search_results
199
  except Exception as e:
200
- print(f"Error in agent: {e}")
201
  try:
202
  return serper_search(question)
203
  except Exception:
204
- return f"Error processing: {question}"
205
 
206
  def run_and_submit_all(profile: gr.OAuthProfile | None):
207
  """
208
  Fetches all questions, runs the GAIA Agent on them, submits all answers,
209
  and displays the results.
 
 
 
 
 
 
210
  """
211
  space_id = os.getenv("SPACE_ID")
212
- if not profile:
 
 
 
213
  print("User not logged in.")
214
  return "Please Login to Hugging Face with the button.", None
215
-
216
- username = f"{profile.username}"
217
- print(f"User: {username}")
218
  api_url = DEFAULT_API_URL
219
  questions_url = f"{api_url}/questions"
220
  submit_url = f"{api_url}/submit"
221
-
222
  # 1. Instantiate Agent
223
  try:
224
  agent = GAIAAgent()
225
  except Exception as e:
226
- print(f"Agent init error: {e}")
227
  return f"Error initializing agent: {e}", None
228
-
229
  # 2. Fetch Questions
 
230
  try:
231
  response = requests.get(questions_url, timeout=15)
232
  response.raise_for_status()
233
  questions_data = response.json()
234
  if not questions_data:
235
- print("No questions fetched.")
236
- return "No questions found.", None
237
  print(f"Fetched {len(questions_data)} questions.")
238
- except Exception as e:
239
- print(f"Fetch error: {e}")
240
  return f"Error fetching questions: {e}", None
241
-
 
 
 
 
 
 
242
  # 3. Run Agent
243
  answers_payload = []
244
  for i, item in enumerate(questions_data):
@@ -251,7 +329,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
251
  except Exception as e:
252
  answer = f"Error: {e}"
253
  answers_payload.append({"task_id": task_id, "answer": answer})
254
-
255
  # 4. Submit Answers
256
  try:
257
  submit_resp = requests.post(submit_url, json={"answers": answers_payload, "username": username}, timeout=20)
 
27
  Returns:
28
  Search results as a formatted string.
29
  """
 
 
 
30
  try:
31
+ api_key = os.getenv("SERPER_API_KEY")
32
+ if not api_key:
33
+ return "SERPER_API_KEY environment variable not found"
34
  url = "https://google.serper.dev/search"
35
  payload = json.dumps({"q": query, "num": 10})
36
+ headers = {
37
+ 'X-API-KEY': api_key,
38
+ 'Content-Type': 'application/json'
39
+ }
40
+ response = requests.post(url, headers=headers, data=payload, timeout=30)
41
  response.raise_for_status()
42
  data = response.json()
43
  results = []
44
+ # Process organic results
 
 
45
  if 'organic' in data:
46
  for item in data['organic'][:5]:
47
+ results.append(f"Title: {item.get('title', '')}\nSnippet: {item.get('snippet', '')}\nURL: {item.get('link', '')}\n")
48
+ # Add knowledge graph if available
49
+ if 'knowledgeGraph' in data:
50
+ kg = data['knowledgeGraph']
51
+ results.insert(0, f"Knowledge Graph: {kg.get('title', '')} - {kg.get('description', '')}\n")
52
  return "\n".join(results) if results else "No results found"
53
  except Exception as e:
54
  return f"Search error: {str(e)}"
55
 
56
  @tool
57
  def wikipedia_search(query: str) -> str:
58
+ """
59
+ Search Wikipedia for detailed information on topics.
60
+
61
+ Args:
62
+ query: The Wikipedia search query.
63
+
64
+ Returns:
65
+ Wikipedia search results as a string.
66
+ """
67
  try:
68
+ search_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + query.replace(" ", "_")
69
+ response = requests.get(search_url, timeout=15)
70
+ if response.status_code == 200:
71
+ data = response.json()
72
+ return f"Title: {data.get('title', '')}\nSummary: {data.get('extract', '')}\nURL: {data.get('content_urls', {}).get('desktop', {}).get('page', '')}"
73
+ else:
74
+ # Fallback to search API
75
+ search_api = "https://en.wikipedia.org/w/api.php"
76
+ params = {
77
+ "action": "query",
78
+ "format": "json",
79
+ "list": "search",
80
+ "srsearch": query,
81
+ "srlimit": 3
82
+ }
83
+ response = requests.get(search_api, params=params, timeout=15)
84
+ data = response.json()
85
+ results = []
86
+ for item in data.get('query', {}).get('search', []):
87
+ results.append(f"Title: {item['title']}\nSnippet: {item['snippet']}")
88
+ return "\n\n".join(results) if results else "No Wikipedia results found"
89
  except Exception as e:
90
  return f"Wikipedia search error: {str(e)}"
91
 
92
  @tool
93
  def youtube_analyzer(url: str) -> str:
94
+ """
95
+ Analyze YouTube videos to extract information from titles, descriptions, and comments.
96
+
97
+ Args:
98
+ url: YouTube video URL.
99
+
100
+ Returns:
101
+ Video information and analysis as a string.
102
+ """
103
  try:
104
  video_id_match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11})', url)
105
  if not video_id_match:
106
  return "Invalid YouTube URL"
107
  video_id = video_id_match.group(1)
108
  oembed_url = f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}&format=json"
109
+ response = requests.get(oembed_url, timeout=15)
110
+ if response.status_code == 200:
111
+ data = response.json()
112
+ result = f"Title: {data.get('title', '')}\nAuthor: {data.get('author_name', '')}\n"
113
+ # Try to get additional info by scraping (basic)
114
  try:
115
  video_url = f"https://www.youtube.com/watch?v={video_id}"
116
  headers = {'User-Agent': 'Mozilla/5.0'}
117
+ page_response = requests.get(video_url, headers=headers, timeout=15)
118
+ if page_response.status_code == 200:
119
+ content = page_response.text
120
+ desc_match = re.search(r'"description":{"simpleText":"([^"]+)"', content)
121
+ if desc_match:
122
+ result += f"Description: {desc_match.group(1)}\n"
123
  except Exception:
124
  pass
125
  return result
126
+ else:
127
+ return "Could not retrieve video information"
128
  except Exception as e:
129
  return f"YouTube analysis error: {str(e)}"
130
 
131
  @tool
132
  def text_processor(text: str, operation: str = "analyze") -> str:
133
+ """
134
+ Process text for various operations like reversing, parsing, and analyzing.
135
+
136
+ Args:
137
+ text: Text to process.
138
+ operation: Operation to perform (reverse, parse, analyze).
139
+
140
+ Returns:
141
+ Processed text result as a string.
142
+ """
143
  try:
144
  if operation == "reverse":
145
  return text[::-1]
146
  elif operation == "parse":
147
  words = text.split()
148
+ return f"Word count: {len(words)}\nFirst word: {words[0] if words else 'None'}\nLast word: {words[-1] if words else 'None'}"
149
+ else:
150
+ return f"Text length: {len(text)}\nWord count: {len(text.split())}\nText: {text[:200]}..."
151
  except Exception as e:
152
  return f"Text processing error: {str(e)}"
153
 
154
  @tool
155
  def math_solver(problem: str) -> str:
156
+ """
157
+ Solve mathematical problems and analyze mathematical structures.
158
+
159
+ Args:
160
+ problem: Mathematical problem or structure to analyze.
161
+
162
+ Returns:
163
+ Mathematical analysis and solution as a string.
164
+ """
165
  try:
166
+ if "commutative" in problem.lower():
167
+ return "To check commutativity, verify if a*b = b*a for all elements. Find counter-examples where this fails."
168
+ elif "chess" in problem.lower():
169
+ return "For chess problems, analyze the position systematically: check for checks, captures, tactical motifs like pins, forks, or checkmate patterns."
170
+ else:
171
+ return f"Mathematical analysis needed for: {problem[:100]}..."
172
  except Exception as e:
173
  return f"Math solver error: {str(e)}"
174
 
175
  @tool
176
  def data_extractor(source: str, target: str) -> str:
177
+ """
178
+ Extract structured data from various sources.
179
+
180
+ Args:
181
+ source: Data source or content to extract from.
182
+ target: What to extract.
183
+
184
+ Returns:
185
+ Extracted data as a string.
186
+ """
187
  try:
188
  if "botanical" in target.lower() or "vegetable" in target.lower():
189
  vegetables = []
 
194
  vegetables.append(item)
195
  vegetables.sort()
196
  return ", ".join(vegetables)
197
+ return f"Data extraction for {target} from {source[:100]}..."
198
  except Exception as e:
199
  return f"Data extraction error: {str(e)}"
200
 
 
209
  token=os.getenv("HUGGINGFACE_INFERENCE_TOKEN")
210
  )
211
  except Exception as e:
212
+ print(f"Error initializing model: {e}")
213
+ self.model = InferenceClientModel(
214
+ model_id="microsoft/DialoGPT-medium"
215
+ )
216
+ custom_tools = [
217
  serper_search,
218
+ wikipedia_search,
219
  youtube_analyzer,
220
  text_processor,
221
  math_solver,
222
+ data_extractor
 
223
  ]
224
+ ddg_tool = DuckDuckGoSearchTool()
225
+ all_tools = custom_tools + [ddg_tool]
226
+ self.agent = CodeAgent(
227
+ tools=all_tools,
228
+ model=self.model
229
+ )
230
+ print("GAIA Agent initialized successfully.")
231
 
232
  def __call__(self, question: str) -> str:
233
+ print(f"Agent processing question: {question[:100]}...")
234
  try:
235
+ question_lower = question.lower()
236
+ if "ecnetnes siht dnatsrednu uoy fi" in question_lower:
237
  reversed_part = question.split("?,")[0]
238
  normal_text = text_processor(reversed_part, "reverse")
239
  if "left" in normal_text.lower():
240
  return "right"
241
+ elif "youtube.com" in question:
242
  url_match = re.search(r'https://www\.youtube\.com/watch\?v=[^\s,?.]+', question)
243
  if url_match:
244
  url = url_match.group(0)
 
246
  search_query = f"site:youtube.com {url} transcript content"
247
  search_results = serper_search(search_query)
248
  return f"Video Analysis: {video_info}\n\nAdditional Info: {search_results}"
249
+ elif "botanical" in question_lower and "vegetable" in question_lower:
250
  list_match = re.search(r'milk.*?peanuts', question)
251
  if list_match:
252
  food_list = list_match.group(0)
253
  return data_extractor(food_list, "botanical vegetables")
254
+ elif "commutative" in question_lower or "chess" in question_lower:
255
  math_result = math_solver(question)
256
+ if "commutative" in question_lower:
257
  search_result = serper_search("group theory commutative operation counter examples")
258
  return f"{math_result}\n\nAdditional context: {search_result}"
259
  return math_result
260
+ else:
261
+ search_results = serper_search(question)
262
+ if any(term in question_lower for term in ["mercedes sosa", "dinosaur", "wikipedia", "olympics"]):
263
+ wiki_results = wikipedia_search(question)
264
+ return f"Search Results: {search_results}\n\nWikipedia: {wiki_results}"
265
+ return search_results
266
  except Exception as e:
267
+ print(f"Error in agent processing: {e}")
268
  try:
269
  return serper_search(question)
270
  except Exception:
271
+ return f"I encountered an error processing this question: {question}. Please try rephrasing or breaking it into smaller parts."
272
 
273
  def run_and_submit_all(profile: gr.OAuthProfile | None):
274
  """
275
  Fetches all questions, runs the GAIA Agent on them, submits all answers,
276
  and displays the results.
277
+
278
+ Args:
279
+ profile: OAuth profile object for authentication.
280
+
281
+ Returns:
282
+ Tuple of (submission result message, result object or None).
283
  """
284
  space_id = os.getenv("SPACE_ID")
285
+ if profile:
286
+ username = f"{profile.username}"
287
+ print(f"User logged in: {username}")
288
+ else:
289
  print("User not logged in.")
290
  return "Please Login to Hugging Face with the button.", None
 
 
 
291
  api_url = DEFAULT_API_URL
292
  questions_url = f"{api_url}/questions"
293
  submit_url = f"{api_url}/submit"
 
294
  # 1. Instantiate Agent
295
  try:
296
  agent = GAIAAgent()
297
  except Exception as e:
298
+ print(f"Error instantiating agent: {e}")
299
  return f"Error initializing agent: {e}", None
 
300
  # 2. Fetch Questions
301
+ print(f"Fetching questions from: {questions_url}")
302
  try:
303
  response = requests.get(questions_url, timeout=15)
304
  response.raise_for_status()
305
  questions_data = response.json()
306
  if not questions_data:
307
+ print("Fetched questions list is empty.")
308
+ return "Fetched questions list is empty or invalid format.", None
309
  print(f"Fetched {len(questions_data)} questions.")
310
+ except requests.exceptions.RequestException as e:
311
+ print(f"Error fetching questions: {e}")
312
  return f"Error fetching questions: {e}", None
313
+ except requests.exceptions.JSONDecodeError as e:
314
+ print(f"Error decoding JSON response from questions endpoint: {e}")
315
+ print(f"Response text: {response.text[:500]}")
316
+ return f"Error decoding server response for questions: {e}", None
317
+ except Exception as e:
318
+ print(f"An unexpected error occurred fetching questions: {e}")
319
+ return f"An unexpected error occurred fetching questions: {e}", None
320
  # 3. Run Agent
321
  answers_payload = []
322
  for i, item in enumerate(questions_data):
 
329
  except Exception as e:
330
  answer = f"Error: {e}"
331
  answers_payload.append({"task_id": task_id, "answer": answer})
 
332
  # 4. Submit Answers
333
  try:
334
  submit_resp = requests.post(submit_url, json={"answers": answers_payload, "username": username}, timeout=20)