dawid-lorek commited on
Commit
6e92f6f
·
verified ·
1 Parent(s): dc1160b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -24
app.py CHANGED
@@ -1,41 +1,57 @@
1
  import os
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
 
6
- from smolagents import CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel
 
7
 
8
  # Constants
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
- MAX_QUESTION_LENGTH = 4000 # Character-based limit for questions
11
- MAX_WEBPAGE_CONTENT = 3000 # Character limit for visited pages (GPT-4 context safe)
12
-
13
- # --- Agent Definition ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  class SmartGAIAAgent:
15
  def __init__(self):
16
  self.api_key = os.getenv("OPENAI_API_KEY")
17
  if not self.api_key:
18
  raise ValueError("Missing OPENAI_API_KEY")
19
  self.model = OpenAIServerModel(model_id="gpt-4", api_key=self.api_key)
20
-
21
  self.agent = CodeAgent(
22
- tools=[DuckDuckGoSearchTool()],
23
  model=self.model,
24
  add_base_tools=True
25
  )
26
 
27
- def truncate_if_needed(self, question: str) -> str:
28
  return question[:MAX_QUESTION_LENGTH]
29
 
30
  def __call__(self, question: str) -> str:
31
  try:
32
- clean_question = self.truncate_if_needed(question)
33
  result = self.agent.run(clean_question)
34
  return result.strip()
35
  except Exception as e:
36
  print(f"Agent error: {e}")
37
  return "error"
38
 
 
39
  def run_and_submit_all(profile: gr.OAuthProfile | None):
40
  space_id = os.getenv("SPACE_ID")
41
  if profile:
@@ -44,9 +60,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
44
  else:
45
  return "Please Login to Hugging Face with the button.", None
46
 
47
- api_url = DEFAULT_API_URL
48
- questions_url = f"{api_url}/questions"
49
- submit_url = f"{api_url}/submit"
50
 
51
  try:
52
  agent = SmartGAIAAgent()
@@ -54,7 +69,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
54
  return f"Error initializing agent: {e}", None
55
 
56
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
57
- print(f"Code link: {agent_code}")
58
 
59
  try:
60
  response = requests.get(questions_url, timeout=15)
@@ -70,18 +84,16 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
70
  task_id = item.get("task_id")
71
  question_text = item.get("question", "")
72
 
73
- # Skip problematic questions
74
- skip_keywords = [
75
- 'attached', '.mp3', '.wav', '.png', '.jpg', '.jpeg',
76
- 'youtube', '.mp4', 'video', 'listen', 'watch'
77
- ]
78
  if not task_id or not question_text:
79
  continue
80
  if len(question_text) > MAX_QUESTION_LENGTH:
81
  print(f"Skipping long question: {task_id}")
82
  continue
83
- if any(keyword in question_text.lower() for keyword in skip_keywords):
84
- print(f"Skipping unsupported question ({task_id}): {question_text[:60]}...")
 
 
 
85
  continue
86
 
87
  try:
@@ -113,17 +125,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
113
  response.raise_for_status()
114
  result_data = response.json()
115
  final_status = (
116
- f"Submission Successful!\n"
117
- f"User: {result_data.get('username')}\n"
118
  f"Score: {result_data.get('score')}% "
119
- f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\n"
120
  f"Message: {result_data.get('message')}"
121
  )
122
  return final_status, pd.DataFrame(results_log)
123
  except Exception as e:
124
  return f"Submission failed: {e}", pd.DataFrame(results_log)
125
 
126
- # --- Gradio UI ---
127
  with gr.Blocks() as demo:
128
  gr.Markdown("# 🧠 GAIA Agent Evaluation")
129
  gr.Markdown("""
 
1
  import os
2
+ import time
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
 
7
+ from smolagents import CodeAgent, OpenAIServerModel
8
+ from langchain_community.tools import DuckDuckGoSearchRun
9
 
10
  # Constants
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
+ MAX_QUESTION_LENGTH = 4000
13
+ MAX_WEBPAGE_CONTENT = 3000
14
+
15
+ # --- Reliable DuckDuckGo Tool with Retry ---
16
+ class ReliableDuckDuckGoTool(DuckDuckGoSearchRun):
17
+ def _run(self, query: str) -> str:
18
+ for attempt in range(3):
19
+ try:
20
+ return super()._run(query)
21
+ except Exception as e:
22
+ if "ratelimit" in str(e).lower() or "202" in str(e):
23
+ print(f"Rate limited. Retry {attempt + 1}/3...")
24
+ time.sleep(3 * (attempt + 1))
25
+ else:
26
+ raise e
27
+ raise RuntimeError("DuckDuckGo search failed after retries")
28
+
29
+ # --- Smart GAIA Agent ---
30
  class SmartGAIAAgent:
31
  def __init__(self):
32
  self.api_key = os.getenv("OPENAI_API_KEY")
33
  if not self.api_key:
34
  raise ValueError("Missing OPENAI_API_KEY")
35
  self.model = OpenAIServerModel(model_id="gpt-4", api_key=self.api_key)
 
36
  self.agent = CodeAgent(
37
+ tools=[ReliableDuckDuckGoTool()],
38
  model=self.model,
39
  add_base_tools=True
40
  )
41
 
42
+ def truncate_question(self, question: str) -> str:
43
  return question[:MAX_QUESTION_LENGTH]
44
 
45
  def __call__(self, question: str) -> str:
46
  try:
47
+ clean_question = self.truncate_question(question)
48
  result = self.agent.run(clean_question)
49
  return result.strip()
50
  except Exception as e:
51
  print(f"Agent error: {e}")
52
  return "error"
53
 
54
+ # --- Evaluation + Submission ---
55
  def run_and_submit_all(profile: gr.OAuthProfile | None):
56
  space_id = os.getenv("SPACE_ID")
57
  if profile:
 
60
  else:
61
  return "Please Login to Hugging Face with the button.", None
62
 
63
+ questions_url = f"{DEFAULT_API_URL}/questions"
64
+ submit_url = f"{DEFAULT_API_URL}/submit"
 
65
 
66
  try:
67
  agent = SmartGAIAAgent()
 
69
  return f"Error initializing agent: {e}", None
70
 
71
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
72
 
73
  try:
74
  response = requests.get(questions_url, timeout=15)
 
84
  task_id = item.get("task_id")
85
  question_text = item.get("question", "")
86
 
 
 
 
 
 
87
  if not task_id or not question_text:
88
  continue
89
  if len(question_text) > MAX_QUESTION_LENGTH:
90
  print(f"Skipping long question: {task_id}")
91
  continue
92
+ if any(keyword in question_text.lower() for keyword in [
93
+ 'attached', '.mp3', '.wav', '.png', '.jpg', '.jpeg',
94
+ 'youtube', '.mp4', 'video', 'listen', 'watch'
95
+ ]):
96
+ print(f"Skipping unsupported media question: {task_id}")
97
  continue
98
 
99
  try:
 
125
  response.raise_for_status()
126
  result_data = response.json()
127
  final_status = (
128
+ f"Submission Successful!\\n"
129
+ f"User: {result_data.get('username')}\\n"
130
  f"Score: {result_data.get('score')}% "
131
+ f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\\n"
132
  f"Message: {result_data.get('message')}"
133
  )
134
  return final_status, pd.DataFrame(results_log)
135
  except Exception as e:
136
  return f"Submission failed: {e}", pd.DataFrame(results_log)
137
 
138
+ # --- Gradio Interface ---
139
  with gr.Blocks() as demo:
140
  gr.Markdown("# 🧠 GAIA Agent Evaluation")
141
  gr.Markdown("""