dawid-lorek commited on
Commit
dc1160b
·
verified ·
1 Parent(s): f8e24f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -18
app.py CHANGED
@@ -7,9 +7,10 @@ from smolagents import CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel
7
 
8
  # Constants
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
- MAX_QUESTION_LENGTH = 4000 # to avoid GPT-4 8k token limit
 
11
 
12
- # --- Agent Definition using smolagents ---
13
  class SmartGAIAAgent:
14
  def __init__(self):
15
  self.api_key = os.getenv("OPENAI_API_KEY")
@@ -17,17 +18,19 @@ class SmartGAIAAgent:
17
  raise ValueError("Missing OPENAI_API_KEY")
18
  self.model = OpenAIServerModel(model_id="gpt-4", api_key=self.api_key)
19
 
20
- # Agent with DuckDuckGo + built-in Python interpreter
21
  self.agent = CodeAgent(
22
  tools=[DuckDuckGoSearchTool()],
23
  model=self.model,
24
  add_base_tools=True
25
  )
26
 
 
 
 
27
  def __call__(self, question: str) -> str:
28
  try:
29
- question = question[:MAX_QUESTION_LENGTH]
30
- result = self.agent.run(question)
31
  return result.strip()
32
  except Exception as e:
33
  print(f"Agent error: {e}")
@@ -65,16 +68,20 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
65
 
66
  for item in questions_data:
67
  task_id = item.get("task_id")
68
- question_text = item.get("question")
69
 
70
- # Skip invalid or long/multimodal questions
 
 
 
 
71
  if not task_id or not question_text:
72
  continue
73
  if len(question_text) > MAX_QUESTION_LENGTH:
74
  print(f"Skipping long question: {task_id}")
75
  continue
76
- if any(keyword in question_text.lower() for keyword in ['attached', '.mp3', '.wav', '.png', '.jpg', 'image']):
77
- print(f"Skipping file/audio/image question: {task_id}")
78
  continue
79
 
80
  try:
@@ -106,29 +113,28 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
106
  response.raise_for_status()
107
  result_data = response.json()
108
  final_status = (
109
- f"Submission Successful!\\n"
110
- f"User: {result_data.get('username')}\\n"
111
  f"Score: {result_data.get('score')}% "
112
- f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\\n"
113
  f"Message: {result_data.get('message')}"
114
  )
115
  return final_status, pd.DataFrame(results_log)
116
  except Exception as e:
117
  return f"Submission failed: {e}", pd.DataFrame(results_log)
118
 
119
- # --- Gradio Interface ---
120
  with gr.Blocks() as demo:
121
- gr.Markdown("# GAIA Agent Evaluation")
122
  gr.Markdown("""
123
- **Instructions:**
124
  1. Log in to Hugging Face
125
- 2. Click 'Run Evaluation' to generate and submit answers
126
- 3. Wait for the results
127
  """)
128
  gr.LoginButton()
129
  run_button = gr.Button("Run Evaluation & Submit All Answers")
130
  status_output = gr.Textbox(label="Submission Status", lines=5)
131
- results_table = gr.DataFrame(label="Results")
132
 
133
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
134
 
 
7
 
8
  # Constants
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
+ MAX_QUESTION_LENGTH = 4000 # Character-based limit for questions
11
+ MAX_WEBPAGE_CONTENT = 3000 # Character limit for visited pages (GPT-4 context safe)
12
 
13
+ # --- Agent Definition ---
14
  class SmartGAIAAgent:
15
  def __init__(self):
16
  self.api_key = os.getenv("OPENAI_API_KEY")
 
18
  raise ValueError("Missing OPENAI_API_KEY")
19
  self.model = OpenAIServerModel(model_id="gpt-4", api_key=self.api_key)
20
 
 
21
  self.agent = CodeAgent(
22
  tools=[DuckDuckGoSearchTool()],
23
  model=self.model,
24
  add_base_tools=True
25
  )
26
 
27
+ def truncate_if_needed(self, question: str) -> str:
28
+ return question[:MAX_QUESTION_LENGTH]
29
+
30
  def __call__(self, question: str) -> str:
31
  try:
32
+ clean_question = self.truncate_if_needed(question)
33
+ result = self.agent.run(clean_question)
34
  return result.strip()
35
  except Exception as e:
36
  print(f"Agent error: {e}")
 
68
 
69
  for item in questions_data:
70
  task_id = item.get("task_id")
71
+ question_text = item.get("question", "")
72
 
73
+ # Skip problematic questions
74
+ skip_keywords = [
75
+ 'attached', '.mp3', '.wav', '.png', '.jpg', '.jpeg',
76
+ 'youtube', '.mp4', 'video', 'listen', 'watch'
77
+ ]
78
  if not task_id or not question_text:
79
  continue
80
  if len(question_text) > MAX_QUESTION_LENGTH:
81
  print(f"Skipping long question: {task_id}")
82
  continue
83
+ if any(keyword in question_text.lower() for keyword in skip_keywords):
84
+ print(f"Skipping unsupported question ({task_id}): {question_text[:60]}...")
85
  continue
86
 
87
  try:
 
113
  response.raise_for_status()
114
  result_data = response.json()
115
  final_status = (
116
+ f"Submission Successful!\n"
117
+ f"User: {result_data.get('username')}\n"
118
  f"Score: {result_data.get('score')}% "
119
+ f"({result_data.get('correct_count')}/{result_data.get('total_attempted')})\n"
120
  f"Message: {result_data.get('message')}"
121
  )
122
  return final_status, pd.DataFrame(results_log)
123
  except Exception as e:
124
  return f"Submission failed: {e}", pd.DataFrame(results_log)
125
 
126
+ # --- Gradio UI ---
127
  with gr.Blocks() as demo:
128
+ gr.Markdown("# 🧠 GAIA Agent Evaluation")
129
  gr.Markdown("""
 
130
  1. Log in to Hugging Face
131
+ 2. Click 'Run Evaluation & Submit All Answers'
132
+ 3. View your score on the leaderboard
133
  """)
134
  gr.LoginButton()
135
  run_button = gr.Button("Run Evaluation & Submit All Answers")
136
  status_output = gr.Textbox(label="Submission Status", lines=5)
137
+ results_table = gr.DataFrame(label="Evaluation Results")
138
 
139
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
140