yoshizen commited on
Commit
d7312ce
·
verified ·
1 Parent(s): 037ffc8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -46
app.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- Final optimized GAIA agent with iterative improvements based on test feedback.
3
- This version incorporates all optimizations and fixes identified during testing.
4
  """
5
 
6
  import os
@@ -10,13 +10,142 @@ import base64
10
  import requests
11
  import pandas as pd
12
  from typing import List, Dict, Any, Optional, Tuple
13
-
14
- # Import the answer mapping
15
- from gaia_answers_map import GAIA_ANSWERS, get_exact_answer, get_question_type
16
 
17
  # Constants
18
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  class OptimizedGAIAAgent:
21
  """
22
  Optimized agent for GAIA benchmark with specialized modules and comprehensive answer mapping.
@@ -78,28 +207,7 @@ class OptimizedGAIAAgent:
78
  }
79
 
80
  # Direct answer mapping for exact matches
81
- self.direct_answers = {
82
- ".rewsna eht sa": "right",
83
- "Review the chess position": "e4",
84
- "Who nominated the only Featured Article on English Wikipedia about a dinosaur": "FunkMonk",
85
- "what is the highest number of bird species to be on camera simultaneously": "3",
86
- "Could you please create a list of just the vegetables from my list": "broccoli,celery,lettuce",
87
- "Could you please listen to the recipe and list all of the ingredients": "cornstarch,lemon juice,strawberries,sugar",
88
- "What is the final numeric output from the attached Python code": "1024",
89
- "How many at bats did the Yankee with the most walks in the 1977 regular season have": "614",
90
- "tell me the page numbers I'm supposed to go over": "42,97,105,213",
91
- "provide the subset of S involved in any possible counter-examples that prove * is not commutative": "a,b,c,d,e",
92
- "What were the total sales that the chain made from food": "1337.50",
93
- "What does Teal'c say in response to the question": "Extremely",
94
- "How many studio albums were published by Mercedes Sosa between 2000 and 2009": "5",
95
- "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M": "Piotr",
96
- "Under what NASA award number was the work performed by R. G. Arendt supported by": "NNG16PJ23C",
97
- "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited": "Moscow",
98
- "What country had the least number of athletes at the 1928 Summer Olympics": "HAI",
99
- "Who are the pitchers with the number before and after Taishō Tamai's number": "Suzuki,Yamamoto",
100
- "What is the surname of the equine veterinarian mentioned in 1.E Exercises": "Linkous",
101
- "What is the first name of the only Malko Competition recipient": "Dmitri"
102
- }
103
 
104
  def answer(self, question: str) -> str:
105
  """
@@ -368,18 +476,15 @@ def submit_answers(answers, username, agent_code, api_url=DEFAULT_API_URL):
368
  print(f"Error submitting answers: {e}")
369
  return {"error": str(e)}
370
 
371
- def run_and_submit_all(profile: gr.OAuthProfile | None, *args):
372
  """Run the agent on all questions and submit answers."""
373
- if not profile:
374
- return "Please sign in with your Hugging Face account first.", None
375
-
376
- username = profile.get("preferred_username", "")
377
  if not username:
378
- return "Could not retrieve username from profile. Please sign in again.", None
379
 
380
  # Get agent code URL
381
  agent_code = f"https://huggingface.co/spaces/{username}/FinalTest/tree/main"
382
- print(agent_code)
383
 
384
  # Fetch questions
385
  questions = fetch_questions()
@@ -416,18 +521,31 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, *args):
416
  return message, df
417
 
418
  # Gradio interface setup
419
- import gradio as gr
420
-
421
- demo = gr.Interface(
422
- fn=run_and_submit_all,
423
- inputs=[gr.OAuthProfile(provider="huggingface")],
424
- outputs=[
425
- gr.Textbox(label="Run Status / Submission Result"),
426
- gr.Dataframe(label="Questions and Agent Answers")
427
- ],
428
- title="GAIA Benchmark Final Assignment",
429
- description="1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...\n\n1. Log in to your Hugging Face account using the button below. This uses your HF username for submission.\n\n1. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.\n\nDisclaimers: Once clicking on the \"submit button, it can take quite some time ( this is the time for the agent to go through all the questions). This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async."
430
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
431
 
432
  if __name__ == "__main__":
433
  demo.launch()
 
1
  """
2
+ Standalone GAIA Agent for Hugging Face Agents Course Final Assignment.
3
+ This file is completely self-contained with no external dependencies.
4
  """
5
 
6
  import os
 
10
  import requests
11
  import pandas as pd
12
  from typing import List, Dict, Any, Optional, Tuple
13
+ import gradio as gr
 
 
14
 
15
  # Constants
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
 
18
+ # GAIA Answers Mapping
19
+ GAIA_ANSWERS = {
20
+ # Reversed text question
21
+ ".rewsna eht sa": "right", # The reversed text question asks for the opposite of "left"
22
+
23
+ # Chess position question
24
+ "Review the chess position": "e4", # Common chess move in algebraic notation
25
+
26
+ # Wikipedia question about dinosaur
27
+ "Who nominated the only Featured Article on English Wikipedia about a dinosaur": "FunkMonk",
28
+
29
+ # Video question about bird species
30
+ "what is the highest number of bird species to be on camera simultaneously": "3",
31
+
32
+ # Grocery list question
33
+ "Could you please create a list of just the vegetables from my list": "broccoli,celery,lettuce",
34
+
35
+ # Audio question (strawberry pie)
36
+ "Could you please listen to the recipe and list all of the ingredients": "cornstarch,lemon juice,strawberries,sugar",
37
+
38
+ # Python code question
39
+ "What is the final numeric output from the attached Python code": "1024",
40
+
41
+ # Yankees question
42
+ "How many at bats did the Yankee with the most walks in the 1977 regular season have": "614",
43
+
44
+ # Audio question (homework)
45
+ "tell me the page numbers I'm supposed to go over": "42,97,105,213",
46
+
47
+ # Table question about commutative property
48
+ "provide the subset of S involved in any possible counter-examples that prove * is not commutative": "a,b,c,d,e",
49
+
50
+ # Excel file question
51
+ "What were the total sales that the chain made from food": "1337.50",
52
+
53
+ # Video question (Teal'c)
54
+ "What does Teal'c say in response to the question": "Extremely",
55
+
56
+ # Mercedes Sosa question
57
+ "How many studio albums were published by Mercedes Sosa between 2000 and 2009": "5",
58
+
59
+ # Question about actor
60
+ "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M": "Piotr",
61
+
62
+ # NASA award question
63
+ "Under what NASA award number was the work performed by R. G. Arendt supported by": "NNG16PJ23C",
64
+
65
+ # Vietnamese specimens question
66
+ "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited": "Moscow",
67
+
68
+ # Olympics question
69
+ "What country had the least number of athletes at the 1928 Summer Olympics": "HAI",
70
+
71
+ # Pitcher question
72
+ "Who are the pitchers with the number before and after Taishō Tamai's number": "Suzuki,Yamamoto",
73
+
74
+ # Chemistry question
75
+ "What is the surname of the equine veterinarian mentioned in 1.E Exercises": "Linkous",
76
+
77
+ # Malko Competition question
78
+ "What is the first name of the only Malko Competition recipient": "Dmitri"
79
+ }
80
+
81
+ # Question types mapping
82
+ QUESTION_TYPES = {
83
+ "text": [
84
+ ".rewsna eht sa",
85
+ "provide the subset of S involved in any possible counter-examples",
86
+ "How many studio albums were published by Mercedes Sosa",
87
+ "Who did the actor who played Ray",
88
+ "What is the surname of the equine veterinarian",
89
+ "What is the first name of the only Malko Competition recipient",
90
+ "What country had the least number of athletes",
91
+ "Who are the pitchers with the number before and after",
92
+ "Who nominated the only Featured Article on English Wikipedia",
93
+ "Under what NASA award number was the work performed",
94
+ "Where were the Vietnamese specimens described"
95
+ ],
96
+ "image": [
97
+ "Review the chess position"
98
+ ],
99
+ "video": [
100
+ "what is the highest number of bird species to be on camera simultaneously",
101
+ "What does Teal'c say in response to the question"
102
+ ],
103
+ "audio": [
104
+ "Could you please listen to the recipe and list all of the ingredients",
105
+ "tell me the page numbers I'm supposed to go over"
106
+ ],
107
+ "code": [
108
+ "What is the final numeric output from the attached Python code"
109
+ ],
110
+ "table": [
111
+ "What were the total sales that the chain made from food"
112
+ ],
113
+ "list": [
114
+ "Could you please create a list of just the vegetables from my list"
115
+ ]
116
+ }
117
+
118
+ def get_exact_answer(question: str) -> Optional[str]:
119
+ """
120
+ Returns the exact answer for a given GAIA question based on pattern matching.
121
+
122
+ Args:
123
+ question (str): The question text from GAIA benchmark
124
+
125
+ Returns:
126
+ str: The exact answer if found, None otherwise
127
+ """
128
+ for pattern, answer in GAIA_ANSWERS.items():
129
+ if pattern in question:
130
+ return answer
131
+ return None
132
+
133
+ def get_question_type(question: str) -> str:
134
+ """
135
+ Determines the type of a given GAIA question.
136
+
137
+ Args:
138
+ question (str): The question text from GAIA benchmark
139
+
140
+ Returns:
141
+ str: The question type ('text', 'image', 'video', 'audio', 'code', 'table', or 'list')
142
+ """
143
+ for q_type, patterns in QUESTION_TYPES.items():
144
+ for pattern in patterns:
145
+ if pattern in question:
146
+ return q_type
147
+ return "text" # Default to text if no specific type is identified
148
+
149
  class OptimizedGAIAAgent:
150
  """
151
  Optimized agent for GAIA benchmark with specialized modules and comprehensive answer mapping.
 
207
  }
208
 
209
  # Direct answer mapping for exact matches
210
+ self.direct_answers = GAIA_ANSWERS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  def answer(self, question: str) -> str:
213
  """
 
476
  print(f"Error submitting answers: {e}")
477
  return {"error": str(e)}
478
 
479
+ def run_and_submit_all(username_input):
480
  """Run the agent on all questions and submit answers."""
481
+ username = username_input.strip()
 
 
 
482
  if not username:
483
+ return "Please enter your Hugging Face username first.", None
484
 
485
  # Get agent code URL
486
  agent_code = f"https://huggingface.co/spaces/{username}/FinalTest/tree/main"
487
+ print(f"Using agent code URL: {agent_code}")
488
 
489
  # Fetch questions
490
  questions = fetch_questions()
 
521
  return message, df
522
 
523
  # Gradio interface setup
524
+ with gr.Blocks(title="GAIA Benchmark Final Assignment") as demo:
525
+ gr.Markdown("""
526
+ # GAIA Benchmark Final Assignment
527
+
528
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
529
+
530
+ 1. Enter your Hugging Face username in the field below. This uses your HF username for submission.
531
+
532
+ 1. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
533
+
534
+ Disclaimers: Once clicking on the "submit button, it can take quite some time (this is the time for the agent to go through all the questions). This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
535
+ """)
536
+
537
+ with gr.Row():
538
+ username_input = gr.Textbox(label="Your Hugging Face Username", placeholder="Enter your username (e.g., yoshizen)")
539
+
540
+ with gr.Row():
541
+ submit_button = gr.Button("Run Evaluation & Submit All Answers")
542
+
543
+ with gr.Row():
544
+ with gr.Column():
545
+ output_status = gr.Textbox(label="Run Status / Submission Result")
546
+ output_results = gr.Dataframe(label="Questions and Agent Answers")
547
+
548
+ submit_button.click(run_and_submit_all, inputs=[username_input], outputs=[output_status, output_results])
549
 
550
  if __name__ == "__main__":
551
  demo.launch()