dlaima commited on
Commit
ef65c0f
·
verified ·
1 Parent(s): 8fd0023

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -40
app.py CHANGED
@@ -9,32 +9,32 @@ from audio_transcriber import AudioTranscriptionTool
9
  from image_analyzer import ImageAnalysisTool
10
  from wikipedia_searcher import WikipediaSearcher
11
 
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
  GAIA_SYSTEM_PROMPT = """You are an agent solving the GAIA benchmark and you are required to provide exact answers.
15
  Rules to follow:
16
  1. Return only the exact requested answer: no explanation and no reasoning.
17
- 2. For yes/no questions, return exactly "Yes" or "No".
18
  3. For dates, use the exact format requested.
19
  4. For numbers, use the exact number, no other format.
20
  5. For names, use the exact name as found in sources.
21
  6. If the question has an associated file, download the file first using the task ID.
22
  Examples of good responses:
23
- - "42"
24
- - "Arturo Nunez"
25
- - "Yes"
26
- - "October 5, 2001"
27
- - "Buenos Aires"
28
- Never include phrases like "the answer is..." or "Based on my research".
29
- Only return the exact answer.
30
- """
31
 
32
  class GaiaAgent:
33
  def __init__(self):
34
  print("Gaia Agent Initialized")
35
 
36
  self.model = InferenceClientModel(
37
- model_id="mistralai/Mistral-7B-Instruct-v0.2",
38
  token=os.getenv("HF_API_TOKEN", "").strip()
39
  )
40
 
@@ -49,28 +49,13 @@ class GaiaAgent:
49
  model=self.model
50
  )
51
 
52
- def __call__(self, question: str, task_id: str = "") -> str:
53
  print(f"Agent received question (first 50 chars): {question[:50]}...")
54
 
55
- file_path = None
56
- if task_id:
57
- try:
58
- file_url = f"https://agents-course-unit4-scoring.hf.space/file={task_id}"
59
- print(f"Attempting to download file from {file_url}")
60
- response = requests.get(file_url)
61
- response.raise_for_status()
62
- file_path = f"/tmp/{task_id}"
63
- with open(file_path, "wb") as f:
64
- f.write(response.content)
65
- print(f"Downloaded file for task {task_id} to {file_path}")
66
- except Exception as e:
67
- print(f"Warning: Failed to download file for {task_id}: {e}")
68
-
69
  try:
70
  result = self.agent.run(
71
- input=question,
72
- system_prompt=GAIA_SYSTEM_PROMPT,
73
- files=[file_path] if file_path else None
74
  )
75
  print(f"Raw result from agent: {result}")
76
 
@@ -127,23 +112,24 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
127
  answers_payload = []
128
  print(f"Running agent on {len(questions_data)} questions...")
129
  for item in questions_data:
130
- task_id = item.get("task_id", "")
131
- question = item.get("question", "")
132
- if not question:
133
  continue
134
  try:
135
- submitted_answer = agent(question, task_id)
 
 
136
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
137
  results_log.append({
138
  "Task ID": task_id,
139
- "Question": question,
140
  "Submitted Answer": submitted_answer
141
  })
142
  except Exception as e:
143
  error_msg = f"AGENT ERROR: {e}"
144
  results_log.append({
145
  "Task ID": task_id,
146
- "Question": question,
147
  "Submitted Answer": error_msg
148
  })
149
 
@@ -181,14 +167,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
181
  except Exception as e:
182
  return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
183
 
 
184
  with gr.Blocks() as demo:
185
  gr.Markdown("# Basic Agent Evaluation Runner")
186
- gr.Markdown("""\
187
- **Instructions:**
188
- 1. Clone this space and define your agent and tools.
189
- 2. Log in to your Hugging Face account using the button below.
190
- 3. Click 'Run Evaluation & Submit All Answers' to test your agent and submit results.
191
- """)
192
 
193
  gr.LoginButton()
194
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
9
  from image_analyzer import ImageAnalysisTool
10
  from wikipedia_searcher import WikipediaSearcher
11
 
12
+ # GAIA scoring endpoint
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
  GAIA_SYSTEM_PROMPT = """You are an agent solving the GAIA benchmark and you are required to provide exact answers.
16
  Rules to follow:
17
  1. Return only the exact requested answer: no explanation and no reasoning.
18
+ 2. For yes/no questions, return exactly \"Yes\" or \"No\".
19
  3. For dates, use the exact format requested.
20
  4. For numbers, use the exact number, no other format.
21
  5. For names, use the exact name as found in sources.
22
  6. If the question has an associated file, download the file first using the task ID.
23
  Examples of good responses:
24
+ - \"42\"
25
+ - \"Arturo Nunez\"
26
+ - \"Yes\"
27
+ - \"October 5, 2001\"
28
+ - \"Buenos Aires\"
29
+ Never include phrases like \"the answer is...\" or \"Based on my research\".
30
+ Only return the exact answer."""
 
31
 
32
  class GaiaAgent:
33
  def __init__(self):
34
  print("Gaia Agent Initialized")
35
 
36
  self.model = InferenceClientModel(
37
+ model_id="HuggingFaceH4/zephyr-7b-beta",
38
  token=os.getenv("HF_API_TOKEN", "").strip()
39
  )
40
 
 
49
  model=self.model
50
  )
51
 
52
+ def __call__(self, question: str) -> str:
53
  print(f"Agent received question (first 50 chars): {question[:50]}...")
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  try:
56
  result = self.agent.run(
57
+ question,
58
+ system_prompt=GAIA_SYSTEM_PROMPT
 
59
  )
60
  print(f"Raw result from agent: {result}")
61
 
 
112
  answers_payload = []
113
  print(f"Running agent on {len(questions_data)} questions...")
114
  for item in questions_data:
115
+ task_id = item.get("task_id")
116
+ if not task_id:
 
117
  continue
118
  try:
119
+ submitted_answer = agent(item.get("question", ""))
120
+ print(f"Q: {item.get('question', '')[:60]}...")
121
+ print(f"A: {submitted_answer}\n")
122
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
123
  results_log.append({
124
  "Task ID": task_id,
125
+ "Question": item.get("question", ""),
126
  "Submitted Answer": submitted_answer
127
  })
128
  except Exception as e:
129
  error_msg = f"AGENT ERROR: {e}"
130
  results_log.append({
131
  "Task ID": task_id,
132
+ "Question": item.get("question", ""),
133
  "Submitted Answer": error_msg
134
  })
135
 
 
167
  except Exception as e:
168
  return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
169
 
170
+ # Gradio UI
171
  with gr.Blocks() as demo:
172
  gr.Markdown("# Basic Agent Evaluation Runner")
173
+ gr.Markdown("""
174
+ **Instructions:**
175
+ 1. Clone this space and define your agent and tools.
176
+ 2. Log in to your Hugging Face account using the button below.
177
+ 3. Click 'Run Evaluation & Submit All Answers' to test your agent and submit results.
178
+ """)
179
 
180
  gr.LoginButton()
181
  run_button = gr.Button("Run Evaluation & Submit All Answers")