dlaima commited on
Commit
8fd0023
·
verified ·
1 Parent(s): 00266d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -37
app.py CHANGED
@@ -9,16 +9,32 @@ from audio_transcriber import AudioTranscriptionTool
9
  from image_analyzer import ImageAnalysisTool
10
  from wikipedia_searcher import WikipediaSearcher
11
 
12
- # GAIA scoring endpoint
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
- # Define the GaiaAgent class with embedded prompt in __call__
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  class GaiaAgent:
17
  def __init__(self):
18
  print("Gaia Agent Initialized")
19
 
20
  self.model = InferenceClientModel(
21
- model_id="cognitivecomputations/dolphin-2.6-mixtral-8x7b",
22
  token=os.getenv("HF_API_TOKEN", "").strip()
23
  )
24
 
@@ -33,31 +49,29 @@ class GaiaAgent:
33
  model=self.model
34
  )
35
 
36
- def __call__(self, question: str) -> str:
37
  print(f"Agent received question (first 50 chars): {question[:50]}...")
38
 
39
- prompt = f"""You are an agent solving the GAIA benchmark and you are required to provide exact answers.
40
- Rules to follow:
41
- 1. Return only the exact requested answer: no explanation and no reasoning.
42
- 2. For yes/no questions, return exactly \"Yes\" or \"No\".
43
- 3. For dates, use the exact format requested.
44
- 4. For numbers, use the exact number, no other format.
45
- 5. For names, use the exact name as found in sources.
46
- 6. If the question has an associated file, download the file first using the task ID.
47
- Examples of good responses:
48
- - \"42\"
49
- - \"Arturo Nunez\"
50
- - \"Yes\"
51
- - \"October 5, 2001\"
52
- - \"Buenos Aires\"
53
- Never include phrases like \"the answer is...\" or \"Based on my research\".
54
- Only return the exact answer.
55
- QUESTION:
56
- {question}
57
- """
58
 
59
  try:
60
- result = self.agent.run(prompt)
 
 
 
 
61
  print(f"Raw result from agent: {result}")
62
 
63
  if isinstance(result, dict) and "answer" in result:
@@ -75,7 +89,6 @@ QUESTION:
75
  print(f"Exception during agent run: {e}")
76
  return f"AGENT ERROR: {e}"
77
 
78
- # Evaluation + Submission function
79
  def run_and_submit_all(profile: gr.OAuthProfile | None):
80
  space_id = os.getenv("SPACE_ID")
81
 
@@ -114,22 +127,23 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
114
  answers_payload = []
115
  print(f"Running agent on {len(questions_data)} questions...")
116
  for item in questions_data:
117
- task_id = item.get("task_id")
118
- if not task_id:
 
119
  continue
120
  try:
121
- submitted_answer = agent(item.get("question", ""))
122
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
123
  results_log.append({
124
  "Task ID": task_id,
125
- "Question": item.get("question", ""),
126
  "Submitted Answer": submitted_answer
127
  })
128
  except Exception as e:
129
  error_msg = f"AGENT ERROR: {e}"
130
  results_log.append({
131
  "Task ID": task_id,
132
- "Question": item.get("question", ""),
133
  "Submitted Answer": error_msg
134
  })
135
 
@@ -167,15 +181,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
167
  except Exception as e:
168
  return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
169
 
170
- # Gradio UI
171
  with gr.Blocks() as demo:
172
  gr.Markdown("# Basic Agent Evaluation Runner")
173
- gr.Markdown("""
174
- **Instructions:**
175
- 1. Clone this space and define your agent and tools.
176
- 2. Log in to your Hugging Face account using the button below.
177
- 3. Click 'Run Evaluation & Submit All Answers' to test your agent and submit results.
178
- """)
179
 
180
  gr.LoginButton()
181
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
9
  from image_analyzer import ImageAnalysisTool
10
  from wikipedia_searcher import WikipediaSearcher
11
 
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
+ GAIA_SYSTEM_PROMPT = """You are an agent solving the GAIA benchmark and you are required to provide exact answers.
15
+ Rules to follow:
16
+ 1. Return only the exact requested answer: no explanation and no reasoning.
17
+ 2. For yes/no questions, return exactly "Yes" or "No".
18
+ 3. For dates, use the exact format requested.
19
+ 4. For numbers, use the exact number, no other format.
20
+ 5. For names, use the exact name as found in sources.
21
+ 6. If the question has an associated file, download the file first using the task ID.
22
+ Examples of good responses:
23
+ - "42"
24
+ - "Arturo Nunez"
25
+ - "Yes"
26
+ - "October 5, 2001"
27
+ - "Buenos Aires"
28
+ Never include phrases like "the answer is..." or "Based on my research".
29
+ Only return the exact answer.
30
+ """
31
+
32
  class GaiaAgent:
33
  def __init__(self):
34
  print("Gaia Agent Initialized")
35
 
36
  self.model = InferenceClientModel(
37
+ model_id="mistralai/Mistral-7B-Instruct-v0.2",
38
  token=os.getenv("HF_API_TOKEN", "").strip()
39
  )
40
 
 
49
  model=self.model
50
  )
51
 
52
+ def __call__(self, question: str, task_id: str = "") -> str:
53
  print(f"Agent received question (first 50 chars): {question[:50]}...")
54
 
55
+ file_path = None
56
+ if task_id:
57
+ try:
58
+ file_url = f"https://agents-course-unit4-scoring.hf.space/file={task_id}"
59
+ print(f"Attempting to download file from {file_url}")
60
+ response = requests.get(file_url)
61
+ response.raise_for_status()
62
+ file_path = f"/tmp/{task_id}"
63
+ with open(file_path, "wb") as f:
64
+ f.write(response.content)
65
+ print(f"Downloaded file for task {task_id} to {file_path}")
66
+ except Exception as e:
67
+ print(f"Warning: Failed to download file for {task_id}: {e}")
 
 
 
 
 
 
68
 
69
  try:
70
+ result = self.agent.run(
71
+ input=question,
72
+ system_prompt=GAIA_SYSTEM_PROMPT,
73
+ files=[file_path] if file_path else None
74
+ )
75
  print(f"Raw result from agent: {result}")
76
 
77
  if isinstance(result, dict) and "answer" in result:
 
89
  print(f"Exception during agent run: {e}")
90
  return f"AGENT ERROR: {e}"
91
 
 
92
  def run_and_submit_all(profile: gr.OAuthProfile | None):
93
  space_id = os.getenv("SPACE_ID")
94
 
 
127
  answers_payload = []
128
  print(f"Running agent on {len(questions_data)} questions...")
129
  for item in questions_data:
130
+ task_id = item.get("task_id", "")
131
+ question = item.get("question", "")
132
+ if not question:
133
  continue
134
  try:
135
+ submitted_answer = agent(question, task_id)
136
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
137
  results_log.append({
138
  "Task ID": task_id,
139
+ "Question": question,
140
  "Submitted Answer": submitted_answer
141
  })
142
  except Exception as e:
143
  error_msg = f"AGENT ERROR: {e}"
144
  results_log.append({
145
  "Task ID": task_id,
146
+ "Question": question,
147
  "Submitted Answer": error_msg
148
  })
149
 
 
181
  except Exception as e:
182
  return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
183
 
 
184
  with gr.Blocks() as demo:
185
  gr.Markdown("# Basic Agent Evaluation Runner")
186
+ gr.Markdown("""\
187
+ **Instructions:**
188
+ 1. Clone this space and define your agent and tools.
189
+ 2. Log in to your Hugging Face account using the button below.
190
+ 3. Click 'Run Evaluation & Submit All Answers' to test your agent and submit results.
191
+ """)
192
 
193
  gr.LoginButton()
194
  run_button = gr.Button("Run Evaluation & Submit All Answers")