dlaima commited on
Commit
bc758d9
·
verified ·
1 Parent(s): ef65c0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -25
app.py CHANGED
@@ -9,25 +9,26 @@ from audio_transcriber import AudioTranscriptionTool
9
  from image_analyzer import ImageAnalysisTool
10
  from wikipedia_searcher import WikipediaSearcher
11
 
12
- # GAIA scoring endpoint
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
- GAIA_SYSTEM_PROMPT = """You are an agent solving the GAIA benchmark and you are required to provide exact answers.
16
- Rules to follow:
17
- 1. Return only the exact requested answer: no explanation and no reasoning.
18
- 2. For yes/no questions, return exactly \"Yes\" or \"No\".
19
- 3. For dates, use the exact format requested.
20
- 4. For numbers, use the exact number, no other format.
21
- 5. For names, use the exact name as found in sources.
22
- 6. If the question has an associated file, download the file first using the task ID.
23
- Examples of good responses:
24
- - \"42\"
25
- - \"Arturo Nunez\"
26
- - \"Yes\"
27
- - \"October 5, 2001\"
28
- - \"Buenos Aires\"
29
- Never include phrases like \"the answer is...\" or \"Based on my research\".
30
- Only return the exact answer."""
 
 
31
 
32
  class GaiaAgent:
33
  def __init__(self):
@@ -52,11 +53,10 @@ class GaiaAgent:
52
  def __call__(self, question: str) -> str:
53
  print(f"Agent received question (first 50 chars): {question[:50]}...")
54
 
 
 
55
  try:
56
- result = self.agent.run(
57
- question,
58
- system_prompt=GAIA_SYSTEM_PROMPT
59
- )
60
  print(f"Raw result from agent: {result}")
61
 
62
  if isinstance(result, dict) and "answer" in result:
@@ -97,7 +97,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
97
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
98
  print(f"Agent code URL: {agent_code}")
99
 
100
- print(f"Fetching questions from: {questions_url}")
101
  try:
102
  response = requests.get(questions_url, timeout=15)
103
  response.raise_for_status()
@@ -110,15 +109,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
110
 
111
  results_log = []
112
  answers_payload = []
113
- print(f"Running agent on {len(questions_data)} questions...")
114
  for item in questions_data:
115
  task_id = item.get("task_id")
116
  if not task_id:
117
  continue
118
  try:
119
  submitted_answer = agent(item.get("question", ""))
120
- print(f"Q: {item.get('question', '')[:60]}...")
121
- print(f"A: {submitted_answer}\n")
122
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
123
  results_log.append({
124
  "Task ID": task_id,
 
9
  from image_analyzer import ImageAnalysisTool
10
  from wikipedia_searcher import WikipediaSearcher
11
 
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
+ # Zephyr-compatible system prompt to prepend manually
15
+ SYSTEM_PROMPT = (
16
+ "You are an agent solving the GAIA benchmark and must provide exact answers.\n"
17
+ "Rules:\n"
18
+ "1. Return only the exact requested answer: no explanation.\n"
19
+ "2. For yes/no, return 'Yes' or 'No'.\n"
20
+ "3. For dates, use the exact requested format.\n"
21
+ "4. For numbers, use only the number.\n"
22
+ "5. For names, use the exact name from sources.\n"
23
+ "6. If the question has a file, download it using the task ID.\n"
24
+ "Examples:\n"
25
+ "- '42'\n"
26
+ "- 'Arturo Nunez'\n"
27
+ "- 'Yes'\n"
28
+ "- 'October 5, 2001'\n"
29
+ "- 'Buenos Aires'\n"
30
+ "Never say 'the answer is...'. Only return the answer.\n"
31
+ )
32
 
33
  class GaiaAgent:
34
  def __init__(self):
 
53
  def __call__(self, question: str) -> str:
54
  print(f"Agent received question (first 50 chars): {question[:50]}...")
55
 
56
+ full_prompt = f"{SYSTEM_PROMPT}\nQUESTION:\n{question}"
57
+
58
  try:
59
+ result = self.agent.run(full_prompt)
 
 
 
60
  print(f"Raw result from agent: {result}")
61
 
62
  if isinstance(result, dict) and "answer" in result:
 
97
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
98
  print(f"Agent code URL: {agent_code}")
99
 
 
100
  try:
101
  response = requests.get(questions_url, timeout=15)
102
  response.raise_for_status()
 
109
 
110
  results_log = []
111
  answers_payload = []
112
+
113
  for item in questions_data:
114
  task_id = item.get("task_id")
115
  if not task_id:
116
  continue
117
  try:
118
  submitted_answer = agent(item.get("question", ""))
 
 
119
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
120
  results_log.append({
121
  "Task ID": task_id,