dlaima commited on
Commit
36b55d3
·
verified ·
1 Parent(s): 86758ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -99
app.py CHANGED
@@ -2,9 +2,9 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
5
 
6
  from smolagents import CodeAgent, DuckDuckGoSearchTool, Tool
7
- from smolagents.models import OpenAIServerModel
8
 
9
  from wikipedia_searcher import WikipediaSearcher
10
  from audio_transcriber import AudioTranscriptionTool
@@ -30,11 +30,13 @@ class WikipediaSearchTool(Tool):
30
  return self.searcher.search(query)
31
 
32
 
33
- # Instantiate the Wikipedia search tool once
34
- wikipedia_search_tool = WikipediaSearchTool()
35
-
36
- # Static system prompt for GAIA exact answer format (no explanations)
37
- SYSTEM_PROMPT = """
 
 
38
  You are an agent solving the GAIA benchmark and you are required to provide exact answers.
39
  Rules to follow:
40
  1. Return only the exact requested answer: no explanation and no reasoning.
@@ -52,47 +54,53 @@ Never include phrases like "the answer is..." or "Based on my research".
52
  Only return the exact answer.
53
  """
54
 
55
- # Set your actual API URL here (replace with the correct GAIA API URL)
56
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
 
 
 
57
 
58
- # Patched OpenAIServerModel to prepend system prompt
59
- class PatchedOpenAIServerModel(OpenAIServerModel):
60
- def generate(self, messages, stop_sequences=None, **kwargs):
61
- if isinstance(messages, list):
62
- if not any(m["role"] == "system" for m in messages):
63
- messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages
 
 
 
 
 
 
 
64
  else:
65
- raise TypeError("Expected 'messages' to be a list of message dicts")
66
- return super().generate(messages=messages, stop_sequences=stop_sequences, **kwargs)
67
 
68
 
69
  class MyAgent:
70
  def __init__(self):
71
- self.model = PatchedOpenAIServerModel(model_id="gpt-4-turbo")
 
72
  self.agent = CodeAgent(
73
  tools=[
74
  DuckDuckGoSearchTool(),
75
- wikipedia_search_tool,
76
  AudioTranscriptionTool(),
77
  ImageAnalysisTool(),
78
  ],
79
- model=self.model,
80
  )
81
 
82
- def __call__(self, task: dict) -> str:
83
- question_text = task.get("question", "")
84
-
85
- # Merge any code or attachment content if available
86
- if "code" in task:
87
- question_text += f"\n\nAttached code:\n{task['code']}"
88
- elif "attachment" in task:
89
- question_text += f"\n\nAttached content:\n{task['attachment']}"
90
-
91
- # Handle special known cases if needed (example)
92
- if "L1vXCYZAYYM" in question_text or "https://www.youtube.com/watch?v=L1vXCYZAYYM" in question_text:
93
- return "11" # Example known answer without extra text
94
-
95
- return self.agent.run(question_text)
96
 
97
 
98
  def run_and_submit_all(profile: gr.OAuthProfile | None):
@@ -100,67 +108,55 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
100
 
101
  if profile:
102
  username = profile.username
103
- print(f"User logged in: {username}")
104
  else:
105
- print("User not logged in.")
106
  return "Please Login to Hugging Face with the button.", None
107
 
108
- api_url = DEFAULT_API_URL
109
- questions_url = f"{api_url}/questions"
110
- submit_url = f"{api_url}/submit"
111
 
112
  try:
113
  agent = MyAgent()
114
  except Exception as e:
115
- print(f"Error initializing agent: {e}")
116
  return f"Error initializing agent: {e}", None
117
 
118
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
119
- print(f"Agent code URL: {agent_code}")
120
 
121
- print(f"Fetching questions from: {questions_url}")
122
  try:
123
  response = requests.get(questions_url, timeout=15)
124
  response.raise_for_status()
125
  questions_data = response.json()
126
- if not questions_data:
127
- return "Fetched questions list is empty or invalid format.", None
128
- print(f"Fetched {len(questions_data)} questions.")
129
  except Exception as e:
130
  return f"Error fetching questions: {e}", None
131
 
132
  results_log = []
133
  answers_payload = []
134
- print(f"Running agent on {len(questions_data)} questions...")
135
  for item in questions_data:
136
  task_id = item.get("task_id")
137
  if not task_id:
138
  continue
139
  try:
140
- submitted_answer = agent(item)
141
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
142
  results_log.append({
143
  "Task ID": task_id,
144
  "Question": item.get("question", ""),
145
- "Submitted Answer": submitted_answer
146
  })
147
  except Exception as e:
148
- error_msg = f"AGENT ERROR: {e}"
149
  results_log.append({
150
  "Task ID": task_id,
151
  "Question": item.get("question", ""),
152
- "Submitted Answer": error_msg
153
  })
154
 
155
- if not answers_payload:
156
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
157
-
158
  submission_data = {
159
  "username": username.strip(),
160
  "agent_code": agent_code,
161
  "answers": answers_payload
162
  }
163
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
164
  try:
165
  response = requests.post(submit_url, json=submission_data, timeout=60)
166
  response.raise_for_status()
@@ -172,58 +168,21 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
172
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
173
  f"Message: {result_data.get('message', 'No message received.')}"
174
  )
175
- results_df = pd.DataFrame(results_log)
176
- return final_status, results_df
177
- except requests.exceptions.HTTPError as e:
178
- try:
179
- detail = e.response.json().get("detail", e.response.text)
180
- except Exception:
181
- detail = e.response.text[:500]
182
- return f"Submission Failed: {detail}", pd.DataFrame(results_log)
183
- except requests.exceptions.Timeout:
184
- return "Submission Failed: The request timed out.", pd.DataFrame(results_log)
185
  except Exception as e:
186
- return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
187
 
188
 
189
- # Gradio UI setup
190
  with gr.Blocks() as demo:
191
- gr.Markdown("# Basic Agent Evaluation Runner")
192
- gr.Markdown("""
193
- **Instructions:**
194
- 1. Clone this space, modify code to define your agent's logic, tools, and packages.
195
- 2. Log in to your Hugging Face account using the button below.
196
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see your score.
197
- **Note:** Submitting can take some time.
198
- """)
199
-
200
  gr.LoginButton()
201
- run_button = gr.Button("Run Evaluation & Submit All Answers")
202
-
203
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
204
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
205
 
206
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
207
 
208
  if __name__ == "__main__":
209
- print("\n" + "-" * 30 + " App Starting " + "-" * 30)
210
- space_host = os.getenv("SPACE_HOST")
211
- space_id = os.getenv("SPACE_ID")
212
-
213
- if space_host:
214
- print(f"✅ SPACE_HOST found: {space_host}")
215
- print(f" Runtime URL should be: https://{space_host}.hf.space")
216
- else:
217
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
218
-
219
- if space_id:
220
- print(f"✅ SPACE_ID found: {space_id}")
221
- print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
222
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id}/tree/main")
223
- else:
224
- print("ℹ️ SPACE_ID environment variable not found (running locally?).")
225
-
226
- print("-" * (60 + len(" App Starting ")) + "\n")
227
- print("Launching Gradio Interface for Basic Agent Evaluation...")
228
  demo.launch(debug=True, share=False)
229
 
 
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ from typing import List, Dict
6
 
7
  from smolagents import CodeAgent, DuckDuckGoSearchTool, Tool
 
8
 
9
  from wikipedia_searcher import WikipediaSearcher
10
  from audio_transcriber import AudioTranscriptionTool
 
30
  return self.searcher.search(query)
31
 
32
 
33
+ # Hugging Face Inference API wrapper for chat completion
34
+ class HFChatModel:
35
+ def __init__(self, model_id: str):
36
+ self.model_id = model_id
37
+ self.api_url = f"https://api-inference.huggingface.co/models/{model_id}"
38
+ self.headers = {"Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}"}
39
+ self.system_prompt = """
40
  You are an agent solving the GAIA benchmark and you are required to provide exact answers.
41
  Rules to follow:
42
  1. Return only the exact requested answer: no explanation and no reasoning.
 
54
  Only return the exact answer.
55
  """
56
 
57
+ def generate(self, messages: List[Dict[str, str]]) -> str:
58
+ # Prepend system prompt as first message
59
+ all_messages = [{"role": "system", "content": self.system_prompt}] + messages
60
+
61
+ payload = {
62
+ "inputs": {
63
+ "past_user_inputs": [],
64
+ "generated_responses": [],
65
+ "text": "\n".join(m["content"] for m in all_messages if m["role"] != "system")
66
+ }
67
+ }
68
 
69
+ # Some HF chat models expect just a string prompt; adjust accordingly per your model's requirements
70
+
71
+ response = requests.post(self.api_url, headers=self.headers, json=payload)
72
+ if response.status_code == 200:
73
+ output = response.json()
74
+ # Output format depends on model; adjust as needed
75
+ if isinstance(output, list) and len(output) > 0 and "generated_text" in output[0]:
76
+ return output[0]["generated_text"].strip()
77
+ elif isinstance(output, dict) and "generated_text" in output:
78
+ return output["generated_text"].strip()
79
+ else:
80
+ # fallback to raw text
81
+ return str(output).strip()
82
  else:
83
+ raise RuntimeError(f"Hugging Face API error {response.status_code}: {response.text}")
 
84
 
85
 
86
  class MyAgent:
87
  def __init__(self):
88
+ self.model = HFChatModel(model_id="gpt-4o-mini") # Or any HF chat model you want
89
+
90
  self.agent = CodeAgent(
91
  tools=[
92
  DuckDuckGoSearchTool(),
93
+ WikipediaSearchTool(),
94
  AudioTranscriptionTool(),
95
  ImageAnalysisTool(),
96
  ],
97
+ model=self, # We'll route calls via __call__ below
98
  )
99
 
100
+ def __call__(self, prompt: str) -> str:
101
+ # Construct chat message for HF model
102
+ messages = [{"role": "user", "content": prompt}]
103
+ return self.model.generate(messages)
 
 
 
 
 
 
 
 
 
 
104
 
105
 
106
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
108
 
109
  if profile:
110
  username = profile.username
 
111
  else:
 
112
  return "Please Login to Hugging Face with the button.", None
113
 
114
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
115
+ questions_url = f"{DEFAULT_API_URL}/questions"
116
+ submit_url = f"{DEFAULT_API_URL}/submit"
117
 
118
  try:
119
  agent = MyAgent()
120
  except Exception as e:
 
121
  return f"Error initializing agent: {e}", None
122
 
123
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
124
 
 
125
  try:
126
  response = requests.get(questions_url, timeout=15)
127
  response.raise_for_status()
128
  questions_data = response.json()
 
 
 
129
  except Exception as e:
130
  return f"Error fetching questions: {e}", None
131
 
132
  results_log = []
133
  answers_payload = []
134
+
135
  for item in questions_data:
136
  task_id = item.get("task_id")
137
  if not task_id:
138
  continue
139
  try:
140
+ answer = agent(item.get("question", ""))
141
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
142
  results_log.append({
143
  "Task ID": task_id,
144
  "Question": item.get("question", ""),
145
+ "Submitted Answer": answer
146
  })
147
  except Exception as e:
 
148
  results_log.append({
149
  "Task ID": task_id,
150
  "Question": item.get("question", ""),
151
+ "Submitted Answer": f"Error: {e}"
152
  })
153
 
 
 
 
154
  submission_data = {
155
  "username": username.strip(),
156
  "agent_code": agent_code,
157
  "answers": answers_payload
158
  }
159
+
160
  try:
161
  response = requests.post(submit_url, json=submission_data, timeout=60)
162
  response.raise_for_status()
 
168
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
169
  f"Message: {result_data.get('message', 'No message received.')}"
170
  )
171
+ return final_status, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
172
  except Exception as e:
173
+ return f"Submission failed: {e}", pd.DataFrame(results_log)
174
 
175
 
 
176
  with gr.Blocks() as demo:
177
+ gr.Markdown("# Basic Agent Evaluation Runner (HF API)")
 
 
 
 
 
 
 
 
178
  gr.LoginButton()
179
+ run_btn = gr.Button("Run Evaluation & Submit All Answers")
180
+ status_out = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
181
+ results_df = gr.DataFrame(label="Questions and Agent Answers")
 
182
 
183
+ run_btn.click(fn=run_and_submit_all, outputs=[status_out, results_df])
184
 
185
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  demo.launch(debug=True, share=False)
187
 
188
+