dlaima commited on
Commit
61c2ff2
·
verified ·
1 Parent(s): 089476a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -175
app.py CHANGED
@@ -1,211 +1,136 @@
1
 
 
2
  import os
 
3
  import requests
4
  import pandas as pd
5
- import gradio as gr
 
 
6
 
7
- from smolagents import ToolCallingAgent, OpenAIServerModel
8
- from audio_transcriber import AudioTranscriptionTool
9
- from image_analyzer import ImageAnalysisTool
10
- from wikipedia_searcher import WikipediaSearcher
 
11
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
13
 
14
- class GaiaAgent:
15
- def __init__(self):
16
- tools = [
17
- AudioTranscriptionTool(),
18
- ImageAnalysisTool(),
19
- WikipediaSearcher()
20
- ]
21
-
22
- model_id = os.getenv("OPENAI_MODEL_ID", "gpt-3.5-turbo")
23
- self.agent = ToolCallingAgent(
24
- model=OpenAIServerModel(model_id=model_id),
25
- tools=tools
26
- )
27
 
28
- def __call__(self, question: str) -> str:
29
- prompt = f"""You are an agent solving the GAIA benchmark and you are required to provide exact answers.
30
- Rules to follow:
31
- 1. Return only the exact requested answer: no explanation and no reasoning.
32
- 2. For yes/no questions, return exactly "Yes" or "No".
33
- 3. For dates, use the exact format requested.
34
- 4. For numbers, use the exact number, no other format.
35
- 5. For names, use the exact name as found in sources.
36
- 6. If the question has an associated file, download the file first using the task ID.
37
- Examples of good responses:
38
- - "42"
39
- - "Pinco Palla"
40
- - "Yes"
41
- - "October 5, 2001"
42
- - "Buenos Aires"
43
- Never include phrases like "the answer is..." or "Based on my research".
44
- Only return the exact answer.
45
- QUESTION:
46
- {question}
47
- """
48
- result = self.agent.run(prompt)
49
- return result # ✅ Fixed: removed .get() since result is a string
50
-
51
-
52
- def run_and_submit_all(profile: gr.OAuthProfile | None):
53
- space_id = os.getenv("SPACE_ID")
54
-
55
- if profile:
56
- username = profile.username
57
- if isinstance(username, list):
58
- username = username[0]
59
- username = username.strip()
60
- print(f"User logged in: {username}")
61
- else:
62
- print("User not logged in.")
63
- return "Please Login to Hugging Face with the button.", None
64
-
65
- api_url = DEFAULT_API_URL
66
- questions_url = f"{api_url}/questions"
67
- submit_url = f"{api_url}/submit"
68
 
69
- try:
70
- agent = GaiaAgent()
71
- except Exception as e:
72
- print(f"Error initializing agent: {e}")
73
- return f"Error initializing agent: {e}", None
 
 
 
 
 
 
 
 
74
 
 
 
 
 
 
 
 
75
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
76
- print(f"Agent code URL: {agent_code}")
77
 
78
  try:
79
- response = requests.get(questions_url, timeout=15)
80
- response.raise_for_status()
81
- questions_data = response.json()
82
- if not questions_data:
83
- return "Fetched questions list is empty or invalid format.", None
84
- print(f"Fetched {len(questions_data)} questions.")
85
  except Exception as e:
86
- return f"Error fetching questions: {e}", None
87
-
88
- results_log = []
89
- answers_payload = []
90
 
91
- for item in questions_data:
 
92
  task_id = item.get("task_id")
93
- if not task_id:
 
94
  continue
95
-
96
- question_text = item.get("question", "")
97
-
98
- file_url = item.get("file_url")
99
- local_file_path = None
100
- if file_url:
101
- try:
102
- ext = file_url.split(".")[-1].lower()
103
- if ext in ["mp3", "wav", "jpeg", "jpg", "png"]:
104
- local_file_path = f"./temp_{task_id}.{ext}"
105
- with requests.get(file_url, stream=True) as r:
106
- r.raise_for_status()
107
- with open(local_file_path, "wb") as f:
108
- for chunk in r.iter_content(chunk_size=8192):
109
- f.write(chunk)
110
- print(f"Downloaded file for task {task_id} to {local_file_path}")
111
- question_text += f"\n\nFile path: {local_file_path}"
112
- except Exception as e:
113
- print(f"Failed to download file for task {task_id}: {e}")
114
-
115
  try:
116
- submitted_answer = agent(question_text)
117
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
118
- results_log.append({
119
- "Task ID": task_id,
120
- "Question": question_text,
121
- "Submitted Answer": submitted_answer
122
- })
123
  except Exception as e:
124
- error_msg = f"AGENT ERROR: {e}"
125
- results_log.append({
126
- "Task ID": task_id,
127
- "Question": question_text,
128
- "Submitted Answer": error_msg
129
- })
130
-
131
- if local_file_path:
132
- try:
133
- os.remove(local_file_path)
134
- except Exception:
135
- pass
136
-
137
- if not answers_payload:
138
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
139
-
140
- submission_data = {
141
- "username": username,
142
- "agent_code": agent_code,
143
- "answers": answers_payload
144
- }
145
-
146
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
147
  try:
148
- response = requests.post(submit_url, json=submission_data, timeout=60)
149
- response.raise_for_status()
150
- result_data = response.json()
151
- final_status = (
152
- f"Submission Successful!\n"
153
- f"User: {result_data.get('username')}\n"
154
- f"Overall Score: {result_data.get('score', 'N/A')}% "
155
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
156
- f"Message: {result_data.get('message', 'No message received.')}"
 
 
 
 
 
 
157
  )
158
- results_df = pd.DataFrame(results_log)
159
- return final_status, results_df
160
- except requests.exceptions.HTTPError as e:
161
- try:
162
- detail = e.response.json().get("detail", e.response.text)
163
- except Exception:
164
- detail = e.response.text[:500]
165
- return f"Submission Failed: {detail}", pd.DataFrame(results_log)
166
- except requests.exceptions.Timeout:
167
- return "Submission Failed: The request timed out.", pd.DataFrame(results_log)
168
  except Exception as e:
169
- return f"An unexpected error occurred during submission: {e}", pd.DataFrame(results_log)
170
 
171
 
172
- # Gradio UI
173
  with gr.Blocks() as demo:
174
  gr.Markdown("# Basic Agent Evaluation Runner")
175
- gr.Markdown("""\
176
- **Instructions:**
177
- 1. Clone this space and define your agent and tools.
178
- 2. Log in to your Hugging Face account using the button below.
179
- 3. Click 'Run Evaluation & Submit All Answers' to test your agent and submit results.
180
- """)
181
-
182
- gr.LoginButton()
183
- run_button = gr.Button("Run Evaluation & Submit All Answers")
184
 
 
 
 
 
185
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
186
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
187
 
188
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
 
 
189
 
190
  if __name__ == "__main__":
191
- print("\n" + "-"*30 + " App Starting " + "-"*30)
192
- space_host = os.getenv("SPACE_HOST")
193
- space_id = os.getenv("SPACE_ID")
194
-
195
- if space_host:
196
- print(f"✅ SPACE_HOST found: {space_host}")
197
- print(f" Runtime URL should be: https://{space_host}.hf.space")
198
- else:
199
- print("ℹ️ SPACE_HOST not found.")
200
-
201
- if space_id:
202
- print(f" SPACE_ID found: {space_id}")
203
- print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
204
- else:
205
- print("ℹ️ SPACE_ID not found.")
206
-
207
- print("-"*(60 + len(" App Starting ")) + "\n")
208
- demo.launch(debug=True, share=False)
 
 
209
 
210
 
211
 
 
1
 
2
+ app/py
3
  import os
4
+ import gradio as gr
5
  import requests
6
  import pandas as pd
7
+ from smolagents import CodeAgent, DuckDuckGoSearchTool
8
+ from smolagents.models import OpenAIServerModel
9
+ import openai
10
 
11
+ # --- Setup ---
12
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
+ if not OPENAI_API_KEY:
14
+ raise RuntimeError("Please set OPENAI_API_KEY in your Space secrets.")
15
+ openai.api_key = OPENAI_API_KEY
16
 
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
+ OPENAI_MODEL_ID = os.getenv("OPENAI_MODEL_ID", "gpt-4o")
19
 
20
+ model = OpenAIServerModel(model_id=OPENAI_MODEL_ID, api_key=OPENAI_API_KEY)
21
+ search_tool = DuckDuckGoSearchTool()
22
+ agent = CodeAgent(tools=[search_tool], model=model)
 
 
 
 
 
 
 
 
 
 
23
 
24
+ answer_formatting_prompt = """
25
+ You are a smart assistant with access to tools like DuckDuckGoSearchTool(query: str).
26
+ Think step-by-step, then output your response.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ IMPORTANT:
29
+ FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers/strings.
30
+ Do NOT include commas, $ or % unless asked.
31
+ Write digits plainly (e.g., '10', not 'ten').
32
+
33
+ Use format:
34
+ FINAL ANSWER: <your_answer>
35
+ """
36
+
37
+ def show_profile(profile):
38
+ if not profile:
39
+ return "⚠️ Not logged in."
40
+ return f"✅ Logged in as: {profile['username']}"
41
 
42
+ def run_and_submit_all(login_info):
43
+ # login_info comes from LoginButton, it's None if not logged in
44
+ if not login_info:
45
+ return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame()
46
+
47
+ username = login_info["username"]
48
+ space_id = os.getenv("SPACE_ID", "")
49
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
50
 
51
  try:
52
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
53
+ resp.raise_for_status()
54
+ questions = resp.json()
 
 
 
55
  except Exception as e:
56
+ return f"Error fetching questions: {e}", pd.DataFrame()
 
 
 
57
 
58
+ results, payload = [], []
59
+ for item in questions:
60
  task_id = item.get("task_id")
61
+ question = item.get("question")
62
+ if not task_id or not question:
63
  continue
64
+ prompt = answer_formatting_prompt.strip() + f"\n\nQUESTION: {question.strip()}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  try:
66
+ answer = agent.run(prompt)
 
 
 
 
 
 
67
  except Exception as e:
68
+ answer = f"AGENT ERROR: {e}"
69
+ results.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
70
+ payload.append({"task_id": task_id, "submitted_answer": answer})
71
+
72
+ if not payload:
73
+ return "⚠️ Agent returned no answers.", pd.DataFrame(results)
74
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  try:
76
+ post = requests.post(
77
+ f"{DEFAULT_API_URL}/submit",
78
+ json={"username": username, "agent_code": agent_code, "answers": payload},
79
+ timeout=60
80
+ )
81
+ post.raise_for_status()
82
+ result = post.json()
83
+ score = result.get("score", "N/A")
84
+ correct = result.get("correct_count", "?")
85
+ attempted = result.get("total_attempted", "?")
86
+ message = result.get("message", "")
87
+ return (
88
+ f"✅ Submission Successful!\nUser: {username}\nScore: {score}% "
89
+ f"({correct}/{attempted})\nMessage: {message}",
90
+ pd.DataFrame(results)
91
  )
 
 
 
 
 
 
 
 
 
 
92
  except Exception as e:
93
+ return f" Submission failed: {e}", pd.DataFrame(results)
94
 
95
 
 
96
  with gr.Blocks() as demo:
97
  gr.Markdown("# Basic Agent Evaluation Runner")
 
 
 
 
 
 
 
 
 
98
 
99
+ login_button = gr.LoginButton()
100
+ login_status = gr.Textbox(label="Login Status")
101
+
102
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
103
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
104
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
105
 
106
+ # Show login status when user logs in
107
+ login_button.click(fn=show_profile, inputs=[login_button], outputs=[login_status])
108
+
109
+ # Run evaluation on click, pass login_button's state as input
110
+ run_button.click(fn=run_and_submit_all, inputs=[login_button], outputs=[status_output, results_table])
111
+
112
 
113
  if __name__ == "__main__":
114
+ demo.launch()
115
+
116
+
117
+ #import gradio as gr
118
+
119
+ #def show_profile(profile):
120
+ # if not profile:
121
+ # return "⚠️ Not logged in."
122
+ # return f"✅ Logged in as: {profile['username']}"
123
+
124
+ # with gr.Blocks() as demo:
125
+ # gr.Markdown("## 🔐 Hugging Face OAuth Login")
126
+
127
+ # login_button = gr.LoginButton()
128
+ # output = gr.Textbox(label="Login Status")
129
+
130
+ # login_button.click(fn=show_profile, inputs=[login_button], outputs=[output])
131
+
132
+ # demo.launch()
133
+
134
 
135
 
136