adriansanz commited on
Commit
e81ceaa
·
verified ·
1 Parent(s): c77782c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app(1).py +348 -0
  2. requirements.txt +8 -0
app(1).py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dataclasses
2
+ import os
3
+ from math import sqrt
4
+ from typing import Dict, List
5
+ from langchain_community.tools.tavily_search import TavilySearchResults
6
+ from langchain_community.document_loaders import WikipediaLoader
7
+ from langchain_community.document_loaders import ArxivLoader
8
+ import gradio as gr
9
+ import requests
10
+ import inspect
11
+ import pandas as pd
12
+ from langchain_core.documents import Document
13
+ from smolagents import CodeAgent, tool, InferenceClientModel
14
+
15
+ # (Keep Constants as is)
16
+ # --- Constants ---
17
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
+
19
+
20
+ @dataclasses.dataclass
21
+ class WikiSourceDocument:
22
+ source: str
23
+ page: str
24
+ page_content: str
25
+
26
+ @tool
27
+ def wiki_search(query: str, load_max_docs: int=3) -> List[Document]:
28
+ """Search Wikipedia for a query and return maximum 2 results.
29
+ Args:
30
+ query: The search query.
31
+ load_max_docs: The maximum number of documents to load."""
32
+ search_docs = WikipediaLoader(query=query, load_max_docs=load_max_docs).load()
33
+ return search_docs
34
+
35
+ @tool
36
+ def load_file(file_id: str) -> str:
37
+ """Load a file from the Hugging Face Hub. It returns the content in bytes.
38
+ Args:
39
+ file_id: The file ID to load."""
40
+ return requests.get(f"https://agents-course-unit4-scoring.hf.space/files/{file_id}").content
41
+
42
+ @tool
43
+ def web_search(query: str, max_results: int) -> Dict[str, str]:
44
+ """Search Tavily for a query and return maximum 3 results.
45
+ Args:
46
+ query: The search query.
47
+ max_results: The maximum number of results to return."""
48
+ search_docs = TavilySearchResults(max_results=max_results).invoke(input=query)
49
+ return {"web_results": search_docs}
50
+
51
+
52
+ @tool
53
+ def arxiv_search(query: str, load_max_docs: int) -> Dict[str, str]:
54
+ """Search Arxiv for a query and return maximum 3 result.
55
+ Args:
56
+ query: The search query.
57
+ load_max_docs: The maximum number of documents to load.
58
+ """
59
+ search_docs = ArxivLoader(query=query, load_max_docs=load_max_docs).load()
60
+ formatted_search_docs = "\n\n---\n\n".join(
61
+ [
62
+ f'<Document Title="{doc.metadata["Title"]}" Published="{doc.metadata["Published"]}" Authors="{doc.metadata["Authors"]} Summary={doc.metadata["Summary"]}"/>\n{doc.page_content}\n</Document>'
63
+ for doc in search_docs
64
+ ]
65
+ )
66
+ return {"arxiv_results": formatted_search_docs}
67
+
68
+
69
+ @tool
70
+ def multiply(a: float, b: float) -> float:
71
+ """
72
+ Multiply two numbers and return the result.
73
+ This function takes two floating-point numbers as arguments and
74
+ returns their product. It performs basic multiplication.
75
+
76
+ Args:
77
+ a: The first number to be multiplied.
78
+ b: The second number to be multiplied.
79
+ """
80
+ return a * b
81
+
82
+
83
+ @tool
84
+ def add(a: float, b: float) -> float:
85
+ """
86
+ Add two numbers and return the result.
87
+ This function takes two floating-point numbers as arguments and
88
+ returns their sum. It performs basic addition.
89
+
90
+ Args:
91
+ a: The first number to be added.
92
+ b: The second number to be added.
93
+ """
94
+ return a + b
95
+
96
+
97
+ @tool
98
+ def subtract(a: float, b: float) -> float:
99
+ """
100
+ Subtracts two numbers.
101
+ Args:
102
+ a (float): the first number
103
+ b (float): the second number
104
+ """
105
+ return a - b
106
+
107
+
108
+ @tool
109
+ def divide(a: float, b: float) -> float:
110
+ """
111
+ Divides two numbers.
112
+ Args:
113
+ a (float): the first float number
114
+ b (float): the second float number
115
+ """
116
+ if b == 0:
117
+ raise ValueError("Cannot divided by zero.")
118
+ return a / b
119
+
120
+
121
+ @tool
122
+ def modulus(a: int, b: int) -> int:
123
+ """
124
+ Get the modulus of two numbers.
125
+ Args:
126
+ a (int): the first number
127
+ b (int): the second number
128
+ """
129
+ return a % b
130
+
131
+
132
+ @tool
133
+ def power(a: float, b: float) -> float:
134
+ """
135
+ Get the power of two numbers.
136
+ Args:
137
+ a (float): the first number
138
+ b (float): the second number
139
+ """
140
+ return a ** b
141
+
142
+
143
+ @tool
144
+ def square_root(a: float) -> float:
145
+ """
146
+ Get the square root of a number.
147
+ Args:
148
+ a (float): the number to get the square root of
149
+ """
150
+ if a >= 0:
151
+ return a ** 0.5
152
+ return sqrt(a)
153
+
154
+
155
+ # --- Basic Agent Definition ---
156
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
157
+ class BasicAgent:
158
+ def __init__(self):
159
+ model_id = "Qwen/Qwen3-32B"
160
+ self.agent = CodeAgent(
161
+ tools=[multiply, add, subtract, power, square_root, modulus, wiki_search, web_search, arxiv_search],
162
+ model=InferenceClientModel(model_id=model_id, token=os.getenv("HF_TOKEN")),
163
+ max_steps=10,
164
+ )
165
+ print("BasicAgent initialized.")
166
+
167
+ def __call__(self, question: str) -> str:
168
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
169
+ answer = self.agent.run(question)
170
+ print(f"Agent returning answer: {answer}")
171
+ return answer
172
+
173
+
174
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
175
+ """
176
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
177
+ and displays the results.
178
+ """
179
+ # --- Determine HF Space Runtime URL and Repo URL ---
180
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
181
+
182
+ if profile:
183
+ username = f"{profile.username}"
184
+ print(f"User logged in: {username}")
185
+ else:
186
+ print("User not logged in.")
187
+ return "Please Login to Hugging Face with the button.", None
188
+
189
+ api_url = DEFAULT_API_URL
190
+ questions_url = f"{api_url}/questions"
191
+ submit_url = f"{api_url}/submit"
192
+
193
+ # 1. Instantiate Agent ( modify this part to create your agent)
194
+ try:
195
+ agent = BasicAgent()
196
+ except Exception as e:
197
+ print(f"Error instantiating agent: {e}")
198
+ return f"Error initializing agent: {e}", None
199
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
200
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
201
+ print(agent_code)
202
+
203
+ # 2. Fetch Questions
204
+ print(f"Fetching questions from: {questions_url}")
205
+ try:
206
+ response = requests.get(questions_url, timeout=15)
207
+ response.raise_for_status()
208
+ questions_data = response.json()
209
+ if not questions_data:
210
+ print("Fetched questions list is empty.")
211
+ return "Fetched questions list is empty or invalid format.", None
212
+ print(f"Fetched {len(questions_data)} questions.")
213
+ except requests.exceptions.RequestException as e:
214
+ print(f"Error fetching questions: {e}")
215
+ return f"Error fetching questions: {e}", None
216
+ except requests.exceptions.JSONDecodeError as e:
217
+ print(f"Error decoding JSON response from questions endpoint: {e}")
218
+ print(f"Response text: {response.text[:500]}")
219
+ return f"Error decoding server response for questions: {e}", None
220
+ except Exception as e:
221
+ print(f"An unexpected error occurred fetching questions: {e}")
222
+ return f"An unexpected error occurred fetching questions: {e}", None
223
+
224
+ # 3. Run your Agent
225
+ results_log = []
226
+ answers_payload = []
227
+ print(f"Running agent on {len(questions_data)} questions...")
228
+ for item in questions_data:
229
+ task_id = item.get("task_id")
230
+ question_text = item.get("question")
231
+ if not task_id or question_text is None:
232
+ print(f"Skipping item with missing task_id or question: {item}")
233
+ continue
234
+ try:
235
+ submitted_answer = agent(question_text)
236
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
237
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
238
+ except Exception as e:
239
+ print(f"Error running agent on task {task_id}: {e}")
240
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
241
+
242
+ if not answers_payload:
243
+ print("Agent did not produce any answers to submit.")
244
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
245
+
246
+ # 4. Prepare Submission
247
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
248
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
249
+ print(status_update)
250
+
251
+ # 5. Submit
252
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
253
+ try:
254
+ response = requests.post(submit_url, json=submission_data, timeout=60)
255
+ response.raise_for_status()
256
+ result_data = response.json()
257
+ final_status = (
258
+ f"Submission Successful!\n"
259
+ f"User: {result_data.get('username')}\n"
260
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
261
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
262
+ f"Message: {result_data.get('message', 'No message received.')}"
263
+ )
264
+ print("Submission successful.")
265
+ results_df = pd.DataFrame(results_log)
266
+ return final_status, results_df
267
+ except requests.exceptions.HTTPError as e:
268
+ error_detail = f"Server responded with status {e.response.status_code}."
269
+ try:
270
+ error_json = e.response.json()
271
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
272
+ except requests.exceptions.JSONDecodeError:
273
+ error_detail += f" Response: {e.response.text[:500]}"
274
+ status_message = f"Submission Failed: {error_detail}"
275
+ print(status_message)
276
+ results_df = pd.DataFrame(results_log)
277
+ return status_message, results_df
278
+ except requests.exceptions.Timeout:
279
+ status_message = "Submission Failed: The request timed out."
280
+ print(status_message)
281
+ results_df = pd.DataFrame(results_log)
282
+ return status_message, results_df
283
+ except requests.exceptions.RequestException as e:
284
+ status_message = f"Submission Failed: Network error - {e}"
285
+ print(status_message)
286
+ results_df = pd.DataFrame(results_log)
287
+ return status_message, results_df
288
+ except Exception as e:
289
+ status_message = f"An unexpected error occurred during submission: {e}"
290
+ print(status_message)
291
+ results_df = pd.DataFrame(results_log)
292
+ return status_message, results_df
293
+
294
+
295
+ # --- Build Gradio Interface using Blocks ---
296
+ with gr.Blocks() as demo:
297
+ gr.Markdown("# Basic Agent Evaluation Runner")
298
+ gr.Markdown(
299
+ """
300
+ **Instructions:**
301
+
302
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
303
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
304
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
305
+
306
+ ---
307
+ **Disclaimers:**
308
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
309
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
310
+ """
311
+ )
312
+
313
+ gr.LoginButton()
314
+
315
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
316
+
317
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
318
+ # Removed max_rows=10 from DataFrame constructor
319
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
320
+
321
+ run_button.click(
322
+ fn=run_and_submit_all,
323
+ outputs=[status_output, results_table]
324
+ )
325
+
326
+ if __name__ == "__main__":
327
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
328
+ # Check for SPACE_HOST and SPACE_ID at startup for information
329
+ space_host_startup = os.getenv("SPACE_HOST")
330
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
331
+
332
+ if space_host_startup:
333
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
334
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
335
+ else:
336
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
337
+
338
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
339
+ print(f"✅ SPACE_ID found: {space_id_startup}")
340
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
341
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
342
+ else:
343
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
344
+
345
+ print("-" * (60 + len(" App Starting ")) + "\n")
346
+
347
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
348
+ demo.launch(debug=True, share=False)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ requests
3
+ smolagents
4
+ pandas
5
+ langchain-community
6
+ wikipedia
7
+ arxiv
8
+ pymupdf