EtienneB commited on
Commit
9349849
·
1 Parent(s): f255c6e

start over

Browse files
Files changed (4) hide show
  1. agent-old.py +70 -0
  2. agent.py +0 -70
  3. app-old.py +523 -0
  4. app.py +126 -423
agent-old.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ from typing import Annotated, TypedDict
3
+
4
+ from langchain_community.chat_models import ChatHuggingFace
5
+ from langchain_community.llms import HuggingFaceEndpoint
6
+ from langchain_core.messages import AIMessage, AnyMessage, HumanMessage
7
+ from langgraph.graph import START, StateGraph
8
+ from langgraph.graph.message import add_messages
9
+ from langgraph.prebuilt import ToolNode, tools_condition
10
+
11
+ from retriever import guest_info_tool
12
+ from tools import (absolute, add, divide, exponential, floor_divide,
13
+ get_current_time_in_timezone, logarithm, modulus, multiply,
14
+ power, roman_calculator_converter, square_root, subtract,
15
+ web_search)
16
+
17
+ # Generate the chat interface, including the tools
18
+ llm = HuggingFaceEndpoint(
19
+ repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
20
+ huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
21
+ )
22
+
23
+ chat = ChatHuggingFace(llm=llm, verbose=True)
24
+ tools = [
25
+ multiply,
26
+ add,
27
+ subtract,
28
+ power,
29
+ divide,
30
+ modulus,
31
+ square_root,
32
+ floor_divide,
33
+ absolute,
34
+ logarithm,
35
+ exponential,
36
+ web_search,
37
+ roman_calculator_converter,
38
+ get_current_time_in_timezone,
39
+ ]
40
+
41
+ chat_with_tools = chat.bind_tools(tools)
42
+
43
+ # Generate the AgentState and Agent graph
44
+ class AgentState(TypedDict):
45
+ messages: Annotated[list[AnyMessage], add_messages]
46
+
47
+ def assistant(state: AgentState):
48
+ return {
49
+ "messages": [chat_with_tools.invoke(state["messages"])],
50
+ }
51
+
52
+ ## The graph
53
+ builder = StateGraph(AgentState)
54
+
55
+ # Define nodes: these do the work
56
+ builder.add_node("assistant", assistant)
57
+ builder.add_node("tools", ToolNode(tools))
58
+
59
+ # Define edges: these determine how the control flow moves
60
+ builder.add_edge(START, "assistant")
61
+ builder.add_conditional_edges(
62
+ "assistant",
63
+ # If the latest message requires a tool, route to tools
64
+ # Otherwise, provide a direct response
65
+ tools_condition,
66
+ )
67
+ builder.add_edge("tools", "assistant")
68
+ alfred = builder.compile()
69
+
70
+ """
agent.py CHANGED
@@ -1,70 +0,0 @@
1
- """
2
- from typing import Annotated, TypedDict
3
-
4
- from langchain_community.chat_models import ChatHuggingFace
5
- from langchain_community.llms import HuggingFaceEndpoint
6
- from langchain_core.messages import AIMessage, AnyMessage, HumanMessage
7
- from langgraph.graph import START, StateGraph
8
- from langgraph.graph.message import add_messages
9
- from langgraph.prebuilt import ToolNode, tools_condition
10
-
11
- from retriever import guest_info_tool
12
- from tools import (absolute, add, divide, exponential, floor_divide,
13
- get_current_time_in_timezone, logarithm, modulus, multiply,
14
- power, roman_calculator_converter, square_root, subtract,
15
- web_search)
16
-
17
- # Generate the chat interface, including the tools
18
- llm = HuggingFaceEndpoint(
19
- repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
20
- huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
21
- )
22
-
23
- chat = ChatHuggingFace(llm=llm, verbose=True)
24
- tools = [
25
- multiply,
26
- add,
27
- subtract,
28
- power,
29
- divide,
30
- modulus,
31
- square_root,
32
- floor_divide,
33
- absolute,
34
- logarithm,
35
- exponential,
36
- web_search,
37
- roman_calculator_converter,
38
- get_current_time_in_timezone,
39
- ]
40
-
41
- chat_with_tools = chat.bind_tools(tools)
42
-
43
- # Generate the AgentState and Agent graph
44
- class AgentState(TypedDict):
45
- messages: Annotated[list[AnyMessage], add_messages]
46
-
47
- def assistant(state: AgentState):
48
- return {
49
- "messages": [chat_with_tools.invoke(state["messages"])],
50
- }
51
-
52
- ## The graph
53
- builder = StateGraph(AgentState)
54
-
55
- # Define nodes: these do the work
56
- builder.add_node("assistant", assistant)
57
- builder.add_node("tools", ToolNode(tools))
58
-
59
- # Define edges: these determine how the control flow moves
60
- builder.add_edge(START, "assistant")
61
- builder.add_conditional_edges(
62
- "assistant",
63
- # If the latest message requires a tool, route to tools
64
- # Otherwise, provide a direct response
65
- tools_condition,
66
- )
67
- builder.add_edge("tools", "assistant")
68
- alfred = builder.compile()
69
-
70
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app-old.py ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import inspect
3
+ import json
4
+ import os
5
+ import time
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ import gradio as gr
9
+ import pandas as pd
10
+ import requests
11
+ from dotenv import load_dotenv
12
+ from langchain_community.chat_models import ChatHuggingFace
13
+ from langchain_community.llms import HuggingFaceEndpoint
14
+ from langchain_core.messages import AIMessage, HumanMessage
15
+ from langchain_core.tools import StructuredTool
16
+
17
+ from tools import (absolute, add, divide, exponential, floor_divide,
18
+ get_current_time_in_timezone, logarithm, modulus, multiply,
19
+ power, roman_calculator_converter, square_root, subtract,
20
+ web_search)
21
+
22
+ # --- Constants ---
23
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
+ MAX_AGENT_ITERATIONS = 15
25
+ MAX_CONCURRENT_REQUESTS = 5 # Limit concurrent requests to avoid overwhelming the API
26
+
27
+ load_dotenv()
28
+ HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
29
+
30
+ # Quick test to see if tokens are available.
31
+ print(f"Available env vars: {[k for k in os.environ.keys() if 'TOKEN' in k or 'HF' in k]}")
32
+
33
+ # Global cache for answers
34
+ answer_cache = {}
35
+
36
+ class ImprovedAgent:
37
+ def __init__(self):
38
+ if not HUGGINGFACEHUB_API_TOKEN:
39
+ raise ValueError("Missing Hugging Face API token. Please set HUGGINGFACEHUB_API_TOKEN.")
40
+
41
+ print("ImprovedAgent initialized.")
42
+
43
+ # Initialize LLM with better parameters
44
+ self.llm = HuggingFaceEndpoint(
45
+ repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
46
+ huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
47
+ temperature=0.1, # Lower temperature for more consistent responses
48
+ max_new_tokens=1024,
49
+ timeout=30,
50
+ )
51
+
52
+ self.chat = ChatHuggingFace(llm=self.llm, verbose=False)
53
+
54
+ # Initialize tools
55
+ self.tools = [
56
+ multiply, add, subtract, power, divide, modulus,
57
+ square_root, floor_divide, absolute, logarithm,
58
+ exponential, web_search, roman_calculator_converter,
59
+ get_current_time_in_timezone
60
+ ]
61
+
62
+ self.chat_with_tools = self.chat.bind_tools(self.tools)
63
+ print(f"Total tools available: {len(self.tools)}")
64
+
65
+ # Create tool mapping for easier access
66
+ self.tool_map = {tool.name: tool for tool in self.tools}
67
+
68
+ def _extract_tool_calls(self, response) -> List[Dict]:
69
+ """Extract tool calls from the response"""
70
+ tool_calls = []
71
+ if hasattr(response, 'tool_calls') and response.tool_calls:
72
+ for tool_call in response.tool_calls:
73
+ tool_calls.append({
74
+ 'name': tool_call['name'],
75
+ 'args': tool_call['args']
76
+ })
77
+ return tool_calls
78
+
79
+ def _execute_tool_calls(self, tool_calls: List[Dict]) -> List[str]:
80
+ """Execute tool calls and return results"""
81
+ results = []
82
+ for tool_call in tool_calls:
83
+ tool_name = tool_call['name']
84
+ tool_args = tool_call['args']
85
+
86
+ if tool_name in self.tool_map:
87
+ try:
88
+ tool = self.tool_map[tool_name]
89
+ result = tool.invoke(tool_args)
90
+ results.append(f"Tool {tool_name} result: {result}")
91
+ except Exception as e:
92
+ results.append(f"Tool {tool_name} error: {str(e)}")
93
+ else:
94
+ results.append(f"Unknown tool: {tool_name}")
95
+
96
+ return results
97
+
98
+ async def answer(self, question: str) -> str:
99
+ """Improved answer method with better error handling and tool usage"""
100
+ print(f"Processing question: {question[:100]}...")
101
+
102
+ try:
103
+ # Create system prompt for better instruction following
104
+ system_prompt = """You are a helpful AI assistant with access to various tools.
105
+ When answering questions, use the appropriate tools when needed and provide clear, concise answers.
106
+ If you need to perform calculations, use the math tools available.
107
+ If you need current information, use the web search tool.
108
+ Always provide a final answer after using tools."""
109
+
110
+ messages = [
111
+ HumanMessage(content=f"{system_prompt}\n\nQuestion: {question}")
112
+ ]
113
+
114
+ # Initial response
115
+ response = await asyncio.to_thread(self.chat_with_tools.invoke, messages)
116
+
117
+ # Handle tool calls if present
118
+ max_iterations = 3
119
+ iteration = 0
120
+
121
+ while iteration < max_iterations:
122
+ tool_calls = self._extract_tool_calls(response)
123
+
124
+ if not tool_calls:
125
+ break
126
+
127
+ # Execute tool calls
128
+ tool_results = self._execute_tool_calls(tool_calls)
129
+
130
+ # Add tool results to conversation
131
+ messages.append(AIMessage(content=response.content))
132
+ messages.append(HumanMessage(content=f"Tool results: {'; '.join(tool_results)}. Please provide a final answer based on these results."))
133
+
134
+ # Get next response
135
+ response = await asyncio.to_thread(self.chat_with_tools.invoke, messages)
136
+ iteration += 1
137
+
138
+ # Extract final answer
139
+ final_answer = response.content.strip()
140
+
141
+ # Clean up the response - remove any tool call artifacts
142
+ if "Tool " in final_answer and "result:" in final_answer:
143
+ # Try to extract just the final answer part
144
+ lines = final_answer.split('\n')
145
+ for line in reversed(lines):
146
+ if line.strip() and not line.startswith('Tool ') and not 'result:' in line:
147
+ final_answer = line.strip()
148
+ break
149
+
150
+ return final_answer
151
+
152
+ except Exception as e:
153
+ print(f"Error in answer method: {e}")
154
+ return f"Error processing question: {str(e)}"
155
+
156
+ def answer_sync(self, question: str) -> str:
157
+ """Synchronous version of answer method"""
158
+ try:
159
+ return asyncio.run(self.answer(question))
160
+ except Exception as e:
161
+ print(f"Error in sync answer: {e}")
162
+ return f"Error: {str(e)}"
163
+
164
+ async def process_questions_batch(agent, questions_batch, semaphore):
165
+ """Process a batch of questions with rate limiting"""
166
+ results = []
167
+
168
+ async def process_single_question(task_id, question):
169
+ async with semaphore:
170
+ try:
171
+ # Check cache first
172
+ cache_key = f"{task_id}_{hash(question)}"
173
+ if cache_key in answer_cache:
174
+ print(f"Using cached answer for task {task_id}")
175
+ return task_id, question, answer_cache[cache_key], None
176
+
177
+ answer = await agent.answer(question)
178
+
179
+ # Cache the result
180
+ answer_cache[cache_key] = answer
181
+
182
+ return task_id, question, answer, None
183
+ except Exception as e:
184
+ print(f"Error processing task {task_id}: {e}")
185
+ return task_id, question, None, str(e)
186
+
187
+ # Create semaphore for rate limiting
188
+ tasks = []
189
+ for item in questions_batch:
190
+ task_id = item.get("task_id")
191
+ question_text = item.get("question")
192
+ if task_id and question_text is not None:
193
+ tasks.append(process_single_question(task_id, question_text))
194
+
195
+ if tasks:
196
+ results = await asyncio.gather(*tasks, return_exceptions=True)
197
+
198
+ return results
199
+
200
+ async def run_agent_async_improved(agent, questions_data):
201
+ """Improved async processing with batching and caching"""
202
+ results_log, answers_payload = [], []
203
+
204
+ # Create semaphore for rate limiting
205
+ semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
206
+
207
+ # Process questions in batches
208
+ batch_size = 10
209
+ batches = [questions_data[i:i + batch_size] for i in range(0, len(questions_data), batch_size)]
210
+
211
+ print(f"Processing {len(questions_data)} questions in {len(batches)} batches...")
212
+
213
+ for i, batch in enumerate(batches):
214
+ print(f"Processing batch {i+1}/{len(batches)} ({len(batch)} questions)...")
215
+
216
+ try:
217
+ batch_results = await process_questions_batch(agent, batch, semaphore)
218
+
219
+ for result in batch_results:
220
+ if isinstance(result, Exception):
221
+ print(f"Batch processing error: {result}")
222
+ continue
223
+
224
+ task_id, question, answer, error = result
225
+
226
+ if error:
227
+ print(f"Error in task {task_id}: {error}")
228
+ results_log.append({
229
+ "Task ID": task_id,
230
+ "Question": question[:100] + "..." if len(question) > 100 else question,
231
+ "Submitted Answer": f"ERROR: {error}"
232
+ })
233
+ else:
234
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
235
+ results_log.append({
236
+ "Task ID": task_id,
237
+ "Question": question[:100] + "..." if len(question) > 100 else question,
238
+ "Submitted Answer": answer[:200] + "..." if len(answer) > 200 else answer
239
+ })
240
+
241
+ # Small delay between batches to be respectful
242
+ if i < len(batches) - 1:
243
+ await asyncio.sleep(1)
244
+
245
+ except Exception as e:
246
+ print(f"Error processing batch {i+1}: {e}")
247
+ # Continue with next batch
248
+ continue
249
+
250
+ return results_log, answers_payload
251
+
252
+ def cache_answers(profile: gr.OAuthProfile | None):
253
+ """Cache answers without submitting"""
254
+ if not profile:
255
+ return "Please log in to Hugging Face first.", None
256
+
257
+ username = profile.username
258
+ print(f"Caching answers for user: {username}")
259
+
260
+ # Fetch questions
261
+ api_url = DEFAULT_API_URL
262
+ questions_url = f"{api_url}/questions"
263
+
264
+ try:
265
+ response = requests.get(questions_url, timeout=15)
266
+ response.raise_for_status()
267
+ questions_data = response.json()
268
+
269
+ if not questions_data:
270
+ return "No questions found.", None
271
+
272
+ print(f"Fetched {len(questions_data)} questions for caching.")
273
+
274
+ # Initialize agent
275
+ try:
276
+ agent = ImprovedAgent()
277
+ except Exception as e:
278
+ print(f"Full error details: {e}")
279
+ return f"Error initializing agent: {e}", None
280
+
281
+ # Process questions
282
+ results_log, answers_payload = asyncio.run(run_agent_async_improved(agent, questions_data))
283
+
284
+ # Store in global cache with username
285
+ answer_cache[f"user_{username}"] = answers_payload
286
+
287
+ status = f"Cached {len(answers_payload)} answers for user {username}. Ready to submit!"
288
+ results_df = pd.DataFrame(results_log)
289
+
290
+ return status, results_df
291
+
292
+ except Exception as e:
293
+ print(f"Error caching answers: {e}")
294
+ return f"Error caching answers: {e}", None
295
+
296
+ def submit_cached_answers(profile: gr.OAuthProfile | None):
297
+ """Submit previously cached answers"""
298
+ if not profile:
299
+ return "Please log in to Hugging Face first.", None
300
+
301
+ username = profile.username
302
+ cache_key = f"user_{username}"
303
+
304
+ if cache_key not in answer_cache:
305
+ return "No cached answers found. Please run 'Cache Answers' first.", None
306
+
307
+ answers_payload = answer_cache[cache_key]
308
+
309
+ if not answers_payload:
310
+ return "No answers to submit.", None
311
+
312
+ # Get space info
313
+ space_id = os.getenv("SPACE_ID")
314
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
315
+
316
+ # Submit
317
+ api_url = DEFAULT_API_URL
318
+ submit_url = f"{api_url}/submit"
319
+
320
+ submission_data = {
321
+ "username": username.strip(),
322
+ "agent_code": agent_code,
323
+ "answers": answers_payload
324
+ }
325
+
326
+ try:
327
+ print(f"Submitting {len(answers_payload)} cached answers...")
328
+ response = requests.post(submit_url, json=submission_data, timeout=60)
329
+ response.raise_for_status()
330
+ result_data = response.json()
331
+
332
+ final_status = (
333
+ f"Submission Successful!\n"
334
+ f"User: {result_data.get('username')}\n"
335
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
336
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
337
+ f"Message: {result_data.get('message', 'No message received.')}"
338
+ )
339
+
340
+ # Clear cache after successful submission
341
+ if cache_key in answer_cache:
342
+ del answer_cache[cache_key]
343
+
344
+ return final_status, None
345
+
346
+ except Exception as e:
347
+ print(f"Submission error: {e}")
348
+ return f"Submission failed: {e}", None
349
+
350
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
351
+ """
352
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
353
+ and displays the results.
354
+ """
355
+ # --- Determine HF Space Runtime URL and Repo URL ---
356
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
357
+
358
+ if profile:
359
+ username= f"{profile.username}"
360
+ print(f"User logged in: {username}")
361
+ else:
362
+ print("User not logged in.")
363
+ return "Please Login to Hugging Face with the button.", None
364
+
365
+ api_url = DEFAULT_API_URL
366
+ questions_url = f"{api_url}/questions"
367
+ submit_url = f"{api_url}/submit"
368
+
369
+ # 1. Instantiate Agent ( modify this part to create your agent)
370
+ try:
371
+ agent = BasicAgent()
372
+ except Exception as e:
373
+ print(f"Error instantiating agent: {e}")
374
+ return f"Error initializing agent: {e}", None
375
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
376
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
377
+ print(agent_code)
378
+
379
+ # 2. Fetch Questions
380
+ print(f"Fetching questions from: {questions_url}")
381
+ try:
382
+ # Using the retry function instead of direct request
383
+ response = make_request_with_retry(questions_url)
384
+ questions_data = response.json()
385
+ if not questions_data:
386
+ print("Fetched questions list is empty.")
387
+ return "Fetched questions list is empty or invalid format.", None
388
+ print(f"Fetched {len(questions_data)} questions.")
389
+ except requests.exceptions.RequestException as e:
390
+ print(f"Error fetching questions: {e}")
391
+ return f"Error fetching questions: {e}", None
392
+ except requests.exceptions.JSONDecodeError as e:
393
+ print(f"Error decoding JSON response from questions endpoint: {e}")
394
+ print(f"Response text: {response.text[:500]}")
395
+ return f"Error decoding server response for questions: {e}", None
396
+ except Exception as e:
397
+ print(f"An unexpected error occurred fetching questions: {e}")
398
+ return f"An unexpected error occurred fetching questions: {e}", None
399
+
400
+ # 3. Run your Agent
401
+ results_log = []
402
+ answers_payload = []
403
+ print(f"Running agent on {len(questions_data)} questions...")
404
+ for item in questions_data:
405
+ task_id = item.get("task_id")
406
+ question_text = item.get("question")
407
+ if not task_id or question_text is None:
408
+ print(f"Skipping item with missing task_id or question: {item}")
409
+ continue
410
+ try:
411
+ submitted_answer = agent(question_text)
412
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
413
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
414
+ except Exception as e:
415
+ print(f"Error running agent on task {task_id}: {e}")
416
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
417
+
418
+ if not answers_payload:
419
+ print("Agent did not produce any answers to submit.")
420
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
421
+
422
+ # 4. Prepare Submission
423
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
424
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
425
+ print(status_update)
426
+
427
+ # 5. Submit
428
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
429
+ try:
430
+ # Using the retry function for submission as well
431
+ response = make_request_with_retry(submit_url, method="post", json_data=submission_data, timeout=60)
432
+ result_data = response.json()
433
+ final_status = (
434
+ f"Submission Successful!\n"
435
+ f"User: {result_data.get('username')}\n"
436
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
437
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
438
+ f"Message: {result_data.get('message', 'No message received.')}"
439
+ )
440
+ print("Submission successful.")
441
+ results_df = pd.DataFrame(results_log)
442
+ return final_status, results_df
443
+ except requests.exceptions.HTTPError as e:
444
+ error_detail = f"Server responded with status {e.response.status_code}."
445
+ try:
446
+ error_json = e.response.json()
447
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
448
+ except requests.exceptions.JSONDecodeError:
449
+ error_detail += f" Response: {e.response.text[:500]}"
450
+ status_message = f"Submission Failed: {error_detail}"
451
+ print(status_message)
452
+ results_df = pd.DataFrame(results_log)
453
+ return status_message, results_df
454
+ except requests.exceptions.Timeout:
455
+ status_message = "Submission Failed: The request timed out."
456
+ print(status_message)
457
+ results_df = pd.DataFrame(results_log)
458
+ return status_message, results_df
459
+ except requests.exceptions.RequestException as e:
460
+ status_message = f"Submission Failed: Network error - {e}"
461
+ print(status_message)
462
+ results_df = pd.DataFrame(results_log)
463
+ return status_message, results_df
464
+ except Exception as e:
465
+ status_message = f"An unexpected error occurred during submission: {e}"
466
+ print(status_message)
467
+ results_df = pd.DataFrame(results_log)
468
+ return status_message, results_df
469
+
470
+
471
+ # --- Build Gradio Interface using Blocks ---
472
+ with gr.Blocks() as demo:
473
+ gr.Markdown("# Basic Agent Evaluation Runner")
474
+ gr.Markdown(
475
+ """
476
+ **Instructions:**
477
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
478
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
479
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
480
+ ---
481
+ **Disclaimers:**
482
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
483
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
484
+ """
485
+ )
486
+
487
+ gr.LoginButton()
488
+
489
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
490
+
491
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
492
+ # Removed max_rows=10 from DataFrame constructor
493
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
494
+
495
+ run_button.click(
496
+ fn=run_and_submit_all,
497
+ outputs=[status_output, results_table]
498
+ )
499
+
500
+ if __name__ == "__main__":
501
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
502
+ # Check for SPACE_HOST and SPACE_ID at startup for information
503
+ space_host_startup = os.getenv("SPACE_HOST")
504
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
505
+
506
+ if space_host_startup:
507
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
508
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
509
+ else:
510
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
511
+
512
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
513
+ print(f"✅ SPACE_ID found: {space_id_startup}")
514
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
515
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
516
+ else:
517
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
518
+
519
+ print("-"*(60 + len(" App Starting ")) + "\n")
520
+
521
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
522
+ demo.launch(debug=True, share=False)
523
+
app.py CHANGED
@@ -1,413 +1,108 @@
1
- import asyncio
2
  import inspect
3
- import json
4
  import os
5
- import time
6
- from typing import Any, Dict, List, Optional
7
 
8
  import gradio as gr
9
  import pandas as pd
10
  import requests
11
- from dotenv import load_dotenv
12
- from langchain_community.chat_models import ChatHuggingFace
13
- from langchain_community.llms import HuggingFaceEndpoint
14
- from langchain_core.messages import AIMessage, HumanMessage
15
- from langchain_core.tools import StructuredTool
16
-
17
- from tools import (absolute, add, divide, exponential, floor_divide,
18
- get_current_time_in_timezone, logarithm, modulus, multiply,
19
- power, roman_calculator_converter, square_root, subtract,
20
- web_search)
21
 
 
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
- MAX_AGENT_ITERATIONS = 15
25
- MAX_CONCURRENT_REQUESTS = 5 # Limit concurrent requests to avoid overwhelming the API
26
-
27
- load_dotenv()
28
- HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
29
-
30
- # Quick test to see if tokens are available.
31
- print(f"Available env vars: {[k for k in os.environ.keys() if 'TOKEN' in k or 'HF' in k]}")
32
 
33
- # Global cache for answers
34
- answer_cache = {}
35
-
36
- class ImprovedAgent:
37
  def __init__(self):
38
- if not HUGGINGFACEHUB_API_TOKEN:
39
- raise ValueError("Missing Hugging Face API token. Please set HUGGINGFACEHUB_API_TOKEN.")
40
-
41
- print("ImprovedAgent initialized.")
42
-
43
- # Initialize LLM with better parameters
44
- self.llm = HuggingFaceEndpoint(
45
- repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
46
- huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
47
- temperature=0.1, # Lower temperature for more consistent responses
48
- max_new_tokens=1024,
49
- timeout=30,
50
- )
51
-
52
- self.chat = ChatHuggingFace(llm=self.llm, verbose=False)
53
-
54
- # Initialize tools
55
- self.tools = [
56
- multiply, add, subtract, power, divide, modulus,
57
- square_root, floor_divide, absolute, logarithm,
58
- exponential, web_search, roman_calculator_converter,
59
- get_current_time_in_timezone
60
- ]
61
-
62
- self.chat_with_tools = self.chat.bind_tools(self.tools)
63
- print(f"Total tools available: {len(self.tools)}")
64
-
65
- # Create tool mapping for easier access
66
- self.tool_map = {tool.name: tool for tool in self.tools}
67
-
68
- def _extract_tool_calls(self, response) -> List[Dict]:
69
- """Extract tool calls from the response"""
70
- tool_calls = []
71
- if hasattr(response, 'tool_calls') and response.tool_calls:
72
- for tool_call in response.tool_calls:
73
- tool_calls.append({
74
- 'name': tool_call['name'],
75
- 'args': tool_call['args']
76
- })
77
- return tool_calls
78
-
79
- def _execute_tool_calls(self, tool_calls: List[Dict]) -> List[str]:
80
- """Execute tool calls and return results"""
81
- results = []
82
- for tool_call in tool_calls:
83
- tool_name = tool_call['name']
84
- tool_args = tool_call['args']
85
-
86
- if tool_name in self.tool_map:
87
- try:
88
- tool = self.tool_map[tool_name]
89
- result = tool.invoke(tool_args)
90
- results.append(f"Tool {tool_name} result: {result}")
91
- except Exception as e:
92
- results.append(f"Tool {tool_name} error: {str(e)}")
93
- else:
94
- results.append(f"Unknown tool: {tool_name}")
95
-
96
- return results
97
-
98
- async def answer(self, question: str) -> str:
99
- """Improved answer method with better error handling and tool usage"""
100
- print(f"Processing question: {question[:100]}...")
101
-
102
- try:
103
- # Create system prompt for better instruction following
104
- system_prompt = """You are a helpful AI assistant with access to various tools.
105
- When answering questions, use the appropriate tools when needed and provide clear, concise answers.
106
- If you need to perform calculations, use the math tools available.
107
- If you need current information, use the web search tool.
108
- Always provide a final answer after using tools."""
109
-
110
- messages = [
111
- HumanMessage(content=f"{system_prompt}\n\nQuestion: {question}")
112
- ]
113
-
114
- # Initial response
115
- response = await asyncio.to_thread(self.chat_with_tools.invoke, messages)
116
-
117
- # Handle tool calls if present
118
- max_iterations = 3
119
- iteration = 0
120
-
121
- while iteration < max_iterations:
122
- tool_calls = self._extract_tool_calls(response)
123
-
124
- if not tool_calls:
125
- break
126
-
127
- # Execute tool calls
128
- tool_results = self._execute_tool_calls(tool_calls)
129
-
130
- # Add tool results to conversation
131
- messages.append(AIMessage(content=response.content))
132
- messages.append(HumanMessage(content=f"Tool results: {'; '.join(tool_results)}. Please provide a final answer based on these results."))
133
-
134
- # Get next response
135
- response = await asyncio.to_thread(self.chat_with_tools.invoke, messages)
136
- iteration += 1
137
-
138
- # Extract final answer
139
- final_answer = response.content.strip()
140
-
141
- # Clean up the response - remove any tool call artifacts
142
- if "Tool " in final_answer and "result:" in final_answer:
143
- # Try to extract just the final answer part
144
- lines = final_answer.split('\n')
145
- for line in reversed(lines):
146
- if line.strip() and not line.startswith('Tool ') and not 'result:' in line:
147
- final_answer = line.strip()
148
- break
149
-
150
- return final_answer
151
-
152
- except Exception as e:
153
- print(f"Error in answer method: {e}")
154
- return f"Error processing question: {str(e)}"
155
-
156
- def answer_sync(self, question: str) -> str:
157
- """Synchronous version of answer method"""
158
- try:
159
- return asyncio.run(self.answer(question))
160
- except Exception as e:
161
- print(f"Error in sync answer: {e}")
162
- return f"Error: {str(e)}"
163
-
164
- async def process_questions_batch(agent, questions_batch, semaphore):
165
- """Process a batch of questions with rate limiting"""
166
- results = []
167
-
168
- async def process_single_question(task_id, question):
169
- async with semaphore:
170
- try:
171
- # Check cache first
172
- cache_key = f"{task_id}_{hash(question)}"
173
- if cache_key in answer_cache:
174
- print(f"Using cached answer for task {task_id}")
175
- return task_id, question, answer_cache[cache_key], None
176
-
177
- answer = await agent.answer(question)
178
-
179
- # Cache the result
180
- answer_cache[cache_key] = answer
181
-
182
- return task_id, question, answer, None
183
- except Exception as e:
184
- print(f"Error processing task {task_id}: {e}")
185
- return task_id, question, None, str(e)
186
-
187
- # Create semaphore for rate limiting
188
- tasks = []
189
- for item in questions_batch:
190
- task_id = item.get("task_id")
191
- question_text = item.get("question")
192
- if task_id and question_text is not None:
193
- tasks.append(process_single_question(task_id, question_text))
194
-
195
- if tasks:
196
- results = await asyncio.gather(*tasks, return_exceptions=True)
197
-
198
- return results
199
-
200
- async def run_agent_async_improved(agent, questions_data):
201
- """Improved async processing with batching and caching"""
202
- results_log, answers_payload = [], []
203
-
204
- # Create semaphore for rate limiting
205
- semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
206
-
207
- # Process questions in batches
208
- batch_size = 10
209
- batches = [questions_data[i:i + batch_size] for i in range(0, len(questions_data), batch_size)]
210
-
211
- print(f"Processing {len(questions_data)} questions in {len(batches)} batches...")
212
-
213
- for i, batch in enumerate(batches):
214
- print(f"Processing batch {i+1}/{len(batches)} ({len(batch)} questions)...")
215
-
216
- try:
217
- batch_results = await process_questions_batch(agent, batch, semaphore)
218
-
219
- for result in batch_results:
220
- if isinstance(result, Exception):
221
- print(f"Batch processing error: {result}")
222
- continue
223
-
224
- task_id, question, answer, error = result
225
-
226
- if error:
227
- print(f"Error in task {task_id}: {error}")
228
- results_log.append({
229
- "Task ID": task_id,
230
- "Question": question[:100] + "..." if len(question) > 100 else question,
231
- "Submitted Answer": f"ERROR: {error}"
232
- })
233
- else:
234
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
235
- results_log.append({
236
- "Task ID": task_id,
237
- "Question": question[:100] + "..." if len(question) > 100 else question,
238
- "Submitted Answer": answer[:200] + "..." if len(answer) > 200 else answer
239
- })
240
-
241
- # Small delay between batches to be respectful
242
- if i < len(batches) - 1:
243
- await asyncio.sleep(1)
244
-
245
- except Exception as e:
246
- print(f"Error processing batch {i+1}: {e}")
247
- # Continue with next batch
248
- continue
249
-
250
- return results_log, answers_payload
251
-
252
- def cache_answers(profile: gr.OAuthProfile | None):
253
- """Cache answers without submitting"""
254
- if not profile:
255
- return "Please log in to Hugging Face first.", None
256
-
257
- username = profile.username
258
- print(f"Caching answers for user: {username}")
259
-
260
- # Fetch questions
261
- api_url = DEFAULT_API_URL
262
- questions_url = f"{api_url}/questions"
263
-
264
- try:
265
- response = requests.get(questions_url, timeout=15)
266
- response.raise_for_status()
267
- questions_data = response.json()
268
-
269
- if not questions_data:
270
- return "No questions found.", None
271
-
272
- print(f"Fetched {len(questions_data)} questions for caching.")
273
-
274
- # Initialize agent
275
- try:
276
- agent = ImprovedAgent()
277
- except Exception as e:
278
- print(f"Full error details: {e}")
279
- return f"Error initializing agent: {e}", None
280
-
281
- # Process questions
282
- results_log, answers_payload = asyncio.run(run_agent_async_improved(agent, questions_data))
283
-
284
- # Store in global cache with username
285
- answer_cache[f"user_{username}"] = answers_payload
286
-
287
- status = f"Cached {len(answers_payload)} answers for user {username}. Ready to submit!"
288
- results_df = pd.DataFrame(results_log)
289
-
290
- return status, results_df
291
-
292
- except Exception as e:
293
- print(f"Error caching answers: {e}")
294
- return f"Error caching answers: {e}", None
295
-
296
- def submit_cached_answers(profile: gr.OAuthProfile | None):
297
- """Submit previously cached answers"""
298
- if not profile:
299
- return "Please log in to Hugging Face first.", None
300
-
301
- username = profile.username
302
- cache_key = f"user_{username}"
303
-
304
- if cache_key not in answer_cache:
305
- return "No cached answers found. Please run 'Cache Answers' first.", None
306
-
307
- answers_payload = answer_cache[cache_key]
308
-
309
- if not answers_payload:
310
- return "No answers to submit.", None
311
-
312
- # Get space info
313
- space_id = os.getenv("SPACE_ID")
314
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
315
-
316
- # Submit
317
- api_url = DEFAULT_API_URL
318
- submit_url = f"{api_url}/submit"
319
-
320
- submission_data = {
321
- "username": username.strip(),
322
- "agent_code": agent_code,
323
- "answers": answers_payload
324
- }
325
-
326
- try:
327
- print(f"Submitting {len(answers_payload)} cached answers...")
328
- response = requests.post(submit_url, json=submission_data, timeout=60)
329
- response.raise_for_status()
330
- result_data = response.json()
331
-
332
- final_status = (
333
- f"Submission Successful!\n"
334
- f"User: {result_data.get('username')}\n"
335
- f"Overall Score: {result_data.get('score', 'N/A')}% "
336
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
337
- f"Message: {result_data.get('message', 'No message received.')}"
338
- )
339
-
340
- # Clear cache after successful submission
341
- if cache_key in answer_cache:
342
- del answer_cache[cache_key]
343
-
344
- return final_status, None
345
-
346
- except Exception as e:
347
- print(f"Submission error: {e}")
348
- return f"Submission failed: {e}", None
349
-
350
- def run_and_submit_all(profile: gr.OAuthProfile | None):
351
- """Original function - now improved with better error handling"""
352
- if not profile:
353
- return "Please log in to Hugging Face first.", None
354
-
355
- username = profile.username
356
- print(f"User logged in: {username}")
357
 
358
  api_url = DEFAULT_API_URL
359
  questions_url = f"{api_url}/questions"
360
  submit_url = f"{api_url}/submit"
361
 
362
- # Initialize agent
363
  try:
364
- agent = ImprovedAgent()
365
  except Exception as e:
366
- print(f"Error initializing agent: {e}")
367
  return f"Error initializing agent: {e}", None
 
 
 
368
 
369
- # Get space info
370
- space_id = os.getenv("SPACE_ID")
371
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
372
-
373
- # Fetch questions
374
  try:
375
- print(f"Fetching questions from: {questions_url}")
376
  response = requests.get(questions_url, timeout=15)
377
  response.raise_for_status()
378
  questions_data = response.json()
379
-
380
  if not questions_data:
381
- return "No questions found.", None
382
-
383
  print(f"Fetched {len(questions_data)} questions.")
384
- except Exception as e:
385
  print(f"Error fetching questions: {e}")
386
  return f"Error fetching questions: {e}", None
387
-
388
- # Process questions
389
- try:
390
- results_log, answers_payload = asyncio.run(run_agent_async_improved(agent, questions_data))
391
  except Exception as e:
392
- print(f"Error processing questions: {e}")
393
- return f"Error processing questions: {e}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
 
395
  if not answers_payload:
396
- return "No answers generated.", pd.DataFrame(results_log) if results_log else None
 
397
 
398
- # Submit answers
399
- submission_data = {
400
- "username": username.strip(),
401
- "agent_code": agent_code,
402
- "answers": answers_payload
403
- }
404
 
 
 
405
  try:
406
- print(f"Submitting {len(answers_payload)} answers...")
407
  response = requests.post(submit_url, json=submission_data, timeout=60)
408
  response.raise_for_status()
409
  result_data = response.json()
410
-
411
  final_status = (
412
  f"Submission Successful!\n"
413
  f"User: {result_data.get('username')}\n"
@@ -415,81 +110,89 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
415
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
416
  f"Message: {result_data.get('message', 'No message received.')}"
417
  )
418
-
419
  results_df = pd.DataFrame(results_log)
420
  return final_status, results_df
421
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  except Exception as e:
423
- print(f"Submission error: {e}")
 
424
  results_df = pd.DataFrame(results_log)
425
- return f"Submission failed: {e}", results_df
 
426
 
427
- # --- Build Gradio Interface ---
428
- with gr.Blocks(title="Improved Agent Evaluation") as demo:
429
- gr.Markdown("# Improved Agent Evaluation Runner")
430
  gr.Markdown(
431
  """
432
  **Instructions:**
433
 
434
- 1. Log in to your Hugging Face account using the button below.
435
- 2. **Recommended**: Use "Cache Answers" to process all questions first, then "Submit Cached Answers" to submit them.
436
- 3. **Alternative**: Use "Run & Submit All" for the original one-step process.
437
-
438
- **Improvements:**
439
- - ✅ Async processing with rate limiting
440
- - ✅ Answer caching for faster resubmissions
441
- - ✅ Better error handling and recovery
442
- - ✅ Batch processing to avoid timeouts
443
- - ✅ Improved tool usage and response parsing
444
 
445
  ---
 
 
 
446
  """
447
  )
448
 
449
  gr.LoginButton()
450
 
451
- with gr.Row():
452
- cache_button = gr.Button("🔄 Cache Answers", variant="secondary")
453
- submit_button = gr.Button("📤 Submit Cached Answers", variant="primary")
454
- run_all_button = gr.Button("🚀 Run & Submit All", variant="secondary")
455
 
456
- status_output = gr.Textbox(label="Status", lines=6, interactive=False)
 
457
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
458
 
459
- # Wire up the buttons
460
- cache_button.click(
461
- fn=cache_answers,
462
- outputs=[status_output, results_table]
463
- )
464
-
465
- submit_button.click(
466
- fn=submit_cached_answers,
467
- outputs=[status_output, results_table]
468
- )
469
-
470
- run_all_button.click(
471
  fn=run_and_submit_all,
472
  outputs=[status_output, results_table]
473
  )
474
 
475
  if __name__ == "__main__":
476
- print("\n" + "-"*30 + " Improved App Starting " + "-"*30)
477
-
478
- space_host = os.getenv("SPACE_HOST")
479
- space_id = os.getenv("SPACE_ID")
480
-
481
- if space_host:
482
- print(f"✅ SPACE_HOST: {space_host}")
483
- print(f" Runtime URL: https://{space_host}.hf.space")
484
  else:
485
- print("ℹ️ Running locally - SPACE_HOST not found.")
486
 
487
- if space_id:
488
- print(f"✅ SPACE_ID: {space_id}")
489
- print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
 
490
  else:
491
- print("ℹ️ SPACE_ID not found.")
 
 
492
 
493
- print("-" * 76 + "\n")
494
- print("Launching Improved Gradio Interface...")
495
- demo.launch(debug=True, share=False)
 
 
1
  import inspect
 
2
  import os
 
 
3
 
4
  import gradio as gr
5
  import pandas as pd
6
  import requests
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # (Keep Constants as is)
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
 
 
11
 
12
+ # --- Basic Agent Definition ---
13
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
14
+ class BasicAgent:
 
15
  def __init__(self):
16
+ print("BasicAgent initialized.")
17
+ def __call__(self, question: str) -> str:
18
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
19
+ fixed_answer = "This is a default answer."
20
+ print(f"Agent returning fixed answer: {fixed_answer}")
21
+ return fixed_answer
22
+
23
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
24
+ """
25
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
26
+ and displays the results.
27
+ """
28
+ # --- Determine HF Space Runtime URL and Repo URL ---
29
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
30
+
31
+ if profile:
32
+ username= f"{profile.username}"
33
+ print(f"User logged in: {username}")
34
+ else:
35
+ print("User not logged in.")
36
+ return "Please Login to Hugging Face with the button.", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  api_url = DEFAULT_API_URL
39
  questions_url = f"{api_url}/questions"
40
  submit_url = f"{api_url}/submit"
41
 
42
+ # 1. Instantiate Agent ( modify this part to create your agent)
43
  try:
44
+ agent = BasicAgent()
45
  except Exception as e:
46
+ print(f"Error instantiating agent: {e}")
47
  return f"Error initializing agent: {e}", None
48
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
49
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
50
+ print(agent_code)
51
 
52
+ # 2. Fetch Questions
53
+ print(f"Fetching questions from: {questions_url}")
 
 
 
54
  try:
 
55
  response = requests.get(questions_url, timeout=15)
56
  response.raise_for_status()
57
  questions_data = response.json()
 
58
  if not questions_data:
59
+ print("Fetched questions list is empty.")
60
+ return "Fetched questions list is empty or invalid format.", None
61
  print(f"Fetched {len(questions_data)} questions.")
62
+ except requests.exceptions.RequestException as e:
63
  print(f"Error fetching questions: {e}")
64
  return f"Error fetching questions: {e}", None
65
+ except requests.exceptions.JSONDecodeError as e:
66
+ print(f"Error decoding JSON response from questions endpoint: {e}")
67
+ print(f"Response text: {response.text[:500]}")
68
+ return f"Error decoding server response for questions: {e}", None
69
  except Exception as e:
70
+ print(f"An unexpected error occurred fetching questions: {e}")
71
+ return f"An unexpected error occurred fetching questions: {e}", None
72
+
73
+ # 3. Run your Agent
74
+ results_log = []
75
+ answers_payload = []
76
+ print(f"Running agent on {len(questions_data)} questions...")
77
+ for item in questions_data:
78
+ task_id = item.get("task_id")
79
+ question_text = item.get("question")
80
+ if not task_id or question_text is None:
81
+ print(f"Skipping item with missing task_id or question: {item}")
82
+ continue
83
+ try:
84
+ submitted_answer = agent(question_text)
85
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
86
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
87
+ except Exception as e:
88
+ print(f"Error running agent on task {task_id}: {e}")
89
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
90
 
91
  if not answers_payload:
92
+ print("Agent did not produce any answers to submit.")
93
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
94
 
95
+ # 4. Prepare Submission
96
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
97
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
98
+ print(status_update)
 
 
99
 
100
+ # 5. Submit
101
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
102
  try:
 
103
  response = requests.post(submit_url, json=submission_data, timeout=60)
104
  response.raise_for_status()
105
  result_data = response.json()
 
106
  final_status = (
107
  f"Submission Successful!\n"
108
  f"User: {result_data.get('username')}\n"
 
110
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
111
  f"Message: {result_data.get('message', 'No message received.')}"
112
  )
113
+ print("Submission successful.")
114
  results_df = pd.DataFrame(results_log)
115
  return final_status, results_df
116
+ except requests.exceptions.HTTPError as e:
117
+ error_detail = f"Server responded with status {e.response.status_code}."
118
+ try:
119
+ error_json = e.response.json()
120
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
121
+ except requests.exceptions.JSONDecodeError:
122
+ error_detail += f" Response: {e.response.text[:500]}"
123
+ status_message = f"Submission Failed: {error_detail}"
124
+ print(status_message)
125
+ results_df = pd.DataFrame(results_log)
126
+ return status_message, results_df
127
+ except requests.exceptions.Timeout:
128
+ status_message = "Submission Failed: The request timed out."
129
+ print(status_message)
130
+ results_df = pd.DataFrame(results_log)
131
+ return status_message, results_df
132
+ except requests.exceptions.RequestException as e:
133
+ status_message = f"Submission Failed: Network error - {e}"
134
+ print(status_message)
135
+ results_df = pd.DataFrame(results_log)
136
+ return status_message, results_df
137
  except Exception as e:
138
+ status_message = f"An unexpected error occurred during submission: {e}"
139
+ print(status_message)
140
  results_df = pd.DataFrame(results_log)
141
+ return status_message, results_df
142
+
143
 
144
+ # --- Build Gradio Interface using Blocks ---
145
+ with gr.Blocks() as demo:
146
+ gr.Markdown("# Basic Agent Evaluation Runner")
147
  gr.Markdown(
148
  """
149
  **Instructions:**
150
 
151
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
152
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
153
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
 
 
 
 
154
 
155
  ---
156
+ **Disclaimers:**
157
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
158
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
159
  """
160
  )
161
 
162
  gr.LoginButton()
163
 
164
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
 
165
 
166
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
167
+ # Removed max_rows=10 from DataFrame constructor
168
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
169
 
170
+ run_button.click(
 
 
 
 
 
 
 
 
 
 
 
171
  fn=run_and_submit_all,
172
  outputs=[status_output, results_table]
173
  )
174
 
175
  if __name__ == "__main__":
176
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
177
+ # Check for SPACE_HOST and SPACE_ID at startup for information
178
+ space_host_startup = os.getenv("SPACE_HOST")
179
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
180
+
181
+ if space_host_startup:
182
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
183
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
184
  else:
185
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
186
 
187
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
188
+ print(f"✅ SPACE_ID found: {space_id_startup}")
189
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
190
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
191
  else:
192
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
193
+
194
+ print("-"*(60 + len(" App Starting ")) + "\n")
195
 
196
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
197
+ demo.launch(debug=True, share=False)
198
+