EtienneB commited on
Commit
c777509
·
1 Parent(s): 4e8db54
Files changed (3) hide show
  1. app.py +366 -166
  2. requirements.txt +21 -13
  3. tools.py +422 -122
app.py CHANGED
@@ -1,6 +1,9 @@
1
  import asyncio
2
  import inspect
 
3
  import os
 
 
4
 
5
  import gradio as gr
6
  import pandas as pd
@@ -8,35 +11,44 @@ import requests
8
  from dotenv import load_dotenv
9
  from langchain_community.chat_models import ChatHuggingFace
10
  from langchain_community.llms import HuggingFaceEndpoint
11
- from langchain_core.messages import HumanMessage
 
12
 
13
  from tools import (absolute, add, divide, exponential, floor_divide,
14
  get_current_time_in_timezone, logarithm, modulus, multiply,
15
  power, roman_calculator_converter, square_root, subtract,
16
  web_search)
17
 
18
- # (Keep Constants as is)
19
  # --- Constants ---
20
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
  MAX_AGENT_ITERATIONS = 15
22
-
23
 
24
  load_dotenv()
25
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
26
 
27
- # --- Basic Agent Definition ---
28
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
29
- class BasicAgent:
 
30
  def __init__(self):
31
  if not HUGGINGFACEHUB_API_TOKEN:
32
  raise ValueError("Missing Hugging Face API token. Please set HUGGINGFACEHUB_API_TOKEN.")
33
 
34
- print("BasicAgent initialized.")
 
 
35
  self.llm = HuggingFaceEndpoint(
36
  repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
37
  huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
 
 
 
38
  )
39
- self.chat = ChatHuggingFace(llm=self.llm, verbose=True)
 
 
 
40
  self.tools = [
41
  multiply, add, subtract, power, divide, modulus,
42
  square_root, floor_divide, absolute, logarithm,
@@ -46,153 +58,349 @@ class BasicAgent:
46
 
47
  self.chat_with_tools = self.chat.bind_tools(self.tools)
48
  print(f"Total tools available: {len(self.tools)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  async def answer(self, question: str) -> str:
51
- print(f"Agent received question (first 50 chars): {question[:50]}...")
52
- messages = [HumanMessage(content=question)]
53
- response = await asyncio.to_thread(self.chat_with_tools.invoke, {"messages": messages})
54
- return response['messages'][-1].content[14:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def answer_sync(self, question: str) -> str:
57
  """Synchronous version of answer method"""
58
- print(f"Agent received question (first 50 chars): {question[:50]}...")
59
- messages = [HumanMessage(content=question)]
60
- response = self.chat_with_tools.invoke({"messages": messages})
61
- return response.content
62
-
63
- async def run_agent_async(agent, questions_data):
64
- """Run agent asynchronously on all questions"""
65
- results_log, answers_payload = [], []
66
-
67
- async def process_question(task_id, question):
68
  try:
69
- answer = await agent.answer(question)
70
- return task_id, question, answer, None
71
  except Exception as e:
72
- return task_id, question, None, str(e)
 
73
 
74
- # Create tasks for all questions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  tasks = []
76
- for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
79
- if not task_id or question_text is None:
80
- print(f"Skipping item with missing task_id or question: {item}")
81
- continue
82
- tasks.append(process_question(task_id, question_text))
 
 
 
83
 
84
- print(f"Processing {len(tasks)} questions asynchronously...")
 
 
85
 
86
- # Process all questions concurrently
87
- results = await asyncio.gather(*tasks, return_exceptions=True)
88
 
89
- for result in results:
90
- if isinstance(result, Exception):
91
- print(f"Unexpected error: {result}")
92
- continue
 
 
 
 
 
 
 
93
 
94
- task_id, question, answer, error = result
95
- if error:
96
- print(f"Error running agent on task {task_id}: {error}")
97
- results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"AGENT ERROR: {error}"})
98
- else:
99
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
100
- results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
101
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  return results_log, answers_payload
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  def run_and_submit_all(profile: gr.OAuthProfile | None):
106
- """
107
- Fetches all questions, runs the BasicAgent on them, submits all answers,
108
- and displays the results.
109
- """
110
- # --- Determine HF Space Runtime URL and Repo URL ---
111
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
112
-
113
- if profile:
114
- username = f"{profile.username}"
115
- print(f"User logged in: {username}")
116
- else:
117
- print("User not logged in.")
118
- return "Please Login to Hugging Face with the button.", None
119
 
120
  api_url = DEFAULT_API_URL
121
  questions_url = f"{api_url}/questions"
122
  submit_url = f"{api_url}/submit"
123
 
124
- # 1. Instantiate Agent (modify this part to create your agent)
125
  try:
126
- agent = BasicAgent()
127
  except Exception as e:
128
- print(f"Error instantiating agent: {e}")
129
  return f"Error initializing agent: {e}", None
130
-
131
- # In the case of an app running as a Hugging Face space, this link points toward your codebase (useful for others so please keep it public)
132
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
133
- print(agent_code)
134
 
135
- # 2. Fetch Questions
136
- print(f"Fetching questions from: {questions_url}")
 
 
 
137
  try:
 
138
  response = requests.get(questions_url, timeout=15)
139
  response.raise_for_status()
140
  questions_data = response.json()
 
141
  if not questions_data:
142
- print("Fetched questions list is empty.")
143
- return "Fetched questions list is empty or invalid format.", None
144
  print(f"Fetched {len(questions_data)} questions.")
145
- except requests.exceptions.RequestException as e:
146
  print(f"Error fetching questions: {e}")
147
  return f"Error fetching questions: {e}", None
148
- except requests.exceptions.JSONDecodeError as e:
149
- print(f"Error decoding JSON response from questions endpoint: {e}")
150
- print(f"Response text: {response.text[:500]}")
151
- return f"Error decoding server response for questions: {e}", None
152
- except Exception as e:
153
- print(f"An unexpected error occurred fetching questions: {e}")
154
- return f"An unexpected error occurred fetching questions: {e}", None
155
 
156
- # 3. Run your Agent
157
- results_log = []
158
- answers_payload = []
159
-
160
- # Try async approach first, fall back to sync if needed
161
  try:
162
- print(f"Running agent asynchronously on {len(questions_data)} questions...")
163
- results_log, answers_payload = asyncio.run(run_agent_async(agent, questions_data))
164
  except Exception as e:
165
- print(f"Async processing failed: {e}, falling back to synchronous processing...")
166
- # Fallback to synchronous processing
167
- for item in questions_data:
168
- task_id = item.get("task_id")
169
- question_text = item.get("question")
170
- if not task_id or question_text is None:
171
- print(f"Skipping item with missing task_id or question: {item}")
172
- continue
173
- try:
174
- submitted_answer = agent.answer_sync(question_text)
175
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
176
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
177
- except Exception as e:
178
- print(f"Error running agent on task {task_id}: {e}")
179
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
180
 
181
  if not answers_payload:
182
- print("Agent did not produce any answers to submit.")
183
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
184
 
185
- # 4. Prepare Submission
186
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
187
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
188
- print(status_update)
 
 
189
 
190
- # 5. Submit
191
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
192
  try:
 
193
  response = requests.post(submit_url, json=submission_data, timeout=60)
194
  response.raise_for_status()
195
  result_data = response.json()
 
196
  final_status = (
197
  f"Submission Successful!\n"
198
  f"User: {result_data.get('username')}\n"
@@ -200,89 +408,81 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
200
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
201
  f"Message: {result_data.get('message', 'No message received.')}"
202
  )
203
- print("Submission successful.")
204
  results_df = pd.DataFrame(results_log)
205
  return final_status, results_df
206
- except requests.exceptions.HTTPError as e:
207
- error_detail = f"Server responded with status {e.response.status_code}."
208
- try:
209
- error_json = e.response.json()
210
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
211
- except requests.exceptions.JSONDecodeError:
212
- error_detail += f" Response: {e.response.text[:500]}"
213
- status_message = f"Submission Failed: {error_detail}"
214
- print(status_message)
215
- results_df = pd.DataFrame(results_log)
216
- return status_message, results_df
217
- except requests.exceptions.Timeout:
218
- status_message = "Submission Failed: The request timed out."
219
- print(status_message)
220
- results_df = pd.DataFrame(results_log)
221
- return status_message, results_df
222
- except requests.exceptions.RequestException as e:
223
- status_message = f"Submission Failed: Network error - {e}"
224
- print(status_message)
225
- results_df = pd.DataFrame(results_log)
226
- return status_message, results_df
227
  except Exception as e:
228
- status_message = f"An unexpected error occurred during submission: {e}"
229
- print(status_message)
230
  results_df = pd.DataFrame(results_log)
231
- return status_message, results_df
232
-
233
 
234
- # --- Build Gradio Interface using Blocks ---
235
- with gr.Blocks() as demo:
236
- gr.Markdown("# Basic Agent Evaluation Runner")
237
  gr.Markdown(
238
  """
239
  **Instructions:**
240
 
241
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
242
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
243
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
 
 
 
 
244
 
245
  ---
246
- **Disclaimers:**
247
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
248
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
249
  """
250
  )
251
 
252
  gr.LoginButton()
253
 
254
- run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
 
255
 
256
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
257
- # Removed max_rows=10 from DataFrame constructor
258
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
259
 
260
- run_button.click(
 
 
 
 
 
 
 
 
 
 
 
261
  fn=run_and_submit_all,
262
  outputs=[status_output, results_table]
263
  )
264
 
265
  if __name__ == "__main__":
266
- print("\n" + "-"*30 + " App Starting " + "-"*30)
267
- # Check for SPACE_HOST and SPACE_ID at startup for information
268
- space_host_startup = os.getenv("SPACE_HOST")
269
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
270
-
271
- if space_host_startup:
272
- print(f"✅ SPACE_HOST found: {space_host_startup}")
273
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
274
- else:
275
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
276
 
277
- if space_id_startup: # Print repo URLs if SPACE_ID is found
278
- print(f"✅ SPACE_ID found: {space_id_startup}")
279
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
280
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
281
  else:
282
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
283
 
284
- print("-"*(60 + len(" App Starting ")) + "\n")
 
 
 
 
285
 
286
- print("Launching Gradio Interface for Basic Agent Evaluation...")
287
- demo.launch(debug=True, share=False)
288
-
 
1
  import asyncio
2
  import inspect
3
+ import json
4
  import os
5
+ import time
6
+ from typing import Any, Dict, List, Optional
7
 
8
  import gradio as gr
9
  import pandas as pd
 
11
  from dotenv import load_dotenv
12
  from langchain_community.chat_models import ChatHuggingFace
13
  from langchain_community.llms import HuggingFaceEndpoint
14
+ from langchain_core.messages import AIMessage, HumanMessage
15
+ from langchain_core.tools import StructuredTool
16
 
17
  from tools import (absolute, add, divide, exponential, floor_divide,
18
  get_current_time_in_timezone, logarithm, modulus, multiply,
19
  power, roman_calculator_converter, square_root, subtract,
20
  web_search)
21
 
 
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
  MAX_AGENT_ITERATIONS = 15
25
+ MAX_CONCURRENT_REQUESTS = 5 # Limit concurrent requests to avoid overwhelming the API
26
 
27
  load_dotenv()
28
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
29
 
30
+ # Global cache for answers
31
+ answer_cache = {}
32
+
33
+ class ImprovedAgent:
34
  def __init__(self):
35
  if not HUGGINGFACEHUB_API_TOKEN:
36
  raise ValueError("Missing Hugging Face API token. Please set HUGGINGFACEHUB_API_TOKEN.")
37
 
38
+ print("ImprovedAgent initialized.")
39
+
40
+ # Initialize LLM with better parameters
41
  self.llm = HuggingFaceEndpoint(
42
  repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
43
  huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
44
+ temperature=0.1, # Lower temperature for more consistent responses
45
+ max_new_tokens=1024,
46
+ timeout=30,
47
  )
48
+
49
+ self.chat = ChatHuggingFace(llm=self.llm, verbose=False)
50
+
51
+ # Initialize tools
52
  self.tools = [
53
  multiply, add, subtract, power, divide, modulus,
54
  square_root, floor_divide, absolute, logarithm,
 
58
 
59
  self.chat_with_tools = self.chat.bind_tools(self.tools)
60
  print(f"Total tools available: {len(self.tools)}")
61
+
62
+ # Create tool mapping for easier access
63
+ self.tool_map = {tool.name: tool for tool in self.tools}
64
+
65
+ def _extract_tool_calls(self, response) -> List[Dict]:
66
+ """Extract tool calls from the response"""
67
+ tool_calls = []
68
+ if hasattr(response, 'tool_calls') and response.tool_calls:
69
+ for tool_call in response.tool_calls:
70
+ tool_calls.append({
71
+ 'name': tool_call['name'],
72
+ 'args': tool_call['args']
73
+ })
74
+ return tool_calls
75
+
76
+ def _execute_tool_calls(self, tool_calls: List[Dict]) -> List[str]:
77
+ """Execute tool calls and return results"""
78
+ results = []
79
+ for tool_call in tool_calls:
80
+ tool_name = tool_call['name']
81
+ tool_args = tool_call['args']
82
+
83
+ if tool_name in self.tool_map:
84
+ try:
85
+ tool = self.tool_map[tool_name]
86
+ result = tool.invoke(tool_args)
87
+ results.append(f"Tool {tool_name} result: {result}")
88
+ except Exception as e:
89
+ results.append(f"Tool {tool_name} error: {str(e)}")
90
+ else:
91
+ results.append(f"Unknown tool: {tool_name}")
92
+
93
+ return results
94
 
95
  async def answer(self, question: str) -> str:
96
+ """Improved answer method with better error handling and tool usage"""
97
+ print(f"Processing question: {question[:100]}...")
98
+
99
+ try:
100
+ # Create system prompt for better instruction following
101
+ system_prompt = """You are a helpful AI assistant with access to various tools.
102
+ When answering questions, use the appropriate tools when needed and provide clear, concise answers.
103
+ If you need to perform calculations, use the math tools available.
104
+ If you need current information, use the web search tool.
105
+ Always provide a final answer after using tools."""
106
+
107
+ messages = [
108
+ HumanMessage(content=f"{system_prompt}\n\nQuestion: {question}")
109
+ ]
110
+
111
+ # Initial response
112
+ response = await asyncio.to_thread(self.chat_with_tools.invoke, messages)
113
+
114
+ # Handle tool calls if present
115
+ max_iterations = 3
116
+ iteration = 0
117
+
118
+ while iteration < max_iterations:
119
+ tool_calls = self._extract_tool_calls(response)
120
+
121
+ if not tool_calls:
122
+ break
123
+
124
+ # Execute tool calls
125
+ tool_results = self._execute_tool_calls(tool_calls)
126
+
127
+ # Add tool results to conversation
128
+ messages.append(AIMessage(content=response.content))
129
+ messages.append(HumanMessage(content=f"Tool results: {'; '.join(tool_results)}. Please provide a final answer based on these results."))
130
+
131
+ # Get next response
132
+ response = await asyncio.to_thread(self.chat_with_tools.invoke, messages)
133
+ iteration += 1
134
+
135
+ # Extract final answer
136
+ final_answer = response.content.strip()
137
+
138
+ # Clean up the response - remove any tool call artifacts
139
+ if "Tool " in final_answer and "result:" in final_answer:
140
+ # Try to extract just the final answer part
141
+ lines = final_answer.split('\n')
142
+ for line in reversed(lines):
143
+ if line.strip() and not line.startswith('Tool ') and not 'result:' in line:
144
+ final_answer = line.strip()
145
+ break
146
+
147
+ return final_answer
148
+
149
+ except Exception as e:
150
+ print(f"Error in answer method: {e}")
151
+ return f"Error processing question: {str(e)}"
152
 
153
  def answer_sync(self, question: str) -> str:
154
  """Synchronous version of answer method"""
 
 
 
 
 
 
 
 
 
 
155
  try:
156
+ return asyncio.run(self.answer(question))
 
157
  except Exception as e:
158
+ print(f"Error in sync answer: {e}")
159
+ return f"Error: {str(e)}"
160
 
161
+ async def process_questions_batch(agent, questions_batch, semaphore):
162
+ """Process a batch of questions with rate limiting"""
163
+ results = []
164
+
165
+ async def process_single_question(task_id, question):
166
+ async with semaphore:
167
+ try:
168
+ # Check cache first
169
+ cache_key = f"{task_id}_{hash(question)}"
170
+ if cache_key in answer_cache:
171
+ print(f"Using cached answer for task {task_id}")
172
+ return task_id, question, answer_cache[cache_key], None
173
+
174
+ answer = await agent.answer(question)
175
+
176
+ # Cache the result
177
+ answer_cache[cache_key] = answer
178
+
179
+ return task_id, question, answer, None
180
+ except Exception as e:
181
+ print(f"Error processing task {task_id}: {e}")
182
+ return task_id, question, None, str(e)
183
+
184
+ # Create semaphore for rate limiting
185
  tasks = []
186
+ for item in questions_batch:
187
  task_id = item.get("task_id")
188
  question_text = item.get("question")
189
+ if task_id and question_text is not None:
190
+ tasks.append(process_single_question(task_id, question_text))
191
+
192
+ if tasks:
193
+ results = await asyncio.gather(*tasks, return_exceptions=True)
194
+
195
+ return results
196
 
197
+ async def run_agent_async_improved(agent, questions_data):
198
+ """Improved async processing with batching and caching"""
199
+ results_log, answers_payload = [], []
200
 
201
+ # Create semaphore for rate limiting
202
+ semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
203
 
204
+ # Process questions in batches
205
+ batch_size = 10
206
+ batches = [questions_data[i:i + batch_size] for i in range(0, len(questions_data), batch_size)]
207
+
208
+ print(f"Processing {len(questions_data)} questions in {len(batches)} batches...")
209
+
210
+ for i, batch in enumerate(batches):
211
+ print(f"Processing batch {i+1}/{len(batches)} ({len(batch)} questions)...")
212
+
213
+ try:
214
+ batch_results = await process_questions_batch(agent, batch, semaphore)
215
 
216
+ for result in batch_results:
217
+ if isinstance(result, Exception):
218
+ print(f"Batch processing error: {result}")
219
+ continue
220
+
221
+ task_id, question, answer, error = result
222
+
223
+ if error:
224
+ print(f"Error in task {task_id}: {error}")
225
+ results_log.append({
226
+ "Task ID": task_id,
227
+ "Question": question[:100] + "..." if len(question) > 100 else question,
228
+ "Submitted Answer": f"ERROR: {error}"
229
+ })
230
+ else:
231
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
232
+ results_log.append({
233
+ "Task ID": task_id,
234
+ "Question": question[:100] + "..." if len(question) > 100 else question,
235
+ "Submitted Answer": answer[:200] + "..." if len(answer) > 200 else answer
236
+ })
237
+
238
+ # Small delay between batches to be respectful
239
+ if i < len(batches) - 1:
240
+ await asyncio.sleep(1)
241
+
242
+ except Exception as e:
243
+ print(f"Error processing batch {i+1}: {e}")
244
+ # Continue with next batch
245
+ continue
246
+
247
  return results_log, answers_payload
248
 
249
+ def cache_answers(profile: gr.OAuthProfile | None):
250
+ """Cache answers without submitting"""
251
+ if not profile:
252
+ return "Please log in to Hugging Face first.", None
253
+
254
+ username = profile.username
255
+ print(f"Caching answers for user: {username}")
256
+
257
+ # Fetch questions
258
+ api_url = DEFAULT_API_URL
259
+ questions_url = f"{api_url}/questions"
260
+
261
+ try:
262
+ response = requests.get(questions_url, timeout=15)
263
+ response.raise_for_status()
264
+ questions_data = response.json()
265
+
266
+ if not questions_data:
267
+ return "No questions found.", None
268
+
269
+ print(f"Fetched {len(questions_data)} questions for caching.")
270
+
271
+ # Initialize agent
272
+ agent = ImprovedAgent()
273
+
274
+ # Process questions
275
+ results_log, answers_payload = asyncio.run(run_agent_async_improved(agent, questions_data))
276
+
277
+ # Store in global cache with username
278
+ answer_cache[f"user_{username}"] = answers_payload
279
+
280
+ status = f"Cached {len(answers_payload)} answers for user {username}. Ready to submit!"
281
+ results_df = pd.DataFrame(results_log)
282
+
283
+ return status, results_df
284
+
285
+ except Exception as e:
286
+ print(f"Error caching answers: {e}")
287
+ return f"Error caching answers: {e}", None
288
+
289
+ def submit_cached_answers(profile: gr.OAuthProfile | None):
290
+ """Submit previously cached answers"""
291
+ if not profile:
292
+ return "Please log in to Hugging Face first.", None
293
+
294
+ username = profile.username
295
+ cache_key = f"user_{username}"
296
+
297
+ if cache_key not in answer_cache:
298
+ return "No cached answers found. Please run 'Cache Answers' first.", None
299
+
300
+ answers_payload = answer_cache[cache_key]
301
+
302
+ if not answers_payload:
303
+ return "No answers to submit.", None
304
+
305
+ # Get space info
306
+ space_id = os.getenv("SPACE_ID")
307
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
308
+
309
+ # Submit
310
+ api_url = DEFAULT_API_URL
311
+ submit_url = f"{api_url}/submit"
312
+
313
+ submission_data = {
314
+ "username": username.strip(),
315
+ "agent_code": agent_code,
316
+ "answers": answers_payload
317
+ }
318
+
319
+ try:
320
+ print(f"Submitting {len(answers_payload)} cached answers...")
321
+ response = requests.post(submit_url, json=submission_data, timeout=60)
322
+ response.raise_for_status()
323
+ result_data = response.json()
324
+
325
+ final_status = (
326
+ f"Submission Successful!\n"
327
+ f"User: {result_data.get('username')}\n"
328
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
329
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
330
+ f"Message: {result_data.get('message', 'No message received.')}"
331
+ )
332
+
333
+ # Clear cache after successful submission
334
+ if cache_key in answer_cache:
335
+ del answer_cache[cache_key]
336
+
337
+ return final_status, None
338
+
339
+ except Exception as e:
340
+ print(f"Submission error: {e}")
341
+ return f"Submission failed: {e}", None
342
 
343
  def run_and_submit_all(profile: gr.OAuthProfile | None):
344
+ """Original function - now improved with better error handling"""
345
+ if not profile:
346
+ return "Please log in to Hugging Face first.", None
347
+
348
+ username = profile.username
349
+ print(f"User logged in: {username}")
 
 
 
 
 
 
 
350
 
351
  api_url = DEFAULT_API_URL
352
  questions_url = f"{api_url}/questions"
353
  submit_url = f"{api_url}/submit"
354
 
355
+ # Initialize agent
356
  try:
357
+ agent = ImprovedAgent()
358
  except Exception as e:
359
+ print(f"Error initializing agent: {e}")
360
  return f"Error initializing agent: {e}", None
 
 
 
 
361
 
362
+ # Get space info
363
+ space_id = os.getenv("SPACE_ID")
364
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
365
+
366
+ # Fetch questions
367
  try:
368
+ print(f"Fetching questions from: {questions_url}")
369
  response = requests.get(questions_url, timeout=15)
370
  response.raise_for_status()
371
  questions_data = response.json()
372
+
373
  if not questions_data:
374
+ return "No questions found.", None
375
+
376
  print(f"Fetched {len(questions_data)} questions.")
377
+ except Exception as e:
378
  print(f"Error fetching questions: {e}")
379
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
380
 
381
+ # Process questions
 
 
 
 
382
  try:
383
+ results_log, answers_payload = asyncio.run(run_agent_async_improved(agent, questions_data))
 
384
  except Exception as e:
385
+ print(f"Error processing questions: {e}")
386
+ return f"Error processing questions: {e}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
  if not answers_payload:
389
+ return "No answers generated.", pd.DataFrame(results_log) if results_log else None
 
390
 
391
+ # Submit answers
392
+ submission_data = {
393
+ "username": username.strip(),
394
+ "agent_code": agent_code,
395
+ "answers": answers_payload
396
+ }
397
 
 
 
398
  try:
399
+ print(f"Submitting {len(answers_payload)} answers...")
400
  response = requests.post(submit_url, json=submission_data, timeout=60)
401
  response.raise_for_status()
402
  result_data = response.json()
403
+
404
  final_status = (
405
  f"Submission Successful!\n"
406
  f"User: {result_data.get('username')}\n"
 
408
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
409
  f"Message: {result_data.get('message', 'No message received.')}"
410
  )
411
+
412
  results_df = pd.DataFrame(results_log)
413
  return final_status, results_df
414
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
  except Exception as e:
416
+ print(f"Submission error: {e}")
 
417
  results_df = pd.DataFrame(results_log)
418
+ return f"Submission failed: {e}", results_df
 
419
 
420
+ # --- Build Gradio Interface ---
421
+ with gr.Blocks(title="Improved Agent Evaluation") as demo:
422
+ gr.Markdown("# Improved Agent Evaluation Runner")
423
  gr.Markdown(
424
  """
425
  **Instructions:**
426
 
427
+ 1. Log in to your Hugging Face account using the button below.
428
+ 2. **Recommended**: Use "Cache Answers" to process all questions first, then "Submit Cached Answers" to submit them.
429
+ 3. **Alternative**: Use "Run & Submit All" for the original one-step process.
430
+
431
+ **Improvements:**
432
+ - ✅ Async processing with rate limiting
433
+ - ✅ Answer caching for faster resubmissions
434
+ - ✅ Better error handling and recovery
435
+ - ✅ Batch processing to avoid timeouts
436
+ - ✅ Improved tool usage and response parsing
437
 
438
  ---
 
 
 
439
  """
440
  )
441
 
442
  gr.LoginButton()
443
 
444
+ with gr.Row():
445
+ cache_button = gr.Button("🔄 Cache Answers", variant="secondary")
446
+ submit_button = gr.Button("📤 Submit Cached Answers", variant="primary")
447
+ run_all_button = gr.Button("🚀 Run & Submit All", variant="secondary")
448
 
449
+ status_output = gr.Textbox(label="Status", lines=6, interactive=False)
 
450
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
451
 
452
+ # Wire up the buttons
453
+ cache_button.click(
454
+ fn=cache_answers,
455
+ outputs=[status_output, results_table]
456
+ )
457
+
458
+ submit_button.click(
459
+ fn=submit_cached_answers,
460
+ outputs=[status_output, results_table]
461
+ )
462
+
463
+ run_all_button.click(
464
  fn=run_and_submit_all,
465
  outputs=[status_output, results_table]
466
  )
467
 
468
  if __name__ == "__main__":
469
+ print("\n" + "-"*30 + " Improved App Starting " + "-"*30)
470
+
471
+ space_host = os.getenv("SPACE_HOST")
472
+ space_id = os.getenv("SPACE_ID")
 
 
 
 
 
 
473
 
474
+ if space_host:
475
+ print(f"✅ SPACE_HOST: {space_host}")
476
+ print(f" Runtime URL: https://{space_host}.hf.space")
 
477
  else:
478
+ print("ℹ️ Running locally - SPACE_HOST not found.")
479
 
480
+ if space_id:
481
+ print(f"✅ SPACE_ID: {space_id}")
482
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
483
+ else:
484
+ print("ℹ️ SPACE_ID not found.")
485
 
486
+ print("-" * 76 + "\n")
487
+ print("Launching Improved Gradio Interface...")
488
+ demo.launch(debug=True, share=False)
requirements.txt CHANGED
@@ -1,22 +1,30 @@
1
  # UI and OAuth
2
- gradio[oauth]
3
- requests
4
- pandas
5
 
6
  # LangChain and ecosystem
7
- langchain
8
- langchain-core
9
- langchain-community
10
- langgraph
11
 
12
  # Hugging Face integration
13
- huggingface_hub
14
- transformers
15
- accelerate # Needed for many transformer-based models
16
 
17
  # Environment config
18
- python-dotenv
19
 
20
  # Tools dependencies
21
- duckduckgo-search # Required for web_search tool
22
- pytz # Required for get_current_time_in_timezone tool
 
 
 
 
 
 
 
 
 
1
  # UI and OAuth
2
+ gradio[oauth]>=4.0.0
3
+ requests>=2.31.0
4
+ pandas>=2.0.0
5
 
6
  # LangChain and ecosystem
7
+ langchain>=0.1.0
8
+ langchain-core>=0.1.0
9
+ langchain-community>=0.0.20
10
+ langgraph>=0.0.30
11
 
12
  # Hugging Face integration
13
+ huggingface_hub>=0.19.0
14
+ transformers>=4.35.0
15
+ accelerate>=0.24.0 # Needed for many transformer-based models
16
 
17
  # Environment config
18
+ python-dotenv>=1.0.0
19
 
20
  # Tools dependencies
21
+ duckduckgo-search>=3.9.0 # Required for web_search tool
22
+ pytz>=2023.3 # Required for get_current_time_in_timezone tool
23
+
24
+ # Additional utilities for better error handling and performance
25
+ typing-extensions>=4.8.0
26
+ asyncio-throttle>=1.0.2 # For rate limiting (optional)
27
+ tenacity>=8.2.0 # For retry logic (optional)
28
+
29
+ # Optional: For better logging and monitoring
30
+ loguru>=0.7.0 # Better logging (optional)
tools.py CHANGED
@@ -1,5 +1,7 @@
1
  import datetime
2
  import math
 
 
3
 
4
  import pytz
5
  from langchain_community.tools import DuckDuckGoSearchRun
@@ -7,180 +9,212 @@ from langchain_core.tools import tool
7
 
8
 
9
  @tool
10
- def multiply(a: int, b:int) -> int:
11
- """Multiplies two integers and returns the product.
12
 
13
  Args:
14
- a (int): The first integer.
15
- b (int): The second integer.
16
 
17
  Returns:
18
- int: The product of the two input integers.
19
  """
20
- return a * b
 
 
 
 
21
 
22
 
23
  @tool
24
- def add(a: int, b:int) -> int:
25
- """Adds two integers and returns the sum.
26
 
27
  Args:
28
- a (int): The first integer.
29
- b (int): The second integer.
30
 
31
  Returns:
32
- int: The sum of the two input integers.
33
  """
34
- return a + b
 
 
 
 
35
 
36
 
37
  @tool
38
- def power(a: float, b: float) -> float:
39
  """Raises a number to the power of another.
40
 
41
  Args:
42
- a (float): The base number.
43
- b (float): The exponent.
44
 
45
  Returns:
46
- float: The result of raising `a` to the power of `b`.
47
  """
48
- return a ** b
 
 
 
 
 
 
 
 
49
 
50
 
51
  @tool
52
- def subtract(a: float, b: float) -> float:
53
  """Subtracts the second number from the first.
54
 
55
  Args:
56
- a (float): The number from which to subtract.
57
- b (float): The number to subtract.
58
 
59
  Returns:
60
- float: The result of `a` minus `b`.
61
  """
62
- return a - b
 
 
 
 
63
 
64
 
65
  @tool
66
- def divide(a: float, b: float) -> float:
67
  """Divides one number by another.
68
 
69
  Args:
70
- a (float): The numerator.
71
- b (float): The denominator.
72
 
73
  Returns:
74
- float: The result of `a` divided by `b`.
75
-
76
- Raises:
77
- ValueError: If `b` is zero.
78
  """
79
- if b == 0:
80
- raise ValueError("Divide by zero is not allowed")
81
- return a / b
 
 
 
82
 
83
 
84
  @tool
85
- def modulus(a: int, b: int) -> int:
86
  """Returns the remainder of the division of two integers.
87
 
88
  Args:
89
- a (int): The dividend.
90
- b (int): The divisor.
91
 
92
  Returns:
93
- int: The remainder when `a` is divided by `b`.
94
-
95
- Raises:
96
- ValueError: If `b` is zero.
97
  """
98
- if b == 0:
99
- raise ValueError("Modulus by zero is not allowed")
100
- return a % b
 
 
 
101
 
102
 
103
  @tool
104
- def square_root(x: float) -> float:
105
  """Returns the square root of a number.
106
 
107
  Args:
108
- x (float): The input number. Must be non-negative.
109
 
110
  Returns:
111
- float: The square root of `x`.
112
-
113
- Raises:
114
- ValueError: If `x` is negative.
115
  """
116
- if x < 0:
117
- raise ValueError("Square root of negative number is not allowed")
118
- return math.sqrt(x)
 
 
 
119
 
120
 
121
  @tool
122
- def floor_divide(a: int, b: int) -> int:
123
  """Performs integer division (floor division) of two numbers.
124
 
125
  Args:
126
- a (int): The dividend.
127
- b (int): The divisor.
128
 
129
  Returns:
130
- int: The floor of the quotient.
131
- Returns the quotient rounded down to the nearest integer.
132
-
133
- Raises:
134
- ValueError: If `b` is zero.
135
  """
136
- if b == 0:
137
- raise ValueError("Division by zero is not allowed")
138
- return a // b
 
 
 
139
 
140
 
141
  @tool
142
- def absolute(x: float) -> float:
143
  """Returns the absolute value of a number.
144
 
145
  Args:
146
- x (float): The input number.
147
 
148
  Returns:
149
- float: The absolute value of `x`.
150
  """
151
- return abs(x)
 
 
 
 
152
 
153
 
154
  @tool
155
- def logarithm(x: float, base: float = math.e) -> float:
156
  """Returns the logarithm of a number with a given base.
157
 
158
  Args:
159
- x (float): The number to take the logarithm of. Must be positive.
160
- base (float): The logarithmic base. Must be positive and not equal to 1.
161
 
162
  Returns:
163
- float: The logarithm of `x` to the given base.
164
-
165
- Raises:
166
- ValueError: If `x <= 0` or `base <= 0` or `base == 1`.
167
  """
168
- if x <= 0 or base <= 0 or base == 1:
169
- raise ValueError("Invalid input for logarithm")
170
- return math.log(x, base)
 
 
 
 
 
171
 
172
 
173
  @tool
174
- def exponential(x: float) -> float:
175
  """Returns e raised to the power of `x`.
176
 
177
  Args:
178
- x (float): The exponent.
179
 
180
  Returns:
181
- float: The value of e^x.
182
  """
183
- return math.exp(x)
 
 
 
 
 
 
 
184
 
185
 
186
  @tool
@@ -188,67 +222,333 @@ def web_search(query: str) -> str:
188
  """Performs a DuckDuckGo search for the given query and returns the results.
189
 
190
  Args:
191
- query (str): The search query.
192
 
193
  Returns:
194
- str: The top search results as a string.
195
  """
196
- search_tool = DuckDuckGoSearchRun()
197
- return search_tool.invoke(query)
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
 
200
  @tool
201
  def roman_calculator_converter(value1: int, value2: int, oper: str) -> str:
202
- """A tool that performs an operator on 2 numbers to calculate the result
203
- Args:
204
- value1: the first value
205
- value2: the second value
206
- oper: operator for the calculation, like "add", "subtract", "multiply", "divide"
207
- """
208
- roman_numerals = {
209
- 1000: "M", 900: "CM", 500: "D", 400: "CD",
210
- 100: "C", 90: "XC", 50: "L", 40: "XL",
211
- 10: "X", 9: "IX", 5: "V", 4: "IV", 1: "I"
212
- }
213
- roman_string = ""
214
-
215
- if oper == "add":
216
- result = value1 + value2
217
- elif oper == "subtract":
218
- result = value1 - value2 # Fixed: was value2 - value1
219
- elif oper == "divide":
220
- if value2 == 0:
221
- return "Error: Division by zero is not allowed"
222
- result = int(value1 / value2) # Convert to int for Roman numerals
223
- elif oper == "multiply":
224
- result = value1 * value2
225
- else:
226
- return "Unsupported operation. Please use 'add', 'subtract', 'multiply', or 'divide'."
227
-
228
- # Handle negative results
229
- if result <= 0:
230
- return f"Error: Roman numerals cannot represent zero or negative numbers. Result was: {result}"
231
 
232
- for value, numeral in roman_numerals.items():
233
- while result >= value:
234
- roman_string += numeral
235
- result -= value
236
 
237
- return f"The result of {oper} on the values {value1} and {value2} is the Roman numeral: {roman_string}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
 
240
  @tool
241
  def get_current_time_in_timezone(timezone: str) -> str:
242
- """A tool that fetches the current local time in a specified timezone.
 
243
  Args:
244
- timezone: A string representing a valid timezone (e.g., 'America/New_York').
 
 
 
245
  """
246
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  # Create timezone object
248
  tz = pytz.timezone(timezone)
 
249
  # Get current time in that timezone
250
- local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
251
- return f"The current local time in {timezone} is: {local_time}"
 
 
 
 
 
252
  except Exception as e:
253
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
 
1
  import datetime
2
  import math
3
+ import re
4
+ from typing import Union
5
 
6
  import pytz
7
  from langchain_community.tools import DuckDuckGoSearchRun
 
9
 
10
 
11
  @tool
12
+ def multiply(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
13
+ """Multiplies two numbers and returns the product.
14
 
15
  Args:
16
+ a: The first number.
17
+ b: The second number.
18
 
19
  Returns:
20
+ The product of the two input numbers.
21
  """
22
+ try:
23
+ result = a * b
24
+ return int(result) if isinstance(a, int) and isinstance(b, int) else result
25
+ except Exception as e:
26
+ return f"Error in multiplication: {str(e)}"
27
 
28
 
29
  @tool
30
+ def add(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
31
+ """Adds two numbers and returns the sum.
32
 
33
  Args:
34
+ a: The first number.
35
+ b: The second number.
36
 
37
  Returns:
38
+ The sum of the two input numbers.
39
  """
40
+ try:
41
+ result = a + b
42
+ return int(result) if isinstance(a, int) and isinstance(b, int) else result
43
+ except Exception as e:
44
+ return f"Error in addition: {str(e)}"
45
 
46
 
47
  @tool
48
+ def power(a: Union[int, float], b: Union[int, float]) -> float:
49
  """Raises a number to the power of another.
50
 
51
  Args:
52
+ a: The base number.
53
+ b: The exponent.
54
 
55
  Returns:
56
+ The result of raising `a` to the power of `b`.
57
  """
58
+ try:
59
+ if a == 0 and b < 0:
60
+ return "Error: Cannot raise 0 to a negative power"
61
+ result = a ** b
62
+ return result
63
+ except OverflowError:
64
+ return "Error: Result too large to compute"
65
+ except Exception as e:
66
+ return f"Error in power calculation: {str(e)}"
67
 
68
 
69
  @tool
70
+ def subtract(a: Union[int, float], b: Union[int, float]) -> Union[int, float]:
71
  """Subtracts the second number from the first.
72
 
73
  Args:
74
+ a: The number from which to subtract.
75
+ b: The number to subtract.
76
 
77
  Returns:
78
+ The result of `a` minus `b`.
79
  """
80
+ try:
81
+ result = a - b
82
+ return int(result) if isinstance(a, int) and isinstance(b, int) else result
83
+ except Exception as e:
84
+ return f"Error in subtraction: {str(e)}"
85
 
86
 
87
  @tool
88
+ def divide(a: Union[int, float], b: Union[int, float]) -> float:
89
  """Divides one number by another.
90
 
91
  Args:
92
+ a: The numerator.
93
+ b: The denominator.
94
 
95
  Returns:
96
+ The result of `a` divided by `b`.
 
 
 
97
  """
98
+ try:
99
+ if b == 0:
100
+ return "Error: Division by zero is not allowed"
101
+ return a / b
102
+ except Exception as e:
103
+ return f"Error in division: {str(e)}"
104
 
105
 
106
  @tool
107
+ def modulus(a: int, b: int) -> Union[int, str]:
108
  """Returns the remainder of the division of two integers.
109
 
110
  Args:
111
+ a: The dividend.
112
+ b: The divisor.
113
 
114
  Returns:
115
+ The remainder when `a` is divided by `b`.
 
 
 
116
  """
117
+ try:
118
+ if b == 0:
119
+ return "Error: Modulus by zero is not allowed"
120
+ return a % b
121
+ except Exception as e:
122
+ return f"Error in modulus operation: {str(e)}"
123
 
124
 
125
  @tool
126
+ def square_root(x: Union[int, float]) -> Union[float, str]:
127
  """Returns the square root of a number.
128
 
129
  Args:
130
+ x: The input number. Must be non-negative.
131
 
132
  Returns:
133
+ The square root of `x`.
 
 
 
134
  """
135
+ try:
136
+ if x < 0:
137
+ return "Error: Square root of negative number is not allowed"
138
+ return math.sqrt(x)
139
+ except Exception as e:
140
+ return f"Error in square root calculation: {str(e)}"
141
 
142
 
143
  @tool
144
+ def floor_divide(a: int, b: int) -> Union[int, str]:
145
  """Performs integer division (floor division) of two numbers.
146
 
147
  Args:
148
+ a: The dividend.
149
+ b: The divisor.
150
 
151
  Returns:
152
+ The floor of the quotient.
 
 
 
 
153
  """
154
+ try:
155
+ if b == 0:
156
+ return "Error: Division by zero is not allowed"
157
+ return a // b
158
+ except Exception as e:
159
+ return f"Error in floor division: {str(e)}"
160
 
161
 
162
  @tool
163
+ def absolute(x: Union[int, float]) -> Union[int, float]:
164
  """Returns the absolute value of a number.
165
 
166
  Args:
167
+ x: The input number.
168
 
169
  Returns:
170
+ The absolute value of `x`.
171
  """
172
+ try:
173
+ result = abs(x)
174
+ return int(result) if isinstance(x, int) else result
175
+ except Exception as e:
176
+ return f"Error in absolute value calculation: {str(e)}"
177
 
178
 
179
  @tool
180
+ def logarithm(x: Union[int, float], base: Union[int, float] = math.e) -> Union[float, str]:
181
  """Returns the logarithm of a number with a given base.
182
 
183
  Args:
184
+ x: The number to take the logarithm of. Must be positive.
185
+ base: The logarithmic base. Must be positive and not equal to 1.
186
 
187
  Returns:
188
+ The logarithm of `x` to the given base.
 
 
 
189
  """
190
+ try:
191
+ if x <= 0:
192
+ return "Error: Logarithm input must be positive"
193
+ if base <= 0 or base == 1:
194
+ return "Error: Logarithm base must be positive and not equal to 1"
195
+ return math.log(x, base)
196
+ except Exception as e:
197
+ return f"Error in logarithm calculation: {str(e)}"
198
 
199
 
200
  @tool
201
+ def exponential(x: Union[int, float]) -> Union[float, str]:
202
  """Returns e raised to the power of `x`.
203
 
204
  Args:
205
+ x: The exponent.
206
 
207
  Returns:
208
+ The value of e^x.
209
  """
210
+ try:
211
+ if x > 700: # Prevent overflow
212
+ return "Error: Exponent too large, would cause overflow"
213
+ return math.exp(x)
214
+ except OverflowError:
215
+ return "Error: Result too large to compute"
216
+ except Exception as e:
217
+ return f"Error in exponential calculation: {str(e)}"
218
 
219
 
220
  @tool
 
222
  """Performs a DuckDuckGo search for the given query and returns the results.
223
 
224
  Args:
225
+ query: The search query.
226
 
227
  Returns:
228
+ The top search results as a string.
229
  """
230
+ try:
231
+ if not query or not query.strip():
232
+ return "Error: Search query cannot be empty"
233
+
234
+ search_tool = DuckDuckGoSearchRun()
235
+ results = search_tool.invoke(query.strip())
236
+
237
+ # Clean up the results a bit
238
+ if len(results) > 2000: # Truncate very long results
239
+ results = results[:2000] + "... (truncated)"
240
+
241
+ return results
242
+ except Exception as e:
243
+ return f"Error performing web search: {str(e)}"
244
 
245
 
246
  @tool
247
  def roman_calculator_converter(value1: int, value2: int, oper: str) -> str:
248
+ """Performs an operation on 2 numbers and returns the result as a Roman numeral.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
+ Args:
251
+ value1: The first value
252
+ value2: The second value
253
+ oper: Operator for the calculation ("add", "subtract", "multiply", "divide")
254
 
255
+ Returns:
256
+ The result as a Roman numeral string.
257
+ """
258
+ try:
259
+ # Input validation
260
+ if not isinstance(value1, int) or not isinstance(value2, int):
261
+ return "Error: Both values must be integers"
262
+
263
+ if oper not in ["add", "subtract", "multiply", "divide"]:
264
+ return "Error: Operator must be 'add', 'subtract', 'multiply', or 'divide'"
265
+
266
+ # Roman numeral mapping
267
+ roman_numerals = [
268
+ (1000, "M"), (900, "CM"), (500, "D"), (400, "CD"),
269
+ (100, "C"), (90, "XC"), (50, "L"), (40, "XL"),
270
+ (10, "X"), (9, "IX"), (5, "V"), (4, "IV"), (1, "I")
271
+ ]
272
+
273
+ # Perform calculation
274
+ if oper == "add":
275
+ result = value1 + value2
276
+ elif oper == "subtract":
277
+ result = value1 - value2
278
+ elif oper == "multiply":
279
+ result = value1 * value2
280
+ elif oper == "divide":
281
+ if value2 == 0:
282
+ return "Error: Division by zero is not allowed"
283
+ result = int(value1 / value2) # Integer division for Roman numerals
284
+
285
+ # Handle invalid results for Roman numerals
286
+ if result <= 0:
287
+ return f"Error: Roman numerals cannot represent zero or negative numbers. Result was: {result}"
288
+
289
+ if result > 3999: # Roman numerals traditionally don't go beyond this
290
+ return f"Error: Result ({result}) is too large for standard Roman numeral representation"
291
+
292
+ # Convert to Roman numeral
293
+ roman_string = ""
294
+ for value, numeral in roman_numerals:
295
+ count = result // value
296
+ if count:
297
+ roman_string += numeral * count
298
+ result -= value * count
299
+
300
+ return f"The result of {oper}ing {value1} and {value2} is: {roman_string}"
301
+
302
+ except Exception as e:
303
+ return f"Error in Roman calculator: {str(e)}"
304
 
305
 
306
  @tool
307
  def get_current_time_in_timezone(timezone: str) -> str:
308
+ """Fetches the current local time in a specified timezone.
309
+
310
  Args:
311
+ timezone: A string representing a valid timezone (e.g., 'America/New_York', 'Europe/London').
312
+
313
+ Returns:
314
+ The current time in the specified timezone.
315
  """
316
  try:
317
+ if not timezone or not timezone.strip():
318
+ return "Error: Timezone cannot be empty"
319
+
320
+ # Clean the timezone string
321
+ timezone = timezone.strip()
322
+
323
+ # Handle common timezone aliases
324
+ timezone_aliases = {
325
+ 'EST': 'America/New_York',
326
+ 'PST': 'America/Los_Angeles',
327
+ 'MST': 'America/Denver',
328
+ 'CST': 'America/Chicago',
329
+ 'GMT': 'GMT',
330
+ 'UTC': 'UTC',
331
+ 'CET': 'Europe/Berlin',
332
+ 'JST': 'Asia/Tokyo',
333
+ }
334
+
335
+ if timezone.upper() in timezone_aliases:
336
+ timezone = timezone_aliases[timezone.upper()]
337
+
338
  # Create timezone object
339
  tz = pytz.timezone(timezone)
340
+
341
  # Get current time in that timezone
342
+ local_time = datetime.datetime.now(tz)
343
+ formatted_time = local_time.strftime("%Y-%m-%d %H:%M:%S %Z")
344
+
345
+ return f"The current local time in {timezone} is: {formatted_time}"
346
+
347
+ except pytz.exceptions.UnknownTimeZoneError:
348
+ return f"Error: Unknown timezone '{timezone}'. Please use a valid timezone like 'America/New_York' or 'Europe/London'"
349
  except Exception as e:
350
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
351
+
352
+
353
+ # Additional utility tools that might be helpful
354
+
355
+ @tool
356
+ def factorial(n: int) -> Union[int, str]:
357
+ """Calculates the factorial of a non-negative integer.
358
+
359
+ Args:
360
+ n: A non-negative integer.
361
+
362
+ Returns:
363
+ The factorial of n.
364
+ """
365
+ try:
366
+ if not isinstance(n, int):
367
+ return "Error: Input must be an integer"
368
+ if n < 0:
369
+ return "Error: Factorial is not defined for negative numbers"
370
+ if n > 170: # Prevent overflow
371
+ return "Error: Number too large for factorial calculation"
372
+
373
+ result = math.factorial(n)
374
+ return result
375
+ except Exception as e:
376
+ return f"Error calculating factorial: {str(e)}"
377
+
378
+
379
+ @tool
380
+ def greatest_common_divisor(a: int, b: int) -> Union[int, str]:
381
+ """Finds the greatest common divisor of two integers.
382
+
383
+ Args:
384
+ a: First integer.
385
+ b: Second integer.
386
+
387
+ Returns:
388
+ The greatest common divisor of a and b.
389
+ """
390
+ try:
391
+ if not isinstance(a, int) or not isinstance(b, int):
392
+ return "Error: Both inputs must be integers"
393
+ return math.gcd(abs(a), abs(b))
394
+ except Exception as e:
395
+ return f"Error calculating GCD: {str(e)}"
396
+
397
+
398
+ @tool
399
+ def least_common_multiple(a: int, b: int) -> Union[int, str]:
400
+ """Finds the least common multiple of two integers.
401
+
402
+ Args:
403
+ a: First integer.
404
+ b: Second integer.
405
+
406
+ Returns:
407
+ The least common multiple of a and b.
408
+ """
409
+ try:
410
+ if not isinstance(a, int) or not isinstance(b, int):
411
+ return "Error: Both inputs must be integers"
412
+ if a == 0 or b == 0:
413
+ return 0
414
+ return abs(a * b) // math.gcd(abs(a), abs(b))
415
+ except Exception as e:
416
+ return f"Error calculating LCM: {str(e)}"
417
+
418
+
419
+ @tool
420
+ def is_prime(n: int) -> Union[bool, str]:
421
+ """Checks if a number is prime.
422
+
423
+ Args:
424
+ n: The number to check.
425
+
426
+ Returns:
427
+ True if n is prime, False otherwise.
428
+ """
429
+ try:
430
+ if not isinstance(n, int):
431
+ return "Error: Input must be an integer"
432
+ if n < 2:
433
+ return False
434
+ if n == 2:
435
+ return True
436
+ if n % 2 == 0:
437
+ return False
438
+
439
+ # Check odd divisors up to sqrt(n)
440
+ for i in range(3, int(math.sqrt(n)) + 1, 2):
441
+ if n % i == 0:
442
+ return False
443
+ return True
444
+ except Exception as e:
445
+ return f"Error checking if prime: {str(e)}"
446
+
447
+
448
+ @tool
449
+ def percentage_calculator(part: Union[int, float], whole: Union[int, float]) -> Union[float, str]:
450
+ """Calculates what percentage 'part' is of 'whole'.
451
+
452
+ Args:
453
+ part: The part value.
454
+ whole: The whole value.
455
+
456
+ Returns:
457
+ The percentage as a float.
458
+ """
459
+ try:
460
+ if whole == 0:
461
+ return "Error: Cannot calculate percentage when whole is zero"
462
+ percentage = (part / whole) * 100
463
+ return round(percentage, 2)
464
+ except Exception as e:
465
+ return f"Error calculating percentage: {str(e)}"
466
+
467
+
468
+ @tool
469
+ def compound_interest(principal: Union[int, float], rate: Union[int, float],
470
+ time: Union[int, float], compound_frequency: int = 1) -> Union[float, str]:
471
+ """Calculates compound interest.
472
+
473
+ Args:
474
+ principal: The initial amount of money.
475
+ rate: The annual interest rate (as a percentage, e.g., 5 for 5%).
476
+ time: The time period in years.
477
+ compound_frequency: How many times per year the interest is compounded (default: 1).
478
+
479
+ Returns:
480
+ The final amount after compound interest.
481
+ """
482
+ try:
483
+ if principal <= 0:
484
+ return "Error: Principal must be positive"
485
+ if rate < 0:
486
+ return "Error: Interest rate cannot be negative"
487
+ if time < 0:
488
+ return "Error: Time cannot be negative"
489
+ if compound_frequency <= 0:
490
+ return "Error: Compound frequency must be positive"
491
+
492
+ # Convert percentage to decimal
493
+ rate_decimal = rate / 100
494
+
495
+ # Compound interest formula: A = P(1 + r/n)^(nt)
496
+ amount = principal * (1 + rate_decimal / compound_frequency) ** (compound_frequency * time)
497
+
498
+ return round(amount, 2)
499
+ except Exception as e:
500
+ return f"Error calculating compound interest: {str(e)}"
501
+
502
+
503
+ @tool
504
+ def convert_temperature(value: Union[int, float], from_unit: str, to_unit: str) -> Union[float, str]:
505
+ """Converts temperature between Celsius, Fahrenheit, and Kelvin.
506
+
507
+ Args:
508
+ value: The temperature value to convert.
509
+ from_unit: The source unit ('C', 'F', or 'K').
510
+ to_unit: The target unit ('C', 'F', or 'K').
511
+
512
+ Returns:
513
+ The converted temperature value.
514
+ """
515
+ try:
516
+ from_unit = from_unit.upper().strip()
517
+ to_unit = to_unit.upper().strip()
518
+
519
+ valid_units = ['C', 'F', 'K', 'CELSIUS', 'FAHRENHEIT', 'KELVIN']
520
+
521
+ # Normalize unit names
522
+ unit_map = {
523
+ 'CELSIUS': 'C', 'FAHRENHEIT': 'F', 'KELVIN': 'K'
524
+ }
525
+
526
+ from_unit = unit_map.get(from_unit, from_unit)
527
+ to_unit = unit_map.get(to_unit, to_unit)
528
+
529
+ if from_unit not in ['C', 'F', 'K'] or to_unit not in ['C', 'F', 'K']:
530
+ return "Error: Units must be 'C' (Celsius), 'F' (Fahrenheit), or 'K' (Kelvin)"
531
+
532
+ if from_unit == to_unit:
533
+ return float(value)
534
+
535
+ # Convert to Celsius first
536
+ if from_unit == 'F':
537
+ celsius = (value - 32) * 5/9
538
+ elif from_unit == 'K':
539
+ celsius = value - 273.15
540
+ else: # from_unit == 'C'
541
+ celsius = value
542
+
543
+ # Convert from Celsius to target unit
544
+ if to_unit == 'F':
545
+ result = celsius * 9/5 + 32
546
+ elif to_unit == 'K':
547
+ result = celsius + 273.15
548
+ else: # to_unit == 'C'
549
+ result = celsius
550
+
551
+ return round(result, 2)
552
+ except Exception as e:
553
+ return f"Error converting temperature: {str(e)}"
554