DragonProgrammer commited on
Commit
ad723a4
·
verified ·
1 Parent(s): 96d4bd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -259
app.py CHANGED
@@ -1,262 +1,171 @@
1
- # app.py
2
  import os
3
  import gradio as gr
4
  import pandas as pd
5
- from bs4 import BeautifulSoup # Keep this if your tools use it
6
  import datetime
7
  import pytz
8
  import math
9
  import re
10
  import requests
11
- from transformers import HfAgent # Your successful import
12
- from transformers.tools import Tool # Your successful import
13
- from transformers import pipeline # <<< --- MAKE SURE THIS IMPORT IS ADDED / PRESENT
14
  import traceback
15
-
16
  import sys
17
- print(f"--- Python version: {sys.version} ---")
18
- # print(f"--- Python sys.path (module search paths): {sys.path} ---") # Optional now
19
-
20
- import transformers
21
- from transformers.tools import Tool
22
- print(f"--- Expected Transformers Version: 4.36.0 ---")
23
- print(f"--- Actual Transformers Version: {transformers.__version__} ---")
24
- # print(f"--- Transformers module loaded from: {transformers.__file__} ---") # Optional now
25
- # print(f"--- Attributes of 'transformers' module (dir(transformers)): {dir(transformers)} ---") # Optional now
26
-
27
- try:
28
- from transformers import HfAgent # <<< --- THE CORRECT IMPORT!
29
- print("--- Successfully imported HfAgent directly from transformers! ---")
30
- except ImportError as e:
31
- print(f"--- FAILED to import HfAgent directly from transformers: {e} ---")
32
- # This should ideally not happen now
33
- raise
34
- except Exception as e_gen:
35
- print(f"--- Some other UNEXPECTED error during HfAgent import: {e_gen} ---")
36
- raise
37
-
38
- print("--- If no errors above, imports were successful. Proceeding with rest of app. ---")
39
-
40
- # (Keep Constants as is)
41
  # --- Constants ---
42
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
43
 
44
- # --- Tool Definitions ---
45
- def get_current_time_in_timezone(timezone: str) -> str:
46
- """Fetches the current local time in a specified IANA timezone (e.g., 'America/New_York', 'Europe/London', 'UTC').
47
- Args:
48
- timezone (str): A string representing a valid IANA timezone name.
49
- """
50
  print(f"--- Tool: Executing get_current_time_in_timezone for: {timezone} ---")
51
  try:
52
  tz = pytz.timezone(timezone)
53
- # Added %Z (timezone name) and %z (UTC offset)
54
  local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S %Z%z")
55
  return f"The current local time in {timezone} is: {local_time}"
56
  except pytz.exceptions.UnknownTimeZoneError:
57
- print(f"Error: Unknown timezone '{timezone}'")
58
- return f"Error: Unknown timezone '{timezone}'. Please use a valid IANA timezone name (e.g., 'America/Denver', 'UTC')."
59
  except Exception as e:
60
- print(f"Error fetching time for timezone '{timezone}': {str(e)}")
61
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
62
 
63
- def web_search(query: str) -> str:
64
- """
65
- Performs a web search using DuckDuckGo (via HTML scraping) and returns the text content of the top result snippets.
66
- Use this tool to find up-to-date information about events, facts, or topics when the answer isn't already known.
67
-
68
- Args:
69
- query (str): The search query string.
70
-
71
- Returns:
72
- str: A string containing the summarized search results (titles and snippets of top hits), or an error message if the search fails.
73
- """
74
- print(f"--- Tool: Executing web_search with query: {query} ---")
75
- try:
76
- search_url = "https://html.duckduckgo.com/html/"
77
- params = {"q": query}
78
- headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36'} # Common user agent
79
-
80
- response = requests.post(search_url, data=params, headers=headers, timeout=15) # Increased timeout
81
- response.raise_for_status() # Check for HTTP errors (4xx or 5xx)
82
-
83
- soup = BeautifulSoup(response.text, 'html.parser')
84
- results = soup.find_all('div', class_='result__body') # Find result containers
85
-
86
- snippets = []
87
- for i, result in enumerate(results[:3]): # Get top 3 results for brevity
88
- title_tag = result.find('a', class_='result__a')
89
- snippet_tag = result.find('a', class_='result__snippet')
90
- title = title_tag.get_text(strip=True) if title_tag else "No Title"
91
- snippet = snippet_tag.get_text(strip=True) if snippet_tag else "No Snippet"
92
- if snippet != "No Snippet": # Only include results with a snippet
93
- snippets.append(f"Result {i+1}: {title} - {snippet}")
94
-
95
- if not snippets:
96
- return "No search results with relevant snippets found."
97
-
98
- return "\n".join(snippets)
99
-
100
- except requests.exceptions.Timeout:
101
- print(f"Error during web search request: Timeout")
102
- return "Error: The web search request timed out."
103
- except requests.exceptions.RequestException as e:
104
- print(f"Error during web search request: {e}")
105
- return f"Error: Could not perform web search. Network issue: {e}"
106
- except Exception as e:
107
- print(f"Error processing web search results: {e}")
108
- return f"Error: Could not process search results. {e}"
109
-
110
- def safe_calculator(expression: str) -> str:
111
- """
112
- Evaluates a simple mathematical expression involving numbers, +, -, *, /, %, parentheses, and the math functions: sqrt, pow.
113
- Use this tool *only* for calculations. Do not use it to run other code.
114
-
115
- Args:
116
- expression (str): The mathematical expression string (e.g., "(2 + 3) * 4", "pow(2, 5)", "sqrt(16)").
117
 
118
- Returns:
119
- str: The numerical result of the calculation or a descriptive error message if the expression is invalid or unsafe.
120
- """
121
  print(f"--- Tool: Executing safe_calculator with expression: {expression} ---")
122
  try:
123
- # Basic check for allowed characters/patterns first
124
- # Allows numbers (including scientific notation), operators, parentheses, whitespace, and known function names
125
- pattern = r"^[0-9eE\.\+\-\*\/\%\(\)\s]*(sqrt|pow)?[0-9eE\.\+\-\*\/\%\(\)\s\,]*$"
126
- if not re.match(pattern, expression):
127
- # Fallback simple pattern check (less precise)
128
- allowed_chars_pattern = r"^[0-9eE\.\+\-\*\/\%\(\)\s\,sqrtpow]+$"
129
- if not re.match(allowed_chars_pattern, expression):
130
- raise ValueError(f"Expression '{expression}' contains disallowed characters.")
131
-
132
- # Define allowed functions/names for eval's context
133
- allowed_names = {
134
- "sqrt": math.sqrt,
135
- "pow": math.pow,
136
- # Add other safe math functions if needed e.g. "log": math.log
137
- }
138
- # Evaluate the expression in a restricted environment
139
- # Limited builtins, only allowed names are accessible.
140
  result = eval(expression, {"__builtins__": {}}, allowed_names)
141
-
142
- # Ensure the result is a number before converting to string
143
- if not isinstance(result, (int, float)):
144
- raise ValueError("Calculation did not produce a numerical result.")
145
-
146
  return str(result)
147
  except Exception as e:
148
- # Catch potential errors during eval (SyntaxError, NameError, TypeError etc.) or from the checks
149
  print(f"Error during calculation for '{expression}': {e}")
150
  return f"Error calculating '{expression}': Invalid expression or calculation error ({e})."
151
 
152
- # --- Custom Agent to Force Correct Behavior ---
153
- class MyCustomHfAgent(HfAgent):
154
- """
155
- A custom agent that inherits from HfAgent to override the text generation method,
156
- forcing it to use the local pipeline instead of attempting a faulty web request.
157
- """
158
- def generate_one(self, prompt: str, stop: list):
159
- print("--- INSIDE CUSTOM HfAgent's generate_one method ---")
160
-
161
- # This is the crucial check. We're logging what the agent thinks its state is.
162
- is_pipeline = self.llm.is_hf_pipeline if hasattr(self.llm, "is_hf_pipeline") else "LLM has no is_hf_pipeline attr"
163
- print(f"--> self.llm.is_hf_pipeline is: {is_pipeline}")
164
-
165
- # Regardless of what the agent thinks, we KNOW we gave it a pipeline.
166
- # So, we will force it to execute the code path for local pipelines.
167
- print("--> Forcing execution of the local pipeline path...")
168
 
169
- try:
170
- # This is the code from the 'if self.llm.is_hf_pipeline:' block in the original Agent.generate_one method
171
- processed_prompt = self.llm.processor.process_prompt(prompt, **self.tokenizer_kwargs)
172
- model_outputs = self.llm.pipeline(processed_prompt, stop_sequence=stop, **self.generate_kwargs)
173
- return self.llm.processor.process_outputs(model_outputs, stop_sequence=stop)
174
- except Exception as e:
175
- print(f"--- ERROR during forced pipeline execution: {e} ---")
176
- traceback.print_exc()
177
- # If this fails, we return an error string.
178
- return f"Error during custom pipeline execution: {e}"
179
-
180
- # --- Agent Definition using HfAgent ---
181
- class HfAgentWrapper:
182
  def __init__(self):
183
- print("Initializing HfAgentWrapper...")
184
- model_id_or_path = "bigcode/starcoderbase-1b" # A model compatible with transformers v4.36.0
185
-
 
 
 
 
186
  try:
187
- print(f"Strategy: Pre-creating pipeline for model: {model_id_or_path}")
188
- hf_auth_token = os.getenv("HF_TOKEN") # Secret should be named HF_TOKEN
189
  if not hf_auth_token:
190
- print("WARNING: HF_TOKEN secret not found. This may fail if model requires token.")
191
- # Starcoderbase is gated, so this is needed.
192
- raise ValueError("HF_TOKEN secret is missing and is required for this model.")
193
  else:
194
- print(f"HF_TOKEN secret found (length: {len(hf_auth_token)}).")
195
-
196
- # --- Step 1: Create the pipeline object FIRST ---
197
- # This allows us to handle errors from pipeline creation directly.
198
- llm_pipeline = pipeline(
199
- task="text-generation",
200
- model=model_id_or_path,
201
- token=hf_auth_token
202
- # trust_remote_code=True # Not generally needed for starcoder with this version
203
- )
204
- print("Successfully created LLM pipeline object.")
205
-
206
- # --- Step 2: Ensure your tools are created WITH proper names ---
207
- if not get_current_time_in_timezone.__doc__: raise ValueError("Tool 'get_current_time_in_timezone' is missing a docstring.")
208
- if not web_search.__doc__: raise ValueError("Tool 'web_search' is missing a docstring.")
209
- if not safe_calculator.__doc__: raise ValueError("Tool 'safe_calculator' is missing a docstring.")
210
-
211
- time_tool_obj = Tool(
212
- name=get_current_time_in_timezone.__name__, # Use the function's name
213
- func=get_current_time_in_timezone,
214
- description=get_current_time_in_timezone.__doc__
215
- )
216
- search_tool_obj = Tool(
217
- name=web_search.__name__, # Use the function's name
218
- func=web_search,
219
- description=web_search.__doc__
220
  )
221
- calculator_tool_obj = Tool(
222
- name=safe_calculator.__name__, # Use the function's name
223
- func=safe_calculator,
224
- description=safe_calculator.__doc__
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  )
226
- self.actual_tools_for_agent = [time_tool_obj, search_tool_obj, calculator_tool_obj]
227
- print(f"Prepared Tool objects with names: {[tool.name for tool in self.actual_tools_for_agent]}")
228
-
229
- # --- Step 3: Pass the PRE-INITIALIZED pipeline object to HfAgent ---
230
- print("Initializing HfAgent with the pre-created pipeline...")
231
- self.agent = MyCustomHfAgent(llm_pipeline, additional_tools=self.actual_tools_for_agent)
232
- print("HfAgent successfully instantiated with pre-initialized pipeline.")
233
 
234
  except Exception as e:
235
- print(f"CRITICAL ERROR: Failed to initialize HfAgent or Pipeline: {e}")
236
- print("Full traceback of HfAgent/Pipeline initialization error:")
237
  traceback.print_exc()
238
- raise RuntimeError(f"HfAgent/Pipeline initialization failed: {e}") from e
239
 
240
- # The __call__ method remains the same
241
  def __call__(self, question: str) -> str:
242
- print(f"\n--- HfAgentWrapper received question (first 100 chars): {question[:100]}... ---")
243
  try:
244
- answer = self.agent.run(question)
245
- print(f"--- HfAgentWrapper generated answer (first 100 chars): {str(answer)[:100]}... ---")
246
- return str(answer)
 
247
  except Exception as e:
248
- print(f"ERROR: HfAgent execution failed for question '{question[:50]}...': {e}")
249
- print("Full traceback of HfAgent execution error:")
250
  traceback.print_exc()
251
  return f"Agent Error: Failed to process the question. Details: {e}"
252
 
253
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
254
  """
255
- Fetches all questions, runs the BasicAgent on them, submits all answers,
256
  and displays the results.
257
  """
258
- # --- Determine HF Space Runtime URL and Repo URL ---
259
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
260
 
261
  if profile:
262
  username= f"{profile.username}"
@@ -269,38 +178,29 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
269
  questions_url = f"{api_url}/questions"
270
  submit_url = f"{api_url}/submit"
271
 
272
- # 1. Instantiate Agent ( modify this part to create your agent)
273
  try:
274
- agent = HfAgentWrapper()
 
275
  except Exception as e:
276
  print(f"Error instantiating agent: {e}")
277
  return f"Error initializing agent: {e}", None
278
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
279
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
280
  print(agent_code)
281
 
282
- # 2. Fetch Questions
283
  print(f"Fetching questions from: {questions_url}")
284
  try:
285
  response = requests.get(questions_url, timeout=15)
286
  response.raise_for_status()
287
  questions_data = response.json()
288
  if not questions_data:
289
- print("Fetched questions list is empty.")
290
- return "Fetched questions list is empty or invalid format.", None
291
  print(f"Fetched {len(questions_data)} questions.")
292
- except requests.exceptions.RequestException as e:
293
- print(f"Error fetching questions: {e}")
294
- return f"Error fetching questions: {e}", None
295
- except requests.exceptions.JSONDecodeError as e:
296
- print(f"Error decoding JSON response from questions endpoint: {e}")
297
- print(f"Response text: {response.text[:500]}")
298
- return f"Error decoding server response for questions: {e}", None
299
  except Exception as e:
300
  print(f"An unexpected error occurred fetching questions: {e}")
301
  return f"An unexpected error occurred fetching questions: {e}", None
302
 
303
- # 3. Run your Agent
304
  results_log = []
305
  answers_payload = []
306
  print(f"Running agent on {len(questions_data)} questions...")
@@ -315,19 +215,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
315
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
316
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
317
  except Exception as e:
318
- print(f"Error running agent on task {task_id}: {e}")
319
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
320
 
321
  if not answers_payload:
322
  print("Agent did not produce any answers to submit.")
323
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
324
 
325
- # 4. Prepare Submission
326
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
327
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
328
  print(status_update)
329
 
330
- # 5. Submit
331
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
332
  try:
333
  response = requests.post(submit_url, json=submission_data, timeout=60)
@@ -343,34 +241,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
343
  print("Submission successful.")
344
  results_df = pd.DataFrame(results_log)
345
  return final_status, results_df
346
- except requests.exceptions.HTTPError as e:
347
- error_detail = f"Server responded with status {e.response.status_code}."
348
- try:
349
- error_json = e.response.json()
350
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
351
- except requests.exceptions.JSONDecodeError:
352
- error_detail += f" Response: {e.response.text[:500]}"
353
- status_message = f"Submission Failed: {error_detail}"
354
- print(status_message)
355
- results_df = pd.DataFrame(results_log)
356
- return status_message, results_df
357
- except requests.exceptions.Timeout:
358
- status_message = "Submission Failed: The request timed out."
359
- print(status_message)
360
- results_df = pd.DataFrame(results_log)
361
- return status_message, results_df
362
- except requests.exceptions.RequestException as e:
363
- status_message = f"Submission Failed: Network error - {e}"
364
- print(status_message)
365
- results_df = pd.DataFrame(results_log)
366
- return status_message, results_df
367
  except Exception as e:
368
  status_message = f"An unexpected error occurred during submission: {e}"
369
  print(status_message)
 
370
  results_df = pd.DataFrame(results_log)
371
  return status_message, results_df
372
 
373
-
374
  # --- Build Gradio Interface using Blocks ---
375
  with gr.Blocks() as demo:
376
  gr.Markdown("# Basic Agent Evaluation Runner")
@@ -381,20 +258,12 @@ with gr.Blocks() as demo:
381
  1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
382
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
383
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
384
-
385
- ---
386
- **Disclaimers:**
387
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
388
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
389
  """
390
  )
391
 
392
  gr.LoginButton()
393
-
394
  run_button = gr.Button("Run Evaluation & Submit All Answers")
395
-
396
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
397
- # Removed max_rows=10 from DataFrame constructor
398
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
399
 
400
  run_button.click(
@@ -404,9 +273,8 @@ with gr.Blocks() as demo:
404
 
405
  if __name__ == "__main__":
406
  print("\n" + "-"*30 + " App Starting " + "-"*30)
407
- # Check for SPACE_HOST and SPACE_ID at startup for information
408
  space_host_startup = os.getenv("SPACE_HOST")
409
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
410
 
411
  if space_host_startup:
412
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -414,7 +282,7 @@ if __name__ == "__main__":
414
  else:
415
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
416
 
417
- if space_id_startup: # Print repo URLs if SPACE_ID is found
418
  print(f"✅ SPACE_ID found: {space_id_startup}")
419
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
420
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
1
+ # app.py (New LangChain version)
2
  import os
3
  import gradio as gr
4
  import pandas as pd
5
+ from bs4 import BeautifulSoup
6
  import datetime
7
  import pytz
8
  import math
9
  import re
10
  import requests
 
 
 
11
  import traceback
 
12
  import sys
13
+
14
+ # --- LangChain and new Transformers imports ---
15
+ from langchain.agents import AgentExecutor, create_react_agent
16
+ from langchain_huggingface import HuggingFacePipeline
17
+ from langchain_core.prompts import PromptTemplate
18
+ from langchain.tools import Tool
19
+ from langchain_community.tools import DuckDuckGoSearchRun
20
+
21
+ # --- Other imports ---
22
+ import transformers # Still useful for version checking
23
+ print(f"--- Using transformers version: {transformers.__version__} ---")
24
+
 
 
 
 
 
 
 
 
 
 
 
 
25
  # --- Constants ---
26
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
27
 
28
+
29
+ # --- Tool Definitions (LangChain Style) ---
30
+ # For LangChain, we define the functions and then wrap them in LangChain's Tool class.
31
+
32
+ def get_current_time_in_timezone_func(timezone: str) -> str:
33
+ """A tool that fetches the current local time in a specified IANA timezone. Always use this tool for questions about the current time. Input should be a valid timezone string (e.g., 'America/New_York', 'Europe/London')."""
34
  print(f"--- Tool: Executing get_current_time_in_timezone for: {timezone} ---")
35
  try:
36
  tz = pytz.timezone(timezone)
 
37
  local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S %Z%z")
38
  return f"The current local time in {timezone} is: {local_time}"
39
  except pytz.exceptions.UnknownTimeZoneError:
40
+ return f"Error: Unknown timezone '{timezone}'. Please use a valid IANA timezone name."
 
41
  except Exception as e:
 
42
  return f"Error fetching time for timezone '{timezone}': {str(e)}"
43
 
44
+ # Using the DuckDuckGoSearchRun tool from LangChain for stability
45
+ # The description is very important for the agent to know when to use it.
46
+ search_tool = DuckDuckGoSearchRun(
47
+ name="web_search",
48
+ description="A tool that performs a web search using DuckDuckGo. Use this to find up-to-date information about events, facts, or topics when the answer isn't already known."
49
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ def safe_calculator_func(expression: str) -> str:
52
+ """A tool for evaluating simple mathematical expressions. Use this tool *only* for calculations involving numbers, +, -, *, /, %, parentheses, and the math functions: sqrt, pow. Do not use it to run other code."""
 
53
  print(f"--- Tool: Executing safe_calculator with expression: {expression} ---")
54
  try:
55
+ # Using a more restricted eval context for safety
56
+ allowed_names = {"sqrt": math.sqrt, "pow": math.pow, "pi": math.pi}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  result = eval(expression, {"__builtins__": {}}, allowed_names)
 
 
 
 
 
58
  return str(result)
59
  except Exception as e:
 
60
  print(f"Error during calculation for '{expression}': {e}")
61
  return f"Error calculating '{expression}': Invalid expression or calculation error ({e})."
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ # --- LangChain Agent Definition ---
65
+ class LangChainAgentWrapper:
 
 
 
 
 
 
 
 
 
 
 
66
  def __init__(self):
67
+ print("Initializing LangChainAgentWrapper...")
68
+
69
+ # Using a newer, more capable instruction-tuned model.
70
+ # This model is generally better at following the ReAct prompt format used by LangChain agents.
71
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
72
+ # model_id = "bigcode/starcoderbase-1b" # You can still use starcoder if you prefer
73
+
74
  try:
75
+ hf_auth_token = os.getenv("HF_TOKEN")
 
76
  if not hf_auth_token:
77
+ raise ValueError("HF_TOKEN secret is missing. It is required for downloading models.")
 
 
78
  else:
79
+ print("HF_TOKEN secret found.")
80
+
81
+ # Create the Hugging Face pipeline
82
+ print(f"Loading model pipeline for: {model_id}")
83
+ llm_pipeline = transformers.pipeline(
84
+ "text-generation",
85
+ model=model_id,
86
+ model_kwargs={"torch_dtype": "auto"}, # Use "auto" for dtype
87
+ device_map="auto", # Requires accelerate
88
+ token=hf_auth_token,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  )
90
+ print("Model pipeline loaded successfully.")
91
+
92
+ # Wrap the pipeline in a LangChain LLM object
93
+ self.llm = HuggingFacePipeline(pipeline=llm_pipeline)
94
+
95
+ # Define the list of LangChain tools
96
+ self.tools = [
97
+ Tool(
98
+ name="get_current_time_in_timezone",
99
+ func=get_current_time_in_timezone_func,
100
+ description=get_current_time_in_timezone_func.__doc__
101
+ ),
102
+ search_tool, # This is already a LangChain Tool instance
103
+ Tool(
104
+ name="safe_calculator",
105
+ func=safe_calculator_func,
106
+ description=safe_calculator_func.__doc__
107
+ ),
108
+ ]
109
+ print(f"Tools prepared for agent: {[tool.name for tool in self.tools]}")
110
+
111
+ # Create the ReAct agent prompt from a template
112
+ # The prompt is crucial for teaching the agent how to think and use tools.
113
+ react_prompt = PromptTemplate.from_template(
114
+ """
115
+ You are a helpful assistant. Answer the following questions as best you can.
116
+ You have access to the following tools:
117
+
118
+ {tools}
119
+
120
+ Use the following format:
121
+
122
+ Question: the input question you must answer
123
+ Thought: you should always think about what to do
124
+ Action: the action to take, should be one of [{tool_names}]
125
+ Action Input: the input to the action
126
+ Observation: the result of the action
127
+ ... (this Thought/Action/Action Input/Observation can repeat N times)
128
+ Thought: I now know the final answer
129
+ Final Answer: the final answer to the original input question
130
+
131
+ Begin!
132
+
133
+ Question: {input}
134
+ Thought:{agent_scratchpad}
135
+ """
136
  )
137
+
138
+ # Create the agent
139
+ agent = create_react_agent(self.llm, self.tools, react_prompt)
140
+
141
+ # Create the agent executor, which runs the agent loop
142
+ self.agent_executor = AgentExecutor(agent=agent, tools=self.tools, verbose=True, handle_parsing_errors=True)
143
+ print("LangChain agent created successfully.")
144
 
145
  except Exception as e:
146
+ print(f"CRITICAL ERROR: Failed to initialize LangChain agent: {e}")
 
147
  traceback.print_exc()
148
+ raise RuntimeError(f"LangChain agent initialization failed: {e}") from e
149
 
 
150
  def __call__(self, question: str) -> str:
151
+ print(f"\n--- LangChainAgentWrapper received question: {question[:100]}... ---")
152
  try:
153
+ # Invoke the agent executor
154
+ response = self.agent_executor.invoke({"input": question})
155
+ # The answer is in the 'output' key of the response dictionary
156
+ return response.get("output", "No output found.")
157
  except Exception as e:
158
+ print(f"ERROR: LangChain agent execution failed: {e}")
 
159
  traceback.print_exc()
160
  return f"Agent Error: Failed to process the question. Details: {e}"
161
 
162
+ # --- Main Evaluation Logic ---
163
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
164
  """
165
+ Fetches all questions, runs the agent on them, submits all answers,
166
  and displays the results.
167
  """
168
+ space_id = os.getenv("SPACE_ID")
 
169
 
170
  if profile:
171
  username= f"{profile.username}"
 
178
  questions_url = f"{api_url}/questions"
179
  submit_url = f"{api_url}/submit"
180
 
 
181
  try:
182
+ # Now instantiate our new LangChain agent
183
+ agent = LangChainAgentWrapper()
184
  except Exception as e:
185
  print(f"Error instantiating agent: {e}")
186
  return f"Error initializing agent: {e}", None
187
+
188
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
189
  print(agent_code)
190
 
 
191
  print(f"Fetching questions from: {questions_url}")
192
  try:
193
  response = requests.get(questions_url, timeout=15)
194
  response.raise_for_status()
195
  questions_data = response.json()
196
  if not questions_data:
197
+ print("Fetched questions list is empty.")
198
+ return "Fetched questions list is empty or invalid format.", None
199
  print(f"Fetched {len(questions_data)} questions.")
 
 
 
 
 
 
 
200
  except Exception as e:
201
  print(f"An unexpected error occurred fetching questions: {e}")
202
  return f"An unexpected error occurred fetching questions: {e}", None
203
 
 
204
  results_log = []
205
  answers_payload = []
206
  print(f"Running agent on {len(questions_data)} questions...")
 
215
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
216
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
217
  except Exception as e:
218
+ print(f"Error running agent on task {task_id}: {e}")
219
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
220
 
221
  if not answers_payload:
222
  print("Agent did not produce any answers to submit.")
223
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
224
 
 
225
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
226
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
227
  print(status_update)
228
 
 
229
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
230
  try:
231
  response = requests.post(submit_url, json=submission_data, timeout=60)
 
241
  print("Submission successful.")
242
  results_df = pd.DataFrame(results_log)
243
  return final_status, results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  except Exception as e:
245
  status_message = f"An unexpected error occurred during submission: {e}"
246
  print(status_message)
247
+ traceback.print_exc()
248
  results_df = pd.DataFrame(results_log)
249
  return status_message, results_df
250
 
 
251
  # --- Build Gradio Interface using Blocks ---
252
  with gr.Blocks() as demo:
253
  gr.Markdown("# Basic Agent Evaluation Runner")
 
258
  1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
259
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
260
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
 
 
261
  """
262
  )
263
 
264
  gr.LoginButton()
 
265
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
266
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
267
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
268
 
269
  run_button.click(
 
273
 
274
  if __name__ == "__main__":
275
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
276
  space_host_startup = os.getenv("SPACE_HOST")
277
+ space_id_startup = os.getenv("SPACE_ID")
278
 
279
  if space_host_startup:
280
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
282
  else:
283
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
284
 
285
+ if space_id_startup:
286
  print(f"✅ SPACE_ID found: {space_id_startup}")
287
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
288
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")