ATK20 commited on
Commit
8a642b5
·
verified ·
1 Parent(s): 0f0a208

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +393 -84
app.py CHANGED
@@ -1,118 +1,427 @@
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
- from transformers import pipeline
 
 
 
 
6
 
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
- HF_MODEL_NAME = "facebook/bart-large-mnli" # Free model that works in Spaces
10
 
11
- # --- Agent Definition ---
12
- class BasicAgent:
13
- def __init__(self):
14
- print("Initializing Agent...")
 
 
15
  try:
16
- self.llm = pipeline(
17
- "text-generation",
18
- model=HF_MODEL_NAME,
19
- device_map="auto"
20
- )
 
 
 
 
 
 
 
21
  except Exception as e:
22
- print(f"LLM initialization failed: {e}")
23
- self.llm = None
24
-
25
- def __call__(self, question: str) -> str:
26
- if not self.llm:
27
- return "Default answer (LLM not available)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  try:
30
- response = self.llm(question, max_length=100)
31
- return response[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  except Exception as e:
33
  return f"Error: {str(e)}"
34
 
35
- def run_and_submit_all():
36
- """Simplified version that works with Gradio auth"""
37
- # Get username from Gradio's auth system
38
- username = os.getenv("GRADIO_AUTH_USERNAME")
39
- if not username:
40
- return "Please login first", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- space_id = os.getenv("SPACE_ID")
43
  api_url = DEFAULT_API_URL
44
- agent = BasicAgent()
 
 
 
 
 
 
 
 
45
 
46
- # Fetch questions
 
 
 
 
 
47
  try:
48
- response = requests.get(f"{api_url}/questions", timeout=15)
49
- questions = response.json()
 
 
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
- return f"Failed to get questions: {str(e)}", None
 
52
 
53
- # Process questions
54
- results = []
55
- answers = []
56
- for q in questions:
 
 
 
 
 
 
57
  try:
58
- answer = agent(q.get("question", ""))
59
- answers.append({
60
- "task_id": q.get("task_id"),
61
- "submitted_answer": answer
62
- })
63
- results.append({
64
- "Task ID": q.get("task_id"),
65
- "Question": q.get("question"),
66
- "Answer": answer
67
- })
68
  except Exception as e:
69
- results.append({
70
- "Task ID": q.get("task_id"),
71
- "Question": q.get("question"),
72
- "Answer": f"Error: {str(e)}"
73
- })
 
74
 
75
- # Submit answers
 
 
 
 
 
 
76
  try:
77
- response = requests.post(
78
- f"{api_url}/submit",
79
- json={
80
- "username": username,
81
- "agent_code": f"https://huggingface.co/spaces/{space_id}",
82
- "answers": answers
83
- },
84
- timeout=60
85
- )
86
- result = response.json()
87
- return (
88
- f"Success! Score: {result.get('score', 'N/A')}%\n"
89
- f"Correct: {result.get('correct_count', 0)}/{result.get('total_attempted', 0)}",
90
- pd.DataFrame(results)
91
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  except Exception as e:
93
- return f"Submission failed: {str(e)}", pd.DataFrame(results)
 
 
 
94
 
95
- # --- Gradio Interface ---
 
96
  with gr.Blocks() as demo:
97
- gr.Markdown("# LLM Agent Evaluation")
98
-
99
- with gr.Accordion("Instructions", open=False):
100
- gr.Markdown("""
101
- 1. Click the login button
102
- 2. Authorize with your Hugging Face account
103
- 3. Click 'Run Evaluation'
104
- """)
105
-
 
 
 
 
 
106
  gr.LoginButton()
107
-
108
- run_btn = gr.Button("Run Evaluation", variant="primary")
109
- status = gr.Textbox(label="Status")
110
- results = gr.DataFrame(label="Results", wrap=True)
111
-
112
- run_btn.click(
 
113
  fn=run_and_submit_all,
114
- outputs=[status, results]
115
  )
116
 
117
  if __name__ == "__main__":
118
- demo.launch(auth_message="Please login with your Hugging Face account")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  import requests
4
+ import inspect
5
  import pandas as pd
6
+ import re
7
+ import json
8
+ import math
9
+ import time
10
+ from typing import Dict, Any, List, Optional, Union
11
 
12
+ # (Keep Constants as is)
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
15
 
16
+ # --- Tool Definitions ---
17
+ class Tools:
18
+ @staticmethod
19
+ def calculator(expression: str) -> Union[float, str]:
20
+ """Safely evaluate mathematical expressions"""
21
+ # Clean the expression to only contain valid math operations
22
  try:
23
+ # Extract numbers and operators
24
+ safe_expr = re.sub(r'[^0-9+\-*/().%\s]', '', expression)
25
+ # Calculate using a safer approach than eval()
26
+ # Use a restricted namespace for evaluation
27
+ safe_globals = {"__builtins__": {}}
28
+ safe_locals = {"math": math}
29
+ # Add basic math functions
30
+ for func in ['sin', 'cos', 'tan', 'sqrt', 'log', 'exp', 'floor', 'ceil']:
31
+ safe_locals[func] = getattr(math, func)
32
+
33
+ result = eval(safe_expr, safe_globals, safe_locals)
34
+ return result
35
  except Exception as e:
36
+ return f"Error in calculation: {str(e)}"
37
+
38
+ @staticmethod
39
+ def search(query: str) -> str:
40
+ """Simulate a web search with predefined responses for common queries"""
41
+ # This is a mock search function - in a real scenario, you might
42
+ # use a proper search API like SerpAPI or DuckDuckGo
43
+ knowledge_base = {
44
+ "population": "The current world population is approximately 8 billion people.",
45
+ "capital of france": "The capital of France is Paris.",
46
+ "largest planet": "Jupiter is the largest planet in our solar system.",
47
+ "tallest mountain": "Mount Everest is the tallest mountain above sea level at 8,848.86 meters.",
48
+ "deepest ocean": "The Mariana Trench is the deepest ocean trench, located in the Pacific Ocean.",
49
+ "president": "The current president of the United States is Joe Biden (as of 2024).",
50
+ "water boiling point": "Water boils at 100 degrees Celsius (212 degrees Fahrenheit) at standard pressure.",
51
+ "pi": "The mathematical constant pi (π) is approximately 3.14159.",
52
+ "speed of light": "The speed of light in vacuum is approximately 299,792,458 meters per second.",
53
+ "human body temperature": "Normal human body temperature is around 37 degrees Celsius (98.6 degrees Fahrenheit)."
54
+ }
55
+
56
+ # Try to find a relevant answer in our knowledge base
57
+ for key, value in knowledge_base.items():
58
+ if key in query.lower():
59
+ return value
60
 
61
+ return "No relevant information found in the knowledge base."
62
+
63
+ @staticmethod
64
+ def date_info() -> str:
65
+ """Provide the current date"""
66
+ return time.strftime("%Y-%m-%d")
67
+
68
+ # --- LLM Interface ---
69
+ class LLMInterface:
70
+ @staticmethod
71
+ def query_llm(prompt: str) -> str:
72
+ """Query a free LLM through Hugging Face's inference API"""
73
  try:
74
+ # Using FLAN-T5-XXL which is available for free
75
+ API_URL = "https://api-inference.huggingface.co/models/google/flan-t5-xxl"
76
+ headers = {"Content-Type": "application/json"}
77
+
78
+ # Use a well-formatted prompt
79
+ payload = {
80
+ "inputs": prompt,
81
+ "parameters": {"max_length": 200, "temperature": 0.7}
82
+ }
83
+
84
+ response = requests.post(API_URL, headers=headers, json=payload, timeout=10)
85
+
86
+ if response.status_code == 200:
87
+ result = response.json()
88
+ # Handle different response formats
89
+ if isinstance(result, list) and len(result) > 0:
90
+ return result[0].get("generated_text", "").strip()
91
+ elif isinstance(result, dict):
92
+ return result.get("generated_text", "").strip()
93
+ else:
94
+ return str(result).strip()
95
+ else:
96
+ # Fallback for rate limits or API issues
97
+ return "The model is currently unavailable. Please try again later."
98
+
99
  except Exception as e:
100
  return f"Error: {str(e)}"
101
 
102
+ # --- Advanced Agent Implementation ---
103
+ class BasicAgent:
104
+ def __init__(self):
105
+ print("Advanced Agent initialized.")
106
+ self.tools = {
107
+ "calculator": Tools.calculator,
108
+ "search": Tools.search,
109
+ "date": Tools.date_info
110
+ }
111
+ self.llm = LLMInterface()
112
+
113
+ def __call__(self, question: str) -> str:
114
+ print(f"Agent received question: {question[:50]}...")
115
+
116
+ # Step 1: Analyze the question
117
+ tool_needed, tool_name = self._analyze_question(question)
118
+
119
+ # Step 2: Use appropriate tool or direct answer
120
+ if tool_needed:
121
+ if tool_name == "calculator":
122
+ # Extract the math expression from the question
123
+ expression = self._extract_math_expression(question)
124
+ if expression:
125
+ result = self.tools["calculator"](expression)
126
+ # Format numerical answers appropriately
127
+ if isinstance(result, (int, float)):
128
+ if result == int(result):
129
+ answer = str(int(result)) # Remove decimal for whole numbers
130
+ else:
131
+ answer = str(result) # Keep decimal for fractions
132
+ else:
133
+ answer = str(result)
134
+ else:
135
+ answer = "Unable to extract a mathematical expression from the question."
136
+
137
+ elif tool_name == "search":
138
+ result = self.tools["search"](question)
139
+ answer = self._extract_direct_answer(question, result)
140
+
141
+ elif tool_name == "date":
142
+ result = self.tools["date"]()
143
+ answer = result
144
+
145
+ else:
146
+ # Use LLM for other types of questions
147
+ answer = self._get_answer_from_llm(question)
148
+ else:
149
+ # Direct answer for simpler questions
150
+ answer = self._get_answer_from_llm(question)
151
+
152
+ print(f"Agent returning answer: {answer[:50]}...")
153
+ return answer
154
+
155
+ def _analyze_question(self, question: str) -> tuple:
156
+ """Determine if the question requires a tool and which one"""
157
+ # Check for mathematical questions
158
+ math_patterns = [
159
+ r'calculate', r'compute', r'what is \d+', r'how much is',
160
+ r'sum of', r'multiply', r'divide', r'subtract', r'plus', r'minus',
161
+ r'\d+\s*[\+\-\*\/\%]\s*\d+', r'squared', r'cubed', r'square root'
162
+ ]
163
+
164
+ for pattern in math_patterns:
165
+ if re.search(pattern, question.lower()):
166
+ return True, "calculator"
167
+
168
+ # Check for factual questions that might need search
169
+ search_patterns = [
170
+ r'^what is', r'^who is', r'^where is', r'^when', r'^how many',
171
+ r'capital of', r'largest', r'tallest', r'population', r'president',
172
+ r'temperature', r'boiling point', r'freezing point', r'speed of'
173
+ ]
174
+
175
+ for pattern in search_patterns:
176
+ if re.search(pattern, question.lower()):
177
+ return True, "search"
178
+
179
+ # Check for date-related questions
180
+ date_patterns = [r'what day is today', r'current date', r'today\'s date']
181
+
182
+ for pattern in date_patterns:
183
+ if re.search(pattern, question.lower()):
184
+ return True, "date"
185
+
186
+ # Default to direct answer
187
+ return False, None
188
+
189
+ def _extract_math_expression(self, question: str) -> str:
190
+ """Extract a mathematical expression from the question"""
191
+ # Look for common pattern: "Calculate X" or "What is X"
192
+ patterns = [
193
+ r'calculate\s+(.*?)(?:\?|$)',
194
+ r'what is\s+(.*?)(?:\?|$)',
195
+ r'compute\s+(.*?)(?:\?|$)',
196
+ r'find\s+(.*?)(?:\?|$)',
197
+ r'how much is\s+(.*?)(?:\?|$)'
198
+ ]
199
+
200
+ for pattern in patterns:
201
+ match = re.search(pattern, question.lower())
202
+ if match:
203
+ expression = match.group(1).strip()
204
+ # Further clean the expression
205
+ expression = re.sub(r'[^0-9+\-*/().%\s]', '', expression)
206
+ return expression
207
+
208
+ # If no clear pattern, attempt to extract any mathematical operation
209
+ nums_and_ops = re.findall(r'(\d+(?:\.\d+)?|\+|\-|\*|\/|\(|\)|\%)', question)
210
+ if nums_and_ops:
211
+ return ''.join(nums_and_ops)
212
+
213
+ return ""
214
+
215
+ def _extract_direct_answer(self, question: str, search_result: str) -> str:
216
+ """Extract a concise answer from search results based on the question"""
217
+ # For simple factual questions, return the search result directly
218
+ return search_result
219
+
220
+ def _get_answer_from_llm(self, question: str) -> str:
221
+ """Get an answer from the LLM with appropriate prompting"""
222
+ prompt = f"""
223
+ Answer the following question with a very concise, direct response:
224
+
225
+ Question: {question}
226
+
227
+ Answer in 1-2 sentences maximum, focusing only on the specific information requested.
228
+ """
229
+
230
+ # Simple responses for common questions to avoid LLM latency
231
+ common_answers = {
232
+ "what color is the sky": "Blue.",
233
+ "how many days in a week": "7 days.",
234
+ "how many months in a year": "12 months.",
235
+ "what is the capital of france": "Paris.",
236
+ "what is the capital of japan": "Tokyo.",
237
+ "what is the capital of italy": "Rome.",
238
+ "what is the capital of germany": "Berlin.",
239
+ "what is the capital of spain": "Madrid.",
240
+ "what is water made of": "H2O (hydrogen and oxygen).",
241
+ "who wrote romeo and juliet": "William Shakespeare.",
242
+ "who painted the mona lisa": "Leonardo da Vinci.",
243
+ "what is the largest ocean": "The Pacific Ocean.",
244
+ "what is the smallest planet": "Mercury."
245
+ }
246
+
247
+ # Check if we have a hardcoded answer
248
+ for key, answer in common_answers.items():
249
+ if question.lower().strip('?').strip() == key:
250
+ return answer
251
+
252
+ # If no hardcoded answer, use the LLM
253
+ return self.llm.query_llm(prompt)
254
+
255
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
256
+ """
257
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
258
+ and displays the results.
259
+ """
260
+ # --- Determine HF Space Runtime URL and Repo URL ---
261
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
262
+
263
+ if profile:
264
+ username= f"{profile.username}"
265
+ print(f"User logged in: {username}")
266
+ else:
267
+ print("User not logged in.")
268
+ return "Please Login to Hugging Face with the button.", None
269
 
 
270
  api_url = DEFAULT_API_URL
271
+ questions_url = f"{api_url}/questions"
272
+ submit_url = f"{api_url}/submit"
273
+
274
+ # 1. Instantiate Agent (now using our improved agent)
275
+ try:
276
+ agent = BasicAgent()
277
+ except Exception as e:
278
+ print(f"Error instantiating agent: {e}")
279
+ return f"Error initializing agent: {e}", None
280
 
281
+ # In the case of an app running as a hugging Face space, this link points toward your codebase
282
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
283
+ print(agent_code)
284
+
285
+ # 2. Fetch Questions
286
+ print(f"Fetching questions from: {questions_url}")
287
  try:
288
+ response = requests.get(questions_url, timeout=15)
289
+ response.raise_for_status()
290
+ questions_data = response.json()
291
+ if not questions_data:
292
+ print("Fetched questions list is empty.")
293
+ return "Fetched questions list is empty or invalid format.", None
294
+ print(f"Fetched {len(questions_data)} questions.")
295
+ except requests.exceptions.RequestException as e:
296
+ print(f"Error fetching questions: {e}")
297
+ return f"Error fetching questions: {e}", None
298
+ except requests.exceptions.JSONDecodeError as e:
299
+ print(f"Error decoding JSON response from questions endpoint: {e}")
300
+ print(f"Response text: {response.text[:500]}")
301
+ return f"Error decoding server response for questions: {e}", None
302
  except Exception as e:
303
+ print(f"An unexpected error occurred fetching questions: {e}")
304
+ return f"An unexpected error occurred fetching questions: {e}", None
305
 
306
+ # 3. Run your Agent
307
+ results_log = []
308
+ answers_payload = []
309
+ print(f"Running agent on {len(questions_data)} questions...")
310
+ for item in questions_data:
311
+ task_id = item.get("task_id")
312
+ question_text = item.get("question")
313
+ if not task_id or question_text is None:
314
+ print(f"Skipping item with missing task_id or question: {item}")
315
+ continue
316
  try:
317
+ submitted_answer = agent(question_text)
318
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
319
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
320
  except Exception as e:
321
+ print(f"Error running agent on task {task_id}: {e}")
322
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
323
+
324
+ if not answers_payload:
325
+ print("Agent did not produce any answers to submit.")
326
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
327
 
328
+ # 4. Prepare Submission
329
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
330
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
331
+ print(status_update)
332
+
333
+ # 5. Submit
334
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
335
  try:
336
+ response = requests.post(submit_url, json=submission_data, timeout=60)
337
+ response.raise_for_status()
338
+ result_data = response.json()
339
+ final_status = (
340
+ f"Submission Successful!\n"
341
+ f"User: {result_data.get('username')}\n"
342
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
343
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
344
+ f"Message: {result_data.get('message', 'No message received.')}"
 
 
 
 
 
345
  )
346
+ print("Submission successful.")
347
+ results_df = pd.DataFrame(results_log)
348
+ return final_status, results_df
349
+ except requests.exceptions.HTTPError as e:
350
+ error_detail = f"Server responded with status {e.response.status_code}."
351
+ try:
352
+ error_json = e.response.json()
353
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
354
+ except requests.exceptions.JSONDecodeError:
355
+ error_detail += f" Response: {e.response.text[:500]}"
356
+ status_message = f"Submission Failed: {error_detail}"
357
+ print(status_message)
358
+ results_df = pd.DataFrame(results_log)
359
+ return status_message, results_df
360
+ except requests.exceptions.Timeout:
361
+ status_message = "Submission Failed: The request timed out."
362
+ print(status_message)
363
+ results_df = pd.DataFrame(results_log)
364
+ return status_message, results_df
365
+ except requests.exceptions.RequestException as e:
366
+ status_message = f"Submission Failed: Network error - {e}"
367
+ print(status_message)
368
+ results_df = pd.DataFrame(results_log)
369
+ return status_message, results_df
370
  except Exception as e:
371
+ status_message = f"An unexpected error occurred during submission: {e}"
372
+ print(status_message)
373
+ results_df = pd.DataFrame(results_log)
374
+ return status_message, results_df
375
 
376
+
377
+ # --- Build Gradio Interface using Blocks ---
378
  with gr.Blocks() as demo:
379
+ gr.Markdown("# Advanced Agent Evaluation Runner")
380
+ gr.Markdown(
381
+ """
382
+ **Instructions:**
383
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
384
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
385
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
386
+ ---
387
+ **Disclaimers:**
388
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
389
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
390
+ """
391
+ )
392
+
393
  gr.LoginButton()
394
+
395
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
396
+
397
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
398
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
399
+
400
+ run_button.click(
401
  fn=run_and_submit_all,
402
+ outputs=[status_output, results_table]
403
  )
404
 
405
  if __name__ == "__main__":
406
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
407
+ # Check for SPACE_HOST and SPACE_ID at startup for information
408
+ space_host_startup = os.getenv("SPACE_HOST")
409
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
410
+
411
+ if space_host_startup:
412
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
413
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
414
+ else:
415
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
416
+
417
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
418
+ print(f"✅ SPACE_ID found: {space_id_startup}")
419
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
420
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
421
+ else:
422
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
423
+
424
+ print("-"*(60 + len(" App Starting ")) + "\n")
425
+
426
+ print("Launching Gradio Interface for Advanced Agent Evaluation...")
427
+ demo.launch(debug=True, share=False)