wt002 commited on
Commit
a625210
·
verified ·
1 Parent(s): f7bb0d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -43
app.py CHANGED
@@ -183,52 +183,74 @@ class KeywordsExtractorTool(Tool):
183
  except Exception as e:
184
  return f"Error during extracting most common words: {e}"
185
 
186
- @tool
187
- def parse_excel_to_json(task_id: str) -> dict:
 
 
 
 
188
  """
189
- For a given task_id fetch and parse an Excel file and save parsed data in structured JSON file.
190
- Args:
191
- task_id: An task ID to fetch.
192
-
193
- Returns:
194
- {
195
- "task_id": str,
196
- "sheets": {
197
- "SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
198
- ...
199
- },
200
- "status": "Success" | "Error"
201
- }
202
  """
203
- url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
204
-
205
- try:
206
- response = requests.get(url, timeout=100)
207
- if response.status_code != 200:
208
- return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"}
209
-
210
- xls_content = pd.ExcelFile(BytesIO(response.content))
211
- json_sheets = {}
212
-
213
- for sheet in xls_content.sheet_names:
214
- df = xls_content.parse(sheet)
215
- df = df.dropna(how="all")
216
- rows = df.head(20).to_dict(orient="records")
217
- json_sheets[sheet] = rows
218
-
219
- return {
220
- "task_id": task_id,
221
- "sheets": json_sheets,
222
- "status": "Success"
223
  }
 
 
224
 
225
- except Exception as e:
226
- return {
227
- "task_id": task_id,
228
- "sheets": {},
229
- "status": f"Error in parsing Excel file: {str(e)}"
230
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
 
234
  import os
@@ -327,7 +349,7 @@ class ImageAnalysisTool(Tool):
327
  "description": "The URL of the image to be analyzed (e.g., 'https://example.com/image.jpg').",
328
  }
329
  }
330
-
331
  # You might consider making API_URL a class attribute if it's constant
332
  # or an instance attribute if it could vary per instance.
333
  # For this example, we'll keep it within the _run method for directness.
@@ -423,7 +445,9 @@ class BasicAgent:
423
  self.image_analysis_tool = ImageAnalysisTool() # Renamed for clarity
424
  self.analyse_attachment_tool = AnalyseAttachmentTool() # Renamed for clarity
425
  self.code_llama_tool = CodeLlamaTool() # Ensure this class is defined/imported
 
426
 
 
427
  system_prompt_template = """
428
  You are my general AI assistant. Your task is to answer the question I asked.
429
  First, provide an explanation of your reasoning, step by step, to arrive at the answer.
@@ -460,7 +484,8 @@ If the answer is a comma-separated list, apply the above rules for each element
460
  self.final_answer_tool,
461
  self.video_transcription_tool,
462
  self.code_llama_tool,
463
- self.image_analysis_tool_instance, # Use the initialized instance
 
464
  self.analyse_attachment_tool # Add the initialized attachment analysis tool
465
  ],
466
  add_base_tools=True # Consider what this adds, ensure it doesn't duplicate.
 
183
  except Exception as e:
184
  return f"Error during extracting most common words: {e}"
185
 
186
+ import requests
187
+ import pandas as pd
188
+ from io import BytesIO
189
+ from smolagents import Tool # Make sure to import the Tool base class
190
+
191
+ class ParseExcelToJsonTool(Tool):
192
  """
193
+ A tool for fetching and parsing an Excel file into structured JSON data.
 
 
 
 
 
 
 
 
 
 
 
 
194
  """
195
+ name = "parse_excel_to_json"
196
+ description = (
197
+ "For a given task_id, fetches an Excel file from a remote URL, "
198
+ "parses its sheets, and returns the data as a structured JSON object. "
199
+ "Each sheet's data is returned as a list of dictionaries, with each dictionary "
200
+ "representing a row (limited to the first 20 rows). "
201
+ "Useful for extracting structured information from Excel files."
202
+ )
203
+ inputs = {
204
+ "task_id": {
205
+ "type": "string",
206
+ "description": "The task ID used to construct the URL for fetching the Excel file.",
 
 
 
 
 
 
 
 
207
  }
208
+ }
209
+ output_type = "json" # The tool returns a dictionary, so "json" is the appropriate output_type
210
 
211
+ def _run(self, task_id: str) -> dict:
212
+ """
213
+ Fetches and parses an Excel file from a URL based on the task_id.
214
+ """
215
+ url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
216
+
217
+ try:
218
+ response = requests.get(url, timeout=100)
219
+ response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
220
+
221
+ xls_content = pd.ExcelFile(BytesIO(response.content))
222
+ json_sheets = {}
223
+
224
+ for sheet_name in xls_content.sheet_names:
225
+ df = xls_content.parse(sheet_name)
226
+ df = df.dropna(how="all") # Drop rows that are entirely NaN
227
+
228
+ # Limit to the first 20 rows for efficiency and to prevent overwhelming context
229
+ rows = df.head(20).to_dict(orient="records")
230
+ json_sheets[sheet_name] = rows
231
 
232
+ return {
233
+ "task_id": task_id,
234
+ "sheets": json_sheets,
235
+ "status": "Success"
236
+ }
237
+
238
+ except requests.exceptions.RequestException as e:
239
+ return {
240
+ "task_id": task_id,
241
+ "sheets": {},
242
+ "status": f"Network or HTTP error: {str(e)}"
243
+ }
244
+ except Exception as e:
245
+ return {
246
+ "task_id": task_id,
247
+ "sheets": {},
248
+ "status": f"Error in parsing Excel file: {str(e)}"
249
+ }
250
+
251
+ # Optional: You can keep __call__ for direct instance calling, but it's handled by Tool base class
252
+ # def __call__(self, task_id: str) -> dict:
253
+ # return self._run(task_id)
254
 
255
 
256
  import os
 
349
  "description": "The URL of the image to be analyzed (e.g., 'https://example.com/image.jpg').",
350
  }
351
  }
352
+ output_type = "string"
353
  # You might consider making API_URL a class attribute if it's constant
354
  # or an instance attribute if it could vary per instance.
355
  # For this example, we'll keep it within the _run method for directness.
 
445
  self.image_analysis_tool = ImageAnalysisTool() # Renamed for clarity
446
  self.analyse_attachment_tool = AnalyseAttachmentTool() # Renamed for clarity
447
  self.code_llama_tool = CodeLlamaTool() # Ensure this class is defined/imported
448
+ self.parse_excel_to_json_tool = ParseExcelToJsonTool()
449
 
450
+
451
  system_prompt_template = """
452
  You are my general AI assistant. Your task is to answer the question I asked.
453
  First, provide an explanation of your reasoning, step by step, to arrive at the answer.
 
484
  self.final_answer_tool,
485
  self.video_transcription_tool,
486
  self.code_llama_tool,
487
+ self.parse_excel_to_json_tool,
488
+ self.image_analysis_tool, # Use the initialized instance
489
  self.analyse_attachment_tool # Add the initialized attachment analysis tool
490
  ],
491
  add_base_tools=True # Consider what this adds, ensure it doesn't duplicate.