Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -183,52 +183,74 @@ class KeywordsExtractorTool(Tool):
|
|
183 |
except Exception as e:
|
184 |
return f"Error during extracting most common words: {e}"
|
185 |
|
186 |
-
|
187 |
-
|
|
|
|
|
|
|
|
|
188 |
"""
|
189 |
-
|
190 |
-
Args:
|
191 |
-
task_id: An task ID to fetch.
|
192 |
-
|
193 |
-
Returns:
|
194 |
-
{
|
195 |
-
"task_id": str,
|
196 |
-
"sheets": {
|
197 |
-
"SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
|
198 |
-
...
|
199 |
-
},
|
200 |
-
"status": "Success" | "Error"
|
201 |
-
}
|
202 |
"""
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
df = df.dropna(how="all")
|
216 |
-
rows = df.head(20).to_dict(orient="records")
|
217 |
-
json_sheets[sheet] = rows
|
218 |
-
|
219 |
-
return {
|
220 |
-
"task_id": task_id,
|
221 |
-
"sheets": json_sheets,
|
222 |
-
"status": "Success"
|
223 |
}
|
|
|
|
|
224 |
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
|
233 |
|
234 |
import os
|
@@ -327,7 +349,7 @@ class ImageAnalysisTool(Tool):
|
|
327 |
"description": "The URL of the image to be analyzed (e.g., 'https://example.com/image.jpg').",
|
328 |
}
|
329 |
}
|
330 |
-
|
331 |
# You might consider making API_URL a class attribute if it's constant
|
332 |
# or an instance attribute if it could vary per instance.
|
333 |
# For this example, we'll keep it within the _run method for directness.
|
@@ -423,7 +445,9 @@ class BasicAgent:
|
|
423 |
self.image_analysis_tool = ImageAnalysisTool() # Renamed for clarity
|
424 |
self.analyse_attachment_tool = AnalyseAttachmentTool() # Renamed for clarity
|
425 |
self.code_llama_tool = CodeLlamaTool() # Ensure this class is defined/imported
|
|
|
426 |
|
|
|
427 |
system_prompt_template = """
|
428 |
You are my general AI assistant. Your task is to answer the question I asked.
|
429 |
First, provide an explanation of your reasoning, step by step, to arrive at the answer.
|
@@ -460,7 +484,8 @@ If the answer is a comma-separated list, apply the above rules for each element
|
|
460 |
self.final_answer_tool,
|
461 |
self.video_transcription_tool,
|
462 |
self.code_llama_tool,
|
463 |
-
self.
|
|
|
464 |
self.analyse_attachment_tool # Add the initialized attachment analysis tool
|
465 |
],
|
466 |
add_base_tools=True # Consider what this adds, ensure it doesn't duplicate.
|
|
|
183 |
except Exception as e:
|
184 |
return f"Error during extracting most common words: {e}"
|
185 |
|
186 |
+
import requests
|
187 |
+
import pandas as pd
|
188 |
+
from io import BytesIO
|
189 |
+
from smolagents import Tool # Make sure to import the Tool base class
|
190 |
+
|
191 |
+
class ParseExcelToJsonTool(Tool):
|
192 |
"""
|
193 |
+
A tool for fetching and parsing an Excel file into structured JSON data.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
"""
|
195 |
+
name = "parse_excel_to_json"
|
196 |
+
description = (
|
197 |
+
"For a given task_id, fetches an Excel file from a remote URL, "
|
198 |
+
"parses its sheets, and returns the data as a structured JSON object. "
|
199 |
+
"Each sheet's data is returned as a list of dictionaries, with each dictionary "
|
200 |
+
"representing a row (limited to the first 20 rows). "
|
201 |
+
"Useful for extracting structured information from Excel files."
|
202 |
+
)
|
203 |
+
inputs = {
|
204 |
+
"task_id": {
|
205 |
+
"type": "string",
|
206 |
+
"description": "The task ID used to construct the URL for fetching the Excel file.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
}
|
208 |
+
}
|
209 |
+
output_type = "json" # The tool returns a dictionary, so "json" is the appropriate output_type
|
210 |
|
211 |
+
def _run(self, task_id: str) -> dict:
|
212 |
+
"""
|
213 |
+
Fetches and parses an Excel file from a URL based on the task_id.
|
214 |
+
"""
|
215 |
+
url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
|
216 |
+
|
217 |
+
try:
|
218 |
+
response = requests.get(url, timeout=100)
|
219 |
+
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
|
220 |
+
|
221 |
+
xls_content = pd.ExcelFile(BytesIO(response.content))
|
222 |
+
json_sheets = {}
|
223 |
+
|
224 |
+
for sheet_name in xls_content.sheet_names:
|
225 |
+
df = xls_content.parse(sheet_name)
|
226 |
+
df = df.dropna(how="all") # Drop rows that are entirely NaN
|
227 |
+
|
228 |
+
# Limit to the first 20 rows for efficiency and to prevent overwhelming context
|
229 |
+
rows = df.head(20).to_dict(orient="records")
|
230 |
+
json_sheets[sheet_name] = rows
|
231 |
|
232 |
+
return {
|
233 |
+
"task_id": task_id,
|
234 |
+
"sheets": json_sheets,
|
235 |
+
"status": "Success"
|
236 |
+
}
|
237 |
+
|
238 |
+
except requests.exceptions.RequestException as e:
|
239 |
+
return {
|
240 |
+
"task_id": task_id,
|
241 |
+
"sheets": {},
|
242 |
+
"status": f"Network or HTTP error: {str(e)}"
|
243 |
+
}
|
244 |
+
except Exception as e:
|
245 |
+
return {
|
246 |
+
"task_id": task_id,
|
247 |
+
"sheets": {},
|
248 |
+
"status": f"Error in parsing Excel file: {str(e)}"
|
249 |
+
}
|
250 |
+
|
251 |
+
# Optional: You can keep __call__ for direct instance calling, but it's handled by Tool base class
|
252 |
+
# def __call__(self, task_id: str) -> dict:
|
253 |
+
# return self._run(task_id)
|
254 |
|
255 |
|
256 |
import os
|
|
|
349 |
"description": "The URL of the image to be analyzed (e.g., 'https://example.com/image.jpg').",
|
350 |
}
|
351 |
}
|
352 |
+
output_type = "string"
|
353 |
# You might consider making API_URL a class attribute if it's constant
|
354 |
# or an instance attribute if it could vary per instance.
|
355 |
# For this example, we'll keep it within the _run method for directness.
|
|
|
445 |
self.image_analysis_tool = ImageAnalysisTool() # Renamed for clarity
|
446 |
self.analyse_attachment_tool = AnalyseAttachmentTool() # Renamed for clarity
|
447 |
self.code_llama_tool = CodeLlamaTool() # Ensure this class is defined/imported
|
448 |
+
self.parse_excel_to_json_tool = ParseExcelToJsonTool()
|
449 |
|
450 |
+
|
451 |
system_prompt_template = """
|
452 |
You are my general AI assistant. Your task is to answer the question I asked.
|
453 |
First, provide an explanation of your reasoning, step by step, to arrive at the answer.
|
|
|
484 |
self.final_answer_tool,
|
485 |
self.video_transcription_tool,
|
486 |
self.code_llama_tool,
|
487 |
+
self.parse_excel_to_json_tool,
|
488 |
+
self.image_analysis_tool, # Use the initialized instance
|
489 |
self.analyse_attachment_tool # Add the initialized attachment analysis tool
|
490 |
],
|
491 |
add_base_tools=True # Consider what this adds, ensure it doesn't duplicate.
|