Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on Jun 7

Commit

a625210

verified ·

1 Parent(s): f7bb0d5

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -43

app.py CHANGED Viewed

@@ -183,52 +183,74 @@ class KeywordsExtractorTool(Tool):
         except Exception as e:
             return f"Error during extracting most common words: {e}"
-@tool
-def parse_excel_to_json(task_id: str) -> dict:
     """
-    For a given task_id fetch and parse an Excel file and save parsed data in structured JSON file.
-    Args:
-        task_id: An task ID to fetch.
-    Returns:
-        {
-            "task_id": str,
-            "sheets": {
-                "SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
-                ...
-            },
-            "status": "Success" | "Error"
-        }
     """
-    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
-    try:
-        response = requests.get(url, timeout=100)
-        if response.status_code != 200:
-            return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"}
-        xls_content = pd.ExcelFile(BytesIO(response.content))
-        json_sheets = {}
-        for sheet in xls_content.sheet_names:
-            df = xls_content.parse(sheet)
-            df = df.dropna(how="all")
-            rows = df.head(20).to_dict(orient="records")
-            json_sheets[sheet] = rows
-        return {
-            "task_id": task_id,
-            "sheets": json_sheets,
-            "status": "Success"
         }
-    except Exception as e:
-        return {
-            "task_id": task_id,
-            "sheets": {},
-            "status": f"Error in parsing Excel file: {str(e)}"
-        }
 import os
@@ -327,7 +349,7 @@ class ImageAnalysisTool(Tool):
             "description": "The URL of the image to be analyzed (e.g., 'https://example.com/image.jpg').",
         }
     }
     # You might consider making API_URL a class attribute if it's constant
     # or an instance attribute if it could vary per instance.
     # For this example, we'll keep it within the _run method for directness.
@@ -423,7 +445,9 @@ class BasicAgent:
         self.image_analysis_tool = ImageAnalysisTool() # Renamed for clarity
         self.analyse_attachment_tool = AnalyseAttachmentTool() # Renamed for clarity
         self.code_llama_tool = CodeLlamaTool() # Ensure this class is defined/imported
         system_prompt_template = """
 You are my general AI assistant. Your task is to answer the question I asked.
 First, provide an explanation of your reasoning, step by step, to arrive at the answer.
@@ -460,7 +484,8 @@ If the answer is a comma-separated list, apply the above rules for each element
                 self.final_answer_tool,
                 self.video_transcription_tool,
                 self.code_llama_tool,
-                self.image_analysis_tool_instance, # Use the initialized instance
                 self.analyse_attachment_tool # Add the initialized attachment analysis tool
             ],
             add_base_tools=True # Consider what this adds, ensure it doesn't duplicate.

         except Exception as e:
             return f"Error during extracting most common words: {e}"
+import requests
+import pandas as pd
+from io import BytesIO
+from smolagents import Tool # Make sure to import the Tool base class
+class ParseExcelToJsonTool(Tool):
     """
+    A tool for fetching and parsing an Excel file into structured JSON data.
     """
+    name = "parse_excel_to_json"
+    description = (
+        "For a given task_id, fetches an Excel file from a remote URL, "
+        "parses its sheets, and returns the data as a structured JSON object. "
+        "Each sheet's data is returned as a list of dictionaries, with each dictionary "
+        "representing a row (limited to the first 20 rows). "
+        "Useful for extracting structured information from Excel files."
+    )
+    inputs = {
+        "task_id": {
+            "type": "string",
+            "description": "The task ID used to construct the URL for fetching the Excel file.",
         }
+    }
+    output_type = "json" # The tool returns a dictionary, so "json" is the appropriate output_type
+    def _run(self, task_id: str) -> dict:
+        """
+        Fetches and parses an Excel file from a URL based on the task_id.
+        """
+        url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+        try:
+            response = requests.get(url, timeout=100)
+            response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
+            xls_content = pd.ExcelFile(BytesIO(response.content))
+            json_sheets = {}
+            for sheet_name in xls_content.sheet_names:
+                df = xls_content.parse(sheet_name)
+                df = df.dropna(how="all") # Drop rows that are entirely NaN
+                # Limit to the first 20 rows for efficiency and to prevent overwhelming context
+                rows = df.head(20).to_dict(orient="records")
+                json_sheets[sheet_name] = rows
+            return {
+                "task_id": task_id,
+                "sheets": json_sheets,
+                "status": "Success"
+            }
+        except requests.exceptions.RequestException as e:
+            return {
+                "task_id": task_id,
+                "sheets": {},
+                "status": f"Network or HTTP error: {str(e)}"
+            }
+        except Exception as e:
+            return {
+                "task_id": task_id,
+                "sheets": {},
+                "status": f"Error in parsing Excel file: {str(e)}"
+            }
+    # Optional: You can keep __call__ for direct instance calling, but it's handled by Tool base class
+    # def __call__(self, task_id: str) -> dict:
+    #     return self._run(task_id)
 import os
             "description": "The URL of the image to be analyzed (e.g., 'https://example.com/image.jpg').",
         }
     }
+    output_type = "string"
     # You might consider making API_URL a class attribute if it's constant
     # or an instance attribute if it could vary per instance.
     # For this example, we'll keep it within the _run method for directness.
         self.image_analysis_tool = ImageAnalysisTool() # Renamed for clarity
         self.analyse_attachment_tool = AnalyseAttachmentTool() # Renamed for clarity
         self.code_llama_tool = CodeLlamaTool() # Ensure this class is defined/imported
+        self.parse_excel_to_json_tool = ParseExcelToJsonTool()
         system_prompt_template = """
 You are my general AI assistant. Your task is to answer the question I asked.
 First, provide an explanation of your reasoning, step by step, to arrive at the answer.
                 self.final_answer_tool,
                 self.video_transcription_tool,
                 self.code_llama_tool,
+                self.parse_excel_to_json_tool,
+                self.image_analysis_tool, # Use the initialized instance
                 self.analyse_attachment_tool # Add the initialized attachment analysis tool
             ],
             add_base_tools=True # Consider what this adds, ensure it doesn't duplicate.