agents_course_final_assignement

Paused

App Files Files Community

drAbreu commited on May 5

Commit

ab81a57

1 Parent(s): b74148d

Fixed bugs for excel handling and add openpyxl dependency

Browse files

Files changed (2) hide show

requirements.txt +1 -1
tools/coding_tools.py +44 -48

requirements.txt CHANGED Viewed

@@ -7,4 +7,4 @@ llama-index-llms-anthropic
 llama-index-llms-openai
 llama-index-readers-whisper
 llama-index-readers-file
-llama-index-readers-pandas-ai

 llama-index-llms-openai
 llama-index-readers-whisper
 llama-index-readers-file
+openpyxl

tools/coding_tools.py CHANGED Viewed

@@ -6,7 +6,10 @@ from llama_index.core import SimpleDirectoryReader
 from llama_index.readers.file import (
     PandasCSVReader,
     CSVReader,
 )
 def execute_python_file(file_path: str) -> Dict[str, Any]:
@@ -69,9 +72,7 @@ def csv_excel_reader(file_path: str) -> list:
     """
     Read and parse CSV or Excel files using LlamaIndex document readers.
-    This function determines the file type by extension and uses the appropriate loader:
-    - For Excel files (.xlsx, .xls): Uses ExcelLoader
-    - For CSV files (.csv): Uses PandasCSVReader with fallback to CSVReader
     Args:
         file_path (str): Path to the CSV or Excel file to be read
@@ -82,16 +83,7 @@ def csv_excel_reader(file_path: str) -> list:
     Raises:
         FileNotFoundError: If the specified file doesn't exist
         ValueError: If the file cannot be parsed or has an unsupported extension
-    Examples:
-        >>> documents = csv_excel_reader("data/financial_report.csv")
-        >>> print(f"Loaded {len(documents)} documents")
-        >>>
-        >>> # Or with Excel files
-        >>> documents = csv_excel_reader("data/quarterly_reports.xlsx")
-        >>> print(f"Loaded {len(documents)} documents from Excel file")
     """
-    import os
     # Check if file exists
     if not os.path.exists(file_path):
@@ -100,46 +92,50 @@ def csv_excel_reader(file_path: str) -> list:
     # Get file extension
     file_ext = os.path.splitext(file_path)[1].lower()
-    # Use the appropriate loader based on file extension
     try:
         if file_ext in ['.xlsx', '.xls']:
-            # Use ExcelLoader for Excel files
-            from llama_index.readers.file.excel import ExcelLoader
-            loader = ExcelLoader(file_path)
-            return loader.load_data()
-        elif file_ext == '.csv':
-            # Use PandasCSVReader for CSV files
-            try:
-                from llama_index.readers.file.csv import PandasCSVReader
-                from llama_index.core import SimpleDirectoryReader
-                directory = os.path.dirname(file_path) or "."
-                filename = os.path.basename(file_path)
-                parser = PandasCSVReader()
-                file_extractor = {".csv": parser}
-                return SimpleDirectoryReader(
-                    input_dir=directory,
-                    input_files=[filename],
-                    file_extractor=file_extractor
-                ).load_data()
-            except Exception as e:
-                # Fall back to basic CSVReader
-                from llama_index.readers.file.csv import CSVReader
-                from llama_index.core import SimpleDirectoryReader
-                directory = os.path.dirname(file_path) or "."
-                filename = os.path.basename(file_path)
-                parser = CSVReader()
-                file_extractor = {".csv": parser}
-                return SimpleDirectoryReader(
-                    input_dir=directory,
-                    input_files=[filename],
-                    file_extractor=file_extractor
-                ).load_data()
         else:
             raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls")
@@ -156,6 +152,6 @@ def csv_excel_reader(file_path: str) -> list:
 # Create a function tool for CSV/Excel reading
 csv_excel_reader_tool = FunctionTool.from_defaults(
     name="csv_excel_reader",
-    description="Reads CSV or Excel files and returns them as Document objects. Uses ExcelLoader for Excel files and PandasCSVReader for CSV files.",
     fn=csv_excel_reader
 )

 from llama_index.readers.file import (
     PandasCSVReader,
     CSVReader,
+    PandasExcelReader
 )
+import pandas as pd
+from llama_index.core import Document
 def execute_python_file(file_path: str) -> Dict[str, Any]:
     """
     Read and parse CSV or Excel files using LlamaIndex document readers.
+    This function processes both CSV and Excel files with proper path handling.
     Args:
         file_path (str): Path to the CSV or Excel file to be read
     Raises:
         FileNotFoundError: If the specified file doesn't exist
         ValueError: If the file cannot be parsed or has an unsupported extension
     """
     # Check if file exists
     if not os.path.exists(file_path):
     # Get file extension
     file_ext = os.path.splitext(file_path)[1].lower()
+    # Read file based on extension
     try:
         if file_ext in ['.xlsx', '.xls']:
+            # Read Excel file directly with pandas
+            excel = pd.ExcelFile(file_path)
+            documents = []
+            # Process each sheet
+            for sheet_name in excel.sheet_names:
+                df = pd.read_excel(file_path, sheet_name=sheet_name)
+                # Convert dataframe to string
+                content = df.to_string(index=False)
+                # Create a document with sheet metadata
+                doc = Document(
+                    text=content,
+                    metadata={
+                        "source": file_path,
+                        "sheet_name": sheet_name,
+                        "filename": os.path.basename(file_path)
+                    }
+                )
+                documents.append(doc)
+            return documents
+        elif file_ext == '.csv':
+            # Read CSV file directly with pandas
+            df = pd.read_csv(file_path)
+            # Convert dataframe to string
+            content = df.to_string(index=False)
+            # Create a document
+            doc = Document(
+                text=content,
+                metadata={
+                    "source": file_path,
+                    "filename": os.path.basename(file_path)
+                }
+            )
+            return [doc]
         else:
             raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls")
 # Create a function tool for CSV/Excel reading
 csv_excel_reader_tool = FunctionTool.from_defaults(
     name="csv_excel_reader",
+    description="Reads CSV or Excel files and returns them as Document objects. Directly uses pandas to read the files rather than relying on SimpleDirectoryReader.",
     fn=csv_excel_reader
 )