drAbreu commited on
Commit
ab81a57
·
1 Parent(s): b74148d

Fixed bugs for excel handling and add openpyxl dependency

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tools/coding_tools.py +44 -48
requirements.txt CHANGED
@@ -7,4 +7,4 @@ llama-index-llms-anthropic
7
  llama-index-llms-openai
8
  llama-index-readers-whisper
9
  llama-index-readers-file
10
- llama-index-readers-pandas-ai
 
7
  llama-index-llms-openai
8
  llama-index-readers-whisper
9
  llama-index-readers-file
10
+ openpyxl
tools/coding_tools.py CHANGED
@@ -6,7 +6,10 @@ from llama_index.core import SimpleDirectoryReader
6
  from llama_index.readers.file import (
7
  PandasCSVReader,
8
  CSVReader,
 
9
  )
 
 
10
 
11
 
12
  def execute_python_file(file_path: str) -> Dict[str, Any]:
@@ -69,9 +72,7 @@ def csv_excel_reader(file_path: str) -> list:
69
  """
70
  Read and parse CSV or Excel files using LlamaIndex document readers.
71
 
72
- This function determines the file type by extension and uses the appropriate loader:
73
- - For Excel files (.xlsx, .xls): Uses ExcelLoader
74
- - For CSV files (.csv): Uses PandasCSVReader with fallback to CSVReader
75
 
76
  Args:
77
  file_path (str): Path to the CSV or Excel file to be read
@@ -82,16 +83,7 @@ def csv_excel_reader(file_path: str) -> list:
82
  Raises:
83
  FileNotFoundError: If the specified file doesn't exist
84
  ValueError: If the file cannot be parsed or has an unsupported extension
85
-
86
- Examples:
87
- >>> documents = csv_excel_reader("data/financial_report.csv")
88
- >>> print(f"Loaded {len(documents)} documents")
89
- >>>
90
- >>> # Or with Excel files
91
- >>> documents = csv_excel_reader("data/quarterly_reports.xlsx")
92
- >>> print(f"Loaded {len(documents)} documents from Excel file")
93
  """
94
- import os
95
 
96
  # Check if file exists
97
  if not os.path.exists(file_path):
@@ -100,46 +92,50 @@ def csv_excel_reader(file_path: str) -> list:
100
  # Get file extension
101
  file_ext = os.path.splitext(file_path)[1].lower()
102
 
103
- # Use the appropriate loader based on file extension
104
  try:
105
  if file_ext in ['.xlsx', '.xls']:
106
- # Use ExcelLoader for Excel files
107
- from llama_index.readers.file.excel import ExcelLoader
108
- loader = ExcelLoader(file_path)
109
- return loader.load_data()
110
 
111
- elif file_ext == '.csv':
112
- # Use PandasCSVReader for CSV files
113
- try:
114
- from llama_index.readers.file.csv import PandasCSVReader
115
- from llama_index.core import SimpleDirectoryReader
116
-
117
- directory = os.path.dirname(file_path) or "."
118
- filename = os.path.basename(file_path)
119
 
120
- parser = PandasCSVReader()
121
- file_extractor = {".csv": parser}
122
- return SimpleDirectoryReader(
123
- input_dir=directory,
124
- input_files=[filename],
125
- file_extractor=file_extractor
126
- ).load_data()
127
 
128
- except Exception as e:
129
- # Fall back to basic CSVReader
130
- from llama_index.readers.file.csv import CSVReader
131
- from llama_index.core import SimpleDirectoryReader
132
-
133
- directory = os.path.dirname(file_path) or "."
134
- filename = os.path.basename(file_path)
135
-
136
- parser = CSVReader()
137
- file_extractor = {".csv": parser}
138
- return SimpleDirectoryReader(
139
- input_dir=directory,
140
- input_files=[filename],
141
- file_extractor=file_extractor
142
- ).load_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  else:
144
  raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls")
145
 
@@ -156,6 +152,6 @@ def csv_excel_reader(file_path: str) -> list:
156
  # Create a function tool for CSV/Excel reading
157
  csv_excel_reader_tool = FunctionTool.from_defaults(
158
  name="csv_excel_reader",
159
- description="Reads CSV or Excel files and returns them as Document objects. Uses ExcelLoader for Excel files and PandasCSVReader for CSV files.",
160
  fn=csv_excel_reader
161
  )
 
6
  from llama_index.readers.file import (
7
  PandasCSVReader,
8
  CSVReader,
9
+ PandasExcelReader
10
  )
11
+ import pandas as pd
12
+ from llama_index.core import Document
13
 
14
 
15
  def execute_python_file(file_path: str) -> Dict[str, Any]:
 
72
  """
73
  Read and parse CSV or Excel files using LlamaIndex document readers.
74
 
75
+ This function processes both CSV and Excel files with proper path handling.
 
 
76
 
77
  Args:
78
  file_path (str): Path to the CSV or Excel file to be read
 
83
  Raises:
84
  FileNotFoundError: If the specified file doesn't exist
85
  ValueError: If the file cannot be parsed or has an unsupported extension
 
 
 
 
 
 
 
 
86
  """
 
87
 
88
  # Check if file exists
89
  if not os.path.exists(file_path):
 
92
  # Get file extension
93
  file_ext = os.path.splitext(file_path)[1].lower()
94
 
95
+ # Read file based on extension
96
  try:
97
  if file_ext in ['.xlsx', '.xls']:
98
+ # Read Excel file directly with pandas
99
+ excel = pd.ExcelFile(file_path)
100
+ documents = []
 
101
 
102
+ # Process each sheet
103
+ for sheet_name in excel.sheet_names:
104
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
 
 
 
 
 
105
 
106
+ # Convert dataframe to string
107
+ content = df.to_string(index=False)
 
 
 
 
 
108
 
109
+ # Create a document with sheet metadata
110
+ doc = Document(
111
+ text=content,
112
+ metadata={
113
+ "source": file_path,
114
+ "sheet_name": sheet_name,
115
+ "filename": os.path.basename(file_path)
116
+ }
117
+ )
118
+ documents.append(doc)
119
+
120
+ return documents
121
+
122
+ elif file_ext == '.csv':
123
+ # Read CSV file directly with pandas
124
+ df = pd.read_csv(file_path)
125
+
126
+ # Convert dataframe to string
127
+ content = df.to_string(index=False)
128
+
129
+ # Create a document
130
+ doc = Document(
131
+ text=content,
132
+ metadata={
133
+ "source": file_path,
134
+ "filename": os.path.basename(file_path)
135
+ }
136
+ )
137
+ return [doc]
138
+
139
  else:
140
  raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls")
141
 
 
152
  # Create a function tool for CSV/Excel reading
153
  csv_excel_reader_tool = FunctionTool.from_defaults(
154
  name="csv_excel_reader",
155
+ description="Reads CSV or Excel files and returns them as Document objects. Directly uses pandas to read the files rather than relying on SimpleDirectoryReader.",
156
  fn=csv_excel_reader
157
  )