|
import subprocess |
|
import os |
|
from typing import Optional, Dict, Any |
|
from llama_index.core.tools import FunctionTool |
|
from llama_index.core import SimpleDirectoryReader |
|
from llama_index.readers.file import ( |
|
PandasCSVReader, |
|
CSVReader, |
|
) |
|
|
|
|
|
def execute_python_file(file_path: str) -> Dict[str, Any]: |
|
""" |
|
Execute a Python file and return its output. |
|
|
|
Args: |
|
file_path: Path to the Python file to execute |
|
|
|
Returns: |
|
Dictionary containing the output and execution status |
|
""" |
|
|
|
if not os.path.exists(file_path): |
|
return { |
|
"success": False, |
|
"error": f"File not found at {file_path}", |
|
"output": None |
|
} |
|
|
|
try: |
|
|
|
result = subprocess.run( |
|
["python3", file_path], |
|
capture_output=True, |
|
text=True, |
|
check=True |
|
) |
|
|
|
|
|
return { |
|
"success": True, |
|
"error": None, |
|
"output": result.stdout.strip() |
|
} |
|
except subprocess.CalledProcessError as e: |
|
return { |
|
"success": False, |
|
"error": f"Execution error: {e}", |
|
"stderr": e.stderr, |
|
"output": None |
|
} |
|
except Exception as e: |
|
return { |
|
"success": False, |
|
"error": f"Error: {str(e)}", |
|
"output": None |
|
} |
|
|
|
|
|
|
|
execute_python_file_tool = FunctionTool.from_defaults( |
|
name="execute_python_file", |
|
description="Execute a Python file and return its output.", |
|
fn=execute_python_file |
|
) |
|
|
|
|
|
def csv_excel_reader(file_path: str) -> list: |
|
""" |
|
Read and parse CSV or Excel files using LlamaIndex document readers. |
|
|
|
This function determines the file type by extension and uses the appropriate loader: |
|
- For Excel files (.xlsx, .xls): Uses ExcelLoader |
|
- For CSV files (.csv): Uses PandasCSVReader with fallback to CSVReader |
|
|
|
Args: |
|
file_path (str): Path to the CSV or Excel file to be read |
|
|
|
Returns: |
|
list: Document objects containing the parsed data from the file |
|
|
|
Raises: |
|
FileNotFoundError: If the specified file doesn't exist |
|
ValueError: If the file cannot be parsed or has an unsupported extension |
|
|
|
Examples: |
|
>>> documents = csv_excel_reader("data/financial_report.csv") |
|
>>> print(f"Loaded {len(documents)} documents") |
|
>>> |
|
>>> # Or with Excel files |
|
>>> documents = csv_excel_reader("data/quarterly_reports.xlsx") |
|
>>> print(f"Loaded {len(documents)} documents from Excel file") |
|
""" |
|
import os |
|
|
|
|
|
if not os.path.exists(file_path): |
|
raise FileNotFoundError(f"File not found at {file_path}") |
|
|
|
|
|
file_ext = os.path.splitext(file_path)[1].lower() |
|
|
|
|
|
try: |
|
if file_ext in ['.xlsx', '.xls']: |
|
|
|
from llama_index.readers.file.excel import ExcelLoader |
|
loader = ExcelLoader(file_path) |
|
return loader.load_data() |
|
|
|
elif file_ext == '.csv': |
|
|
|
try: |
|
from llama_index.readers.file.csv import PandasCSVReader |
|
from llama_index.core import SimpleDirectoryReader |
|
|
|
directory = os.path.dirname(file_path) or "." |
|
filename = os.path.basename(file_path) |
|
|
|
parser = PandasCSVReader() |
|
file_extractor = {".csv": parser} |
|
return SimpleDirectoryReader( |
|
input_dir=directory, |
|
input_files=[filename], |
|
file_extractor=file_extractor |
|
).load_data() |
|
|
|
except Exception as e: |
|
|
|
from llama_index.readers.file.csv import CSVReader |
|
from llama_index.core import SimpleDirectoryReader |
|
|
|
directory = os.path.dirname(file_path) or "." |
|
filename = os.path.basename(file_path) |
|
|
|
parser = CSVReader() |
|
file_extractor = {".csv": parser} |
|
return SimpleDirectoryReader( |
|
input_dir=directory, |
|
input_files=[filename], |
|
file_extractor=file_extractor |
|
).load_data() |
|
else: |
|
raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls") |
|
|
|
except Exception as e: |
|
import sys |
|
import traceback |
|
|
|
exc_type, exc_value, exc_traceback = sys.exc_info() |
|
error_details = traceback.format_exception(exc_type, exc_value, exc_traceback) |
|
|
|
raise ValueError(f"Error processing file {file_path}: {str(e)}\nDetails: {''.join(error_details)}") |
|
|
|
|
|
|
|
csv_excel_reader_tool = FunctionTool.from_defaults( |
|
name="csv_excel_reader", |
|
description="Reads CSV or Excel files and returns them as Document objects. Uses ExcelLoader for Excel files and PandasCSVReader for CSV files.", |
|
fn=csv_excel_reader |
|
) |