Yago Bolivar
feat: implement SpreadsheetTool for parsing and querying Excel files with detailed summaries
108e7a1
#!/usr/bin/env python3 | |
# Testing the spreadsheet tool with a downloaded Excel file | |
import os | |
import sys | |
# Add the parent directory to sys.path to find the src module | |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
from src.spreadsheet_tool import SpreadsheetTool | |
def main(): | |
# Initialize the spreadsheet tool | |
spreadsheet_tool = SpreadsheetTool() | |
# Path to the downloaded Excel file | |
# Need to navigate up one level and then to downloaded_files | |
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
excel_file_path = os.path.join(project_root, "downloaded_files", "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx") | |
print(f"Testing SpreadsheetTool with file: {excel_file_path}") | |
print(f"File exists: {os.path.exists(excel_file_path)}") | |
# Parse the spreadsheet | |
print("\n--- PARSING SPREADSHEET ---") | |
parsed_data = spreadsheet_tool.parse_spreadsheet(excel_file_path) | |
if parsed_data.get("error"): | |
print(f"Error: {parsed_data['error']}") | |
return | |
# Display basic information about the spreadsheet | |
print(f"\nSpreadsheet contains {len(parsed_data['sheet_names'])} sheets:") | |
print(f"Sheet names: {parsed_data['sheet_names']}") | |
# Display a summary of each sheet | |
print("\n--- SHEET SUMMARIES ---") | |
for sheet_name, info in parsed_data["summary"].items(): | |
print(f"\nSheet: {sheet_name}") | |
print(f" Dimensions: {info['shape'][0]} rows × {info['shape'][1]} columns") | |
print(f" Column names: {info['columns']}") | |
print(f" Numeric columns: {info['numeric_columns']}") | |
print(f" Text columns: {info['text_columns']}") | |
print(f" Contains null values: {info['has_nulls']}") | |
# Display a sample of the first 3 rows | |
print(f"\n Sample data (first 3 rows):") | |
for i, row in enumerate(info['first_few_rows']): | |
print(f" Row {i+1}: {row}") | |
# Test the query_data method for numeric operations | |
print("\n--- TESTING QUERY OPERATIONS ---") | |
for query in ["sum", "average", "count"]: | |
print(f"\nTesting '{query}' operation:") | |
query_result = spreadsheet_tool.query_data(parsed_data, query) | |
if query_result.get("error"): | |
print(f" Error: {query_result['error']}") | |
else: | |
# Remove data_structure from output to keep it cleaner | |
if "data_structure" in query_result: | |
del query_result["data_structure"] | |
print(f" Result: {query_result}") | |
# Test extracting specific data | |
print("\n--- TESTING DATA EXTRACTION ---") | |
# We'll extract data from the first sheet | |
first_sheet = parsed_data["sheet_names"][0] | |
all_columns = parsed_data["summary"][first_sheet]["columns"] | |
# Extract first two columns from the first sheet | |
if len(all_columns) >= 2: | |
extract_columns = all_columns[:2] | |
print(f"\nExtracting columns {extract_columns} from sheet '{first_sheet}':") | |
extract_result = spreadsheet_tool.extract_specific_data( | |
parsed_data, | |
sheet_name=first_sheet, | |
column_names=extract_columns | |
) | |
if extract_result.get("error"): | |
print(f" Error: {extract_result['error']}") | |
else: | |
print(f" Extracted data shape: {extract_result['shape']}") | |
print(f" First few rows:") | |
for i, row in enumerate(extract_result['data'][:3]): | |
print(f" Row {i+1}: {row}") | |
if __name__ == "__main__": | |
main() | |