HF_Agents_Final_Project / tests /test_spreadsheet_tool.py
Yago Bolivar
feat: implement SpreadsheetTool for parsing and querying Excel files with detailed summaries
108e7a1
#!/usr/bin/env python3
# Testing the spreadsheet tool with a downloaded Excel file
import os
import sys
# Add the parent directory to sys.path to find the src module
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.spreadsheet_tool import SpreadsheetTool
def main():
# Initialize the spreadsheet tool
spreadsheet_tool = SpreadsheetTool()
# Path to the downloaded Excel file
# Need to navigate up one level and then to downloaded_files
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
excel_file_path = os.path.join(project_root, "downloaded_files", "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx")
print(f"Testing SpreadsheetTool with file: {excel_file_path}")
print(f"File exists: {os.path.exists(excel_file_path)}")
# Parse the spreadsheet
print("\n--- PARSING SPREADSHEET ---")
parsed_data = spreadsheet_tool.parse_spreadsheet(excel_file_path)
if parsed_data.get("error"):
print(f"Error: {parsed_data['error']}")
return
# Display basic information about the spreadsheet
print(f"\nSpreadsheet contains {len(parsed_data['sheet_names'])} sheets:")
print(f"Sheet names: {parsed_data['sheet_names']}")
# Display a summary of each sheet
print("\n--- SHEET SUMMARIES ---")
for sheet_name, info in parsed_data["summary"].items():
print(f"\nSheet: {sheet_name}")
print(f" Dimensions: {info['shape'][0]} rows × {info['shape'][1]} columns")
print(f" Column names: {info['columns']}")
print(f" Numeric columns: {info['numeric_columns']}")
print(f" Text columns: {info['text_columns']}")
print(f" Contains null values: {info['has_nulls']}")
# Display a sample of the first 3 rows
print(f"\n Sample data (first 3 rows):")
for i, row in enumerate(info['first_few_rows']):
print(f" Row {i+1}: {row}")
# Test the query_data method for numeric operations
print("\n--- TESTING QUERY OPERATIONS ---")
for query in ["sum", "average", "count"]:
print(f"\nTesting '{query}' operation:")
query_result = spreadsheet_tool.query_data(parsed_data, query)
if query_result.get("error"):
print(f" Error: {query_result['error']}")
else:
# Remove data_structure from output to keep it cleaner
if "data_structure" in query_result:
del query_result["data_structure"]
print(f" Result: {query_result}")
# Test extracting specific data
print("\n--- TESTING DATA EXTRACTION ---")
# We'll extract data from the first sheet
first_sheet = parsed_data["sheet_names"][0]
all_columns = parsed_data["summary"][first_sheet]["columns"]
# Extract first two columns from the first sheet
if len(all_columns) >= 2:
extract_columns = all_columns[:2]
print(f"\nExtracting columns {extract_columns} from sheet '{first_sheet}':")
extract_result = spreadsheet_tool.extract_specific_data(
parsed_data,
sheet_name=first_sheet,
column_names=extract_columns
)
if extract_result.get("error"):
print(f" Error: {extract_result['error']}")
else:
print(f" Extracted data shape: {extract_result['shape']}")
print(f" First few rows:")
for i, row in enumerate(extract_result['data'][:3]):
print(f" Row {i+1}: {row}")
if __name__ == "__main__":
main()