HF_Agents_Final_Project

Sleeping

HF_Agents_Final_Project / tests /test_spreadsheet_tool.py

Yago Bolivar

feat: implement SpreadsheetTool for parsing and querying Excel files with detailed summaries

108e7a1 3 months ago

3.6 kB

	#!/usr/bin/env python3
	# Testing the spreadsheet tool with a downloaded Excel file

	import os
	import sys

	# Add the parent directory to sys.path to find the src module
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from src.spreadsheet_tool import SpreadsheetTool

	def main():
	# Initialize the spreadsheet tool
	spreadsheet_tool = SpreadsheetTool()

	# Path to the downloaded Excel file
	# Need to navigate up one level and then to downloaded_files
	project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	excel_file_path = os.path.join(project_root, "downloaded_files", "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx")

	print(f"Testing SpreadsheetTool with file: {excel_file_path}")
	print(f"File exists: {os.path.exists(excel_file_path)}")

	# Parse the spreadsheet
	print("\n--- PARSING SPREADSHEET ---")
	parsed_data = spreadsheet_tool.parse_spreadsheet(excel_file_path)

	if parsed_data.get("error"):
	print(f"Error: {parsed_data['error']}")
	return

	# Display basic information about the spreadsheet
	print(f"\nSpreadsheet contains {len(parsed_data['sheet_names'])} sheets:")
	print(f"Sheet names: {parsed_data['sheet_names']}")

	# Display a summary of each sheet
	print("\n--- SHEET SUMMARIES ---")
	for sheet_name, info in parsed_data["summary"].items():
	print(f"\nSheet: {sheet_name}")
	print(f" Dimensions: {info['shape'][0]} rows × {info['shape'][1]} columns")
	print(f" Column names: {info['columns']}")
	print(f" Numeric columns: {info['numeric_columns']}")
	print(f" Text columns: {info['text_columns']}")
	print(f" Contains null values: {info['has_nulls']}")

	# Display a sample of the first 3 rows
	print(f"\n Sample data (first 3 rows):")
	for i, row in enumerate(info['first_few_rows']):
	print(f" Row {i+1}: {row}")

	# Test the query_data method for numeric operations
	print("\n--- TESTING QUERY OPERATIONS ---")
	for query in ["sum", "average", "count"]:
	print(f"\nTesting '{query}' operation:")
	query_result = spreadsheet_tool.query_data(parsed_data, query)

	if query_result.get("error"):
	print(f" Error: {query_result['error']}")
	else:
	# Remove data_structure from output to keep it cleaner
	if "data_structure" in query_result:
	del query_result["data_structure"]
	print(f" Result: {query_result}")

	# Test extracting specific data
	print("\n--- TESTING DATA EXTRACTION ---")
	# We'll extract data from the first sheet
	first_sheet = parsed_data["sheet_names"][0]
	all_columns = parsed_data["summary"][first_sheet]["columns"]

	# Extract first two columns from the first sheet
	if len(all_columns) >= 2:
	extract_columns = all_columns[:2]
	print(f"\nExtracting columns {extract_columns} from sheet '{first_sheet}':")
	extract_result = spreadsheet_tool.extract_specific_data(
	parsed_data,
	sheet_name=first_sheet,
	column_names=extract_columns
	)

	if extract_result.get("error"):
	print(f" Error: {extract_result['error']}")
	else:
	print(f" Extracted data shape: {extract_result['shape']}")
	print(f" First few rows:")
	for i, row in enumerate(extract_result['data'][:3]):
	print(f" Row {i+1}: {row}")

	if __name__ == "__main__":
	main()