File size: 3,604 Bytes
108e7a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python3
# Testing the spreadsheet tool with a downloaded Excel file

import os
import sys

# Add the parent directory to sys.path to find the src module
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from src.spreadsheet_tool import SpreadsheetTool

def main():
    # Initialize the spreadsheet tool
    spreadsheet_tool = SpreadsheetTool()
    
    # Path to the downloaded Excel file
    # Need to navigate up one level and then to downloaded_files
    project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    excel_file_path = os.path.join(project_root, "downloaded_files", "7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx")
    
    print(f"Testing SpreadsheetTool with file: {excel_file_path}")
    print(f"File exists: {os.path.exists(excel_file_path)}")
    
    # Parse the spreadsheet
    print("\n--- PARSING SPREADSHEET ---")
    parsed_data = spreadsheet_tool.parse_spreadsheet(excel_file_path)
    
    if parsed_data.get("error"):
        print(f"Error: {parsed_data['error']}")
        return
        
    # Display basic information about the spreadsheet
    print(f"\nSpreadsheet contains {len(parsed_data['sheet_names'])} sheets:")
    print(f"Sheet names: {parsed_data['sheet_names']}")
    
    # Display a summary of each sheet
    print("\n--- SHEET SUMMARIES ---")
    for sheet_name, info in parsed_data["summary"].items():
        print(f"\nSheet: {sheet_name}")
        print(f"  Dimensions: {info['shape'][0]} rows × {info['shape'][1]} columns")
        print(f"  Column names: {info['columns']}")
        print(f"  Numeric columns: {info['numeric_columns']}")
        print(f"  Text columns: {info['text_columns']}")
        print(f"  Contains null values: {info['has_nulls']}")
        
        # Display a sample of the first 3 rows
        print(f"\n  Sample data (first 3 rows):")
        for i, row in enumerate(info['first_few_rows']):
            print(f"    Row {i+1}: {row}")
    
    # Test the query_data method for numeric operations
    print("\n--- TESTING QUERY OPERATIONS ---")
    for query in ["sum", "average", "count"]:
        print(f"\nTesting '{query}' operation:")
        query_result = spreadsheet_tool.query_data(parsed_data, query)
        
        if query_result.get("error"):
            print(f"  Error: {query_result['error']}")
        else:
            # Remove data_structure from output to keep it cleaner
            if "data_structure" in query_result:
                del query_result["data_structure"]
            print(f"  Result: {query_result}")
    
    # Test extracting specific data
    print("\n--- TESTING DATA EXTRACTION ---")
    # We'll extract data from the first sheet
    first_sheet = parsed_data["sheet_names"][0]
    all_columns = parsed_data["summary"][first_sheet]["columns"]
    
    # Extract first two columns from the first sheet
    if len(all_columns) >= 2:
        extract_columns = all_columns[:2]
        print(f"\nExtracting columns {extract_columns} from sheet '{first_sheet}':")
        extract_result = spreadsheet_tool.extract_specific_data(
            parsed_data, 
            sheet_name=first_sheet,
            column_names=extract_columns
        )
        
        if extract_result.get("error"):
            print(f"  Error: {extract_result['error']}")
        else:
            print(f"  Extracted data shape: {extract_result['shape']}")
            print(f"  First few rows:")
            for i, row in enumerate(extract_result['data'][:3]):
                print(f"    Row {i+1}: {row}")

if __name__ == "__main__":
    main()