File size: 3,296 Bytes
42dc069
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""Test the new filename formatting"""
import os
import sys
import datetime
import inspect

# Add the project root to the path so we can import modules
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

# Import the main utils.py file directly
import utils as root_utils

print(f"Imported utils from: {root_utils.__file__}")
print("Current create_descriptive_filename implementation:")
print(inspect.getsource(root_utils.create_descriptive_filename))

def main():
    """Test the filename formatting"""
    # Sample inputs
    sample_files = [
        "handwritten-letter.jpg",
        "magician-or-bottle-cungerer.jpg",
        "baldwin_15th_north.jpg",
        "harpers.pdf",
        "recipe.jpg"
    ]
    
    # Sample OCR results for testing
    sample_results = [
        {
            "detected_document_type": "handwritten",
            "topics": ["Letter", "Handwritten", "19th Century", "Personal Correspondence"]
        },
        {
            "topics": ["Newspaper", "Print", "19th Century", "Illustration", "Advertisement"]
        },
        {
            "detected_document_type": "letter",
            "topics": ["Correspondence", "Early Modern", "English Language"]
        },
        {
            "detected_document_type": "magazine",
            "topics": ["Publication", "Late 19th Century", "Magazine", "Historical"]
        },
        {
            "detected_document_type": "recipe",
            "topics": ["Food", "Culinary", "Historical", "Instruction"]
        }
    ]
    
    print("\nIMPROVED FILENAME FORMATTING TEST")
    print("=" * 50)
    
    # Format current date manually 
    current_date = datetime.datetime.now().strftime("%b %d, %Y")
    print(f"Current date for filenames: {current_date}")
    
    print("\nBEFORE vs AFTER Examples:\n")
    
    for i, (original_file, result) in enumerate(zip(sample_files, sample_results)):
        # Get file extension from original file
        file_ext = os.path.splitext(original_file)[1]
        
        # Generate the old style filename manually
        original_name = os.path.splitext(original_file)[0]
        
        doc_type_tag = ""
        if 'detected_document_type' in result:
            doc_type = result['detected_document_type'].lower()
            doc_type_tag = f"_{doc_type.replace(' ', '_')}"
        elif 'topics' in result and result['topics']:
            doc_type_tag = f"_{result['topics'][0].lower().replace(' ', '_')}"
        
        period_tag = ""
        if 'topics' in result and result['topics']:
            for tag in result['topics']:
                if "century" in tag.lower() or "pre-" in tag.lower() or "era" in tag.lower():
                    period_tag = f"_{tag.lower().replace(' ', '_')}"
                    break
                    
        old_filename = f"{original_name}{doc_type_tag}{period_tag}{file_ext}"
        
        # Generate the new descriptive filename with our improved formatter
        new_filename = root_utils.create_descriptive_filename(original_file, result, file_ext)
        
        print(f"Example {i+1}:")
        print(f"  Original: {original_file}")
        print(f"  Old Format: {old_filename}")
        print(f"  New Format: {new_filename}")
        print()

if __name__ == "__main__":
    main()