Spaces:
Running
Running
"""Test the new filename formatting""" | |
import os | |
import sys | |
import datetime | |
import inspect | |
# Add the project root to the path so we can import modules | |
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | |
# Import the main utils.py file directly | |
import utils as root_utils | |
print(f"Imported utils from: {root_utils.__file__}") | |
print("Current create_descriptive_filename implementation:") | |
print(inspect.getsource(root_utils.create_descriptive_filename)) | |
def main(): | |
"""Test the filename formatting""" | |
# Sample inputs | |
sample_files = [ | |
"handwritten-letter.jpg", | |
"magician-or-bottle-cungerer.jpg", | |
"baldwin_15th_north.jpg", | |
"harpers.pdf", | |
"recipe.jpg" | |
] | |
# Sample OCR results for testing | |
sample_results = [ | |
{ | |
"detected_document_type": "handwritten", | |
"topics": ["Letter", "Handwritten", "19th Century", "Personal Correspondence"] | |
}, | |
{ | |
"topics": ["Newspaper", "Print", "19th Century", "Illustration", "Advertisement"] | |
}, | |
{ | |
"detected_document_type": "letter", | |
"topics": ["Correspondence", "Early Modern", "English Language"] | |
}, | |
{ | |
"detected_document_type": "magazine", | |
"topics": ["Publication", "Late 19th Century", "Magazine", "Historical"] | |
}, | |
{ | |
"detected_document_type": "recipe", | |
"topics": ["Food", "Culinary", "Historical", "Instruction"] | |
} | |
] | |
print("\nIMPROVED FILENAME FORMATTING TEST") | |
print("=" * 50) | |
# Format current date manually | |
current_date = datetime.datetime.now().strftime("%b %d, %Y") | |
print(f"Current date for filenames: {current_date}") | |
print("\nBEFORE vs AFTER Examples:\n") | |
for i, (original_file, result) in enumerate(zip(sample_files, sample_results)): | |
# Get file extension from original file | |
file_ext = os.path.splitext(original_file)[1] | |
# Generate the old style filename manually | |
original_name = os.path.splitext(original_file)[0] | |
doc_type_tag = "" | |
if 'detected_document_type' in result: | |
doc_type = result['detected_document_type'].lower() | |
doc_type_tag = f"_{doc_type.replace(' ', '_')}" | |
elif 'topics' in result and result['topics']: | |
doc_type_tag = f"_{result['topics'][0].lower().replace(' ', '_')}" | |
period_tag = "" | |
if 'topics' in result and result['topics']: | |
for tag in result['topics']: | |
if "century" in tag.lower() or "pre-" in tag.lower() or "era" in tag.lower(): | |
period_tag = f"_{tag.lower().replace(' ', '_')}" | |
break | |
old_filename = f"{original_name}{doc_type_tag}{period_tag}{file_ext}" | |
# Generate the new descriptive filename with our improved formatter | |
new_filename = root_utils.create_descriptive_filename(original_file, result, file_ext) | |
print(f"Example {i+1}:") | |
print(f" Original: {original_file}") | |
print(f" Old Format: {old_filename}") | |
print(f" New Format: {new_filename}") | |
print() | |
if __name__ == "__main__": | |
main() | |