#!/usr/bin/env python3 import streamlit as st from ocr_processing import process_file # Mock a file upload class MockFile: def __init__(self, name, content): self.name = name self._content = content def getvalue(self): return self._content def main(): # Load the test image - using the problematic image from the original task with open('input/magician-or-bottle-cungerer.jpg', 'rb') as f: file_bytes = f.read() # Create mock file uploaded_file = MockFile('magician-or-bottle-cungerer.jpg', file_bytes) # Process the file result = process_file(uploaded_file) # Display results print("\nDocument Content") print("Title") if 'title' in result['ocr_contents']: print(result['ocr_contents']['title']) print("\nMain") if 'main_text' in result['ocr_contents']: print(result['ocr_contents']['main_text']) print("\nRaw Text") if 'raw_text' in result['ocr_contents']: print(result['ocr_contents']['raw_text'][:300] + "...") # Debug: Print all keys in ocr_contents print("\nAll OCR Content Keys:") for key in result['ocr_contents'].keys(): print(f"- {key}") # Debug: Display content of all keys print("\nContent of each key:") for key in result['ocr_contents'].keys(): print(f"\n--- {key} ---") content = result['ocr_contents'][key] if isinstance(content, str): print(content[:150] + "..." if len(content) > 150 else content) else: print(f"Type: {type(content)}") if __name__ == "__main__": main()