Spaces:
Running
Running
#!/usr/bin/env python3 | |
import streamlit as st | |
from ocr_processing import process_file | |
# Mock a file upload | |
class MockFile: | |
def __init__(self, name, content): | |
self.name = name | |
self._content = content | |
def getvalue(self): | |
return self._content | |
def main(): | |
# Load the test image - using the problematic image from the original task | |
with open('input/magician-or-bottle-cungerer.jpg', 'rb') as f: | |
file_bytes = f.read() | |
# Create mock file | |
uploaded_file = MockFile('magician-or-bottle-cungerer.jpg', file_bytes) | |
# Process the file | |
result = process_file(uploaded_file) | |
# Display results | |
print("\nDocument Content") | |
print("Title") | |
if 'title' in result['ocr_contents']: | |
print(result['ocr_contents']['title']) | |
print("\nMain") | |
if 'main_text' in result['ocr_contents']: | |
print(result['ocr_contents']['main_text']) | |
print("\nRaw Text") | |
if 'raw_text' in result['ocr_contents']: | |
print(result['ocr_contents']['raw_text'][:300] + "...") | |
# Debug: Print all keys in ocr_contents | |
print("\nAll OCR Content Keys:") | |
for key in result['ocr_contents'].keys(): | |
print(f"- {key}") | |
# Debug: Display content of all keys | |
print("\nContent of each key:") | |
for key in result['ocr_contents'].keys(): | |
print(f"\n--- {key} ---") | |
content = result['ocr_contents'][key] | |
if isinstance(content, str): | |
print(content[:150] + "..." if len(content) > 150 else content) | |
else: | |
print(f"Type: {type(content)}") | |
if __name__ == "__main__": | |
main() | |