File size: 1,647 Bytes
42dc069
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python3
import streamlit as st
from ocr_processing import process_file

# Mock a file upload
class MockFile:
    def __init__(self, name, content):
        self.name = name
        self._content = content
    
    def getvalue(self):
        return self._content

def main():
    # Load the test image - using the problematic image from the original task
    with open('input/magician-or-bottle-cungerer.jpg', 'rb') as f:
        file_bytes = f.read()
    
    # Create mock file
    uploaded_file = MockFile('magician-or-bottle-cungerer.jpg', file_bytes)
    
    # Process the file
    result = process_file(uploaded_file)
    
    # Display results
    print("\nDocument Content")
    print("Title")
    if 'title' in result['ocr_contents']:
        print(result['ocr_contents']['title'])
    
    print("\nMain")
    if 'main_text' in result['ocr_contents']:
        print(result['ocr_contents']['main_text'])
    
    print("\nRaw Text")
    if 'raw_text' in result['ocr_contents']:
        print(result['ocr_contents']['raw_text'][:300] + "...")
    
    # Debug: Print all keys in ocr_contents
    print("\nAll OCR Content Keys:")
    for key in result['ocr_contents'].keys():
        print(f"- {key}")
        
    # Debug: Display content of all keys
    print("\nContent of each key:")
    for key in result['ocr_contents'].keys():
        print(f"\n--- {key} ---")
        content = result['ocr_contents'][key]
        if isinstance(content, str):
            print(content[:150] + "..." if len(content) > 150 else content)
        else:
            print(f"Type: {type(content)}")

if __name__ == "__main__":
    main()