File size: 4,970 Bytes
795183d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import torch
from transformers import pipeline, TrOCRProcessor, VisionEncoderDecoderModel
from transformers import AutoModel, AutoProcessor
from PIL import Image
import requests

# Method 1: Using TrOCR with pipeline (easiest approach)
print("Loading TrOCR model using pipeline...")
trocr_pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")

# Method 2: Loading TrOCR model directly (more control)
print("Loading TrOCR model directly...")
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

# Method 3: Loading LayoutLMv3 model (for document layout understanding)
print("Loading LayoutLMv3 model...")
layoutlm_processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
layoutlm_model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")

def extract_text_with_trocr_pipeline(image_path):
    """

    Extract text from handwritten image using TrOCR pipeline

    """
    try:
        image = Image.open(image_path)
        result = trocr_pipe(image)
        return result[0]['generated_text']
    except Exception as e:
        print(f"Error processing image with pipeline: {e}")
        return None

def extract_text_with_trocr_direct(image_path):
    """

    Extract text from handwritten image using TrOCR model directly

    """
    try:
        image = Image.open(image_path)
        pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
        
        generated_ids = trocr_model.generate(pixel_values)
        generated_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        return generated_text
    except Exception as e:
        print(f"Error processing image with direct model: {e}")
        return None

def analyze_document_layout(image_path):
    """

    Analyze document layout using LayoutLMv3

    Note: This is a basic example. LayoutLMv3 typically requires fine-tuning for specific tasks

    """
    try:
        image = Image.open(image_path)
        
        # For LayoutLMv3, you typically need text and bounding boxes
        # This is a simplified example - in practice, you'd need OCR results first
        encoding = layoutlm_processor(image, return_tensors="pt")
        
        with torch.no_grad():
            outputs = layoutlm_model(**encoding)
            
        # The outputs contain embeddings that can be used for downstream tasks
        return outputs.last_hidden_state
    except Exception as e:
        print(f"Error analyzing document layout: {e}")
        return None

def process_bill_image(image_path):
    """

    Complete pipeline to process a Singtel bill image

    """
    print(f"Processing bill image: {image_path}")
    
    # Extract text using TrOCR
    print("Extracting text with TrOCR...")
    extracted_text = extract_text_with_trocr_pipeline(image_path)
    
    if extracted_text:
        print(f"Extracted text: {extracted_text}")
        
        # You can add bill-specific parsing logic here
        # For example, looking for patterns like:
        # - Account numbers
        # - Amounts due
        # - Due dates
        # - Service charges
        
        return {
            'extracted_text': extracted_text,
            'status': 'success'
        }
    else:
        return {
            'extracted_text': None,
            'status': 'failed'
        }

# Example usage
if __name__ == "__main__":
    print("=== Singtel Bill Scanner Test ===")
    
    # Replace with your actual image path
    # image_path = "path/to/your/bill_image.jpg"
    
    # For testing with a sample image (you can download this)
    sample_url = "https://huggingface.co/microsoft/trocr-base-handwritten/resolve/main/images/example_1.jpg"
    
    print("\nTesting with sample image...")
    try:
        # Download sample image for testing
        response = requests.get(sample_url)
        with open("sample_handwritten.jpg", "wb") as f:
            f.write(response.content)
        
        # Test the models
        result = process_bill_image("sample_handwritten.jpg")
        print(f"Result: {result}")
        
    except Exception as e:
        print(f"Error downloading sample image: {e}")
        print("Please provide your own image path to test the models")
    
    print("\n=== Usage Instructions ===")
    print("1. To use with your own image:")
    print("   result = process_bill_image('path/to/your/bill.jpg')")
    print("\n2. For pipeline approach:")
    print("   text = extract_text_with_trocr_pipeline('image.jpg')")
    print("\n3. For direct model approach:")
    print("   text = extract_text_with_trocr_direct('image.jpg')")
    print("\n4. For document layout analysis:")
    print("   layout = analyze_document_layout('image.jpg')")