File size: 4,970 Bytes
795183d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import torch
from transformers import pipeline, TrOCRProcessor, VisionEncoderDecoderModel
from transformers import AutoModel, AutoProcessor
from PIL import Image
import requests
# Method 1: Using TrOCR with pipeline (easiest approach)
print("Loading TrOCR model using pipeline...")
trocr_pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
# Method 2: Loading TrOCR model directly (more control)
print("Loading TrOCR model directly...")
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
# Method 3: Loading LayoutLMv3 model (for document layout understanding)
print("Loading LayoutLMv3 model...")
layoutlm_processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
layoutlm_model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
def extract_text_with_trocr_pipeline(image_path):
"""
Extract text from handwritten image using TrOCR pipeline
"""
try:
image = Image.open(image_path)
result = trocr_pipe(image)
return result[0]['generated_text']
except Exception as e:
print(f"Error processing image with pipeline: {e}")
return None
def extract_text_with_trocr_direct(image_path):
"""
Extract text from handwritten image using TrOCR model directly
"""
try:
image = Image.open(image_path)
pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
generated_ids = trocr_model.generate(pixel_values)
generated_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_text
except Exception as e:
print(f"Error processing image with direct model: {e}")
return None
def analyze_document_layout(image_path):
"""
Analyze document layout using LayoutLMv3
Note: This is a basic example. LayoutLMv3 typically requires fine-tuning for specific tasks
"""
try:
image = Image.open(image_path)
# For LayoutLMv3, you typically need text and bounding boxes
# This is a simplified example - in practice, you'd need OCR results first
encoding = layoutlm_processor(image, return_tensors="pt")
with torch.no_grad():
outputs = layoutlm_model(**encoding)
# The outputs contain embeddings that can be used for downstream tasks
return outputs.last_hidden_state
except Exception as e:
print(f"Error analyzing document layout: {e}")
return None
def process_bill_image(image_path):
"""
Complete pipeline to process a Singtel bill image
"""
print(f"Processing bill image: {image_path}")
# Extract text using TrOCR
print("Extracting text with TrOCR...")
extracted_text = extract_text_with_trocr_pipeline(image_path)
if extracted_text:
print(f"Extracted text: {extracted_text}")
# You can add bill-specific parsing logic here
# For example, looking for patterns like:
# - Account numbers
# - Amounts due
# - Due dates
# - Service charges
return {
'extracted_text': extracted_text,
'status': 'success'
}
else:
return {
'extracted_text': None,
'status': 'failed'
}
# Example usage
if __name__ == "__main__":
print("=== Singtel Bill Scanner Test ===")
# Replace with your actual image path
# image_path = "path/to/your/bill_image.jpg"
# For testing with a sample image (you can download this)
sample_url = "https://huggingface.co/microsoft/trocr-base-handwritten/resolve/main/images/example_1.jpg"
print("\nTesting with sample image...")
try:
# Download sample image for testing
response = requests.get(sample_url)
with open("sample_handwritten.jpg", "wb") as f:
f.write(response.content)
# Test the models
result = process_bill_image("sample_handwritten.jpg")
print(f"Result: {result}")
except Exception as e:
print(f"Error downloading sample image: {e}")
print("Please provide your own image path to test the models")
print("\n=== Usage Instructions ===")
print("1. To use with your own image:")
print(" result = process_bill_image('path/to/your/bill.jpg')")
print("\n2. For pipeline approach:")
print(" text = extract_text_with_trocr_pipeline('image.jpg')")
print("\n3. For direct model approach:")
print(" text = extract_text_with_trocr_direct('image.jpg')")
print("\n4. For document layout analysis:")
print(" layout = analyze_document_layout('image.jpg')") |