SIngtel-Bill-Scanner / test_model.py
Cosmo125's picture
Upload 26 files
795183d verified
import torch
from transformers import pipeline, TrOCRProcessor, VisionEncoderDecoderModel
from transformers import AutoModel, AutoProcessor
from PIL import Image
import requests
# Method 1: Using TrOCR with pipeline (easiest approach)
print("Loading TrOCR model using pipeline...")
trocr_pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
# Method 2: Loading TrOCR model directly (more control)
print("Loading TrOCR model directly...")
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
# Method 3: Loading LayoutLMv3 model (for document layout understanding)
print("Loading LayoutLMv3 model...")
layoutlm_processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
layoutlm_model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
def extract_text_with_trocr_pipeline(image_path):
"""
Extract text from handwritten image using TrOCR pipeline
"""
try:
image = Image.open(image_path)
result = trocr_pipe(image)
return result[0]['generated_text']
except Exception as e:
print(f"Error processing image with pipeline: {e}")
return None
def extract_text_with_trocr_direct(image_path):
"""
Extract text from handwritten image using TrOCR model directly
"""
try:
image = Image.open(image_path)
pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
generated_ids = trocr_model.generate(pixel_values)
generated_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_text
except Exception as e:
print(f"Error processing image with direct model: {e}")
return None
def analyze_document_layout(image_path):
"""
Analyze document layout using LayoutLMv3
Note: This is a basic example. LayoutLMv3 typically requires fine-tuning for specific tasks
"""
try:
image = Image.open(image_path)
# For LayoutLMv3, you typically need text and bounding boxes
# This is a simplified example - in practice, you'd need OCR results first
encoding = layoutlm_processor(image, return_tensors="pt")
with torch.no_grad():
outputs = layoutlm_model(**encoding)
# The outputs contain embeddings that can be used for downstream tasks
return outputs.last_hidden_state
except Exception as e:
print(f"Error analyzing document layout: {e}")
return None
def process_bill_image(image_path):
"""
Complete pipeline to process a Singtel bill image
"""
print(f"Processing bill image: {image_path}")
# Extract text using TrOCR
print("Extracting text with TrOCR...")
extracted_text = extract_text_with_trocr_pipeline(image_path)
if extracted_text:
print(f"Extracted text: {extracted_text}")
# You can add bill-specific parsing logic here
# For example, looking for patterns like:
# - Account numbers
# - Amounts due
# - Due dates
# - Service charges
return {
'extracted_text': extracted_text,
'status': 'success'
}
else:
return {
'extracted_text': None,
'status': 'failed'
}
# Example usage
if __name__ == "__main__":
print("=== Singtel Bill Scanner Test ===")
# Replace with your actual image path
# image_path = "path/to/your/bill_image.jpg"
# For testing with a sample image (you can download this)
sample_url = "https://huggingface.co/microsoft/trocr-base-handwritten/resolve/main/images/example_1.jpg"
print("\nTesting with sample image...")
try:
# Download sample image for testing
response = requests.get(sample_url)
with open("sample_handwritten.jpg", "wb") as f:
f.write(response.content)
# Test the models
result = process_bill_image("sample_handwritten.jpg")
print(f"Result: {result}")
except Exception as e:
print(f"Error downloading sample image: {e}")
print("Please provide your own image path to test the models")
print("\n=== Usage Instructions ===")
print("1. To use with your own image:")
print(" result = process_bill_image('path/to/your/bill.jpg')")
print("\n2. For pipeline approach:")
print(" text = extract_text_with_trocr_pipeline('image.jpg')")
print("\n3. For direct model approach:")
print(" text = extract_text_with_trocr_direct('image.jpg')")
print("\n4. For document layout analysis:")
print(" layout = analyze_document_layout('image.jpg')")