|
import torch
|
|
from transformers import pipeline, TrOCRProcessor, VisionEncoderDecoderModel
|
|
from transformers import AutoModel, AutoProcessor
|
|
from PIL import Image
|
|
import requests
|
|
|
|
|
|
print("Loading TrOCR model using pipeline...")
|
|
trocr_pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
|
|
|
|
|
|
print("Loading TrOCR model directly...")
|
|
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
|
|
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
|
|
|
|
|
|
print("Loading LayoutLMv3 model...")
|
|
layoutlm_processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
|
|
layoutlm_model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
|
|
|
def extract_text_with_trocr_pipeline(image_path):
|
|
"""
|
|
Extract text from handwritten image using TrOCR pipeline
|
|
"""
|
|
try:
|
|
image = Image.open(image_path)
|
|
result = trocr_pipe(image)
|
|
return result[0]['generated_text']
|
|
except Exception as e:
|
|
print(f"Error processing image with pipeline: {e}")
|
|
return None
|
|
|
|
def extract_text_with_trocr_direct(image_path):
|
|
"""
|
|
Extract text from handwritten image using TrOCR model directly
|
|
"""
|
|
try:
|
|
image = Image.open(image_path)
|
|
pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
|
|
|
|
generated_ids = trocr_model.generate(pixel_values)
|
|
generated_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
|
|
return generated_text
|
|
except Exception as e:
|
|
print(f"Error processing image with direct model: {e}")
|
|
return None
|
|
|
|
def analyze_document_layout(image_path):
|
|
"""
|
|
Analyze document layout using LayoutLMv3
|
|
Note: This is a basic example. LayoutLMv3 typically requires fine-tuning for specific tasks
|
|
"""
|
|
try:
|
|
image = Image.open(image_path)
|
|
|
|
|
|
|
|
encoding = layoutlm_processor(image, return_tensors="pt")
|
|
|
|
with torch.no_grad():
|
|
outputs = layoutlm_model(**encoding)
|
|
|
|
|
|
return outputs.last_hidden_state
|
|
except Exception as e:
|
|
print(f"Error analyzing document layout: {e}")
|
|
return None
|
|
|
|
def process_bill_image(image_path):
|
|
"""
|
|
Complete pipeline to process a Singtel bill image
|
|
"""
|
|
print(f"Processing bill image: {image_path}")
|
|
|
|
|
|
print("Extracting text with TrOCR...")
|
|
extracted_text = extract_text_with_trocr_pipeline(image_path)
|
|
|
|
if extracted_text:
|
|
print(f"Extracted text: {extracted_text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
'extracted_text': extracted_text,
|
|
'status': 'success'
|
|
}
|
|
else:
|
|
return {
|
|
'extracted_text': None,
|
|
'status': 'failed'
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("=== Singtel Bill Scanner Test ===")
|
|
|
|
|
|
|
|
|
|
|
|
sample_url = "https://huggingface.co/microsoft/trocr-base-handwritten/resolve/main/images/example_1.jpg"
|
|
|
|
print("\nTesting with sample image...")
|
|
try:
|
|
|
|
response = requests.get(sample_url)
|
|
with open("sample_handwritten.jpg", "wb") as f:
|
|
f.write(response.content)
|
|
|
|
|
|
result = process_bill_image("sample_handwritten.jpg")
|
|
print(f"Result: {result}")
|
|
|
|
except Exception as e:
|
|
print(f"Error downloading sample image: {e}")
|
|
print("Please provide your own image path to test the models")
|
|
|
|
print("\n=== Usage Instructions ===")
|
|
print("1. To use with your own image:")
|
|
print(" result = process_bill_image('path/to/your/bill.jpg')")
|
|
print("\n2. For pipeline approach:")
|
|
print(" text = extract_text_with_trocr_pipeline('image.jpg')")
|
|
print("\n3. For direct model approach:")
|
|
print(" text = extract_text_with_trocr_direct('image.jpg')")
|
|
print("\n4. For document layout analysis:")
|
|
print(" layout = analyze_document_layout('image.jpg')") |