Spaces:

NTU-Peak-2
/

SIngtel-Bill-Scanner

Running

File size: 4,970 Bytes

795183d

import torch
from transformers import pipeline, TrOCRProcessor, VisionEncoderDecoderModel
from transformers import AutoModel, AutoProcessor
from PIL import Image
import requests

# Method 1: Using TrOCR with pipeline (easiest approach)
print("Loading TrOCR model using pipeline...")
trocr_pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")

# Method 2: Loading TrOCR model directly (more control)
print("Loading TrOCR model directly...")
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

# Method 3: Loading LayoutLMv3 model (for document layout understanding)
print("Loading LayoutLMv3 model...")
layoutlm_processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
layoutlm_model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")

def extract_text_with_trocr_pipeline(image_path):
    """

    Extract text from handwritten image using TrOCR pipeline

    """
    try:
        image = Image.open(image_path)
        result = trocr_pipe(image)
        return result[0]['generated_text']
    except Exception as e:
        print(f"Error processing image with pipeline: {e}")
        return None

def extract_text_with_trocr_direct(image_path):
    """

    Extract text from handwritten image using TrOCR model directly

    """
    try:
        image = Image.open(image_path)
        pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
        
        generated_ids = trocr_model.generate(pixel_values)
        generated_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        return generated_text
    except Exception as e:
        print(f"Error processing image with direct model: {e}")
        return None

def analyze_document_layout(image_path):
    """

    Analyze document layout using LayoutLMv3

    Note: This is a basic example. LayoutLMv3 typically requires fine-tuning for specific tasks

    """
    try:
        image = Image.open(image_path)
        
        # For LayoutLMv3, you typically need text and bounding boxes
        # This is a simplified example - in practice, you'd need OCR results first
        encoding = layoutlm_processor(image, return_tensors="pt")
        
        with torch.no_grad():
            outputs = layoutlm_model(**encoding)
            
        # The outputs contain embeddings that can be used for downstream tasks
        return outputs.last_hidden_state
    except Exception as e:
        print(f"Error analyzing document layout: {e}")
        return None

def process_bill_image(image_path):
    """

    Complete pipeline to process a Singtel bill image

    """
    print(f"Processing bill image: {image_path}")
    
    # Extract text using TrOCR
    print("Extracting text with TrOCR...")
    extracted_text = extract_text_with_trocr_pipeline(image_path)
    
    if extracted_text:
        print(f"Extracted text: {extracted_text}")
        
        # You can add bill-specific parsing logic here
        # For example, looking for patterns like:
        # - Account numbers
        # - Amounts due
        # - Due dates
        # - Service charges
        
        return {
            'extracted_text': extracted_text,
            'status': 'success'
        }
    else:
        return {
            'extracted_text': None,
            'status': 'failed'
        }

# Example usage
if __name__ == "__main__":
    print("=== Singtel Bill Scanner Test ===")
    
    # Replace with your actual image path
    # image_path = "path/to/your/bill_image.jpg"
    
    # For testing with a sample image (you can download this)
    sample_url = "https://huggingface.co/microsoft/trocr-base-handwritten/resolve/main/images/example_1.jpg"
    
    print("\nTesting with sample image...")
    try:
        # Download sample image for testing
        response = requests.get(sample_url)
        with open("sample_handwritten.jpg", "wb") as f:
            f.write(response.content)
        
        # Test the models
        result = process_bill_image("sample_handwritten.jpg")
        print(f"Result: {result}")
        
    except Exception as e:
        print(f"Error downloading sample image: {e}")
        print("Please provide your own image path to test the models")
    
    print("\n=== Usage Instructions ===")
    print("1. To use with your own image:")
    print("   result = process_bill_image('path/to/your/bill.jpg')")
    print("\n2. For pipeline approach:")
    print("   text = extract_text_with_trocr_pipeline('image.jpg')")
    print("\n3. For direct model approach:")
    print("   text = extract_text_with_trocr_direct('image.jpg')")
    print("\n4. For document layout analysis:")
    print("   layout = analyze_document_layout('image.jpg')")