Spaces:

NTU-Peak-2
/

SIngtel-Bill-Scanner

Running

App Files Files Community

SIngtel-Bill-Scanner / test_model.py

Cosmo125

Upload 26 files

795183d verified about 1 month ago

raw

history blame contribute delete

4.97 kB

	import torch
	from transformers import pipeline, TrOCRProcessor, VisionEncoderDecoderModel
	from transformers import AutoModel, AutoProcessor
	from PIL import Image
	import requests

	# Method 1: Using TrOCR with pipeline (easiest approach)
	print("Loading TrOCR model using pipeline...")
	trocr_pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")

	# Method 2: Loading TrOCR model directly (more control)
	print("Loading TrOCR model directly...")
	trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
	trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

	# Method 3: Loading LayoutLMv3 model (for document layout understanding)
	print("Loading LayoutLMv3 model...")
	layoutlm_processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base")
	layoutlm_model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")

	def extract_text_with_trocr_pipeline(image_path):
	"""
	Extract text from handwritten image using TrOCR pipeline
	"""
	try:
	image = Image.open(image_path)
	result = trocr_pipe(image)
	return result[0]['generated_text']
	except Exception as e:
	print(f"Error processing image with pipeline: {e}")
	return None

	def extract_text_with_trocr_direct(image_path):
	"""
	Extract text from handwritten image using TrOCR model directly
	"""
	try:
	image = Image.open(image_path)
	pixel_values = trocr_processor(image, return_tensors="pt").pixel_values

	generated_ids = trocr_model.generate(pixel_values)
	generated_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return generated_text
	except Exception as e:
	print(f"Error processing image with direct model: {e}")
	return None

	def analyze_document_layout(image_path):
	"""
	Analyze document layout using LayoutLMv3
	Note: This is a basic example. LayoutLMv3 typically requires fine-tuning for specific tasks
	"""
	try:
	image = Image.open(image_path)

	# For LayoutLMv3, you typically need text and bounding boxes
	# This is a simplified example - in practice, you'd need OCR results first
	encoding = layoutlm_processor(image, return_tensors="pt")

	with torch.no_grad():
	outputs = layoutlm_model(**encoding)

	# The outputs contain embeddings that can be used for downstream tasks
	return outputs.last_hidden_state
	except Exception as e:
	print(f"Error analyzing document layout: {e}")
	return None

	def process_bill_image(image_path):
	"""
	Complete pipeline to process a Singtel bill image
	"""
	print(f"Processing bill image: {image_path}")

	# Extract text using TrOCR
	print("Extracting text with TrOCR...")
	extracted_text = extract_text_with_trocr_pipeline(image_path)

	if extracted_text:
	print(f"Extracted text: {extracted_text}")

	# You can add bill-specific parsing logic here
	# For example, looking for patterns like:
	# - Account numbers
	# - Amounts due
	# - Due dates
	# - Service charges

	return {
	'extracted_text': extracted_text,
	'status': 'success'
	}
	else:
	return {
	'extracted_text': None,
	'status': 'failed'
	}

	# Example usage
	if __name__ == "__main__":
	print("=== Singtel Bill Scanner Test ===")

	# Replace with your actual image path
	# image_path = "path/to/your/bill_image.jpg"

	# For testing with a sample image (you can download this)
	sample_url = "https://huggingface.co/microsoft/trocr-base-handwritten/resolve/main/images/example_1.jpg"

	print("\nTesting with sample image...")
	try:
	# Download sample image for testing
	response = requests.get(sample_url)
	with open("sample_handwritten.jpg", "wb") as f:
	f.write(response.content)

	# Test the models
	result = process_bill_image("sample_handwritten.jpg")
	print(f"Result: {result}")

	except Exception as e:
	print(f"Error downloading sample image: {e}")
	print("Please provide your own image path to test the models")

	print("\n=== Usage Instructions ===")
	print("1. To use with your own image:")
	print(" result = process_bill_image('path/to/your/bill.jpg')")
	print("\n2. For pipeline approach:")
	print(" text = extract_text_with_trocr_pipeline('image.jpg')")
	print("\n3. For direct model approach:")
	print(" text = extract_text_with_trocr_direct('image.jpg')")
	print("\n4. For document layout analysis:")
	print(" layout = analyze_document_layout('image.jpg')")