BrailleMenuGenV2 / utils /image_preprocessing.py
Chamin09's picture
initial commit
93c4f75 verified
import cv2
import numpy as np
from PIL import Image
def preprocess_image(image, target_size=(1000, 1000)):
"""
Preprocess image for document analysis.
Args:
image: PIL Image object
target_size: Tuple of (width, height) to resize to
Returns:
Preprocessed image as numpy array
"""
# Convert PIL Image to numpy array if needed
if isinstance(image, Image.Image):
img_array = np.array(image)
else:
img_array = image
# Convert to RGB if grayscale
if len(img_array.shape) == 2:
img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
elif img_array.shape[2] == 4:
img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
# Resize image
img_array = cv2.resize(img_array, target_size)
# Enhance contrast
lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
l, a, b = cv2.split(lab)
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
cl = clahe.apply(l)
enhanced_lab = cv2.merge((cl, a, b))
enhanced_img = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
return enhanced_img