import cv2 import cv2 as cv import numpy as np import streamlit as st from PIL import Image import pytesseract def enhance(img): # Apply GaussianBlur to reduce noise blurred = cv2.GaussianBlur(img, (5, 5), 0) # Apply adaptive thresholding to enhance text thresh = cv2.adaptiveThreshold( blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 ) # Perform morphological operations to remove small noise and connect text components kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) return morph act = [(150,240), (610,260)] act2 = [(130,440), (590,460)] acts=[act,act2] def align_images(ref_gray, input_gray, enh= False): """ Aligns the input image to the reference image using homography. Parameters: reference_image (numpy.ndarray): The reference image. input_image (numpy.ndarray): The input image to be aligned. Returns: numpy.ndarray: The aligned version of the input image. """ # # Convert images to grayscale # ref_gray = cv2.cvtColor(reference_image, cv2.COLOR_BGR2GRAY) # input_gray = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY) if enh: ref_gray = enhance(ref_gray) input_gray = enhance(input_gray) st.image(ref_gray) st.image(input_gray) # Detect ORB keypoints and descriptors orb = cv2.ORB_create(nfeatures=3000) keypoints1, descriptors1 = orb.detectAndCompute(ref_gray, None) keypoints2, descriptors2 = orb.detectAndCompute(input_gray, None) # Match descriptors using BFMatcher bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) matches = bf.match(descriptors1, descriptors2) matches = sorted(matches, key=lambda x: x.distance) # Extract location of good matches ref_points = np.float32([keypoints1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2) input_points = np.float32([keypoints2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2) # Compute homography matrix H, mask = cv2.findHomography(input_points, ref_points, cv2.RANSAC, 5.0) # Warp input image to align with reference image height, width = ref_gray.shape aligned_image = cv2.warpPerspective(input_gray, H, (width, height)) return aligned_image def ocr_with_crop(aligned_image): # Open the image # img = Image.open(image_path) # img = cv2.imread(image_path,0) # img = enhance(img) # st.image(img) # st.write(type(img)) # enh = enhance(np.array(img)) # st.image(enh) # Define the coordinates for cropping def ocr(act): crop_coordinates = act # Convert to rectangular bounds (x1, y1, x2, y2) x1, y1 = crop_coordinates[0] x2, y2 = crop_coordinates[1] # Crop the image using the defined coordinates # cropped_img = img.crop((x1, y1, x2, y2)) cropped_img = aligned_image[y1:y2,x1:x2] st.image(cropped_img) # Perform OCR on the cropped image text = pytesseract.image_to_string(cropped_img) # Print the extracted text st.write(text) for cor in acts: ocr(cor) if __name__== "__main__": ref = cv.imread("r.png",0) if inp:= st.file_uploader("upload your form in image format", type=['png']): image = Image.open(inp) gray_image_pil = image.convert('L') image_array = np.array(gray_image_pil) st.image(image_array) align_image = align_images(ref,image_array) ocr_with_crop(align_image)