Spaces:
Running
Running
import cv2 | |
import cv2 as cv | |
import numpy as np | |
import streamlit as st | |
from PIL import Image | |
import pytesseract | |
def enhance(img): | |
# Apply GaussianBlur to reduce noise | |
blurred = cv2.GaussianBlur(img, (5, 5), 0) | |
# Apply adaptive thresholding to enhance text | |
thresh = cv2.adaptiveThreshold( | |
blurred, | |
255, | |
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
cv2.THRESH_BINARY, | |
11, | |
2 | |
) | |
# Perform morphological operations to remove small noise and connect text components | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) | |
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) | |
return morph | |
act = [(150,240), (610,260)] | |
act2 = [(130,440), (590,460)] | |
acts=[act,act2] | |
def align_images(ref_gray, input_gray, enh= False): | |
""" | |
Aligns the input image to the reference image using homography. | |
Parameters: | |
reference_image (numpy.ndarray): The reference image. | |
input_image (numpy.ndarray): The input image to be aligned. | |
Returns: | |
numpy.ndarray: The aligned version of the input image. | |
""" | |
# # Convert images to grayscale | |
# ref_gray = cv2.cvtColor(reference_image, cv2.COLOR_BGR2GRAY) | |
# input_gray = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY) | |
if enh: | |
ref_gray = enhance(ref_gray) | |
input_gray = enhance(input_gray) | |
st.image(ref_gray) | |
st.image(input_gray) | |
# Detect ORB keypoints and descriptors | |
orb = cv2.ORB_create(nfeatures=3000) | |
keypoints1, descriptors1 = orb.detectAndCompute(ref_gray, None) | |
keypoints2, descriptors2 = orb.detectAndCompute(input_gray, None) | |
# Match descriptors using BFMatcher | |
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) | |
matches = bf.match(descriptors1, descriptors2) | |
matches = sorted(matches, key=lambda x: x.distance) | |
# Extract location of good matches | |
ref_points = np.float32([keypoints1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2) | |
input_points = np.float32([keypoints2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2) | |
# Compute homography matrix | |
H, mask = cv2.findHomography(input_points, ref_points, cv2.RANSAC, 5.0) | |
# Warp input image to align with reference image | |
height, width = ref_gray.shape | |
aligned_image = cv2.warpPerspective(input_gray, H, (width, height)) | |
return aligned_image | |
def ocr_with_crop(aligned_image): | |
# Open the image | |
# img = Image.open(image_path) | |
# img = cv2.imread(image_path,0) | |
# img = enhance(img) | |
# st.image(img) | |
# st.write(type(img)) | |
# enh = enhance(np.array(img)) | |
# st.image(enh) | |
# Define the coordinates for cropping | |
def ocr(act): | |
crop_coordinates = act | |
# Convert to rectangular bounds (x1, y1, x2, y2) | |
x1, y1 = crop_coordinates[0] | |
x2, y2 = crop_coordinates[1] | |
# Crop the image using the defined coordinates | |
# cropped_img = img.crop((x1, y1, x2, y2)) | |
cropped_img = aligned_image[y1:y2,x1:x2] | |
st.image(cropped_img) | |
# Perform OCR on the cropped image | |
text = pytesseract.image_to_string(cropped_img) | |
# Print the extracted text | |
st.write(text) | |
for cor in acts: | |
ocr(cor) | |
if __name__== "__main__": | |
ref = cv.imread("r.png",0) | |
if inp:= st.file_uploader("upload your form in image format", type=['png']): | |
image = Image.open(inp) | |
gray_image_pil = image.convert('L') | |
image_array = np.array(gray_image_pil) | |
st.image(image_array) | |
align_image = align_images(ref,image_array) | |
ocr_with_crop(align_image) | |