File size: 3,666 Bytes
e4b6ad9
 
 
 
 
 
 
7d65a14
f9e7031
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4b6ad9
 
0e15c8c
 
e4b6ad9
f9e7031
e4b6ad9
 
 
 
 
 
 
 
 
 
 
 
 
7d65a14
 
 
f9e7031
 
e4b6ad9
 
0e15c8c
e4b6ad9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e15c8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4b6ad9
 
c1cb031
e4b6ad9
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import cv2
import cv2 as cv
import numpy as np
import streamlit as st
from PIL import Image
import pytesseract

def enhance(img):
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(img, (5, 5), 0)

    # Apply adaptive thresholding to enhance text
    thresh = cv2.adaptiveThreshold(
        blurred, 
        255, 
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
        cv2.THRESH_BINARY, 
        11, 
        2
    )

    # Perform morphological operations to remove small noise and connect text components
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    return morph

act =  [(150,240), (610,260)]
act2 =  [(130,440), (590,460)]
acts=[act,act2]

def align_images(ref_gray, input_gray, enh= False):
    """
    Aligns the input image to the reference image using homography.
    
    Parameters:
        reference_image (numpy.ndarray): The reference image.
        input_image (numpy.ndarray): The input image to be aligned.
    
    Returns:
        numpy.ndarray: The aligned version of the input image.
    """
    # # Convert images to grayscale
    # ref_gray = cv2.cvtColor(reference_image, cv2.COLOR_BGR2GRAY)
    # input_gray = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
    if enh:
        ref_gray = enhance(ref_gray)
        input_gray = enhance(input_gray)
        st.image(ref_gray)
        st.image(input_gray)
    
    # Detect ORB keypoints and descriptors
    orb = cv2.ORB_create(nfeatures=3000)
    keypoints1, descriptors1 = orb.detectAndCompute(ref_gray, None)
    keypoints2, descriptors2 = orb.detectAndCompute(input_gray, None)
    
    # Match descriptors using BFMatcher
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
    matches = bf.match(descriptors1, descriptors2)
    matches = sorted(matches, key=lambda x: x.distance)
    
    # Extract location of good matches
    ref_points = np.float32([keypoints1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
    input_points = np.float32([keypoints2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
    
    # Compute homography matrix
    H, mask = cv2.findHomography(input_points, ref_points, cv2.RANSAC, 5.0)
    
    # Warp input image to align with reference image
    height, width = ref_gray.shape
    aligned_image = cv2.warpPerspective(input_gray, H, (width, height))
    
    return aligned_image

def ocr_with_crop(aligned_image):
    # Open the image
    # img = Image.open(image_path)
    # img = cv2.imread(image_path,0)
    # img = enhance(img)
    # st.image(img)
    # st.write(type(img))
    # enh = enhance(np.array(img))
    # st.image(enh)
    # Define the coordinates for cropping
    def ocr(act):
        crop_coordinates = act
        
        # Convert to rectangular bounds (x1, y1, x2, y2)
        x1, y1 = crop_coordinates[0]
        x2, y2 = crop_coordinates[1]
        # Crop the image using the defined coordinates
        # cropped_img = img.crop((x1, y1, x2, y2))
        cropped_img = aligned_image[y1:y2,x1:x2]
        st.image(cropped_img)
        # Perform OCR on the cropped image
        text = pytesseract.image_to_string(cropped_img)
        
        # Print the extracted text
        st.write(text)
    for cor in acts:
        ocr(cor)
        
if __name__== "__main__":
    ref = cv.imread("r.png",0)
    if inp:= st.file_uploader("upload your form in image format", type=['png']):
        image = Image.open(inp)
        gray_image_pil = image.convert('L')
        image_array = np.array(gray_image_pil)
        st.image(image_array)
        align_image = align_images(ref,image_array)
        ocr_with_crop(align_image)