aabdoo234 commited on
Commit
62b0def
·
verified ·
1 Parent(s): e27e719

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -0
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import pytesseract
5
+ import re
6
+ import google.generativeai as genai
7
+ from rapidfuzz.distance import Levenshtein
8
+ import os
9
+
10
+ # Configure Generative AI
11
+ OPENAI_API_KEY = os.getenv("API_KEY")
12
+ genai.configure(api_key=OPENAI_API_KEY)
13
+ model = genai.GenerativeModel("gemini-1.5-flash")
14
+
15
+ # Image processing functions
16
+ def threshold_image(img, threshold_value=None):
17
+ if threshold_value is None: # Adaptive thresholding
18
+ thresholded_image = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
19
+ cv2.THRESH_BINARY, 11, 2)
20
+ else: # Manual thresholding
21
+ _, thresholded_image = cv2.threshold(img, threshold_value, 255, cv2.THRESH_BINARY)
22
+ return thresholded_image
23
+
24
+ def bm3d_denoising(img, sigma_psd=55):
25
+ return cv2.fastNlMeansDenoising(img, None, sigma_psd)
26
+
27
+ def remove_noise(img, kernel_size=3):
28
+ kernel = np.ones((kernel_size, kernel_size), np.float32) / (kernel_size**2)
29
+ denoised = cv2.filter2D(img, -1, kernel)
30
+ return cv2.medianBlur(denoised, 3)
31
+
32
+ def sharpen_image(img):
33
+ kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
34
+ return cv2.filter2D(img, -1, kernel)
35
+
36
+ def remove_extra_spaces_and_lines(text):
37
+ text = re.sub(r'\s+', ' ', text).strip()
38
+ text = re.sub(r'\n\s*\n', '\n\n', text)
39
+ return text
40
+
41
+ def calculate_accuracy(text1, text2):
42
+ # matcher = difflib.SequenceMatcher(None, generated_text, transcribed_text)
43
+ # return matcher.ratio()
44
+ distance = Levenshtein.distance(text1, text2)
45
+ max_length = max(len(text1), len(text2))
46
+ accuracy = (1 - (distance / max_length))
47
+ return accuracy
48
+
49
+ # Gradio app
50
+ def process_image(image, threshold_value=None, correct_transcription=None):
51
+ img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
52
+
53
+ # Process the image
54
+ thresholded = threshold_image(img, threshold_value)
55
+ bm3d_denoised_image = bm3d_denoising(thresholded)
56
+ denoised = remove_noise(thresholded)
57
+ sharpened_image = sharpen_image(bm3d_denoised_image)
58
+
59
+ # OCR
60
+ original_text = pytesseract.image_to_string(img)
61
+ thresholded_text = pytesseract.image_to_string(thresholded)
62
+ bm3d_denoised_text = pytesseract.image_to_string(bm3d_denoised_image)
63
+ denoised_text = pytesseract.image_to_string(denoised)
64
+ sharpened_text = pytesseract.image_to_string(sharpened_image)
65
+
66
+ # Clean up text
67
+ original_text = remove_extra_spaces_and_lines(original_text)
68
+ thresholded_text = remove_extra_spaces_and_lines(thresholded_text)
69
+ bm3d_denoised_text = remove_extra_spaces_and_lines(bm3d_denoised_text)
70
+ denoised_text = remove_extra_spaces_and_lines(denoised_text)
71
+ sharpened_text = remove_extra_spaces_and_lines(sharpened_text)
72
+
73
+ # Generative AI model response
74
+ user_prompt = user_prompt = f"""
75
+ below are the output texts of OCR on multiple image processing techniques of a faded letter written in English, can you predict the original text, provide only the original text.
76
+ Pre-Processing Image Text:
77
+ {original_text}
78
+ Sharpened Image Text:
79
+ {sharpened_text}
80
+ Thresholded Image Text:
81
+ {thresholded_text}
82
+ BM3D Denoised Image Text:
83
+ {bm3d_denoised_text}
84
+ Denoised Image Text:
85
+ {denoised_text}
86
+ """
87
+ response = model.generate_content(user_prompt)
88
+ model_text = response.text
89
+
90
+ if not correct_transcription:
91
+ correct_transcription = model_text
92
+ # Accuracy metrics
93
+ if correct_transcription:
94
+ original_accuracy = calculate_accuracy(original_text, correct_transcription)
95
+ thresholded_accuracy = calculate_accuracy(thresholded_text, correct_transcription)
96
+ bm3d_denoised_accuracy = calculate_accuracy(bm3d_denoised_text, correct_transcription)
97
+ denoised_accuracy = calculate_accuracy(denoised_text, correct_transcription)
98
+ sharpened_accuracy = calculate_accuracy(sharpened_text, correct_transcription)
99
+ model_accuracy = calculate_accuracy(model_text, correct_transcription)
100
+ accuracy_metrics = f"""
101
+ Original Image Accuracy: {original_accuracy:.2%}
102
+ Thresholded Image Accuracy: {thresholded_accuracy:.2%}
103
+ BM3D Denoised Image Accuracy: {bm3d_denoised_accuracy:.2%}
104
+ Denoised Image Accuracy: {denoised_accuracy:.2%}
105
+ Sharpened Image Accuracy: {sharpened_accuracy:.2%}
106
+ Model Response Accuracy: {model_accuracy:.2%}
107
+ """
108
+ else:
109
+ accuracy_metrics = "No correct transcription provided."
110
+
111
+ # Return results
112
+ return (
113
+ image, thresholded, bm3d_denoised_image, denoised, sharpened_image,
114
+ original_text, thresholded_text, bm3d_denoised_text, denoised_text, sharpened_text,
115
+ model_text, accuracy_metrics
116
+ )
117
+
118
+ # Interface
119
+ with gr.Blocks() as demo:
120
+ with gr.Row():
121
+ gr.Markdown("## Image Preprocessing and OCR App")
122
+ with gr.Row():
123
+ gr.Markdown("""
124
+ ### Legend
125
+ - **Model Response**: Text generated by the Generative AI model.
126
+ - **Accuracy Metrics**: Comparison of OCR results with the provided correct transcription if provided, otherwise with the model response.
127
+ """)
128
+ with gr.Row():
129
+ with gr.Column():
130
+ image_input = gr.Image(label="Upload Image", type="numpy")
131
+ threshold_slider = gr.Slider(label="Threshold Value", minimum=0, maximum=255, step=1, value=242)
132
+ adaptive_checkbox = gr.Checkbox(label="Use Adaptive Thresholding", value=False)
133
+ transcription_input = gr.Textbox(label="Correct Transcription (Optional)")
134
+ process_button = gr.Button("Process Image")
135
+
136
+ with gr.Column():
137
+ tabs = gr.Tabs()
138
+ with tabs:
139
+ with gr.TabItem("Original"):
140
+ original_image_display = gr.Image(label="Original Image")
141
+ original_text_display = gr.Textbox(label="Original Image Text")
142
+ with gr.TabItem("Thresholded"):
143
+ thresholded_image_display = gr.Image(label="Thresholded Image")
144
+ thresholded_text_display = gr.Textbox(label="Thresholded Image Text", lines=1)
145
+ with gr.TabItem("BM3D Denoised"):
146
+ bm3d_denoised_image_display = gr.Image(label="BM3D Denoised Image")
147
+ bm3d_denoised_text_display = gr.Textbox(label="BM3D Denoised Image Text")
148
+ with gr.TabItem("Denoised"):
149
+ denoised_image_display = gr.Image(label="Denoised Image")
150
+ denoised_text_display = gr.Textbox(label="Denoised Image Text")
151
+ with gr.TabItem("Sharpened"):
152
+ sharpened_image_display = gr.Image(label="Sharpened Image")
153
+ sharpened_text_display = gr.Textbox(label="Sharpened Image Text")
154
+ accuracy_output = gr.Textbox(label="Accuracy Metrics")
155
+ model_text_display = gr.Textbox(label="Model Response Text")
156
+
157
+ # Link button to processing function
158
+ def update_process(image, threshold_value, use_adaptive, correct_transcription):
159
+ threshold_value = None if use_adaptive else threshold_value
160
+ return process_image(image, threshold_value, correct_transcription)
161
+
162
+ process_button.click(
163
+ update_process,
164
+ inputs=[image_input, threshold_slider, adaptive_checkbox, transcription_input],
165
+ outputs=[
166
+ original_image_display, thresholded_image_display,
167
+ bm3d_denoised_image_display, denoised_image_display,
168
+ sharpened_image_display, original_text_display,
169
+ thresholded_text_display, bm3d_denoised_text_display,
170
+ denoised_text_display, sharpened_text_display,
171
+ model_text_display, accuracy_output
172
+ ],
173
+ )
174
+
175
+ # Launch app
176
+ demo.launch()