File size: 5,597 Bytes
7d1906d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab90a8a
 
7d1906d
 
ab90a8a
7d1906d
 
 
 
ab90a8a
 
 
 
 
 
7d1906d
 
 
ab90a8a
 
 
 
 
 
 
 
 
7d1906d
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# app.py
import os
import shutil
import tempfile
import cv2
import numpy as np
import gradio as gr
from paddleocr import PaddleOCR

from PIL import Image

def is_valid_image(path):
    try:
        img = Image.open(path)
        img.verify()
        return True
    except:
        return False


ocr = PaddleOCR(use_angle_cls=True, lang='en', det_model_dir='models/det', rec_model_dir='models/rec', cls_model_dir='models/cls')

def classify_background_color(avg_color, white_thresh=230, black_thresh=50, yellow_thresh=100):
    r, g, b = avg_color
    if r >= white_thresh and g >= white_thresh and b >= white_thresh:
        return (255, 255, 255)
    if r <= black_thresh and g <= black_thresh and b <= black_thresh:
        return (0, 0, 0)
    if r >= yellow_thresh and g >= yellow_thresh and b < yellow_thresh:
        return (255, 255, 0)
    return None

def sample_border_color(image, box, padding=2):
    h, w = image.shape[:2]
    x_min, y_min, x_max, y_max = box
    x_min = max(0, x_min - padding)
    x_max = min(w-1, x_max + padding)
    y_min = max(0, y_min - padding)
    y_max = min(h-1, y_max + padding)

    top = image[y_min:y_min+padding, x_min:x_max]
    bottom = image[y_max-padding:y_max, x_min:x_max]
    left = image[y_min:y_max, x_min:x_min+padding]
    right = image[y_min:y_max, x_max-padding:x_max]

    border_pixels = np.vstack((top.reshape(-1, 3), bottom.reshape(-1, 3),
                               left.reshape(-1, 3), right.reshape(-1, 3)))
    if border_pixels.size == 0:
        return (255, 255, 255)
    median_color = np.median(border_pixels, axis=0)
    return tuple(map(int, median_color))

def detect_text_boxes(image, max_dim=1280):
    try:
        # Check if image is valid
        if image is None or not hasattr(image, 'shape'):
            print("Invalid image. Skipping...")
            return []

        # Resize large images to reduce memory load
        height, width = image.shape[:2]
        if max(height, width) > max_dim:
            scale = max_dim / float(max(height, width))
            image = cv2.resize(image, (int(width * scale), int(height * scale)))

        # Ensure image is in RGB
        if image.shape[2] == 1:
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        elif image.shape[2] == 3:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Call PaddleOCR correctly
        results = ocr.ocr(image, cls=True)

        if results is None or not results[0]:
            print("No OCR results found or OCR returned None.")
            return []

        boxes = []
        for line in results[0]:
            box, (text, confidence) = line
            if text.strip():
                x_min = int(min(pt[0] for pt in box))
                x_max = int(max(pt[0] for pt in box))
                y_min = int(min(pt[1] for pt in box))
                y_max = int(max(pt[1] for pt in box))
                boxes.append(((x_min, y_min, x_max, y_max), text, confidence))
        return boxes

    except Exception as e:
        print(f"OCR failed on image: {e}")
        return []



def remove_text_dynamic_fill(img_path, output_path):
    image = cv2.imread(img_path)
    if image is None:
        return

    if len(image.shape) == 2:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    elif image.shape[2] == 1:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    else:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    boxes = detect_text_boxes(image)

    for (bbox, text, confidence) in boxes:
        if confidence < 0.4 or not text.strip():
            continue

        x_min, y_min, x_max, y_max = bbox
        height = y_max - y_min

        if height <= 30:
            padding = 2
        elif height <= 60:
            padding = 4
        else:
            padding = 6

        x_min_p = max(0, x_min - padding)
        y_min_p = max(0, y_min - padding)
        x_max_p = min(image.shape[1]-1, x_max + padding)
        y_max_p = min(image.shape[0]-1, y_max + padding)

        sample_crop = image[y_min_p:y_max_p, x_min_p:x_max_p]
        avg_color = np.mean(sample_crop.reshape(-1, 3), axis=0)

        fill_color = classify_background_color(avg_color)
        if fill_color is None:
            fill_color = sample_border_color(image, (x_min, y_min, x_max, y_max))

        cv2.rectangle(image, (x_min_p, y_min_p), (x_max_p, y_max_p), fill_color, -1)

    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    cv2.imwrite(output_path, image)

import uuid

def process_folder(input_files):
    temp_output = tempfile.mkdtemp()
    
    for file in input_files:
        filename = os.path.basename(file.name)
        output_path = os.path.join(temp_output, filename)
        remove_text_dynamic_fill(file.name, output_path)

    unique_name = str(uuid.uuid4())[:8]
    zip_path = os.path.join("/tmp", f"cleaned_output_{unique_name}.zip")
    shutil.make_archive(zip_path.replace(".zip", ""), 'zip', temp_output)

    delayed_cleanup(zip_path)
    return zip_path


import threading
import time

def delayed_cleanup(path, delay=30):
    def cleanup():
        time.sleep(delay)
        if os.path.exists(path):
            os.remove(path)
    threading.Thread(target=cleanup).start()



demo = gr.Interface(
    fn=process_folder,
    inputs=gr.File(file_types=[".jpg", ".jpeg", ".png"], file_count="multiple", label="Upload Comic Images"),
    outputs=gr.File(label="Download Cleaned Zip"),
    title="Comic Text Cleaner",
    description="Upload comic images and get a zip of cleaned versions (text removed). Uses PaddleOCR for detection."
)

demo.launch()