File size: 5,156 Bytes
b0763cb
 
7d1906d
 
b0763cb
 
 
 
 
 
 
 
 
 
7d1906d
b0763cb
 
 
 
 
 
 
 
 
 
 
 
 
 
7d1906d
b0763cb
7d1906d
b0763cb
7d1906d
 
 
b0763cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d1906d
b0763cb
 
7d1906d
b0763cb
 
 
 
7d1906d
b0763cb
 
 
 
7d1906d
 
b0763cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d1906d
 
b0763cb
 
 
7d1906d
 
b0763cb
 
 
 
 
 
 
 
 
7d1906d
 
b0763cb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# app.py  –  2025‑06 update
import os, cv2, time, psutil, shutil, tempfile, zipfile, numpy as np, gradio as gr
from paddleocr import PaddleOCR

# ------------------------------------------------------------------------
# 1. PaddleOCR – latest lightweight v5‑mobile with angle‑cls disabled
# ------------------------------------------------------------------------
ocr = PaddleOCR(
    lang="en",
    det_model_dir="ppocr_v5_det",
    rec_model_dir="ppocr_v5_rec",
    use_angle_cls=False,            # comic bubbles are already horizontal
    show_log=False
)

# ------------------------------------------------------------------------
# 2. Utility helpers
# ------------------------------------------------------------------------
def wait_for_cpu(th=90, interval=3, timeout=30):
    """Pause if CPU is saturated (helps on free‑tier Spaces)."""
    start = time.time()
    while psutil.cpu_percent(interval=1) > th:
        time.sleep(interval)
        if time.time() - start > timeout:
            break

def classify_bg(avg, w=230, b=50, y=100):
    r, g, b_ = avg
    if r >= w and g >= w and b_ >= w:          # white
        return (255, 255, 255)
    if r <= b and g <= b and b_ <= b:          # black
        return (0, 0, 0)
    if r >= y and g >= y and b_ < y:           # yellowish narration box
        return (255, 255, 0)
    return None

def sample_border(img, box, pad=2):
    h, w = img.shape[:2]
    x1, y1, x2, y2 = box
    x1, x2 = max(0, x1 - pad), min(w - 1, x2 + pad)
    y1, y2 = max(0, y1 - pad), min(h - 1, y2 + pad)
    border = np.concatenate([
        img[y1:y1+pad, x1:x2], img[y2-pad:y2, x1:x2],
        img[y1:y2, x1:x1+pad], img[y1:y2, x2-pad:x2]
    ], axis=0)
    return tuple(np.median(border.reshape(-1, 3), axis=0).astype(int))

# ------------------------------------------------------------------------
# 3. Bubble‑mask (simple heuristic: very‑light regions enclosed)
# ------------------------------------------------------------------------
def make_bubble_mask(rgb):
    gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
    # threshold near‑white & narration‑yellow
    _, white = cv2.threshold(gray, 230, 255, cv2.THRESH_BINARY)
    # small morph closing to join dotted edges
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
    mask = cv2.morphologyEx(white, cv2.MORPH_CLOSE, kernel, iterations=2)
    return mask // 255   # 0/1

# ------------------------------------------------------------------------
# 4. Detect & clean
# ------------------------------------------------------------------------
def remove_text_in_bubbles(img_path, dst_path):
    bgr = cv2.imread(img_path)
    if bgr is None:  # skip unreadable
        return
    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
    bubble_mask = make_bubble_mask(rgb)

    results = ocr.ocr(rgb, cls=False)
    if not results or not results[0]:
        cv2.imwrite(dst_path, bgr)
        return

    for line in results[0]:
        box = line[0]
        text, conf = line[1]
        if conf < 0.4 or not text.strip():
            continue

        # bounding box
        xs, ys = zip(*box)
        x1, x2 = int(min(xs)), int(max(xs))
        y1, y2 = int(min(ys)), int(max(ys))

        # skip if box centre is outside bubble mask
        cx, cy = int((x1+x2)/2), int((y1+y2)/2)
        if bubble_mask[cy, cx] == 0:
            continue  # text is not inside a bubble

        # dynamic padding by height
        h_box = y2 - y1
        pad = 2 if h_box <= 30 else 4 if h_box <= 60 else 6
        x1p, y1p = max(0, x1-pad), max(0, y1-pad)
        x2p, y2p = min(rgb.shape[1]-1, x2+pad), min(rgb.shape[0]-1, y2+pad)

        # choose fill colour
        crop = rgb[y1p:y2p, x1p:x2p]
        fill = classify_bg(np.mean(crop.reshape(-1,3), axis=0))
        if fill is None:
            fill = sample_border(rgb, (x1, y1, x2, y2))

        cv2.rectangle(bgr, (x1p, y1p), (x2p, y2p), fill, thickness=-1)

    cv2.imwrite(dst_path, bgr)

# ------------------------------------------------------------------------
# 5. Gradio batch wrapper
# ------------------------------------------------------------------------
def process_folder(files):
    wait_for_cpu()
    out_dir = tempfile.mkdtemp()
    for f in files:
        fname = os.path.basename(f)
        remove_text_in_bubbles(f, os.path.join(out_dir, fname))
    zip_path = shutil.make_archive(out_dir, 'zip', out_dir)
    return zip_path

# ------------------------------------------------------------------------
# 6. Gradio UI
# ------------------------------------------------------------------------
demo = gr.Interface(
    fn=process_folder,
    inputs=gr.File(file_types=[".jpg", ".jpeg", ".png"],
                   label="Upload comic page images",
                   file_count="multiple"),
    outputs=gr.File(label="Download cleaned .zip"),
    title="Comic Bubble Text Cleaner – PP‑OCRv5",
    description=("Removes speech/thought/narration bubble text only, "
                 "leaving outside FX or captions untouched. "
                 "Powered by PaddleOCR PP‑OCRv5‑mobile."),
    concurrency_limit=1
)

if __name__ == "__main__":
    demo.launch()