Spaces:
Build error
Build error
File size: 5,156 Bytes
b0763cb 7d1906d b0763cb 7d1906d b0763cb 7d1906d b0763cb 7d1906d b0763cb 7d1906d b0763cb 7d1906d b0763cb 7d1906d b0763cb 7d1906d b0763cb 7d1906d b0763cb 7d1906d b0763cb 7d1906d b0763cb 7d1906d b0763cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# app.py – 2025‑06 update
import os, cv2, time, psutil, shutil, tempfile, zipfile, numpy as np, gradio as gr
from paddleocr import PaddleOCR
# ------------------------------------------------------------------------
# 1. PaddleOCR – latest lightweight v5‑mobile with angle‑cls disabled
# ------------------------------------------------------------------------
ocr = PaddleOCR(
lang="en",
det_model_dir="ppocr_v5_det",
rec_model_dir="ppocr_v5_rec",
use_angle_cls=False, # comic bubbles are already horizontal
show_log=False
)
# ------------------------------------------------------------------------
# 2. Utility helpers
# ------------------------------------------------------------------------
def wait_for_cpu(th=90, interval=3, timeout=30):
"""Pause if CPU is saturated (helps on free‑tier Spaces)."""
start = time.time()
while psutil.cpu_percent(interval=1) > th:
time.sleep(interval)
if time.time() - start > timeout:
break
def classify_bg(avg, w=230, b=50, y=100):
r, g, b_ = avg
if r >= w and g >= w and b_ >= w: # white
return (255, 255, 255)
if r <= b and g <= b and b_ <= b: # black
return (0, 0, 0)
if r >= y and g >= y and b_ < y: # yellowish narration box
return (255, 255, 0)
return None
def sample_border(img, box, pad=2):
h, w = img.shape[:2]
x1, y1, x2, y2 = box
x1, x2 = max(0, x1 - pad), min(w - 1, x2 + pad)
y1, y2 = max(0, y1 - pad), min(h - 1, y2 + pad)
border = np.concatenate([
img[y1:y1+pad, x1:x2], img[y2-pad:y2, x1:x2],
img[y1:y2, x1:x1+pad], img[y1:y2, x2-pad:x2]
], axis=0)
return tuple(np.median(border.reshape(-1, 3), axis=0).astype(int))
# ------------------------------------------------------------------------
# 3. Bubble‑mask (simple heuristic: very‑light regions enclosed)
# ------------------------------------------------------------------------
def make_bubble_mask(rgb):
gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
# threshold near‑white & narration‑yellow
_, white = cv2.threshold(gray, 230, 255, cv2.THRESH_BINARY)
# small morph closing to join dotted edges
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
mask = cv2.morphologyEx(white, cv2.MORPH_CLOSE, kernel, iterations=2)
return mask // 255 # 0/1
# ------------------------------------------------------------------------
# 4. Detect & clean
# ------------------------------------------------------------------------
def remove_text_in_bubbles(img_path, dst_path):
bgr = cv2.imread(img_path)
if bgr is None: # skip unreadable
return
rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
bubble_mask = make_bubble_mask(rgb)
results = ocr.ocr(rgb, cls=False)
if not results or not results[0]:
cv2.imwrite(dst_path, bgr)
return
for line in results[0]:
box = line[0]
text, conf = line[1]
if conf < 0.4 or not text.strip():
continue
# bounding box
xs, ys = zip(*box)
x1, x2 = int(min(xs)), int(max(xs))
y1, y2 = int(min(ys)), int(max(ys))
# skip if box centre is outside bubble mask
cx, cy = int((x1+x2)/2), int((y1+y2)/2)
if bubble_mask[cy, cx] == 0:
continue # text is not inside a bubble
# dynamic padding by height
h_box = y2 - y1
pad = 2 if h_box <= 30 else 4 if h_box <= 60 else 6
x1p, y1p = max(0, x1-pad), max(0, y1-pad)
x2p, y2p = min(rgb.shape[1]-1, x2+pad), min(rgb.shape[0]-1, y2+pad)
# choose fill colour
crop = rgb[y1p:y2p, x1p:x2p]
fill = classify_bg(np.mean(crop.reshape(-1,3), axis=0))
if fill is None:
fill = sample_border(rgb, (x1, y1, x2, y2))
cv2.rectangle(bgr, (x1p, y1p), (x2p, y2p), fill, thickness=-1)
cv2.imwrite(dst_path, bgr)
# ------------------------------------------------------------------------
# 5. Gradio batch wrapper
# ------------------------------------------------------------------------
def process_folder(files):
wait_for_cpu()
out_dir = tempfile.mkdtemp()
for f in files:
fname = os.path.basename(f)
remove_text_in_bubbles(f, os.path.join(out_dir, fname))
zip_path = shutil.make_archive(out_dir, 'zip', out_dir)
return zip_path
# ------------------------------------------------------------------------
# 6. Gradio UI
# ------------------------------------------------------------------------
demo = gr.Interface(
fn=process_folder,
inputs=gr.File(file_types=[".jpg", ".jpeg", ".png"],
label="Upload comic page images",
file_count="multiple"),
outputs=gr.File(label="Download cleaned .zip"),
title="Comic Bubble Text Cleaner – PP‑OCRv5",
description=("Removes speech/thought/narration bubble text only, "
"leaving outside FX or captions untouched. "
"Powered by PaddleOCR PP‑OCRv5‑mobile."),
concurrency_limit=1
)
if __name__ == "__main__":
demo.launch()
|