SohomToom commited on
Commit
b0763cb
·
verified ·
1 Parent(s): 7e3d56c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -125
app.py CHANGED
@@ -1,144 +1,136 @@
1
- # app.py
2
- import os
3
- import shutil
4
- import tempfile
5
- import cv2
6
- import numpy as np
7
- import gradio as gr
8
  from paddleocr import PaddleOCR
9
- import psutil
10
- import time
11
- #import spaces
12
 
13
- #ocr = PaddleOCR(use_angle_cls=True, lang='en')
14
- #ocr = PaddleOCR(use_angle_cls=True, lang='en', det_model_dir='models/det', rec_model_dir='models/rec')
15
- ocr = PaddleOCR(use_angle_cls=True, lang='en')
 
 
 
 
 
 
 
16
 
17
- def classify_background_color(avg_color, white_thresh=230, black_thresh=50, yellow_thresh=100):
18
- r, g, b = avg_color
19
- if r >= white_thresh and g >= white_thresh and b >= white_thresh:
 
 
 
 
 
 
 
 
 
 
 
20
  return (255, 255, 255)
21
- if r <= black_thresh and g <= black_thresh and b <= black_thresh:
22
  return (0, 0, 0)
23
- if r >= yellow_thresh and g >= yellow_thresh and b < yellow_thresh:
24
  return (255, 255, 0)
25
  return None
26
 
27
- def sample_border_color(image, box, padding=2):
28
- h, w = image.shape[:2]
29
- x_min, y_min, x_max, y_max = box
30
- x_min = max(0, x_min - padding)
31
- x_max = min(w-1, x_max + padding)
32
- y_min = max(0, y_min - padding)
33
- y_max = min(h-1, y_max + padding)
34
-
35
- top = image[y_min:y_min+padding, x_min:x_max]
36
- bottom = image[y_max-padding:y_max, x_min:x_max]
37
- left = image[y_min:y_max, x_min:x_min+padding]
38
- right = image[y_min:y_max, x_max-padding:x_max]
39
-
40
- border_pixels = np.vstack((top.reshape(-1, 3), bottom.reshape(-1, 3),
41
- left.reshape(-1, 3), right.reshape(-1, 3)))
42
- if border_pixels.size == 0:
43
- return (255, 255, 255)
44
- median_color = np.median(border_pixels, axis=0)
45
- return tuple(map(int, median_color))
46
-
47
-
48
- def detect_text_boxes(image):
49
- results = ocr.ocr(image, cls=True)
50
- boxes = []
51
- if results and results[0]:
52
- for line in results[0]:
53
- box, (text, confidence) = line
54
- if text.strip():
55
- x_min = int(min(pt[0] for pt in box))
56
- x_max = int(max(pt[0] for pt in box))
57
- y_min = int(min(pt[1] for pt in box))
58
- y_max = int(max(pt[1] for pt in box))
59
- boxes.append(((x_min, y_min, x_max, y_max), text, confidence))
60
- else:
61
- print("No text detected in the image.")
62
- return boxes
63
-
64
- def remove_text_dynamic_fill(img_path, output_path):
65
- image = cv2.imread(img_path)
66
- if image is None:
67
  return
 
 
68
 
69
- if len(image.shape) == 2:
70
- image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
71
- elif image.shape[2] == 1:
72
- image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
73
- else:
74
- image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
75
-
76
- boxes = detect_text_boxes(image)
77
 
78
- for (bbox, text, confidence) in boxes:
79
- if confidence < 0.4 or not text.strip():
 
 
80
  continue
81
 
82
- x_min, y_min, x_max, y_max = bbox
83
- height = y_max - y_min
84
-
85
- if height <= 30:
86
- padding = 2
87
- elif height <= 60:
88
- padding = 4
89
- else:
90
- padding = 6
91
-
92
- x_min_p = max(0, x_min - padding)
93
- y_min_p = max(0, y_min - padding)
94
- x_max_p = min(image.shape[1]-1, x_max + padding)
95
- y_max_p = min(image.shape[0]-1, y_max + padding)
96
-
97
- sample_crop = image[y_min_p:y_max_p, x_min_p:x_max_p]
98
- avg_color = np.mean(sample_crop.reshape(-1, 3), axis=0)
99
-
100
- fill_color = classify_background_color(avg_color)
101
- if fill_color is None:
102
- fill_color = sample_border_color(image, (x_min, y_min, x_max, y_max))
103
-
104
- cv2.rectangle(image, (x_min_p, y_min_p), (x_max_p, y_max_p), fill_color, -1)
105
-
106
- image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
107
- cv2.imwrite(output_path, image)
108
-
109
- #@spaces.GPU
110
- def process_folder(input_files):
111
- temp_output = tempfile.mkdtemp()
112
-
113
-
114
- wait_for_cpu();
115
- for file in input_files:
116
- filename = os.path.basename(file.name)
117
- output_path = os.path.join(temp_output, filename)
118
- remove_text_dynamic_fill(file.name, output_path)
119
-
120
- zip_path = shutil.make_archive(temp_output, 'zip', temp_output)
121
  return zip_path
122
 
123
-
124
-
125
- def wait_for_cpu(threshold=90, interval=3, timeout=30):
126
- start = time.time()
127
- while psutil.cpu_percent(interval=1) > threshold:
128
- print("High CPU usage detected, waiting...")
129
- time.sleep(interval)
130
- if time.time() - start > timeout:
131
- print("Timed out waiting for CPU to cool down.")
132
- break
133
-
134
-
135
  demo = gr.Interface(
136
  fn=process_folder,
137
- inputs=gr.File(file_types=[".jpg", ".jpeg", ".png"], file_count="multiple", label="Upload Comic Images"),
138
- outputs=gr.File(label="Download Cleaned Zip"),
139
- concurrency_limit=1,
140
- title="Comic Text Cleaner",
141
- description="Upload comic images and get a zip of cleaned versions (text removed). Uses PaddleOCR for detection."
 
 
 
 
142
  )
143
 
144
- demo.launch()
 
 
1
+ # app.py – 2025‑06 update
2
+ import os, cv2, time, psutil, shutil, tempfile, zipfile, numpy as np, gradio as gr
 
 
 
 
 
3
  from paddleocr import PaddleOCR
 
 
 
4
 
5
+ # ------------------------------------------------------------------------
6
+ # 1. PaddleOCR latest lightweight v5‑mobile with angle‑cls disabled
7
+ # ------------------------------------------------------------------------
8
+ ocr = PaddleOCR(
9
+ lang="en",
10
+ det_model_dir="ppocr_v5_det",
11
+ rec_model_dir="ppocr_v5_rec",
12
+ use_angle_cls=False, # comic bubbles are already horizontal
13
+ show_log=False
14
+ )
15
 
16
+ # ------------------------------------------------------------------------
17
+ # 2. Utility helpers
18
+ # ------------------------------------------------------------------------
19
+ def wait_for_cpu(th=90, interval=3, timeout=30):
20
+ """Pause if CPU is saturated (helps on free‑tier Spaces)."""
21
+ start = time.time()
22
+ while psutil.cpu_percent(interval=1) > th:
23
+ time.sleep(interval)
24
+ if time.time() - start > timeout:
25
+ break
26
+
27
+ def classify_bg(avg, w=230, b=50, y=100):
28
+ r, g, b_ = avg
29
+ if r >= w and g >= w and b_ >= w: # white
30
  return (255, 255, 255)
31
+ if r <= b and g <= b and b_ <= b: # black
32
  return (0, 0, 0)
33
+ if r >= y and g >= y and b_ < y: # yellowish narration box
34
  return (255, 255, 0)
35
  return None
36
 
37
+ def sample_border(img, box, pad=2):
38
+ h, w = img.shape[:2]
39
+ x1, y1, x2, y2 = box
40
+ x1, x2 = max(0, x1 - pad), min(w - 1, x2 + pad)
41
+ y1, y2 = max(0, y1 - pad), min(h - 1, y2 + pad)
42
+ border = np.concatenate([
43
+ img[y1:y1+pad, x1:x2], img[y2-pad:y2, x1:x2],
44
+ img[y1:y2, x1:x1+pad], img[y1:y2, x2-pad:x2]
45
+ ], axis=0)
46
+ return tuple(np.median(border.reshape(-1, 3), axis=0).astype(int))
47
+
48
+ # ------------------------------------------------------------------------
49
+ # 3. Bubble‑mask (simple heuristic: very‑light regions enclosed)
50
+ # ------------------------------------------------------------------------
51
+ def make_bubble_mask(rgb):
52
+ gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
53
+ # threshold near‑white & narration‑yellow
54
+ _, white = cv2.threshold(gray, 230, 255, cv2.THRESH_BINARY)
55
+ # small morph closing to join dotted edges
56
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
57
+ mask = cv2.morphologyEx(white, cv2.MORPH_CLOSE, kernel, iterations=2)
58
+ return mask // 255 # 0/1
59
+
60
+ # ------------------------------------------------------------------------
61
+ # 4. Detect & clean
62
+ # ------------------------------------------------------------------------
63
+ def remove_text_in_bubbles(img_path, dst_path):
64
+ bgr = cv2.imread(img_path)
65
+ if bgr is None: # skip unreadable
 
 
 
 
 
 
 
 
 
 
 
66
  return
67
+ rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
68
+ bubble_mask = make_bubble_mask(rgb)
69
 
70
+ results = ocr.ocr(rgb, cls=False)
71
+ if not results or not results[0]:
72
+ cv2.imwrite(dst_path, bgr)
73
+ return
 
 
 
 
74
 
75
+ for line in results[0]:
76
+ box = line[0]
77
+ text, conf = line[1]
78
+ if conf < 0.4 or not text.strip():
79
  continue
80
 
81
+ # bounding box
82
+ xs, ys = zip(*box)
83
+ x1, x2 = int(min(xs)), int(max(xs))
84
+ y1, y2 = int(min(ys)), int(max(ys))
85
+
86
+ # skip if box centre is outside bubble mask
87
+ cx, cy = int((x1+x2)/2), int((y1+y2)/2)
88
+ if bubble_mask[cy, cx] == 0:
89
+ continue # text is not inside a bubble
90
+
91
+ # dynamic padding by height
92
+ h_box = y2 - y1
93
+ pad = 2 if h_box <= 30 else 4 if h_box <= 60 else 6
94
+ x1p, y1p = max(0, x1-pad), max(0, y1-pad)
95
+ x2p, y2p = min(rgb.shape[1]-1, x2+pad), min(rgb.shape[0]-1, y2+pad)
96
+
97
+ # choose fill colour
98
+ crop = rgb[y1p:y2p, x1p:x2p]
99
+ fill = classify_bg(np.mean(crop.reshape(-1,3), axis=0))
100
+ if fill is None:
101
+ fill = sample_border(rgb, (x1, y1, x2, y2))
102
+
103
+ cv2.rectangle(bgr, (x1p, y1p), (x2p, y2p), fill, thickness=-1)
104
+
105
+ cv2.imwrite(dst_path, bgr)
106
+
107
+ # ------------------------------------------------------------------------
108
+ # 5. Gradio batch wrapper
109
+ # ------------------------------------------------------------------------
110
+ def process_folder(files):
111
+ wait_for_cpu()
112
+ out_dir = tempfile.mkdtemp()
113
+ for f in files:
114
+ fname = os.path.basename(f)
115
+ remove_text_in_bubbles(f, os.path.join(out_dir, fname))
116
+ zip_path = shutil.make_archive(out_dir, 'zip', out_dir)
 
 
 
117
  return zip_path
118
 
119
+ # ------------------------------------------------------------------------
120
+ # 6. Gradio UI
121
+ # ------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
122
  demo = gr.Interface(
123
  fn=process_folder,
124
+ inputs=gr.File(file_types=[".jpg", ".jpeg", ".png"],
125
+ label="Upload comic page images",
126
+ file_count="multiple"),
127
+ outputs=gr.File(label="Download cleaned .zip"),
128
+ title="Comic Bubble Text Cleaner – PP‑OCRv5",
129
+ description=("Removes speech/thought/narration bubble text only, "
130
+ "leaving outside FX or captions untouched. "
131
+ "Powered by PaddleOCR PP‑OCRv5‑mobile."),
132
+ concurrency_limit=1
133
  )
134
 
135
+ if __name__ == "__main__":
136
+ demo.launch()