# app.py import os import shutil import tempfile import cv2 import numpy as np import gradio as gr from paddleocr import PaddleOCR import psutil import time #import spaces #ocr = PaddleOCR(use_angle_cls=True, lang='en') #ocr = PaddleOCR(use_angle_cls=True, lang='en', det_model_dir='models/det', rec_model_dir='models/rec') ocr = PaddleOCR(use_angle_cls=True, lang='en') def classify_background_color(avg_color, white_thresh=230, black_thresh=50, yellow_thresh=100): r, g, b = avg_color if r >= white_thresh and g >= white_thresh and b >= white_thresh: return (255, 255, 255) if r <= black_thresh and g <= black_thresh and b <= black_thresh: return (0, 0, 0) if r >= yellow_thresh and g >= yellow_thresh and b < yellow_thresh: return (255, 255, 0) return None def sample_border_color(image, box, padding=2): h, w = image.shape[:2] x_min, y_min, x_max, y_max = box x_min = max(0, x_min - padding) x_max = min(w-1, x_max + padding) y_min = max(0, y_min - padding) y_max = min(h-1, y_max + padding) top = image[y_min:y_min+padding, x_min:x_max] bottom = image[y_max-padding:y_max, x_min:x_max] left = image[y_min:y_max, x_min:x_min+padding] right = image[y_min:y_max, x_max-padding:x_max] border_pixels = np.vstack((top.reshape(-1, 3), bottom.reshape(-1, 3), left.reshape(-1, 3), right.reshape(-1, 3))) if border_pixels.size == 0: return (255, 255, 255) median_color = np.median(border_pixels, axis=0) return tuple(map(int, median_color)) def detect_text_boxes(image): results = ocr.ocr(image, cls=True) boxes = [] if results and results[0]: for line in results[0]: box, (text, confidence) = line if text.strip(): x_min = int(min(pt[0] for pt in box)) x_max = int(max(pt[0] for pt in box)) y_min = int(min(pt[1] for pt in box)) y_max = int(max(pt[1] for pt in box)) boxes.append(((x_min, y_min, x_max, y_max), text, confidence)) else: print("No text detected in the image.") return boxes def remove_text_dynamic_fill(img_path, output_path): image = cv2.imread(img_path) if image is None: return if len(image.shape) == 2: image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) elif image.shape[2] == 1: image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) else: image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) boxes = detect_text_boxes(image) for (bbox, text, confidence) in boxes: if confidence < 0.4 or not text.strip(): continue x_min, y_min, x_max, y_max = bbox height = y_max - y_min if height <= 30: padding = 2 elif height <= 60: padding = 4 else: padding = 6 x_min_p = max(0, x_min - padding) y_min_p = max(0, y_min - padding) x_max_p = min(image.shape[1]-1, x_max + padding) y_max_p = min(image.shape[0]-1, y_max + padding) sample_crop = image[y_min_p:y_max_p, x_min_p:x_max_p] avg_color = np.mean(sample_crop.reshape(-1, 3), axis=0) fill_color = classify_background_color(avg_color) if fill_color is None: fill_color = sample_border_color(image, (x_min, y_min, x_max, y_max)) cv2.rectangle(image, (x_min_p, y_min_p), (x_max_p, y_max_p), fill_color, -1) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imwrite(output_path, image) #@spaces.GPU def process_folder(input_files): temp_output = tempfile.mkdtemp() wait_for_cpu(); for file in input_files: filename = os.path.basename(file.name) output_path = os.path.join(temp_output, filename) remove_text_dynamic_fill(file.name, output_path) zip_path = shutil.make_archive(temp_output, 'zip', temp_output) return zip_path def wait_for_cpu(threshold=90, interval=3, timeout=30): start = time.time() while psutil.cpu_percent(interval=1) > threshold: print("High CPU usage detected, waiting...") time.sleep(interval) if time.time() - start > timeout: print("Timed out waiting for CPU to cool down.") break demo = gr.Interface( fn=process_folder, inputs=gr.File(file_types=[".jpg", ".jpeg", ".png",".JPG", ".JPEG", ".PNG"], file_count="multiple", label="Upload Comic Images"), outputs=gr.File(label="Download Cleaned Zip"), concurrency_limit=1, title="Comic Text Cleaner", description="Upload comic images and get a zip of cleaned versions (text removed). Uses PaddleOCR for detection." ) demo.launch()