import gradio as gr import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont import tempfile import os def is_horizontal_angle(angle, threshold=15): """判断角度是否接近水平(0度或180度)""" angle = angle % 180 # 将角度归一化到0-180度范围 return abs(angle) < threshold or abs(angle - 180) < threshold def process_video_with_mask(video_path, mask_path, text="Joker"): # 创建临时输出文件 output_path = os.path.join(tempfile.gettempdir(), "output.mp4") # 打开视频和mask视频 cap_video = cv2.VideoCapture(video_path) cap_mask = cv2.VideoCapture(mask_path) # 获取视频参数 fps = cap_video.get(cv2.CAP_PROP_FPS) width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # 初始化固定参数 fixed_font_size = None fixed_angle = None # 第一阶段:寻找第一个接近水平的角度和计算字体大小 while fixed_angle is None: ret_video, frame_video = cap_video.read() ret_mask, frame_mask = cap_mask.read() if not ret_video or not ret_mask: break # 处理mask为单通道 if len(frame_mask.shape) == 3: frame_mask = cv2.cvtColor(frame_mask, cv2.COLOR_BGR2GRAY) # 找到mask中的白色区域 _, binary = cv2.threshold(frame_mask, 200, 255, cv2.THRESH_BINARY) # 查找轮廓 contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if contours: # 找到面积最大的轮廓 max_contour = max(contours, key=cv2.contourArea) # 获取旋转矩形 rect = cv2.minAreaRect(max_contour) size = rect[1] current_angle = rect[2] # 计算基于mask大小的字体大小 if fixed_font_size is None: fixed_font_size = int(min(size[0] * 0.8 / max(1, len(text)), size[1] * 0.8)) # 检查是否接近水平 if is_horizontal_angle(current_angle): fixed_angle = current_angle # 如果没有找到接近水平的角度,使用第一个找到的角度 if fixed_angle is None: cap_video.set(cv2.CAP_PROP_POS_FRAMES, 0) cap_mask.set(cv2.CAP_PROP_POS_FRAMES, 0) ret_video, frame_video = cap_video.read() ret_mask, frame_mask = cap_mask.read() if ret_video and ret_mask: if len(frame_mask.shape) == 3: frame_mask = cv2.cvtColor(frame_mask, cv2.COLOR_BGR2GRAY) _, binary = cv2.threshold(frame_mask, 200, 255, cv2.THRESH_BINARY) contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if contours: max_contour = max(contours, key=cv2.contourArea) rect = cv2.minAreaRect(max_contour) fixed_angle = rect[2] # 如果还是没有角度,默认使用0度 if fixed_angle is None: fixed_angle = 0 # 重置视频位置 cap_video.set(cv2.CAP_PROP_POS_FRAMES, 0) cap_mask.set(cv2.CAP_PROP_POS_FRAMES, 0) # 创建视频写入器 fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) # 第二阶段:使用固定角度和固定字体大小处理视频 while True: ret_video, frame_video = cap_video.read() ret_mask, frame_mask = cap_mask.read() if not ret_video or not ret_mask: break # 处理mask为单通道 if len(frame_mask.shape) == 3: frame_mask = cv2.cvtColor(frame_mask, cv2.COLOR_BGR2GRAY) # 找到mask中的白色区域 _, binary = cv2.threshold(frame_mask, 200, 255, cv2.THRESH_BINARY) # 查找轮廓 contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # 转换为PIL图像处理文字 pil_image = Image.fromarray(cv2.cvtColor(frame_video, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(pil_image) if contours: # 找到面积最大的轮廓 max_contour = max(contours, key=cv2.contourArea) # 获取旋转矩形 rect = cv2.minAreaRect(max_contour) center = rect[0] size = rect[1] # 创建透明画布绘制文字 text_image = Image.new("RGBA", (int(size[0]), int(size[1])), (0, 0, 0, 0)) text_draw = ImageDraw.Draw(text_image) try: font = ImageFont.truetype("华文琥珀.ttf", fixed_font_size) except: font = ImageFont.load_default() # 计算文字位置 bbox = text_draw.textbbox((0, 0), text, font=font) text_w = bbox[2] - bbox[0] text_h = bbox[3] - bbox[1] pos_x = (size[0] - text_w) // 2 pos_y = (size[1] - text_h) // 2 # 绘制文字 text_draw.text((pos_x, pos_y), text, fill=(255, 255, 0, 255), font=font) # 使用固定角度旋转文字 rotated_text = text_image.rotate(fixed_angle, expand=True, center=(size[0]/2, size[1]/2)) # 计算粘贴位置 paste_x = int(center[0] - rotated_text.width / 2) paste_y = int(center[1] - rotated_text.height / 2) # 合并到原图 pil_image.paste(rotated_text, (paste_x, paste_y), rotated_text) # 转换回OpenCV格式 frame_processed = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) out.write(frame_processed) # 释放资源 cap_video.release() cap_mask.release() out.release() return output_path # 创建Gradio界面 with gr.Blocks() as demo: gr.Markdown("## 视频文字替换工具") with gr.Row(): with gr.Column(): video_input = gr.Video(label="原始视频") mask_input = gr.Video(label="Mask视频") text_input = gr.Textbox(label="要添加的文字", value="示例文字") submit_btn = gr.Button("处理视频") with gr.Column(): video_output = gr.Video(label="处理结果") submit_btn.click( fn=process_video_with_mask, inputs=[video_input, mask_input, text_input], outputs=video_output, ) gr.Examples( [ ["examples/20250511-013442_3331156457_.mp4", "examples/20250511-013442_3331156457__temp.mp4_alpha_temp.mp4", "今天天气怎么样?"], ["examples/20250510-123229_332745243_.mp4", "examples/20250510-123229_332745243__temp.mp4_alpha_temp.mp4", "今天天气怎么样?"], ["examples/20250510-121949_3109246688_.mp4", "examples/20250510-121949_3109246688__temp.mp4_alpha_temp.mp4", "今天天气怎么样?"], ], [video_input, mask_input, text_input] ) if __name__ == "__main__": demo.launch(share=True)