|
import gradio as gr |
|
import torch |
|
import yaml |
|
import os |
|
from pathlib import Path |
|
from modules.fslip import FastLip |
|
from modules.base_model import BaseModel |
|
import numpy as np |
|
import cv2 |
|
from moviepy.editor import VideoFileClip |
|
import tempfile |
|
|
|
|
|
def load_config(): |
|
with open('configs/lipgen/grid/lipgen_grid.yaml', 'r') as f: |
|
config = yaml.safe_load(f) |
|
return config |
|
|
|
|
|
def init_model(): |
|
config = load_config() |
|
model = FastLip( |
|
arch=config['arch'], |
|
dictionary=None, |
|
out_dims=None |
|
) |
|
|
|
checkpoint = torch.load('checkpoints/lipgen_grid.pt', map_location='cpu') |
|
model.load_state_dict(checkpoint['state_dict']) |
|
model.eval() |
|
return model |
|
|
|
|
|
def process_video(video_path, target_language): |
|
model = init_model() |
|
|
|
|
|
video = VideoFileClip(video_path) |
|
frames = [] |
|
for frame in video.iter_frames(): |
|
|
|
frame = cv2.resize(frame, (160, 80)) |
|
frames.append(frame) |
|
|
|
|
|
frames = torch.FloatTensor(np.array(frames)).permute(0, 3, 1, 2) / 255.0 |
|
|
|
|
|
with torch.no_grad(): |
|
|
|
|
|
output = model(frames.unsqueeze(0)) |
|
|
|
|
|
output_frames = output['lip_out'].squeeze(0).cpu().numpy() |
|
output_frames = (output_frames * 255).astype(np.uint8) |
|
|
|
|
|
temp_dir = tempfile.mkdtemp() |
|
output_path = os.path.join(temp_dir, 'output.mp4') |
|
|
|
|
|
height, width = output_frames.shape[2:4] |
|
fourcc = cv2.VideoWriter_fourcc(*'mp4v') |
|
out = cv2.VideoWriter(output_path, fourcc, 25.0, (width, height)) |
|
|
|
for frame in output_frames: |
|
frame = frame.transpose(1, 2, 0) |
|
out.write(frame) |
|
out.release() |
|
|
|
return output_path |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks(title="ParaLip Video Dubbing") as demo: |
|
gr.Markdown(""" |
|
# ParaLip Video Dubbing |
|
Upload a video and select a target language to create a dubbed version. |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
video_input = gr.Video(label="Upload Video") |
|
language = gr.Dropdown( |
|
choices=["spanish", "french", "german", "italian", "portuguese"], |
|
value="spanish", |
|
label="Target Language" |
|
) |
|
dub_button = gr.Button("Dub Video") |
|
|
|
with gr.Column(): |
|
status = gr.Textbox(label="Status") |
|
video_output = gr.Video(label="Dubbed Video") |
|
|
|
def process_video_wrapper(video_file, target_lang): |
|
if video_file is None: |
|
return "Please upload a video file", None |
|
|
|
try: |
|
|
|
temp_path = Path("temp_video.mp4") |
|
with open(temp_path, "wb") as f: |
|
f.write(video_file.read()) |
|
|
|
|
|
output_path = process_video(temp_path, target_lang) |
|
|
|
|
|
temp_path.unlink() |
|
|
|
return "Dubbing completed successfully!", output_path |
|
|
|
except Exception as e: |
|
return f"Error during dubbing: {str(e)}", None |
|
|
|
dub_button.click( |
|
fn=process_video_wrapper, |
|
inputs=[video_input, language], |
|
outputs=[status, video_output] |
|
) |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
demo.launch() |