paralips / app.py
arsh121's picture
Add ParaLip dubbing interface with Gradio
5421a47
import gradio as gr
import torch
import yaml
import os
from pathlib import Path
from modules.fslip import FastLip
from modules.base_model import BaseModel
import numpy as np
import cv2
from moviepy.editor import VideoFileClip
import tempfile
# Load configuration
def load_config():
with open('configs/lipgen/grid/lipgen_grid.yaml', 'r') as f:
config = yaml.safe_load(f)
return config
# Initialize model
def init_model():
config = load_config()
model = FastLip(
arch=config['arch'],
dictionary=None, # We'll need to implement a simple dictionary
out_dims=None
)
# Load checkpoint
checkpoint = torch.load('checkpoints/lipgen_grid.pt', map_location='cpu')
model.load_state_dict(checkpoint['state_dict'])
model.eval()
return model
# Process video frames
def process_video(video_path, target_language):
model = init_model()
# Load video
video = VideoFileClip(video_path)
frames = []
for frame in video.iter_frames():
# Resize frame to match model input size (80x160)
frame = cv2.resize(frame, (160, 80))
frames.append(frame)
# Convert frames to tensor
frames = torch.FloatTensor(np.array(frames)).permute(0, 3, 1, 2) / 255.0
# Process with model
with torch.no_grad():
# TODO: Implement text processing for target language
# For now, we'll just return the processed frames
output = model(frames.unsqueeze(0))
# Convert output to video
output_frames = output['lip_out'].squeeze(0).cpu().numpy()
output_frames = (output_frames * 255).astype(np.uint8)
# Save to temporary file
temp_dir = tempfile.mkdtemp()
output_path = os.path.join(temp_dir, 'output.mp4')
# Create video from frames
height, width = output_frames.shape[2:4]
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, 25.0, (width, height))
for frame in output_frames:
frame = frame.transpose(1, 2, 0)
out.write(frame)
out.release()
return output_path
# Create Gradio interface
def create_interface():
with gr.Blocks(title="ParaLip Video Dubbing") as demo:
gr.Markdown("""
# ParaLip Video Dubbing
Upload a video and select a target language to create a dubbed version.
""")
with gr.Row():
with gr.Column():
video_input = gr.Video(label="Upload Video")
language = gr.Dropdown(
choices=["spanish", "french", "german", "italian", "portuguese"],
value="spanish",
label="Target Language"
)
dub_button = gr.Button("Dub Video")
with gr.Column():
status = gr.Textbox(label="Status")
video_output = gr.Video(label="Dubbed Video")
def process_video_wrapper(video_file, target_lang):
if video_file is None:
return "Please upload a video file", None
try:
# Save uploaded file temporarily
temp_path = Path("temp_video.mp4")
with open(temp_path, "wb") as f:
f.write(video_file.read())
# Process video
output_path = process_video(temp_path, target_lang)
# Clean up
temp_path.unlink()
return "Dubbing completed successfully!", output_path
except Exception as e:
return f"Error during dubbing: {str(e)}", None
dub_button.click(
fn=process_video_wrapper,
inputs=[video_input, language],
outputs=[status, video_output]
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch()