paralips / app.py
arsh121's picture
Add ParaLip dubbing interface with Gradio
5421a47
raw
history blame
3.97 kB
import gradio as gr
import torch
import yaml
import os
from pathlib import Path
from modules.fslip import FastLip
from modules.base_model import BaseModel
import numpy as np
import cv2
from moviepy.editor import VideoFileClip
import tempfile
# Load configuration
def load_config():
with open('configs/lipgen/grid/lipgen_grid.yaml', 'r') as f:
config = yaml.safe_load(f)
return config
# Initialize model
def init_model():
config = load_config()
model = FastLip(
arch=config['arch'],
dictionary=None, # We'll need to implement a simple dictionary
out_dims=None
)
# Load checkpoint
checkpoint = torch.load('checkpoints/lipgen_grid.pt', map_location='cpu')
model.load_state_dict(checkpoint['state_dict'])
model.eval()
return model
# Process video frames
def process_video(video_path, target_language):
model = init_model()
# Load video
video = VideoFileClip(video_path)
frames = []
for frame in video.iter_frames():
# Resize frame to match model input size (80x160)
frame = cv2.resize(frame, (160, 80))
frames.append(frame)
# Convert frames to tensor
frames = torch.FloatTensor(np.array(frames)).permute(0, 3, 1, 2) / 255.0
# Process with model
with torch.no_grad():
# TODO: Implement text processing for target language
# For now, we'll just return the processed frames
output = model(frames.unsqueeze(0))
# Convert output to video
output_frames = output['lip_out'].squeeze(0).cpu().numpy()
output_frames = (output_frames * 255).astype(np.uint8)
# Save to temporary file
temp_dir = tempfile.mkdtemp()
output_path = os.path.join(temp_dir, 'output.mp4')
# Create video from frames
height, width = output_frames.shape[2:4]
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, 25.0, (width, height))
for frame in output_frames:
frame = frame.transpose(1, 2, 0)
out.write(frame)
out.release()
return output_path
# Create Gradio interface
def create_interface():
with gr.Blocks(title="ParaLip Video Dubbing") as demo:
gr.Markdown("""
# ParaLip Video Dubbing
Upload a video and select a target language to create a dubbed version.
""")
with gr.Row():
with gr.Column():
video_input = gr.Video(label="Upload Video")
language = gr.Dropdown(
choices=["spanish", "french", "german", "italian", "portuguese"],
value="spanish",
label="Target Language"
)
dub_button = gr.Button("Dub Video")
with gr.Column():
status = gr.Textbox(label="Status")
video_output = gr.Video(label="Dubbed Video")
def process_video_wrapper(video_file, target_lang):
if video_file is None:
return "Please upload a video file", None
try:
# Save uploaded file temporarily
temp_path = Path("temp_video.mp4")
with open(temp_path, "wb") as f:
f.write(video_file.read())
# Process video
output_path = process_video(temp_path, target_lang)
# Clean up
temp_path.unlink()
return "Dubbing completed successfully!", output_path
except Exception as e:
return f"Error during dubbing: {str(e)}", None
dub_button.click(
fn=process_video_wrapper,
inputs=[video_input, language],
outputs=[status, video_output]
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch()