Spaces:

arsh121
/

paralips

Build error

App Files Files Community

paralips / app.py

arsh121

Add ParaLip dubbing interface with Gradio

5421a47 4 months ago

raw

history blame

3.97 kB

	import gradio as gr
	import torch
	import yaml
	import os
	from pathlib import Path
	from modules.fslip import FastLip
	from modules.base_model import BaseModel
	import numpy as np
	import cv2
	from moviepy.editor import VideoFileClip
	import tempfile

	# Load configuration
	def load_config():
	with open('configs/lipgen/grid/lipgen_grid.yaml', 'r') as f:
	config = yaml.safe_load(f)
	return config

	# Initialize model
	def init_model():
	config = load_config()
	model = FastLip(
	arch=config['arch'],
	dictionary=None, # We'll need to implement a simple dictionary
	out_dims=None
	)
	# Load checkpoint
	checkpoint = torch.load('checkpoints/lipgen_grid.pt', map_location='cpu')
	model.load_state_dict(checkpoint['state_dict'])
	model.eval()
	return model

	# Process video frames
	def process_video(video_path, target_language):
	model = init_model()

	# Load video
	video = VideoFileClip(video_path)
	frames = []
	for frame in video.iter_frames():
	# Resize frame to match model input size (80x160)
	frame = cv2.resize(frame, (160, 80))
	frames.append(frame)

	# Convert frames to tensor
	frames = torch.FloatTensor(np.array(frames)).permute(0, 3, 1, 2) / 255.0

	# Process with model
	with torch.no_grad():
	# TODO: Implement text processing for target language
	# For now, we'll just return the processed frames
	output = model(frames.unsqueeze(0))

	# Convert output to video
	output_frames = output['lip_out'].squeeze(0).cpu().numpy()
	output_frames = (output_frames * 255).astype(np.uint8)

	# Save to temporary file
	temp_dir = tempfile.mkdtemp()
	output_path = os.path.join(temp_dir, 'output.mp4')

	# Create video from frames
	height, width = output_frames.shape[2:4]
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	out = cv2.VideoWriter(output_path, fourcc, 25.0, (width, height))

	for frame in output_frames:
	frame = frame.transpose(1, 2, 0)
	out.write(frame)
	out.release()

	return output_path

	# Create Gradio interface
	def create_interface():
	with gr.Blocks(title="ParaLip Video Dubbing") as demo:
	gr.Markdown("""
	# ParaLip Video Dubbing
	Upload a video and select a target language to create a dubbed version.
	""")

	with gr.Row():
	with gr.Column():
	video_input = gr.Video(label="Upload Video")
	language = gr.Dropdown(
	choices=["spanish", "french", "german", "italian", "portuguese"],
	value="spanish",
	label="Target Language"
	)
	dub_button = gr.Button("Dub Video")

	with gr.Column():
	status = gr.Textbox(label="Status")
	video_output = gr.Video(label="Dubbed Video")

	def process_video_wrapper(video_file, target_lang):
	if video_file is None:
	return "Please upload a video file", None

	try:
	# Save uploaded file temporarily
	temp_path = Path("temp_video.mp4")
	with open(temp_path, "wb") as f:
	f.write(video_file.read())

	# Process video
	output_path = process_video(temp_path, target_lang)

	# Clean up
	temp_path.unlink()

	return "Dubbing completed successfully!", output_path

	except Exception as e:
	return f"Error during dubbing: {str(e)}", None

	dub_button.click(
	fn=process_video_wrapper,
	inputs=[video_input, language],
	outputs=[status, video_output]
	)

	return demo

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()