Spaces:
Build error
Build error
| import streamlit as st | |
| import torch | |
| from transformers import pipeline | |
| from moviepy.editor import * | |
| from moviepy.video.tools.subtitles import SubtitlesClip | |
| from TTS.api import TTS | |
| import tempfile | |
| import os | |
| # Initialize Hugging Face models | |
| def load_models(): | |
| video_gen = pipeline('text-to-video-generation', model='cerspense/zeroscope_v2_576w') | |
| tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False) | |
| return video_gen, tts_model | |
| video_gen, tts_model = load_models() | |
| # Streamlit app | |
| st.title("Text-to-Video with Voice Cloning") | |
| # User input | |
| input_text = st.text_area("Enter text to generate video:", height=150) | |
| voice_file = st.file_uploader("Upload your voice sample (WAV format):", type=["wav"]) | |
| if st.button("Generate Video") and input_text and voice_file: | |
| with st.spinner("Generating video..."): | |
| # Generate video frames | |
| video_output = video_gen(input_text, num_frames=30) | |
| video_tensor = video_output["video"] | |
| video_np = (video_tensor * 255).astype('uint8') | |
| # Save video | |
| video_filename = tempfile.mktemp(suffix=".mp4") | |
| clips = [ImageClip(frame).set_duration(0.1) for frame in video_np] | |
| video_clip = concatenate_videoclips(clips, method="compose") | |
| video_clip.write_videofile(video_filename, fps=10) | |
| # Generate cloned voice audio | |
| audio_filename = tempfile.mktemp(suffix=".wav") | |
| voice_path = tempfile.mktemp(suffix=".wav") | |
| with open(voice_path, 'wb') as f: | |
| f.write(voice_file.getvalue()) | |
| tts_model.tts_to_file(text=input_text, speaker_wav=voice_path, language='en', file_path=audio_filename) | |
| # Combine audio and video | |
| final_video_path = tempfile.mktemp(suffix=".mp4") | |
| video_clip = VideoFileClip(video_filename) | |
| audio_clip = AudioFileClip(audio_filename) | |
| video_clip = video_clip.set_audio(audio_clip) | |
| video_clip.write_videofile(final_video_path, fps=10) | |
| # Display video | |
| st.video(final_video_path) | |
| # Cleanup | |
| os.remove(video_filename) | |
| os.remove(audio_filename) | |
| os.remove(voice_path) | |
| os.remove(final_video_path) | |