ZeeAI1 commited on
Commit
a43841a
·
verified ·
1 Parent(s): e57204f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -55
app.py CHANGED
@@ -1,62 +1,12 @@
1
  import streamlit as st
2
- import torch
3
- from transformers import pipeline
4
- from moviepy.editor import *
5
- from moviepy.video.tools.subtitles import SubtitlesClip
6
- from TTS.api import TTS
7
- import tempfile
8
- import os
9
 
10
- # Initialize Hugging Face models
11
- @st.cache_resource
12
  def load_models():
13
- video_gen = pipeline('Zee_text-to-video-generation', model='cerspense/zeroscope_v2_576w')
 
 
 
14
  tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False)
15
  return video_gen, tts_model
16
 
17
  video_gen, tts_model = load_models()
18
-
19
- # Streamlit app
20
- st.title("Zee_Text-to-Video with Voice Cloning")
21
-
22
- # User input
23
- input_text = st.text_area("Enter text to generate video:", height=150)
24
- voice_file = st.file_uploader("Upload your voice sample (WAV format):", type=["wav"])
25
-
26
- if st.button("Generate Video") and input_text and voice_file:
27
- with st.spinner("Generating video..."):
28
- # Generate video frames
29
- video_output = video_gen(input_text, num_frames=30)
30
- video_tensor = video_output["video"]
31
- video_np = (video_tensor * 255).astype('uint8')
32
-
33
- # Save video
34
- video_filename = tempfile.mktemp(suffix=".mp4")
35
- clips = [ImageClip(frame).set_duration(0.1) for frame in video_np]
36
- video_clip = concatenate_videoclips(clips, method="compose")
37
- video_clip.write_videofile(video_filename, fps=10)
38
-
39
- # Generate cloned voice audio
40
- audio_filename = tempfile.mktemp(suffix=".wav")
41
- voice_path = tempfile.mktemp(suffix=".wav")
42
- with open(voice_path, 'wb') as f:
43
- f.write(voice_file.getvalue())
44
-
45
- tts_model.tts_to_file(text=input_text, speaker_wav=voice_path, language='en', file_path=audio_filename)
46
-
47
- # Combine audio and video
48
- final_video_path = tempfile.mktemp(suffix=".mp4")
49
- video_clip = VideoFileClip(video_filename)
50
- audio_clip = AudioFileClip(audio_filename)
51
- video_clip = video_clip.set_audio(audio_clip)
52
- video_clip.write_videofile(final_video_path, fps=10)
53
-
54
- # Display video
55
- st.video(final_video_path)
56
-
57
- # Cleanup
58
- os.remove(video_filename)
59
- os.remove(audio_filename)
60
- os.remove(voice_path)
61
- os.remove(final_video_path)
62
-
 
1
  import streamlit as st
 
 
 
 
 
 
 
2
 
3
+ @st.cache_resource()
 
4
  def load_models():
5
+ from transformers import pipeline
6
+ from TTS.api import TTS
7
+
8
+ video_gen = pipeline('text-to-video-generation', model='cerspense/zeroscope_v2_576w')
9
  tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False)
10
  return video_gen, tts_model
11
 
12
  video_gen, tts_model = load_models()