Spaces:
Build error
Build error
| import streamlit as st | |
| import os | |
| from glob import glob | |
| from pathlib import Path | |
| # from TTS.TTS.api import TTS | |
| from TTS.utils.synthesizer import Synthesizer | |
| from Wav2Lip.video_generator import create_video | |
| from diffusers import StableDiffusionPipeline | |
| from diffusers import LMSDiscreteScheduler | |
| gpu = False | |
| model_path = Path(r"tss_model/model_file.pth") | |
| config_path = Path(r"tss_model/config.json") | |
| vocoder_path = None | |
| vocoder_config_path = None | |
| model_dir = None | |
| language="en" | |
| file_path="generated_audio.wav" | |
| speaker = None | |
| split_sentences = True | |
| pipe_out = None | |
| # def get_synthesizer(model_path, config_path, vocoder_path, vocoder_config_path, model_dir, gpu): | |
| synthesizer = Synthesizer( | |
| tts_checkpoint=model_path, | |
| tts_config_path=config_path, | |
| tts_speakers_file=None, | |
| tts_languages_file=None, | |
| vocoder_checkpoint=vocoder_path, | |
| vocoder_config=vocoder_config_path, | |
| encoder_checkpoint=None, | |
| encoder_config=None, | |
| model_dir=model_dir, | |
| use_cuda=gpu, | |
| ) | |
| # return synthesizer | |
| # synthesizer = get_synthesizer(model_path, config_path, vocoder_path, vocoder_config_path, model_dir, gpu) | |
| def get_audio(synthesizer, speaker, language, speaker_wav, split_sentences, text): | |
| wav = synthesizer.tts( | |
| text=text, | |
| speaker_name=speaker, | |
| language_name=language, | |
| speaker_wav=speaker_wav, | |
| reference_wav=None, | |
| style_wav=None, | |
| style_text=None, | |
| reference_speaker_name=None, | |
| split_sentences=split_sentences | |
| ) | |
| synthesizer.save_wav(wav=wav, path=file_path, pipe_out=pipe_out) | |
| # avatar_images_dir = Path('avatar_images') | |
| avatar_images_list = os.listdir('avatar_images') | |
| avatar_names_list = list(map(lambda x: x.split('.')[0], avatar_images_list)) | |
| n_cols_avatars = 3 | |
| n_rows_avatars = int((len(avatar_images_list) - len(avatar_images_list) % n_cols_avatars) / n_cols_avatars) | |
| if len(avatar_images_list) % n_cols_avatars != 0: | |
| n_rows_avatars += 1 | |
| voice_audio_list = os.listdir('voice_audios') | |
| voice_names_list = list(map(lambda x: x.split('.')[0], voice_audio_list)) | |
| n_cols_voices = 3 | |
| n_rows_voices = int((len(voice_audio_list) - len(voice_audio_list) % n_cols_voices) / n_cols_voices) | |
| if len(voice_audio_list) % n_cols_voices != 0: | |
| n_rows_voices += 1 | |
| st.set_page_config( | |
| page_title='Avatar service', | |
| layout='wide' | |
| ) | |
| st.markdown("<h1 style='text-align: center; color: white;'>Avatar video generation</h1>", unsafe_allow_html=True) | |
| # st.title('Avatar video generation') | |
| st.subheader('Step 1: Avatar Selection') | |
| with st.expander('Available avatars'): | |
| n_images_shown = 0 | |
| for i in range(n_rows_avatars): | |
| avatar_cols_list = st.columns(n_cols_avatars) | |
| for j in range(n_cols_avatars): | |
| avatar_cols_list[j].image( | |
| os.path.join('avatar_images', avatar_images_list[j+i*3]), | |
| width=150, | |
| caption=avatar_names_list[j+i*3] | |
| ) | |
| n_images_shown += 1 | |
| if n_images_shown == len(avatar_images_list): | |
| break | |
| def avatar_callback(): | |
| if st.session_state.avatar_image: | |
| st.session_state.selected_avatar = st.session_state.avatar_image | |
| if os.path.isfile('generated_avatar.jpg'): | |
| os.remove('generated_avatar.jpg') | |
| # if os.path.isfile('uploaded_avatar_image.jpg'): | |
| # os.remove('uploaded_avatar_image.jpg') | |
| def uploaded_avatar_callback(): | |
| if st.session_state.uploaded_avatar_image is None: | |
| pass | |
| else: | |
| image_path = "uploaded_avatar_image" + \ | |
| os.path.splitext(st.session_state.uploaded_avatar_image.name)[-1] | |
| with open(image_path, "wb") as f: | |
| f.write(st.session_state.uploaded_avatar_image.getvalue()) | |
| step1_col1, step1_col2 = st.columns(2) | |
| with step1_col1: | |
| selected_avatar = st.selectbox( | |
| label='Please select an avatar', | |
| options=avatar_names_list, | |
| key='avatar_image', | |
| on_change=avatar_callback | |
| ) | |
| st.write('or') | |
| uploaded_image = st.file_uploader( | |
| label='Please upload an avatar', | |
| type=['png', 'jpg', 'jpeg'], | |
| on_change=uploaded_avatar_callback, | |
| key='uploaded_avatar_image' | |
| ) | |
| st.write('or') | |
| st.text_area( | |
| label='Please type a prompt to generate an image for the avatar', | |
| key='image_prompt' | |
| ) | |
| def generate_avatar(): | |
| if st.session_state.avatar_generator: | |
| # if not os.path.exists('generated_avatars'): | |
| # os.mkdir('generated_avatars') | |
| pipe = StableDiffusionPipeline.from_pretrained(pretrained_model_name_or_path='diffusion_model') | |
| pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config) | |
| pipe_output = pipe( | |
| prompt=st.session_state.image_prompt, # What to generate | |
| negative_prompt="Oversaturated, blurry, low quality, do not show head", # What NOT to generate | |
| height=480, | |
| width=640, # Specify the image size | |
| guidance_scale=13, # How strongly to follow the prompt | |
| num_inference_steps=40, # How many steps to take | |
| # generator=generator, # Fixed random seed | |
| ) | |
| pipe_output.images[0].save('generated_avatar.jpg') | |
| else: | |
| pass | |
| st.button( | |
| label='generate_avatar', | |
| key='avatar_generator', | |
| on_click=generate_avatar | |
| ) | |
| # st.write(st.session_state.avatar_generator) | |
| with step1_col2: | |
| if uploaded_image is not None: | |
| uploaded_avatar_image_path = glob('uploaded_avatar_image.*')[0] | |
| st.image(uploaded_avatar_image_path, width=300) | |
| elif len(glob('generated_avatar.*')) != 0: | |
| st.image('generated_avatar.jpg', width=300) | |
| else: | |
| st.image(os.path.join('avatar_images', avatar_images_list[avatar_names_list.index(selected_avatar)]), width=300) | |
| st.subheader('Step 2: Audio Selection') | |
| # st.markdown("<div title='Opa'>Option 1</div>", unsafe_allow_html=True) | |
| option1_expander = st.expander('Option 1') | |
| option1_expander.write( | |
| '''Please select or upload an audio with a voice you want to be used in the video. | |
| Then provide a text that will be used in the video. Afterwards click on | |
| <Generate audio from text> button to get the audio which will be used in the video: | |
| please, take into account that depending on the size of the text it may take some time. | |
| ''' | |
| ) | |
| with st.expander('Available voice audio'): | |
| n_voices_shown = 0 | |
| for i in range(n_rows_voices): | |
| voice_cols_list = st.columns(n_cols_voices) | |
| for j in range(n_cols_avatars): | |
| voice_cols_list[j].audio( | |
| os.path.join('voice_audios', voice_audio_list[j+i*3]) | |
| ) | |
| voice_cols_list[j].write(voice_names_list[j+i*3]) | |
| n_voices_shown += 1 | |
| if n_voices_shown == len(voice_audio_list): | |
| break | |
| def voice_callback(): | |
| if st.session_state.voice_audio: | |
| st.session_state.selected_voice = st.session_state.voice_audio | |
| def uploaded_voice_callback(): | |
| if st.session_state.uploaded_voice_audio is None: | |
| pass | |
| else: | |
| audio_path = "uploaded_voice_audio" + \ | |
| os.path.splitext(st.session_state.uploaded_voice_audio.name)[-1] | |
| with open(audio_path, "wb") as f: | |
| f.write(st.session_state.uploaded_voice_audio.getvalue()) | |
| step21_col1, step21_col2 = st.columns(2) | |
| with step21_col1: | |
| selected_voice = st.selectbox( | |
| label='Please select a voice to clone', | |
| options=voice_names_list, | |
| key='voice_audio', | |
| on_change=voice_callback | |
| ) | |
| st.write('or') | |
| uploaded_voice = st.file_uploader( | |
| "Upload a voice to clone", | |
| type=['mp3', 'wav'], | |
| key='uploaded_voice_audio', | |
| on_change=uploaded_voice_callback | |
| ) | |
| with step21_col2: | |
| st.markdown('<br>', unsafe_allow_html=True) | |
| if uploaded_voice is None: | |
| st.audio(os.path.join('voice_audios', voice_audio_list[voice_names_list.index(selected_voice)])) | |
| else: | |
| uploaded_voice_audio_path = glob('uploaded_voice_audio.*')[0] | |
| st.audio(uploaded_voice_audio_path) | |
| step21txt_col1, step21txt_col2 = st.columns(2) | |
| with step21txt_col1: | |
| uploaded_txt = st.text_area( | |
| label='Please input text for avatar', | |
| key='txt4audio' | |
| ) | |
| def generate_audio(): | |
| if st.session_state.audio_button: | |
| if uploaded_voice is None: | |
| speaker_wav = os.path.join('voice_audios', voice_audio_list[voice_names_list.index(selected_voice)]) | |
| else: | |
| speaker_wav = "uploaded_voice_audio.mp3" | |
| get_audio( | |
| synthesizer, speaker, language, | |
| speaker_wav, split_sentences, | |
| text=st.session_state.txt4audio | |
| ) | |
| with step21txt_col2: | |
| st.markdown('<br>', unsafe_allow_html=True) | |
| st.button( | |
| label='Generate audio from text', | |
| key='audio_button', | |
| on_click=generate_audio | |
| ) | |
| if st.session_state.audio_button: | |
| gen_audio_col1, _ = st.columns(2) | |
| gen_audio_col1.audio("generated_audio.wav") | |
| # st.subheader('Step 2 - Option 2') | |
| option1_expander = st.expander('Option 2') | |
| option1_expander.write( | |
| '''Please, just upload an audio that will be reproduced in the video. | |
| ''' | |
| ) | |
| def uploaded_audio_callback(): | |
| if st.session_state.uploaded_audio is None: | |
| pass | |
| else: | |
| audio_path = "uploaded_audio" + \ | |
| os.path.splitext(st.session_state.uploaded_audio.name)[-1] | |
| with open(audio_path, "wb") as f: | |
| f.write(st.session_state.uploaded_audio.getvalue()) | |
| step22_col1, step22_col2 = st.columns(2) | |
| with step22_col1: | |
| uploaded_audio = st.file_uploader( | |
| "Please, upload an audio", | |
| type=['mp3', 'wav'], | |
| key='uploaded_audio', | |
| on_change=uploaded_audio_callback | |
| ) | |
| with step22_col2: | |
| st.markdown('<br>', unsafe_allow_html=True) | |
| if uploaded_audio is None: | |
| pass | |
| else: | |
| st.audio(glob('uploaded_audio.*')[0]) | |
| st.subheader('Step 3') | |
| def generate_video(): | |
| if st.session_state.video_button: | |
| if uploaded_audio is None: | |
| voice_audio = glob('generated_audio.*')[0] | |
| else: | |
| voice_audio = glob('uploaded_audio.*')[0] | |
| # if st.session_state.audio_button: | |
| # voice_audio = glob('generated_audio.*')[0] | |
| # else: | |
| # voice_audio = os.path.join('voice_audios', voice_audio_list[voice_names_list.index(selected_voice)]) | |
| if uploaded_image is not None: | |
| face = glob('uploaded_avatar_image.*')[0] | |
| elif len(glob('generated_avatar.*')) != 0: | |
| face = glob('generated_avatar.*')[0] | |
| else: | |
| face = os.path.join('avatar_images', avatar_images_list[avatar_names_list.index(selected_avatar)]) | |
| create_video(voice_audio, face) | |
| step3_button_col1, _, _ = st.columns([3, 4, 5]) | |
| with step3_button_col1: | |
| st.button( | |
| label='Generate video', | |
| key='video_button', | |
| on_click=generate_video | |
| ) | |
| if st.session_state.video_button: | |
| step3_col1, _, _ = st.columns([4, 3, 5]) | |
| with step3_col1: | |
| st.video( | |
| # os.path.join('avatar_videos', 'generated_video.mp4') | |
| 'generated_video.mp4' | |
| ) | |
| # with step3_col2: | |
| # # st.markdown('<br>', unsafe_allow_html=True) | |
| # # with open(os.path.join('avatar_videos', 'generated_video.mp4'), 'rb') as file: | |
| # with open('generated_video.mp4', 'rb') as file: | |
| # st.download_button( | |
| # label='Download generated video', | |
| # data=file, | |
| # file_name='avatar_video.mp4', | |
| # mime='video/mp4' | |
| # ) |