Spaces:
Paused
Paused
| import streamlit as st | |
| from kokoro import KPipeline | |
| import soundfile as sf | |
| import io | |
| import os | |
| from textblob import TextBlob # We will use TextBlob for translation | |
| # Install espeak-ng if not installed | |
| # if not os.system("which espeak-ng"): | |
| # st.text("espeak-ng already installed.") | |
| # else: | |
| # os.system("apt-get -qq -y install espeak-ng") | |
| # st.text("Installing espeak-ng...") | |
| # Streamlit App UI Setup | |
| st.title("Text-to-Speech with Kokoro") | |
| # Expander section to display information in multiple languages | |
| with st.expander("Sample Prompt!"): | |
| st.markdown(""" | |
| - My name is Shukdev. (In English) | |
| - Mi nombre es Shukdev. (In Spanish) | |
| - Je m'appelle Choukdev. (In French) | |
| - मेरा नाम शुकदेव है. (In Hindi) | |
| - Il mio nome è Shukdev. (In Italy) | |
| - Meu nome é Sukhdev. (In Portuguese, Brazil) | |
| - 我叫苏赫德夫。(In Chinese) | |
| - 私の名前はスクデフです。(In Japanese) | |
| """) | |
| st.sidebar.header("Configuration & Instructions") | |
| # Sidebar Instructions | |
| st.sidebar.markdown(""" | |
| ### How to Use the Text-to-Speech App: | |
| 1. **Enter Text**: In the main text area, input any text that you want the model to convert to speech. | |
| 2. **Select Language**: | |
| - Choose the language of the text you are entering. Available options include: | |
| - 🇺🇸 American English (`a`) | |
| - 🇬🇧 British English (`b`) | |
| - 🇪🇸 Spanish (`e`) | |
| - 🇫🇷 French (`f`) | |
| - 🇮🇳 Hindi (`h`) | |
| - 🇮🇹 Italian (`i`) | |
| - 🇧🇷 Brazilian Portuguese (`p`) | |
| - 🇨🇳 Mandarin Chinese (`z`) | |
| - 🇯🇵 Japanese (`j`) | |
| 3. **Select Voice**: | |
| - Choose the voice style for the speech. You can pick different voices based on tone and gender, such as `af_heart`, `af_joy`, etc. | |
| 4. **Adjust Speed**: | |
| - Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed. | |
| 5. **Generate Speech**: | |
| - After configuring the settings, click on the **"Generate Audio"** button. The app will process your text and produce speech audio accordingly. | |
| 6. **Download**: | |
| - Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the **"Download Audio"** button. | |
| Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages! | |
| """) | |
| # User input for text, language, and voice settings | |
| input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...") | |
| lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j']) | |
| voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', | |
| 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', | |
| 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', | |
| 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', | |
| 'ef_dora', | |
| 'em_alex', 'em_santa', | |
| 'ff_siwis', | |
| 'hf_alpha', 'hf_beta', | |
| 'hm_omega', 'hm_psi', | |
| 'if_sara', | |
| 'im_nicola', | |
| 'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro', | |
| 'jm_kumo', | |
| 'pf_dora', | |
| 'pm_alex', 'pm_santa', | |
| 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi', | |
| 'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'] | |
| ) # Change voice options as per model | |
| speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1) | |
| # Initialize the TTS pipeline with user-selected language | |
| pipeline = KPipeline(lang_code=lang_code) | |
| # Generate Audio function | |
| def generate_audio(text, lang_code, voice, speed): | |
| generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+') | |
| for i, (gs, ps, audio) in enumerate(generator): | |
| audio_data = audio | |
| # Save audio to in-memory buffer | |
| buffer = io.BytesIO() | |
| # Explicitly specify format as WAV | |
| sf.write(buffer, audio_data, 24000, format='WAV') # Add 'format="WAV"' | |
| buffer.seek(0) | |
| return buffer | |
| # Translate text to English using TextBlob | |
| def translate_to_english(text, lang_code): | |
| if lang_code != 'a': # Only translate if it's not already English | |
| blob = TextBlob(text) | |
| translated_text = blob.translate(to='en') | |
| return str(translated_text) | |
| return text | |
| # Generate and display the audio file | |
| if st.button('Generate Audio'): | |
| st.write("Generating speech...") | |
| audio_buffer = generate_audio(input_text, lang_code, voice, speed) | |
| # Display Audio player in the app | |
| st.audio(audio_buffer, format='audio/wav') | |
| # Optional: Save the generated audio file for download | |
| st.download_button( | |
| label="Download Audio (Original Text)", | |
| data=audio_buffer, | |
| file_name="generated_speech_original.wav", | |
| mime="audio/wav" | |
| ) | |
| # Translate the input text to English | |
| translated_text = translate_to_english(input_text, lang_code) | |
| # Generate audio for the translated English text | |
| translated_audio_buffer = generate_audio(translated_text, 'a', voice, speed) | |
| # Display Audio for the translated text | |
| st.write(f"Translated Text: {translated_text}") | |
| st.audio(translated_audio_buffer, format='audio/wav') | |
| # Download option for the translated audio | |
| st.download_button( | |
| label="Download Audio (Translated to English)", | |
| data=translated_audio_buffer, | |
| file_name="generated_speech_translated.wav", | |
| mime="audio/wav" | |
| ) | |