Spaces:

shukdevdatta123
/

Kokoro-TTS

Paused

App Files Files Community

Kokoro-TTS / app.py

shukdevdatta123

Update app.py

f0897e2 verified 9 months ago

raw

history blame

5.55 kB

	import streamlit as st
	from kokoro import KPipeline
	import soundfile as sf
	import io
	import os
	from textblob import TextBlob # We will use TextBlob for translation

	# Install espeak-ng if not installed
	# if not os.system("which espeak-ng"):
	# st.text("espeak-ng already installed.")
	# else:
	# os.system("apt-get -qq -y install espeak-ng")
	# st.text("Installing espeak-ng...")

	# Streamlit App UI Setup
	st.title("Text-to-Speech with Kokoro")
	# Expander section to display information in multiple languages
	with st.expander("Sample Prompt!"):
	st.markdown("""
	- My name is Shukdev. (In English)
	- Mi nombre es Shukdev. (In Spanish)
	- Je m'appelle Choukdev. (In French)
	- मेरा नाम शुकदेव है. (In Hindi)
	- Il mio nome è Shukdev. (In Italy)
	- Meu nome é Sukhdev. (In Portuguese, Brazil)
	- 我叫苏赫德夫。(In Chinese)
	- 私の名前はスクデフです。(In Japanese)
	""")
	st.sidebar.header("Configuration & Instructions")

	# Sidebar Instructions
	st.sidebar.markdown("""
	### How to Use the Text-to-Speech App:
	1. Enter Text: In the main text area, input any text that you want the model to convert to speech.

	2. Select Language:
	- Choose the language of the text you are entering. Available options include:
	- 🇺🇸 American English (`a`)
	- 🇬🇧 British English (`b`)
	- 🇪🇸 Spanish (`e`)
	- 🇫🇷 French (`f`)
	- 🇮🇳 Hindi (`h`)
	- 🇮🇹 Italian (`i`)
	- 🇧🇷 Brazilian Portuguese (`p`)
	- 🇨🇳 Mandarin Chinese (`z`)
	- 🇯🇵 Japanese (`j`)

	3. Select Voice:
	- Choose the voice style for the speech. You can pick different voices based on tone and gender, such as `af_heart`, `af_joy`, etc.

	4. Adjust Speed:
	- Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.
	5. Generate Speech:
	- After configuring the settings, click on the "Generate Audio" button. The app will process your text and produce speech audio accordingly.

	6. Download:
	- Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the "Download Audio" button.
	Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
	""")

	# User input for text, language, and voice settings
	input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
	lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
	voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky',
	'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa',
	'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily',
	'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis',
	'ef_dora',
	'em_alex', 'em_santa',
	'ff_siwis',
	'hf_alpha', 'hf_beta',
	'hm_omega', 'hm_psi',
	'if_sara',
	'im_nicola',
	'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro',
	'jm_kumo',
	'pf_dora',
	'pm_alex', 'pm_santa',
	'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi',
	'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang']
	) # Change voice options as per model
	speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)

	# Initialize the TTS pipeline with user-selected language
	pipeline = KPipeline(lang_code=lang_code)

	# Generate Audio function
	def generate_audio(text, lang_code, voice, speed):
	generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
	for i, (gs, ps, audio) in enumerate(generator):
	audio_data = audio
	# Save audio to in-memory buffer
	buffer = io.BytesIO()
	# Explicitly specify format as WAV
	sf.write(buffer, audio_data, 24000, format='WAV') # Add 'format="WAV"'
	buffer.seek(0)
	return buffer

	# Translate text to English using TextBlob
	def translate_to_english(text, lang_code):
	if lang_code != 'a': # Only translate if it's not already English
	blob = TextBlob(text)
	translated_text = blob.translate(to='en')
	return str(translated_text)
	return text

	# Generate and display the audio file
	if st.button('Generate Audio'):
	st.write("Generating speech...")
	audio_buffer = generate_audio(input_text, lang_code, voice, speed)

	# Display Audio player in the app
	st.audio(audio_buffer, format='audio/wav')

	# Optional: Save the generated audio file for download
	st.download_button(
	label="Download Audio (Original Text)",
	data=audio_buffer,
	file_name="generated_speech_original.wav",
	mime="audio/wav"
	)

	# Translate the input text to English
	translated_text = translate_to_english(input_text, lang_code)

	# Generate audio for the translated English text
	translated_audio_buffer = generate_audio(translated_text, 'a', voice, speed)

	# Display Audio for the translated text
	st.write(f"Translated Text: {translated_text}")
	st.audio(translated_audio_buffer, format='audio/wav')

	# Download option for the translated audio
	st.download_button(
	label="Download Audio (Translated to English)",
	data=translated_audio_buffer,
	file_name="generated_speech_translated.wav",
	mime="audio/wav"
	)