Spaces:

shukdevdatta123
/

Kokoro-TTS

Paused

App Files Files Community

Kokoro-TTS / app.py

shukdevdatta123

Update app.py

666cd48 verified 9 months ago

raw

history blame

3.34 kB

	import streamlit as st
	from kokoro import KPipeline
	import soundfile as sf
	import io
	import os

	# Install espeak-ng if not installed
	if not os.system("which espeak-ng"):
	st.text("espeak-ng already installed.")
	else:
	os.system("apt-get -qq -y install espeak-ng")
	st.text("Installing espeak-ng...")

	# Streamlit App UI Setup
	st.title("Text-to-Speech with Kokoro")
	st.sidebar.header("Configuration & Instructions")

	# Sidebar Instructions
	st.sidebar.markdown("""
	### How to Use the Text-to-Speech App:

	1. Enter Text: In the main text area, input any text that you want the model to convert to speech.

	2. Select Language:
	- Choose the language of the text you are entering. Available options include:
	- 🇺🇸 American English (`a`)
	- 🇬🇧 British English (`b`)
	- 🇪🇸 Spanish (`e`)
	- 🇫🇷 French (`f`)
	- 🇮🇳 Hindi (`h`)
	- 🇮🇹 Italian (`i`)
	- 🇧🇷 Brazilian Portuguese (`p`)
	- 🇯🇵 Japanese (`j`)
	- 🇨🇳 Mandarin Chinese (`z`)

	3. Select Voice:
	- Choose the voice style for the speech. You can pick different voices based on tone and gender, such as `af_heart`, `af_joy`, etc.

	4. Adjust Speed:
	- Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.

	5. Generate Speech:
	- After configuring the settings, click on the "Generate Audio" button. The app will process your text and produce speech audio accordingly.

	6. Download:
	- Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the "Download Audio" button.

	Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
	""")

	# User input for text, language, and voice settings
	input_text = st.text_area("Enter your text here", "The sky above the port was the color of television...")
	lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'j', 'z'])
	voice = st.selectbox("Select Voice", ['af_heart', 'af_joy', 'af_female', 'af_male']) # Change voice options as per model
	speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)

	# Initialize the TTS pipeline with user-selected language
	pipeline = KPipeline(lang_code=lang_code)

	# Generate Audio function
	def generate_audio(text, lang_code, voice, speed):
	generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
	for i, (gs, ps, audio) in enumerate(generator):
	audio_data = audio
	# Save audio to in-memory buffer
	buffer = io.BytesIO()
	# Explicitly specify format as WAV
	sf.write(buffer, audio_data, 24000, format='WAV') # Add 'format="WAV"'
	buffer.seek(0)
	return buffer

	# Generate and display the audio file
	if st.button('Generate Audio'):
	st.write("Generating speech...")
	audio_buffer = generate_audio(input_text, lang_code, voice, speed)

	# Display Audio player in the app
	st.audio(audio_buffer, format='audio/wav')

	# Optional: Save the generated audio file for download
	st.download_button(
	label="Download Audio",
	data=audio_buffer,
	file_name="generated_speech.wav",
	mime="audio/wav"
	)