kokoro-tts-english

Running

App Files Files Community

kokoro-tts-english / app.py

leonelhs

Update app.py

8f6f3d7 verified about 1 month ago

raw

history blame

4.64 kB

	#######################################################################################
	#
	# MIT License
	#
	# Copyright (c) [2025] [[email protected]]
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in all
	# copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	# SOFTWARE.
	#
	#######################################################################################

	# This file implements an API endpoint for the English Kokoro Text-to-Speech (TTS) system.
	# It provides functionality to generate TTS audio from input English text using the Kokoro voice model.


	# Source code is based on or inspired by several projects.
	# For more details and proper attribution, please refer to the following resources:
	#
	# - [Kokoro] - [https://github.com/hexgrad/kokoro]
	# - [Misaki] - [https://github.com/hexgrad/misaki]
	# - [Kokoro-82M] - [https://huggingface.co/hexgrad/Kokoro-82M]
	# - [Kokoro-onnx] - [https://github.com/thewh1teagle/kokoro-onnx]



	import os

	import gradio as gr
	from huggingface_hub import snapshot_download
	from kokoro_onnx import Kokoro
	from misaki import en, espeak

	KOKORO_REPO_ID = "leonelhs/kokoro-thewh1teagle"

	VOICES = {
	'🇺🇸 🚺 Heart ❤️': 'af_heart',
	'🇺🇸 🚺 Bella 🔥': 'af_bella',
	'🇺🇸 🚺 Nicole 🎧': 'af_nicole',
	'🇺🇸 🚺 Aoede': 'af_aoede',
	'🇺🇸 🚺 Kore': 'af_kore',
	'🇺🇸 🚺 Sarah': 'af_sarah',
	'🇺🇸 🚺 Nova': 'af_nova',
	'🇺🇸 🚺 Sky': 'af_sky',
	'🇺🇸 🚺 Alloy': 'af_alloy',
	'🇺🇸 🚺 Jessica': 'af_jessica',
	'🇺🇸 🚺 River': 'af_river',
	'🇺🇸 🚹 Michael': 'am_michael',
	'🇺🇸 🚹 Fenrir': 'am_fenrir',
	'🇺🇸 🚹 Puck': 'am_puck',
	'🇺🇸 🚹 Echo': 'am_echo',
	'🇺🇸 🚹 Eric': 'am_eric',
	'🇺🇸 🚹 Liam': 'am_liam',
	'🇺🇸 🚹 Onyx': 'am_onyx',
	'🇺🇸 🚹 Santa': 'am_santa',
	'🇺🇸 🚹 Adam': 'am_adam',
	'🇬🇧 🚺 Emma': 'bf_emma',
	'🇬🇧 🚺 Isabella': 'bf_isabella',
	'🇬🇧 🚺 Alice': 'bf_alice',
	'🇬🇧 🚺 Lily': 'bf_lily',
	'🇬🇧 🚹 George': 'bm_george',
	'🇬🇧 🚹 Fable': 'bm_fable',
	'🇬🇧 🚹 Lewis': 'bm_lewis',
	'🇬🇧 🚹 Daniel': 'bm_daniel',
	}

	snapshot = snapshot_download(repo_id=KOKORO_REPO_ID)

	# Misaki G2P with espeak-ng fallback
	fallback = espeak.EspeakFallback(british=False)
	g2p = en.G2P(trf=False, british=False, fallback=fallback)

	# Kokoro
	model_path = os.path.join(snapshot, "kokoro-v1.0.onnx")
	voices_path = os.path.join(snapshot, "voices-v1.0.bin")
	kokoro = Kokoro(model_path, voices_path)

	def predict(text, voice='af_heart', speed=1):
	"""
	Generate speech audio from english text input.

	Parameters:
	text (string): The text to be converted into speech.
	voice (string): The selected male of female voice profile (specific voice ID).
	speed (float): The speaking rate multiplier (e.g., 1.0 = normal speed, 0.8 = slower, 1.2 = faster).

	Returns:
	path: File path to the generated audio speech.
	"""

	phonemes, _ = g2p(text)
	samples, sample_rate = kokoro.create(phonemes, voice, speed, is_phonemes=True)
	return sample_rate, samples

	app = gr.Interface(
	predict,
	[
	gr.Textbox(label='Input Text'),
	gr.Dropdown(list(VOICES.items()), value='af_heart', label='Voice'),
	gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label='Speed')
	],
	gr.Audio(label='Output Audio', interactive=False, streaming=False, autoplay=True),
	description="Kokoro TTS 🇺🇸 🇬🇧 API Endpoint",
	)

	app.launch(share=False, debug=True, show_error=True, mcp_server=True)
	app.queue()