Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,43 +1,90 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
from kittentts import KittenTTS
|
3 |
|
4 |
-
# Initialize the KittenTTS model
|
5 |
-
# This model
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
def text_to_speech(text):
|
9 |
-
"""
|
10 |
-
Generates audio from the input text using the KittenTTS model.
|
11 |
|
12 |
-
|
13 |
-
|
|
|
14 |
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
17 |
"""
|
18 |
-
#
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
#
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
# Launch the Gradio app
|
43 |
-
|
|
|
1 |
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
from kittentts import KittenTTS
|
4 |
|
5 |
+
# 1. Initialize the KittenTTS model.
|
6 |
+
# This will download the model from Hugging Face on the first run.
|
7 |
+
print("Loading KittenTTS model...")
|
8 |
+
try:
|
9 |
+
tts_model = KittenTTS("KittenML/kitten-tts-nano-0.1")
|
10 |
+
print("Model loaded successfully.")
|
11 |
+
except Exception as e:
|
12 |
+
print(f"Error loading model: {e}")
|
13 |
+
# You might want to handle this more gracefully
|
14 |
+
exit()
|
15 |
|
|
|
|
|
|
|
16 |
|
17 |
+
# 2. Get the list of available voices directly from the model instance.
|
18 |
+
AVAILABLE_VOICES = tts_model.available_voices
|
19 |
+
DEFAULT_VOICE = "expr-voice-5-m" if "expr-voice-5-m" in AVAILABLE_VOICES else AVAILABLE_VOICES[0]
|
20 |
|
21 |
+
# 3. Define the core function that Gradio will call.
|
22 |
+
# This function now accepts 'voice' and 'speed' as arguments.
|
23 |
+
def synthesize_speech(text, voice, speed):
|
24 |
+
"""
|
25 |
+
Generates audio using the selected text, voice, and speed.
|
26 |
"""
|
27 |
+
# Handle empty input gracefully
|
28 |
+
if not text.strip():
|
29 |
+
# Return a silent, empty audio clip
|
30 |
+
return (24000, np.zeros(0, dtype=np.int16))
|
31 |
+
|
32 |
+
# Call the model's generate method with all the parameters
|
33 |
+
audio_data = tts_model.generate(text, voice=voice, speed=speed)
|
34 |
+
|
35 |
+
# Return the audio in the format Gradio expects: (sample_rate, numpy_array)
|
36 |
+
return (24000, audio_data)
|
37 |
+
|
38 |
+
# 4. Create the Gradio UI with the new controls.
|
39 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
40 |
+
gr.Markdown(
|
41 |
+
"""
|
42 |
+
# 🐱 Enhanced KittenTTS UI
|
43 |
+
A user-friendly interface for the KittenTTS text-to-speech model.
|
44 |
+
Select a voice, adjust the speed, and type your text to generate audio.
|
45 |
+
"""
|
46 |
+
)
|
47 |
+
|
48 |
+
with gr.Row():
|
49 |
+
with gr.Column(scale=3):
|
50 |
+
text_input = gr.Textbox(
|
51 |
+
lines=5,
|
52 |
+
label="Input Text",
|
53 |
+
placeholder="Type something here..."
|
54 |
+
)
|
55 |
+
|
56 |
+
with gr.Column(scale=1):
|
57 |
+
voice_dropdown = gr.Dropdown(
|
58 |
+
choices=AVAILABLE_VOICES,
|
59 |
+
value=DEFAULT_VOICE,
|
60 |
+
label="Voice Selection"
|
61 |
+
)
|
62 |
+
speed_slider = gr.Slider(
|
63 |
+
minimum=0.5,
|
64 |
+
maximum=2.0,
|
65 |
+
step=0.1,
|
66 |
+
value=1.0,
|
67 |
+
label="Speech Speed"
|
68 |
+
)
|
69 |
+
generate_button = gr.Button("Generate Audio", variant="primary")
|
70 |
+
|
71 |
+
audio_output = gr.Audio(label="Generated Speech", autoplay=True)
|
72 |
+
|
73 |
+
gr.Examples(
|
74 |
+
examples=[
|
75 |
+
["This is an example of a female voice.", "expr-voice-5-f", 1.0],
|
76 |
+
["This is an example of a male voice, speaking a bit faster.", "expr-voice-5-m", 1.2],
|
77 |
+
["The speed can also be slowed down for clarity.", "expr-voice-4-f", 0.8],
|
78 |
+
],
|
79 |
+
inputs=[text_input, voice_dropdown, speed_slider]
|
80 |
+
)
|
81 |
+
|
82 |
+
# Connect the UI components to the function
|
83 |
+
generate_button.click(
|
84 |
+
fn=synthesize_speech,
|
85 |
+
inputs=[text_input, voice_dropdown, speed_slider],
|
86 |
+
outputs=audio_output
|
87 |
+
)
|
88 |
|
89 |
# Launch the Gradio app
|
90 |
+
demo.launch()
|