Spaces:

broadfield-dev
/

kitten-tts

Running

App Files Files Community

broadfield-dev commited on 4 days ago

Commit

55105f5

verified ·

1 Parent(s): 78e02b1

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -13

app.py CHANGED Viewed

@@ -2,40 +2,29 @@ import gradio as gr
 import numpy as np
 from kittentts import KittenTTS
-# 1. Initialize the KittenTTS model.
-# This will download the model from Hugging Face on the first run.
 print("Loading KittenTTS model...")
 try:
     tts_model = KittenTTS("KittenML/kitten-tts-nano-0.1")
     print("Model loaded successfully.")
 except Exception as e:
     print(f"Error loading model: {e}")
-    # You might want to handle this more gracefully
     exit()
-# 2. Get the list of available voices directly from the model instance.
 AVAILABLE_VOICES = tts_model.available_voices
 DEFAULT_VOICE = "expr-voice-5-m" if "expr-voice-5-m" in AVAILABLE_VOICES else AVAILABLE_VOICES[0]
-# 3. Define the core function that Gradio will call.
-# This function now accepts 'voice' and 'speed' as arguments.
 def synthesize_speech(text, voice, speed):
     """
     Generates audio using the selected text, voice, and speed.
     """
-    # Handle empty input gracefully
     if not text.strip():
-        # Return a silent, empty audio clip
         return (24000, np.zeros(0, dtype=np.int16))
-    # Call the model's generate method with all the parameters
     audio_data = tts_model.generate(text, voice=voice, speed=speed)
-    # Return the audio in the format Gradio expects: (sample_rate, numpy_array)
     return (24000, audio_data)
-# 4. Create the Gradio UI with the new controls.
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
@@ -79,12 +68,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         inputs=[text_input, voice_dropdown, speed_slider]
     )
-    # Connect the UI components to the function
     generate_button.click(
         fn=synthesize_speech,
         inputs=[text_input, voice_dropdown, speed_slider],
         outputs=audio_output
     )
-# Launch the Gradio app
 demo.launch()

 import numpy as np
 from kittentts import KittenTTS
 print("Loading KittenTTS model...")
 try:
     tts_model = KittenTTS("KittenML/kitten-tts-nano-0.1")
     print("Model loaded successfully.")
 except Exception as e:
     print(f"Error loading model: {e}")
     exit()
 AVAILABLE_VOICES = tts_model.available_voices
 DEFAULT_VOICE = "expr-voice-5-m" if "expr-voice-5-m" in AVAILABLE_VOICES else AVAILABLE_VOICES[0]
 def synthesize_speech(text, voice, speed):
     """
     Generates audio using the selected text, voice, and speed.
     """
     if not text.strip():
         return (24000, np.zeros(0, dtype=np.int16))
     audio_data = tts_model.generate(text, voice=voice, speed=speed)
     return (24000, audio_data)
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
         inputs=[text_input, voice_dropdown, speed_slider]
     )
     generate_button.click(
         fn=synthesize_speech,
         inputs=[text_input, voice_dropdown, speed_slider],
         outputs=audio_output
     )
 demo.launch()