default speed for Kokoro is 1.25x
Browse files
app.py
CHANGED
@@ -499,7 +499,7 @@ def _init_kokoro() -> None:
|
|
499 |
|
500 |
def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
|
501 |
text: Annotated[str, "The text to synthesize (English)."],
|
502 |
-
speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.
|
503 |
voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
|
504 |
) -> Tuple[int, np.ndarray]:
|
505 |
"""
|
@@ -510,9 +510,12 @@ def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
|
|
510 |
tool is created for each function wired into your app; docstrings and type
|
511 |
hints are used to describe the tool).
|
512 |
|
|
|
|
|
|
|
513 |
Args:
|
514 |
text: The text to synthesize (English).
|
515 |
-
speed: Speech speed multiplier in 0.5–2.0; 1.0 = normal speed.
|
516 |
voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
|
517 |
|
518 |
Returns:
|
@@ -668,7 +671,7 @@ kokoro_interface = gr.Interface(
|
|
668 |
fn=Generate_Speech,
|
669 |
inputs=[
|
670 |
gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
|
671 |
-
|
672 |
gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
|
673 |
],
|
674 |
outputs=gr.Audio(label="Audio", type="numpy"),
|
|
|
499 |
|
500 |
def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
|
501 |
text: Annotated[str, "The text to synthesize (English)."],
|
502 |
+
speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.25,
|
503 |
voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
|
504 |
) -> Tuple[int, np.ndarray]:
|
505 |
"""
|
|
|
510 |
tool is created for each function wired into your app; docstrings and type
|
511 |
hints are used to describe the tool).
|
512 |
|
513 |
+
Default behavior:
|
514 |
+
- Speed defaults to 1.25 (slightly brisk cadence) for clearer, snappier delivery.
|
515 |
+
|
516 |
Args:
|
517 |
text: The text to synthesize (English).
|
518 |
+
speed: Speech speed multiplier in 0.5–2.0; 1.0 = normal speed. Default: 1.25 (slightly brisk).
|
519 |
voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
|
520 |
|
521 |
Returns:
|
|
|
671 |
fn=Generate_Speech,
|
672 |
inputs=[
|
673 |
gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
|
674 |
+
gr.Slider(minimum=0.5, maximum=2.0, value=1.25, step=0.1, label="Speed"),
|
675 |
gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
|
676 |
],
|
677 |
outputs=gr.Audio(label="Audio", type="numpy"),
|