Nymbo commited on
Commit
a455050
·
verified ·
1 Parent(s): 86c62a8

default speed for Kokoro is 1.25x

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -499,7 +499,7 @@ def _init_kokoro() -> None:
499
 
500
  def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
501
  text: Annotated[str, "The text to synthesize (English)."],
502
- speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.0,
503
  voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
504
  ) -> Tuple[int, np.ndarray]:
505
  """
@@ -510,9 +510,12 @@ def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
510
  tool is created for each function wired into your app; docstrings and type
511
  hints are used to describe the tool).
512
 
 
 
 
513
  Args:
514
  text: The text to synthesize (English).
515
- speed: Speech speed multiplier in 0.5–2.0; 1.0 = normal speed.
516
  voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
517
 
518
  Returns:
@@ -668,7 +671,7 @@ kokoro_interface = gr.Interface(
668
  fn=Generate_Speech,
669
  inputs=[
670
  gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
671
- gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed"),
672
  gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
673
  ],
674
  outputs=gr.Audio(label="Audio", type="numpy"),
 
499
 
500
  def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
501
  text: Annotated[str, "The text to synthesize (English)."],
502
+ speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.25,
503
  voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
504
  ) -> Tuple[int, np.ndarray]:
505
  """
 
510
  tool is created for each function wired into your app; docstrings and type
511
  hints are used to describe the tool).
512
 
513
+ Default behavior:
514
+ - Speed defaults to 1.25 (slightly brisk cadence) for clearer, snappier delivery.
515
+
516
  Args:
517
  text: The text to synthesize (English).
518
+ speed: Speech speed multiplier in 0.5–2.0; 1.0 = normal speed. Default: 1.25 (slightly brisk).
519
  voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
520
 
521
  Returns:
 
671
  fn=Generate_Speech,
672
  inputs=[
673
  gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
674
+ gr.Slider(minimum=0.5, maximum=2.0, value=1.25, step=0.1, label="Speed"),
675
  gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
676
  ],
677
  outputs=gr.Audio(label="Audio", type="numpy"),