Athspi commited on
Commit
e567eaf
·
verified ·
1 Parent(s): 6c54982

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -11,8 +11,6 @@ from huggingface_hub import hf_hub_download
11
  repo_id = "Athspi/Gg"
12
 
13
  # Download the ONNX model file from the repository.
14
- # This will download "mms_tts_eng.onnx" from:
15
- # https://huggingface.co/Athspi/Gg/resolve/main/mms_tts_eng.onnx
16
  onnx_model_path = hf_hub_download(repo_id=repo_id, filename="mms_tts_eng.onnx")
17
 
18
  # Load the tokenizer from the repository.
@@ -34,8 +32,8 @@ def tts_inference(text: str):
34
  text (str): Input text to synthesize.
35
 
36
  Returns:
37
- waveform (np.ndarray): Synthesized audio waveform in float32 format.
38
- sampling_rate (int): The sampling rate of the waveform.
39
  """
40
  # Tokenize the input text.
41
  inputs = tokenizer(text, return_tensors="pt")
@@ -57,15 +55,21 @@ def tts_inference(text: str):
57
  # Remove any extra dimensions.
58
  waveform = np.squeeze(waveform)
59
 
60
- return waveform, sampling_rate
 
61
 
62
  # Build the Gradio interface.
63
  iface = gr.Interface(
64
  fn=tts_inference,
65
  inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
66
- outputs=gr.Audio(type="numpy"),
67
  title="ONNX TTS Demo",
68
- description="Text-to-Speech synthesis using an ONNX model from the Athspi/Gg repository on Hugging Face."
 
 
 
 
 
69
  )
70
 
71
  if __name__ == "__main__":
 
11
  repo_id = "Athspi/Gg"
12
 
13
  # Download the ONNX model file from the repository.
 
 
14
  onnx_model_path = hf_hub_download(repo_id=repo_id, filename="mms_tts_eng.onnx")
15
 
16
  # Load the tokenizer from the repository.
 
32
  text (str): Input text to synthesize.
33
 
34
  Returns:
35
+ Tuple[int, np.ndarray]: A tuple containing the sampling rate (int) and the synthesized
36
+ audio waveform (np.ndarray in float32 format).
37
  """
38
  # Tokenize the input text.
39
  inputs = tokenizer(text, return_tensors="pt")
 
55
  # Remove any extra dimensions.
56
  waveform = np.squeeze(waveform)
57
 
58
+ return (sampling_rate, waveform) # Return as a tuple
59
+
60
 
61
  # Build the Gradio interface.
62
  iface = gr.Interface(
63
  fn=tts_inference,
64
  inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
65
+ outputs=gr.Audio(type="numpy", label="Generated Speech"),
66
  title="ONNX TTS Demo",
67
+ description="Text-to-Speech synthesis using an ONNX model from the Athspi/Gg repository on Hugging Face.",
68
+ examples=[
69
+ ["Hello, this is an example of text-to-speech."],
70
+ ["This model uses ONNX Runtime for fast inference."],
71
+ ["You can try your own sentences here."]
72
+ ]
73
  )
74
 
75
  if __name__ == "__main__":