Spaces:

Athspi
/

Tttt

Sleeping

Athspi commited on Mar 19

Commit

6c54982

verified ·

1 Parent(s): b6b39ee

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,7 +11,8 @@ from huggingface_hub import hf_hub_download
 repo_id = "Athspi/Gg"
 # Download the ONNX model file from the repository.
-# This will download "mms_tts_eng.onnx" from: https://huggingface.co/Athspi/Gg/resolve/main/mms_tts_eng.onnx
 onnx_model_path = hf_hub_download(repo_id=repo_id, filename="mms_tts_eng.onnx")
 # Load the tokenizer from the repository.
@@ -33,7 +34,7 @@ def tts_inference(text: str):
         text (str): Input text to synthesize.
     Returns:
-        waveform (np.ndarray): Synthesized audio waveform.
         sampling_rate (int): The sampling rate of the waveform.
     """
     # Tokenize the input text.
@@ -46,16 +47,19 @@ def tts_inference(text: str):
     onnx_outputs = ort_session.run(None, {"input_ids": input_ids})
     waveform = onnx_outputs[0]
-    # Ensure waveform is in float32 format (required by Gradio).
     waveform = waveform.astype(np.float32)
-    # Remove unnecessary dimensions.
     waveform = np.squeeze(waveform)
-    # Return the waveform and its sampling rate.
     return waveform, sampling_rate
-# Build a Gradio interface.
 iface = gr.Interface(
     fn=tts_inference,
     inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),

 repo_id = "Athspi/Gg"
 # Download the ONNX model file from the repository.
+# This will download "mms_tts_eng.onnx" from:
+# https://huggingface.co/Athspi/Gg/resolve/main/mms_tts_eng.onnx
 onnx_model_path = hf_hub_download(repo_id=repo_id, filename="mms_tts_eng.onnx")
 # Load the tokenizer from the repository.
         text (str): Input text to synthesize.
     Returns:
+        waveform (np.ndarray): Synthesized audio waveform in float32 format.
         sampling_rate (int): The sampling rate of the waveform.
     """
     # Tokenize the input text.
     onnx_outputs = ort_session.run(None, {"input_ids": input_ids})
     waveform = onnx_outputs[0]
+    # Ensure the output is a NumPy array.
+    if not isinstance(waveform, np.ndarray):
+        waveform = np.array(waveform)
+    # Convert waveform to float32 (required by Gradio's Audio component).
     waveform = waveform.astype(np.float32)
+    # Remove any extra dimensions.
     waveform = np.squeeze(waveform)
     return waveform, sampling_rate
+# Build the Gradio interface.
 iface = gr.Interface(
     fn=tts_inference,
     inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),