Spaces:

Athspi
/

Tg

Sleeping

App Files Files Community

Athspi commited on Mar 19

Commit

b805946

verified ·

1 Parent(s): 87a1ec1

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -72

app.py CHANGED Viewed

@@ -4,90 +4,76 @@ import numpy as np
 from transformers import AutoTokenizer
 import onnxruntime
 from huggingface_hub import hf_hub_download
 # --- Configuration ---
 repo_id = "Athspi/Gg"  # Your Hugging Face Hub repository ID
-onnx_filename = "mms_tts_eng.onnx"  # Name of the ONNX file in the repository
-sampling_rate = 16000  # Sampling rate of the model (adjust if needed)
-# --- Load Model and Tokenizer ---
-# Download the ONNX model (using hf_hub_download for caching)
 onnx_model_path = hf_hub_download(repo_id=repo_id, filename=onnx_filename)
-# Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained(repo_id)
-# --- ONNX Runtime Session Setup with Optimization ---
 session_options = onnxruntime.SessionOptions()
-# Optimization level: Use all available optimizations
 session_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
-# Threading:  Set intra_op_num_threads to the number of *physical* cores
-#              (You'll need to determine this for your system).  Here's a
-#              way to get it programmatically (but it might not be 100%
-#              reliable on all systems).
 try:
     import psutil
     num_physical_cores = psutil.cpu_count(logical=False)
 except ImportError:
-    print("psutil not installed.  You can install it with: pip install psutil")
-    num_physical_cores = 4  # Set a reasonable default (e.g., 4)
-    print(f"Using default number of physical cores: {num_physical_cores}")
 session_options.intra_op_num_threads = num_physical_cores
-session_options.inter_op_num_threads = 1 # Usually best for TTS to be 1 or 2
-# Create the ONNX Runtime inference session
 ort_session = onnxruntime.InferenceSession(
     onnx_model_path,
-    providers=['CPUExecutionProvider'],  # You can try other providers if available
     sess_options=session_options,
 )
 # --- IO Binding Setup ---
 io_binding = ort_session.io_binding()
-# Get input/output metadata
 input_meta = ort_session.get_inputs()[0]
 output_meta = ort_session.get_outputs()[0]
-# Dummy input for shape/type
 dummy_input = tokenizer("a", return_tensors="pt")["input_ids"].to(torch.long)
 input_shape = tuple(dummy_input.shape)
 input_type = dummy_input.numpy().dtype
-# Pre-allocate input tensor (CPU, contiguous)
 input_tensor = torch.empty(input_shape, dtype=torch.int64, device="cpu").contiguous()
-# Pre-allocate output tensor (CPU, contiguous) - estimate max size
-max_output_length = input_shape[1] * 10  # Adjust factor as needed
 output_shape = (1, 1, max_output_length)
 output_tensor = torch.empty(output_shape, dtype=torch.float32, device="cpu").contiguous()
-# Bind the pre-allocated tensors
 io_binding.bind_input(
-    name=input_meta.name,
-    device_type="cpu",
-    device_id=0,
-    element_type=input_type,
-    shape=input_shape,
-    buffer_ptr=input_tensor.data_ptr(),
 )
 io_binding.bind_output(
-    name=output_meta.name,
-    device_type="cpu",
-    device_id=0,
-    element_type=np.float32,
-    shape=output_shape,
-    buffer_ptr=output_tensor.data_ptr(),
 )
-# --- Inference Function (with IO Binding) ---
 def tts_inference_io_binding(text: str):
     """TTS inference with IO Binding."""
@@ -97,62 +83,47 @@ def tts_inference_io_binding(text: str):
     input_ids = inputs.input_ids.to(torch.long)
     current_input_shape = tuple(input_ids.shape)
-    # Resize input tensor if necessary
     if current_input_shape[1] > input_tensor.shape[1]:
         input_tensor = torch.empty(current_input_shape, dtype=torch.int64, device="cpu").contiguous()
         io_binding.bind_input(
-            name=input_meta.name,
-            device_type="cpu",
-            device_id=0,
-            element_type=input_type,
-            shape=current_input_shape,
             buffer_ptr=input_tensor.data_ptr(),
         )
-    # Copy input data
     input_tensor[:current_input_shape[0], :current_input_shape[1]].copy_(input_ids)
-    # Resize output tensor if necessary
     required_output_length = current_input_shape[1] * 10
     if required_output_length > output_tensor.shape[2]:
         output_shape = (1, 1, required_output_length)
         output_tensor = torch.empty(output_shape, dtype=torch.float32, device="cpu").contiguous()
         io_binding.bind_output(
-            name=output_meta.name,
-            device_type="cpu",
-            device_id=0,
-            element_type=np.float32,
-            shape=output_shape,
             buffer_ptr=output_tensor.data_ptr(),
         )
-    # Clear binding
-    io_binding.clear_binding_outputs()
-    # Run inference
     ort_session.run_with_iobinding(io_binding)
-    # Get output
     ort_outputs = io_binding.get_outputs()
     output_data = ort_outputs[0].numpy()
     return (sampling_rate, output_data.squeeze())
 # --- Gradio Interface ---
 iface = gr.Interface(
     fn=tts_inference_io_binding,
-    inputs=gr.Textbox(lines=3, placeholder="Enter text here..."),  # Slightly larger textbox
     outputs=gr.Audio(type="numpy", label="Generated Speech"),
-    title="Optimized MMS-TTS (English) with ONNX Runtime",
-    description="Fast Text-to-Speech using the facebook/mms-tts-eng model, optimized with ONNX Runtime and IO Binding.  Model loaded from Hugging Face Hub.",
     examples=[
-        ["Hello, this is a demonstration of optimized text-to-speech."],
-        ["This model uses ONNX Runtime and IO Binding for fast CPU inference."],
         ["The quick brown fox jumps over the lazy dog."],
-        ["Try entering your own text to hear how it sounds!"]
     ],
-    cache_examples=False,  # Disable example caching (important for dynamic TTS)
 )
 if __name__ == "__main__":

 from transformers import AutoTokenizer
 import onnxruntime
 from huggingface_hub import hf_hub_download
+import os  # Import the 'os' module
 # --- Configuration ---
 repo_id = "Athspi/Gg"  # Your Hugging Face Hub repository ID
+onnx_filename = "mms_tts_eng.onnx"  # Name of the ONNX file
+sampling_rate = 16000
+# --- Download ONNX Model (and handle location) ---
+# Option 1: Use the cached path (Recommended)
 onnx_model_path = hf_hub_download(repo_id=repo_id, filename=onnx_filename)
+print(f"ONNX model downloaded to (cache): {onnx_model_path}")
+# Option 2: Download to a specific directory (e.g., the current working directory)
+# output_dir = "."  # Current directory
+# onnx_model_path = hf_hub_download(repo_id=repo_id, filename=onnx_filename, cache_dir=output_dir)
+# print(f"ONNX model downloaded to: {onnx_model_path}")
+# Option 3:  Download to a custom directory:
+# output_dir = "models"  # Or any directory you want
+# os.makedirs(output_dir, exist_ok=True) # Create directory if it doesn't exist
+# onnx_model_path = hf_hub_download(repo_id=repo_id, filename=onnx_filename, cache_dir=output_dir)
+# print(f"ONNX model downloaded to: {onnx_model_path}")
+# --- Load Tokenizer ---
 tokenizer = AutoTokenizer.from_pretrained(repo_id)
+# --- ONNX Runtime Session Setup ---
 session_options = onnxruntime.SessionOptions()
 session_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
 try:
     import psutil
     num_physical_cores = psutil.cpu_count(logical=False)
 except ImportError:
+    print("psutil not installed. Install with: pip install psutil")
+    num_physical_cores = 4
+    print(f"Using default: {num_physical_cores}")
 session_options.intra_op_num_threads = num_physical_cores
+session_options.inter_op_num_threads = 1
 ort_session = onnxruntime.InferenceSession(
     onnx_model_path,
+    providers=['CPUExecutionProvider'],
     sess_options=session_options,
 )
 # --- IO Binding Setup ---
 io_binding = ort_session.io_binding()
 input_meta = ort_session.get_inputs()[0]
 output_meta = ort_session.get_outputs()[0]
 dummy_input = tokenizer("a", return_tensors="pt")["input_ids"].to(torch.long)
 input_shape = tuple(dummy_input.shape)
 input_type = dummy_input.numpy().dtype
 input_tensor = torch.empty(input_shape, dtype=torch.int64, device="cpu").contiguous()
+max_output_length = input_shape[1] * 10
 output_shape = (1, 1, max_output_length)
 output_tensor = torch.empty(output_shape, dtype=torch.float32, device="cpu").contiguous()
 io_binding.bind_input(
+    name=input_meta.name, device_type="cpu", device_id=0,
+    element_type=input_type, shape=input_shape, buffer_ptr=input_tensor.data_ptr(),
 )
 io_binding.bind_output(
+    name=output_meta.name, device_type="cpu", device_id=0,
+    element_type=np.float32, shape=output_shape, buffer_ptr=output_tensor.data_ptr(),
 )
+# --- Inference Function ---
 def tts_inference_io_binding(text: str):
     """TTS inference with IO Binding."""
     input_ids = inputs.input_ids.to(torch.long)
     current_input_shape = tuple(input_ids.shape)
     if current_input_shape[1] > input_tensor.shape[1]:
         input_tensor = torch.empty(current_input_shape, dtype=torch.int64, device="cpu").contiguous()
         io_binding.bind_input(
+            name=input_meta.name, device_type="cpu", device_id=0,
+            element_type=input_type, shape=current_input_shape,
             buffer_ptr=input_tensor.data_ptr(),
         )
     input_tensor[:current_input_shape[0], :current_input_shape[1]].copy_(input_ids)
     required_output_length = current_input_shape[1] * 10
     if required_output_length > output_tensor.shape[2]:
         output_shape = (1, 1, required_output_length)
         output_tensor = torch.empty(output_shape, dtype=torch.float32, device="cpu").contiguous()
         io_binding.bind_output(
+            name=output_meta.name, device_type="cpu", device_id=0,
+            element_type=np.float32, shape=output_shape,
             buffer_ptr=output_tensor.data_ptr(),
         )
+    io_binding.clear_binding_outputs()
     ort_session.run_with_iobinding(io_binding)
     ort_outputs = io_binding.get_outputs()
     output_data = ort_outputs[0].numpy()
     return (sampling_rate, output_data.squeeze())
 # --- Gradio Interface ---
 iface = gr.Interface(
     fn=tts_inference_io_binding,
+    inputs=gr.Textbox(lines=3, placeholder="Enter text here..."),
     outputs=gr.Audio(type="numpy", label="Generated Speech"),
+    title="Optimized MMS-TTS (English)",
+    description="Fast TTS with ONNX Runtime and IO Binding (Hugging Face Hub).",
     examples=[
+        ["Hello, this is a demonstration."],
+        ["This uses ONNX Runtime and IO Binding."],
         ["The quick brown fox jumps over the lazy dog."],
+        ["Try your own text!"]
     ],
+    cache_examples=False,
 )
 if __name__ == "__main__":