File size: 4,945 Bytes
f07e098
 
 
 
 
 
b805946
f07e098
 
 
b805946
 
f07e098
b805946
f07e098
b805946
f07e098
b805946
f07e098
b805946
 
 
 
 
 
 
 
 
 
 
 
 
f07e098
 
b805946
f07e098
 
 
 
 
 
 
b805946
 
 
f07e098
b805946
f07e098
 
 
b805946
f07e098
 
 
 
 
 
 
 
 
 
 
b805946
f07e098
 
 
 
b805946
 
f07e098
 
b805946
 
f07e098
 
b805946
f07e098
 
 
 
 
 
 
 
 
 
 
 
b805946
 
f07e098
 
 
 
 
 
 
 
 
 
b805946
 
f07e098
 
 
b805946
f07e098
 
 
 
 
 
 
 
 
b805946
f07e098
b805946
 
f07e098
b805946
 
f07e098
b805946
f07e098
b805946
f07e098
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import gradio as gr
import torch
import numpy as np
from transformers import AutoTokenizer
import onnxruntime
from huggingface_hub import hf_hub_download
import os  # Import the 'os' module

# --- Configuration ---
repo_id = "Athspi/Gg"  # Your Hugging Face Hub repository ID
onnx_filename = "mms_tts_eng.onnx"  # Name of the ONNX file
sampling_rate = 16000

# --- Download ONNX Model (and handle location) ---

# Option 1: Use the cached path (Recommended)
onnx_model_path = hf_hub_download(repo_id=repo_id, filename=onnx_filename)
print(f"ONNX model downloaded to (cache): {onnx_model_path}")

# Option 2: Download to a specific directory (e.g., the current working directory)
# output_dir = "."  # Current directory
# onnx_model_path = hf_hub_download(repo_id=repo_id, filename=onnx_filename, cache_dir=output_dir)
# print(f"ONNX model downloaded to: {onnx_model_path}")

# Option 3:  Download to a custom directory:
# output_dir = "models"  # Or any directory you want
# os.makedirs(output_dir, exist_ok=True) # Create directory if it doesn't exist
# onnx_model_path = hf_hub_download(repo_id=repo_id, filename=onnx_filename, cache_dir=output_dir)
# print(f"ONNX model downloaded to: {onnx_model_path}")


# --- Load Tokenizer ---
tokenizer = AutoTokenizer.from_pretrained(repo_id)

# --- ONNX Runtime Session Setup ---

session_options = onnxruntime.SessionOptions()
session_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
try:
    import psutil
    num_physical_cores = psutil.cpu_count(logical=False)
except ImportError:
    print("psutil not installed. Install with: pip install psutil")
    num_physical_cores = 4
    print(f"Using default: {num_physical_cores}")
session_options.intra_op_num_threads = num_physical_cores
session_options.inter_op_num_threads = 1

ort_session = onnxruntime.InferenceSession(
    onnx_model_path,
    providers=['CPUExecutionProvider'],
    sess_options=session_options,
)

# --- IO Binding Setup ---
io_binding = ort_session.io_binding()
input_meta = ort_session.get_inputs()[0]
output_meta = ort_session.get_outputs()[0]
dummy_input = tokenizer("a", return_tensors="pt")["input_ids"].to(torch.long)
input_shape = tuple(dummy_input.shape)
input_type = dummy_input.numpy().dtype
input_tensor = torch.empty(input_shape, dtype=torch.int64, device="cpu").contiguous()
max_output_length = input_shape[1] * 10
output_shape = (1, 1, max_output_length)
output_tensor = torch.empty(output_shape, dtype=torch.float32, device="cpu").contiguous()

io_binding.bind_input(
    name=input_meta.name, device_type="cpu", device_id=0,
    element_type=input_type, shape=input_shape, buffer_ptr=input_tensor.data_ptr(),
)
io_binding.bind_output(
    name=output_meta.name, device_type="cpu", device_id=0,
    element_type=np.float32, shape=output_shape, buffer_ptr=output_tensor.data_ptr(),
)

# --- Inference Function ---

def tts_inference_io_binding(text: str):
    """TTS inference with IO Binding."""
    global input_tensor, output_tensor, io_binding

    inputs = tokenizer(text, return_tensors="pt")
    input_ids = inputs.input_ids.to(torch.long)
    current_input_shape = tuple(input_ids.shape)

    if current_input_shape[1] > input_tensor.shape[1]:
        input_tensor = torch.empty(current_input_shape, dtype=torch.int64, device="cpu").contiguous()
        io_binding.bind_input(
            name=input_meta.name, device_type="cpu", device_id=0,
            element_type=input_type, shape=current_input_shape,
            buffer_ptr=input_tensor.data_ptr(),
        )

    input_tensor[:current_input_shape[0], :current_input_shape[1]].copy_(input_ids)

    required_output_length = current_input_shape[1] * 10
    if required_output_length > output_tensor.shape[2]:
        output_shape = (1, 1, required_output_length)
        output_tensor = torch.empty(output_shape, dtype=torch.float32, device="cpu").contiguous()
        io_binding.bind_output(
            name=output_meta.name, device_type="cpu", device_id=0,
            element_type=np.float32, shape=output_shape,
            buffer_ptr=output_tensor.data_ptr(),
        )

    io_binding.clear_binding_outputs()
    ort_session.run_with_iobinding(io_binding)
    ort_outputs = io_binding.get_outputs()
    output_data = ort_outputs[0].numpy()
    return (sampling_rate, output_data.squeeze())

# --- Gradio Interface ---

iface = gr.Interface(
    fn=tts_inference_io_binding,
    inputs=gr.Textbox(lines=3, placeholder="Enter text here..."),
    outputs=gr.Audio(type="numpy", label="Generated Speech"),
    title="Optimized MMS-TTS (English)",
    description="Fast TTS with ONNX Runtime and IO Binding (Hugging Face Hub).",
    examples=[
        ["Hello, this is a demonstration."],
        ["This uses ONNX Runtime and IO Binding."],
        ["The quick brown fox jumps over the lazy dog."],
        ["Try your own text!"]
    ],
    cache_examples=False,
)

if __name__ == "__main__":
    iface.launch()