Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,10 +9,11 @@ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5Hif
|
|
9 |
from speechbrain.pretrained import EncoderClassifier
|
10 |
from datasets import load_dataset
|
11 |
|
12 |
-
|
13 |
-
import
|
14 |
-
|
15 |
-
|
|
|
16 |
|
17 |
|
18 |
|
@@ -115,19 +116,31 @@ def text_to_speech(text, audio_file=None):
|
|
115 |
# Normalize the input text
|
116 |
normalized_text = normalize_text(text)
|
117 |
|
118 |
-
# Prepare the input for the model
|
119 |
-
inputs = processor(text=normalized_text, return_tensors="pt").to(device)
|
120 |
|
121 |
-
# Use the default speaker embedding
|
122 |
-
speaker_embeddings = default_embedding
|
123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
# Generate speech
|
125 |
-
|
126 |
-
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
|
127 |
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
-
return (16000, speech_np)
|
131 |
|
132 |
iface = gr.Interface(
|
133 |
fn=text_to_speech,
|
|
|
9 |
from speechbrain.pretrained import EncoderClassifier
|
10 |
from datasets import load_dataset
|
11 |
|
12 |
+
|
13 |
+
from piper_tts import Piper
|
14 |
+
|
15 |
+
# Initialize the Piper TTS engine
|
16 |
+
tts = Piper()
|
17 |
|
18 |
|
19 |
|
|
|
116 |
# Normalize the input text
|
117 |
normalized_text = normalize_text(text)
|
118 |
|
119 |
+
# # Prepare the input for the model
|
120 |
+
# inputs = processor(text=normalized_text, return_tensors="pt").to(device)
|
121 |
|
122 |
+
# # Use the default speaker embedding
|
123 |
+
# speaker_embeddings = default_embedding
|
124 |
|
125 |
+
# # Generate speech
|
126 |
+
# with torch.no_grad():
|
127 |
+
# speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
|
128 |
+
|
129 |
+
# speech_np = speech.cpu().numpy()
|
130 |
+
|
131 |
+
# return (16000, speech_np)
|
132 |
+
|
133 |
# Generate speech
|
134 |
+
audio = tts.synthesize(normalized_text)
|
|
|
135 |
|
136 |
+
# Save the audio to a file
|
137 |
+
# with open("output.wav", "wb") as f:
|
138 |
+
# f.write(audio)
|
139 |
+
|
140 |
+
# return audio_file
|
141 |
+
|
142 |
+
return audio
|
143 |
|
|
|
144 |
|
145 |
iface = gr.Interface(
|
146 |
fn=text_to_speech,
|