fahadqazi commited on
Commit
0629797
·
verified ·
1 Parent(s): f484f3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -12
app.py CHANGED
@@ -9,10 +9,11 @@ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5Hif
9
  from speechbrain.pretrained import EncoderClassifier
10
  from datasets import load_dataset
11
 
12
- import time
13
- import wave
14
- from pathlib import Path
15
- from piper.PiperVoice import PiperVoice # Ensure you have PiperVoice imported correctly
 
16
 
17
 
18
 
@@ -115,19 +116,31 @@ def text_to_speech(text, audio_file=None):
115
  # Normalize the input text
116
  normalized_text = normalize_text(text)
117
 
118
- # Prepare the input for the model
119
- inputs = processor(text=normalized_text, return_tensors="pt").to(device)
120
 
121
- # Use the default speaker embedding
122
- speaker_embeddings = default_embedding
123
 
 
 
 
 
 
 
 
 
124
  # Generate speech
125
- with torch.no_grad():
126
- speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
127
 
128
- speech_np = speech.cpu().numpy()
 
 
 
 
 
 
129
 
130
- return (16000, speech_np)
131
 
132
  iface = gr.Interface(
133
  fn=text_to_speech,
 
9
  from speechbrain.pretrained import EncoderClassifier
10
  from datasets import load_dataset
11
 
12
+
13
+ from piper_tts import Piper
14
+
15
+ # Initialize the Piper TTS engine
16
+ tts = Piper()
17
 
18
 
19
 
 
116
  # Normalize the input text
117
  normalized_text = normalize_text(text)
118
 
119
+ # # Prepare the input for the model
120
+ # inputs = processor(text=normalized_text, return_tensors="pt").to(device)
121
 
122
+ # # Use the default speaker embedding
123
+ # speaker_embeddings = default_embedding
124
 
125
+ # # Generate speech
126
+ # with torch.no_grad():
127
+ # speech = model.generate_speech(inputs["input_ids"], speaker_embeddings.unsqueeze(0), vocoder=vocoder)
128
+
129
+ # speech_np = speech.cpu().numpy()
130
+
131
+ # return (16000, speech_np)
132
+
133
  # Generate speech
134
+ audio = tts.synthesize(normalized_text)
 
135
 
136
+ # Save the audio to a file
137
+ # with open("output.wav", "wb") as f:
138
+ # f.write(audio)
139
+
140
+ # return audio_file
141
+
142
+ return audio
143
 
 
144
 
145
  iface = gr.Interface(
146
  fn=text_to_speech,