Spaces:
Build error
Build error
hashhac
commited on
Commit
·
be00791
1
Parent(s):
a70a34d
fixes
Browse files
app.py
CHANGED
|
@@ -15,17 +15,23 @@ asr_model = SpeechT5ForSpeechToText.from_pretrained("microsoft/speecht5_asr").to
|
|
| 15 |
tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts").to(device)
|
| 16 |
|
| 17 |
# Function to convert speech to text
|
| 18 |
-
def speech_to_text(
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
with torch.no_grad():
|
| 21 |
logits = asr_model(inputs).logits
|
|
|
|
| 22 |
predicted_ids = torch.argmax(logits, dim=-1)
|
| 23 |
transcription = processor.batch_decode(predicted_ids)[0]
|
| 24 |
return transcription
|
| 25 |
|
| 26 |
# Function to convert text to speech
|
| 27 |
def text_to_speech(text):
|
| 28 |
-
inputs = processor(text, return_tensors="pt").input_ids.to(device)
|
| 29 |
with torch.no_grad():
|
| 30 |
speech = tts_model.generate_speech(inputs)
|
| 31 |
return speech
|
|
|
|
| 15 |
tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts").to(device)
|
| 16 |
|
| 17 |
# Function to convert speech to text
|
| 18 |
+
def speech_to_text(audio_dict):
|
| 19 |
+
# Extract the audio array from the dictionary
|
| 20 |
+
audio_array = audio_dict["array"]
|
| 21 |
+
|
| 22 |
+
# Pass the audio array directly to the processor
|
| 23 |
+
inputs = processor(audio=audio_array, sampling_rate=16000, return_tensors="pt").input_values.to(device)
|
| 24 |
+
|
| 25 |
with torch.no_grad():
|
| 26 |
logits = asr_model(inputs).logits
|
| 27 |
+
|
| 28 |
predicted_ids = torch.argmax(logits, dim=-1)
|
| 29 |
transcription = processor.batch_decode(predicted_ids)[0]
|
| 30 |
return transcription
|
| 31 |
|
| 32 |
# Function to convert text to speech
|
| 33 |
def text_to_speech(text):
|
| 34 |
+
inputs = processor(text=text, return_tensors="pt").input_ids.to(device)
|
| 35 |
with torch.no_grad():
|
| 36 |
speech = tts_model.generate_speech(inputs)
|
| 37 |
return speech
|