Spaces:
Running
Running
import gradio as gr | |
import torchaudio | |
import torch | |
def speech_to_text(audio_file): | |
audio_input, _ = torchaudio.load(audio_file.name) | |
s2t_model = torch.jit.load("unity_on_device_s2t.ptl") | |
with torch.no_grad(): | |
text = s2t_model(audio_input, tgt_lang=TGT_LANG) | |
return text | |
def speech_to_speech_translation(audio_file): | |
audio_input, _ = torchaudio.load(audio_file.name) | |
s2st_model = torch.jit.load("unity_on_device.ptl") | |
with torch.no_grad(): | |
text, units, waveform = s2st_model(audio_input, tgt_lang=TGT_LANG) | |
output_file = "/tmp/result.wav" | |
torchaudio.save(output_file, waveform.unsqueeze(0), sample_rate=16000) | |
return text, output_file | |
# Gradio interfaces | |
iface_s2t = gr.Interface( | |
fn=speech_to_text, | |
inputs=gr.Audio(type="file", label="Upload Audio for Speech to Text"), | |
outputs="text", | |
title="Speech to Text" | |
) | |
iface_s2st = gr.Interface( | |
fn=speech_to_speech_translation, | |
inputs=gr.Audio(type="file", label="Upload Audio for Speech to Speech Translation"), | |
outputs=["text", "audio"], | |
title="Speech to Speech Translation" | |
) | |
# Combine into an accordion interface | |
accordion = gr.Accordion( | |
iface_s2t, | |
iface_s2st, | |
labels=["Speech to Text", "Speech to Speech Translation"] | |
) | |
# Launch the application | |
accordion.launch() | |