Spaces:

TeamTonic
/

SeamlessOnDevice

Running

App Files Files Community

Tonic commited on Nov 20, 2023

Commit

678c468

1 Parent(s): 8fa6df8

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -8

app.py CHANGED Viewed

@@ -2,19 +2,27 @@ import gradio as gr
 import torchaudio
 import torch
-def speech_to_text(audio_file):
-    audio_input, _ = torchaudio.load(audio_file.name)
     s2t_model = torch.jit.load("unity_on_device_s2t.ptl")
     with torch.no_grad():
-        text = s2t_model(audio_input, tgt_lang=TGT_LANG)
     return text
-def speech_to_speech_translation(audio_file):
-    audio_input, _ = torchaudio.load(audio_file.name)
     s2st_model = torch.jit.load("unity_on_device.ptl")
     with torch.no_grad():
-        text, units, waveform = s2st_model(audio_input, tgt_lang=TGT_LANG)
     output_file = "/tmp/result.wav"
     torchaudio.save(output_file, waveform.unsqueeze(0), sample_rate=16000)
     return text, output_file
@@ -22,14 +30,20 @@ def speech_to_speech_translation(audio_file):
 # Gradio interfaces
 iface_s2t = gr.Interface(
     fn=speech_to_text,
-    inputs=gr.Audio(type="file", label="Upload Audio for Speech to Text"),
     outputs="text",
     title="Speech to Text"
 )
 iface_s2st = gr.Interface(
     fn=speech_to_speech_translation,
-    inputs=gr.Audio(type="file", label="Upload Audio for Speech to Speech Translation"),
     outputs=["text", "audio"],
     title="Speech to Speech Translation"
 )

 import torchaudio
 import torch
+# Define the list of target languages
+languages = {
+    "English": "eng",
+    "Hindi": "hin",
+    "Portuguese": "por",
+    "Russian": "rus",
+    "Spanish": "spa"
+}
+def speech_to_text(audio_data, tgt_lang):
+    audio_input, _ = torchaudio.load(audio_data)
     s2t_model = torch.jit.load("unity_on_device_s2t.ptl")
     with torch.no_grad():
+        text = s2t_model(audio_input, tgt_lang=languages[tgt_lang])
     return text
+def speech_to_speech_translation(audio_data, tgt_lang):
+    audio_input, _ = torchaudio.load(audio_data)
     s2st_model = torch.jit.load("unity_on_device.ptl")
     with torch.no_grad():
+        text, units, waveform = s2st_model(audio_input, tgt_lang=languages[tgt_lang])
     output_file = "/tmp/result.wav"
     torchaudio.save(output_file, waveform.unsqueeze(0), sample_rate=16000)
     return text, output_file
 # Gradio interfaces
 iface_s2t = gr.Interface(
     fn=speech_to_text,
+    inputs=[
+        gr.inputs.Audio(label="Upload or Record Audio for Speech to Text"),
+        gr.inputs.Dropdown(list(languages.keys()), label="Select Target Language")
+    ],
     outputs="text",
     title="Speech to Text"
 )
 iface_s2st = gr.Interface(
     fn=speech_to_speech_translation,
+    inputs=[
+        gr.inputs.Audio(label="Upload or Record Audio for Speech to Speech Translation"),
+        gr.inputs.Dropdown(list(languages.keys()), label="Select Target Language")
+    ],
     outputs=["text", "audio"],
     title="Speech to Speech Translation"
 )