Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ from streaming_stt_nemo import Model
|
|
| 9 |
import torch
|
| 10 |
import random
|
| 11 |
from openai import OpenAI
|
| 12 |
-
|
| 13 |
|
| 14 |
default_lang = "en"
|
| 15 |
|
|
@@ -91,22 +91,41 @@ async def respond(audio, model, seed):
|
|
| 91 |
await communicate.save(tmp_path)
|
| 92 |
yield tmp_path
|
| 93 |
|
| 94 |
-
#
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
-
def translate_speech(
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
DESCRIPTION = """ # <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>"""
|
| 112 |
|
|
@@ -146,18 +165,21 @@ with gr.Blocks(css="style.css") as demo:
|
|
| 146 |
)
|
| 147 |
|
| 148 |
with gr.TabItem("Speech Translation"):
|
| 149 |
-
input_audio = gr.Audio(label="
|
| 150 |
target_lang = gr.Dropdown(
|
| 151 |
-
choices=
|
| 152 |
-
value="
|
| 153 |
label="Target Language"
|
| 154 |
)
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
gr.Interface(
|
| 158 |
fn=translate_speech,
|
| 159 |
inputs=[input_audio, target_lang],
|
| 160 |
-
outputs=[
|
| 161 |
live=True
|
| 162 |
)
|
| 163 |
|
|
|
|
| 9 |
import torch
|
| 10 |
import random
|
| 11 |
from openai import OpenAI
|
| 12 |
+
import subprocess
|
| 13 |
|
| 14 |
default_lang = "en"
|
| 15 |
|
|
|
|
| 91 |
await communicate.save(tmp_path)
|
| 92 |
yield tmp_path
|
| 93 |
|
| 94 |
+
# Supported languages for seamless-expressive
|
| 95 |
+
LANGUAGE_CODES = {
|
| 96 |
+
"English": "eng",
|
| 97 |
+
"Spanish": "spa",
|
| 98 |
+
"French": "fra",
|
| 99 |
+
"German": "deu",
|
| 100 |
+
"Italian": "ita",
|
| 101 |
+
"Chinese": "cmn"
|
| 102 |
+
}
|
| 103 |
|
| 104 |
+
def translate_speech(audio_file, target_language):
|
| 105 |
+
"""
|
| 106 |
+
Translate input speech (audio file) to the specified target language.
|
| 107 |
+
"""
|
| 108 |
+
language_code = LANGUAGE_CODES[target_language]
|
| 109 |
+
output_file = "translated_audio.wav"
|
| 110 |
|
| 111 |
+
command = [
|
| 112 |
+
"expressivity_predict",
|
| 113 |
+
audio_file,
|
| 114 |
+
"--tgt_lang", language_code,
|
| 115 |
+
"--model_name", "seamless_expressivity",
|
| 116 |
+
"--vocoder_name", "vocoder_pretssel",
|
| 117 |
+
"--gated-model-dir", "seamlessmodel",
|
| 118 |
+
"--output_path", output_file
|
| 119 |
+
]
|
| 120 |
|
| 121 |
+
subprocess.run(command, check=True)
|
| 122 |
+
|
| 123 |
+
if os.path.exists(output_file):
|
| 124 |
+
print(f"File created successfully: {output_file}")
|
| 125 |
+
else:
|
| 126 |
+
print(f"File not found: {output_file}")
|
| 127 |
+
|
| 128 |
+
return output_file
|
| 129 |
|
| 130 |
DESCRIPTION = """ # <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>"""
|
| 131 |
|
|
|
|
| 165 |
)
|
| 166 |
|
| 167 |
with gr.TabItem("Speech Translation"):
|
| 168 |
+
input_audio = gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False)
|
| 169 |
target_lang = gr.Dropdown(
|
| 170 |
+
choices=list(LANGUAGE_CODES.keys()),
|
| 171 |
+
value="Spanish",
|
| 172 |
label="Target Language"
|
| 173 |
)
|
| 174 |
+
output_audio = gr.Audio(label="Translated Audio",
|
| 175 |
+
interactive=False,
|
| 176 |
+
autoplay=True,
|
| 177 |
+
elem_classes="audio")
|
| 178 |
|
| 179 |
gr.Interface(
|
| 180 |
fn=translate_speech,
|
| 181 |
inputs=[input_audio, target_lang],
|
| 182 |
+
outputs=[output_audio],
|
| 183 |
live=True
|
| 184 |
)
|
| 185 |
|