Spaces:

blasisd
/

talk-globe

Sleeping

blasisd commited on Apr 27

Commit

58a3604

1 Parent(s): 349f722

Removed thread lock

Files changed (1) hide show

src/app.py CHANGED Viewed

@@ -18,7 +18,7 @@ from fastrtc import (
     WebRTC,
     ReplyOnPause,
 )
-from gradio.utils import get_space
 from transformers import AutoProcessor, SeamlessM4Tv2Model
@@ -53,30 +53,27 @@ def translate_audio(
     :yield: the tuple containing the sampling rate and the audio array
     :rtype: tuple[int, np.ndarray]
     """
-    with translate_lock:
-        orig_freq, np_array = audio
-        waveform = torch.from_numpy(np_array)
-        waveform = waveform.to(torch.float32)
-        waveform = waveform / 32768.0  # normalize int16 to [-1, 1]
-        audio = torchaudio.functional.resample(
-            waveform, orig_freq=orig_freq, new_freq=default_sampling_rate
-        )  # must be a 16 kHz waveform array
-        audio_inputs = processor(
-            audios=audio,
-            return_tensors="pt",
-            sampling_rate=default_sampling_rate,
-        )
-        audio_array_from_audio = (
-            model.generate(**audio_inputs, tgt_lang=tgt_language)[0]
-            .cpu()
-            .numpy()
-            .squeeze()
-        )
-        yield (default_sampling_rate, audio_array_from_audio)
 # Supported target languages for speech
@@ -164,6 +161,7 @@ css = """
 }
 """
 with gr.Blocks(
     theme=gr.themes.Glass(),
     css=css,

     WebRTC,
     ReplyOnPause,
 )
 from transformers import AutoProcessor, SeamlessM4Tv2Model
     :yield: the tuple containing the sampling rate and the audio array
     :rtype: tuple[int, np.ndarray]
     """
+    # with translate_lock:
+    orig_freq, np_array = audio
+    waveform = torch.from_numpy(np_array)
+    waveform = waveform.to(torch.float32)
+    waveform = waveform / 32768.0  # normalize int16 to [-1, 1]
+    audio = torchaudio.functional.resample(
+        waveform, orig_freq=orig_freq, new_freq=default_sampling_rate
+    )  # must be a 16 kHz waveform array
+    audio_inputs = processor(
+        audios=audio,
+        return_tensors="pt",
+        sampling_rate=default_sampling_rate,
+    )
+    audio_array_from_audio = (
+        model.generate(**audio_inputs, tgt_lang=tgt_language)[0].cpu().numpy().squeeze()
+    )
+    yield (default_sampling_rate, audio_array_from_audio)
 # Supported target languages for speech
 }
 """
 with gr.Blocks(
     theme=gr.themes.Glass(),
     css=css,