blasisd commited on
Commit
58a3604
·
1 Parent(s): 349f722

Removed thread lock

Browse files
Files changed (1) hide show
  1. src/app.py +21 -23
src/app.py CHANGED
@@ -18,7 +18,7 @@ from fastrtc import (
18
  WebRTC,
19
  ReplyOnPause,
20
  )
21
- from gradio.utils import get_space
22
  from transformers import AutoProcessor, SeamlessM4Tv2Model
23
 
24
 
@@ -53,30 +53,27 @@ def translate_audio(
53
  :yield: the tuple containing the sampling rate and the audio array
54
  :rtype: tuple[int, np.ndarray]
55
  """
56
- with translate_lock:
57
- orig_freq, np_array = audio
58
- waveform = torch.from_numpy(np_array)
59
- waveform = waveform.to(torch.float32)
60
- waveform = waveform / 32768.0 # normalize int16 to [-1, 1]
61
-
62
- audio = torchaudio.functional.resample(
63
- waveform, orig_freq=orig_freq, new_freq=default_sampling_rate
64
- ) # must be a 16 kHz waveform array
65
-
66
- audio_inputs = processor(
67
- audios=audio,
68
- return_tensors="pt",
69
- sampling_rate=default_sampling_rate,
70
- )
71
 
72
- audio_array_from_audio = (
73
- model.generate(**audio_inputs, tgt_lang=tgt_language)[0]
74
- .cpu()
75
- .numpy()
76
- .squeeze()
77
- )
78
 
79
- yield (default_sampling_rate, audio_array_from_audio)
80
 
81
 
82
  # Supported target languages for speech
@@ -164,6 +161,7 @@ css = """
164
  }
165
  """
166
 
 
167
  with gr.Blocks(
168
  theme=gr.themes.Glass(),
169
  css=css,
 
18
  WebRTC,
19
  ReplyOnPause,
20
  )
21
+
22
  from transformers import AutoProcessor, SeamlessM4Tv2Model
23
 
24
 
 
53
  :yield: the tuple containing the sampling rate and the audio array
54
  :rtype: tuple[int, np.ndarray]
55
  """
56
+ # with translate_lock:
57
+ orig_freq, np_array = audio
58
+ waveform = torch.from_numpy(np_array)
59
+ waveform = waveform.to(torch.float32)
60
+ waveform = waveform / 32768.0 # normalize int16 to [-1, 1]
61
+
62
+ audio = torchaudio.functional.resample(
63
+ waveform, orig_freq=orig_freq, new_freq=default_sampling_rate
64
+ ) # must be a 16 kHz waveform array
65
+
66
+ audio_inputs = processor(
67
+ audios=audio,
68
+ return_tensors="pt",
69
+ sampling_rate=default_sampling_rate,
70
+ )
71
 
72
+ audio_array_from_audio = (
73
+ model.generate(**audio_inputs, tgt_lang=tgt_language)[0].cpu().numpy().squeeze()
74
+ )
 
 
 
75
 
76
+ yield (default_sampling_rate, audio_array_from_audio)
77
 
78
 
79
  # Supported target languages for speech
 
161
  }
162
  """
163
 
164
+
165
  with gr.Blocks(
166
  theme=gr.themes.Glass(),
167
  css=css,