blasisd commited on
Commit
349f722
·
1 Parent(s): bf8155b

Added thread lock to prevent overlapping executions

Browse files
Files changed (1) hide show
  1. src/app.py +26 -20
src/app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
 
3
  from pathlib import Path
 
4
 
5
  import pandas as pd
6
 
@@ -30,6 +31,7 @@ processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
30
  model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
31
  default_sampling_rate = 16_000
32
 
 
33
 
34
  HF_TOKEN = os.getenv("HF_TOKEN")
35
 
@@ -51,26 +53,30 @@ def translate_audio(
51
  :yield: the tuple containing the sampling rate and the audio array
52
  :rtype: tuple[int, np.ndarray]
53
  """
54
- orig_freq, np_array = audio
55
- waveform = torch.from_numpy(np_array)
56
- waveform = waveform.to(torch.float32)
57
- waveform = waveform / 32768.0 # normalize int16 to [-1, 1]
58
-
59
- audio = torchaudio.functional.resample(
60
- waveform, orig_freq=orig_freq, new_freq=default_sampling_rate
61
- ) # must be a 16 kHz waveform array
62
-
63
- audio_inputs = processor(
64
- audios=audio,
65
- return_tensors="pt",
66
- sampling_rate=default_sampling_rate,
67
- )
68
-
69
- audio_array_from_audio = (
70
- model.generate(**audio_inputs, tgt_lang=tgt_language)[0].cpu().numpy().squeeze()
71
- )
72
-
73
- yield (default_sampling_rate, audio_array_from_audio)
 
 
 
 
74
 
75
 
76
  # Supported target languages for speech
 
1
  import os
2
 
3
  from pathlib import Path
4
+ from threading import Lock
5
 
6
  import pandas as pd
7
 
 
31
  model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
32
  default_sampling_rate = 16_000
33
 
34
+ translate_lock = Lock()
35
 
36
  HF_TOKEN = os.getenv("HF_TOKEN")
37
 
 
53
  :yield: the tuple containing the sampling rate and the audio array
54
  :rtype: tuple[int, np.ndarray]
55
  """
56
+ with translate_lock:
57
+ orig_freq, np_array = audio
58
+ waveform = torch.from_numpy(np_array)
59
+ waveform = waveform.to(torch.float32)
60
+ waveform = waveform / 32768.0 # normalize int16 to [-1, 1]
61
+
62
+ audio = torchaudio.functional.resample(
63
+ waveform, orig_freq=orig_freq, new_freq=default_sampling_rate
64
+ ) # must be a 16 kHz waveform array
65
+
66
+ audio_inputs = processor(
67
+ audios=audio,
68
+ return_tensors="pt",
69
+ sampling_rate=default_sampling_rate,
70
+ )
71
+
72
+ audio_array_from_audio = (
73
+ model.generate(**audio_inputs, tgt_lang=tgt_language)[0]
74
+ .cpu()
75
+ .numpy()
76
+ .squeeze()
77
+ )
78
+
79
+ yield (default_sampling_rate, audio_array_from_audio)
80
 
81
 
82
  # Supported target languages for speech