blasisd commited on
Commit
86f88a9
·
1 Parent(s): 4145f35

Removed thread lock, changed concurrency limit and added time limit

Browse files
Files changed (1) hide show
  1. src/app.py +20 -28
src/app.py CHANGED
@@ -1,14 +1,11 @@
1
  import os
2
 
3
  from pathlib import Path
4
- from threading import Lock
5
 
6
  import pandas as pd
7
-
8
  import torchaudio
9
  import torch
10
  import numpy as np
11
-
12
  import gradio as gr
13
 
14
  from dotenv import load_dotenv
@@ -18,7 +15,6 @@ from fastrtc import (
18
  WebRTC,
19
  ReplyOnPause,
20
  )
21
-
22
  from transformers import AutoProcessor, SeamlessM4Tv2Model
23
 
24
 
@@ -31,7 +27,6 @@ processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
31
  model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
32
  default_sampling_rate = 16_000
33
 
34
- translate_lock = Lock()
35
 
36
  HF_TOKEN = os.getenv("HF_TOKEN")
37
 
@@ -53,30 +48,26 @@ def translate_audio(
53
  :yield: the tuple containing the sampling rate and the audio array
54
  :rtype: tuple[int, np.ndarray]
55
  """
56
- with translate_lock:
57
- orig_freq, np_array = audio
58
- waveform = torch.from_numpy(np_array)
59
- waveform = waveform.to(torch.float32)
60
- waveform = waveform / 32768.0 # normalize int16 to [-1, 1]
61
-
62
- audio = torchaudio.functional.resample(
63
- waveform, orig_freq=orig_freq, new_freq=default_sampling_rate
64
- ) # must be a 16 kHz waveform array
65
-
66
- audio_inputs = processor(
67
- audios=audio,
68
- return_tensors="pt",
69
- sampling_rate=default_sampling_rate,
70
- )
71
 
72
- audio_array_from_audio = (
73
- model.generate(**audio_inputs, tgt_lang=tgt_language)[0]
74
- .cpu()
75
- .numpy()
76
- .squeeze()
77
- )
78
 
79
- yield (default_sampling_rate, audio_array_from_audio)
80
 
81
 
82
  # Supported target languages for speech
@@ -215,7 +206,8 @@ with gr.Blocks(
215
  ReplyOnPause(translate_audio),
216
  inputs=[audio, target_lang],
217
  outputs=[audio],
218
- concurrency_limit=1, # Important: Set to 1 to prevent overlapping executions
 
219
  )
220
 
221
  # Sticky footer (will stay at bottom on all screen sizes)
 
1
  import os
2
 
3
  from pathlib import Path
 
4
 
5
  import pandas as pd
 
6
  import torchaudio
7
  import torch
8
  import numpy as np
 
9
  import gradio as gr
10
 
11
  from dotenv import load_dotenv
 
15
  WebRTC,
16
  ReplyOnPause,
17
  )
 
18
  from transformers import AutoProcessor, SeamlessM4Tv2Model
19
 
20
 
 
27
  model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
28
  default_sampling_rate = 16_000
29
 
 
30
 
31
  HF_TOKEN = os.getenv("HF_TOKEN")
32
 
 
48
  :yield: the tuple containing the sampling rate and the audio array
49
  :rtype: tuple[int, np.ndarray]
50
  """
51
+ orig_freq, np_array = audio
52
+ waveform = torch.from_numpy(np_array)
53
+ waveform = waveform.to(torch.float32)
54
+ waveform = waveform / 32768.0 # normalize int16 to [-1, 1]
55
+
56
+ audio = torchaudio.functional.resample(
57
+ waveform, orig_freq=orig_freq, new_freq=default_sampling_rate
58
+ ) # must be a 16 kHz waveform array
59
+
60
+ audio_inputs = processor(
61
+ audios=audio,
62
+ return_tensors="pt",
63
+ sampling_rate=default_sampling_rate,
64
+ )
 
65
 
66
+ audio_array_from_audio = (
67
+ model.generate(**audio_inputs, tgt_lang=tgt_language)[0].cpu().numpy().squeeze()
68
+ )
 
 
 
69
 
70
+ yield (default_sampling_rate, audio_array_from_audio)
71
 
72
 
73
  # Supported target languages for speech
 
206
  ReplyOnPause(translate_audio),
207
  inputs=[audio, target_lang],
208
  outputs=[audio],
209
+ concurrency_limit=5,
210
+ time_limit=60,
211
  )
212
 
213
  # Sticky footer (will stay at bottom on all screen sizes)