badrex commited on
Commit
e2dd32d
·
verified ·
1 Parent(s): 2fb4363

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -3,6 +3,7 @@ from transformers import pipeline
3
  import numpy as np
4
  import os
5
  from huggingface_hub import login
 
6
  import spaces
7
 
8
  HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -12,17 +13,24 @@ if HF_TOKEN:
12
  MODEL_ID = "badrex/ASRwanda"
13
  transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID)
14
 
 
15
  @spaces.GPU
16
  def transcribe(audio):
17
  sr, y = audio
 
18
  # convert to mono if stereo
19
  if y.ndim > 1:
20
  y = y.mean(axis=1)
 
 
 
 
 
21
  y = y.astype(np.float32)
22
  y /= np.max(np.abs(y))
 
23
  return transcriber({"sampling_rate": 16000, "raw": y})["text"]
24
 
25
-
26
  examples = []
27
  examples_dir = "examples"
28
  if os.path.exists(examples_dir):
 
3
  import numpy as np
4
  import os
5
  from huggingface_hub import login
6
+ import librosa
7
  import spaces
8
 
9
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
13
  MODEL_ID = "badrex/ASRwanda"
14
  transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID)
15
 
16
+
17
  @spaces.GPU
18
  def transcribe(audio):
19
  sr, y = audio
20
+
21
  # convert to mono if stereo
22
  if y.ndim > 1:
23
  y = y.mean(axis=1)
24
+
25
+ # resample to 16kHz if needed
26
+ if sr != 16000:
27
+ y = librosa.resample(y, orig_sr=sr, target_sr=16000)
28
+
29
  y = y.astype(np.float32)
30
  y /= np.max(np.abs(y))
31
+
32
  return transcriber({"sampling_rate": 16000, "raw": y})["text"]
33
 
 
34
  examples = []
35
  examples_dir = "examples"
36
  if os.path.exists(examples_dir):