FarmerlineML commited on
Commit
0c33dd3
·
verified ·
1 Parent(s): 7f8e389

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -20
app.py CHANGED
@@ -2,28 +2,28 @@
2
 
3
  import gradio as gr
4
  from transformers import pipeline
5
- import soundfile as sf
6
  import numpy as np
 
7
 
8
  # --- EDIT THIS: map display names to your HF Hub model IDs ---
9
  language_models = {
10
- "Akan (Asanti Twi)": "FarmerlineML/w2v-bert-2.0_twi_alpha_v1",
11
- "Ewe": "FarmerlineML/w2v-bert-2.0_ewe_2",
12
- "Kiswahili": "FarmerlineML/w2v-bert-2.0_swahili_alpha",
13
- "Luganda": "FarmerlineML/w2v-bert-2.0_luganda",
14
- "Brazilian Portuguese": "FarmerlineML/w2v-bert-2.0_brazilian_portugese_alpha",
15
- "Fante Kissi":"misterkissi/w2v2-lg-xls-r-300m-fante",
16
- "Runyankore Kissi":"misterkissi/w2v2-lg-xls-r-300m-runyankore",
17
  # add more as needed
18
  }
19
 
20
- # Pre-load pipelines for each language
21
  asr_pipelines = {
22
  lang: pipeline(
23
  task="automatic-speech-recognition",
24
  model=model_id,
25
- # device=0, # uncomment if you have GPU
26
- chunk_length_s=30 # adjust if your audio can be longer
27
  )
28
  for lang, model_id in language_models.items()
29
  }
@@ -31,18 +31,16 @@ asr_pipelines = {
31
 
32
  def transcribe(audio_path: str, language: str) -> str:
33
  """
34
- Load the audio file, convert to mono if needed,
35
- and run it through the selected ASR pipeline.
36
  """
37
- if audio_path is None:
38
  return "⚠️ Please upload or record an audio clip."
39
 
40
- # Read the file
41
- speech, sr = sf.read(audio_path)
42
- # Stereo → mono
43
- if speech.ndim > 1:
44
- speech = np.mean(speech, axis=1)
45
 
 
46
  result = asr_pipelines[language]({
47
  "sampling_rate": sr,
48
  "raw": speech
@@ -54,7 +52,8 @@ with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo:
54
  gr.Markdown(
55
  """
56
  ## 🎙️ Multilingual Speech-to-Text
57
- Upload an audio file or record via your microphone, then choose the language/model and hit **Transcribe**.
 
58
  """
59
  )
60
 
 
2
 
3
  import gradio as gr
4
  from transformers import pipeline
 
5
  import numpy as np
6
+ import librosa # pip install librosa
7
 
8
  # --- EDIT THIS: map display names to your HF Hub model IDs ---
9
  language_models = {
10
+ "Akan (Asanti Twi)": "FarmerlineML/w2v-bert-2.0_twi_alpha_v1",
11
+ "Ewe": "FarmerlineML/w2v-bert-2.0_ewe_2",
12
+ "Kiswahili": "FarmerlineML/w2v-bert-2.0_swahili_alpha",
13
+ "Luganda": "FarmerlineML/w2v-bert-2.0_luganda",
14
+ "Brazilian Portuguese": "FarmerlineML/w2v-bert-2.0_brazilian_portugese_alpha",
15
+ "Fante Kissi": "misterkissi/w2v2-lg-xls-r-300m-fante",
16
+ "Runyankore Kissi": "misterkissi/w2v2-lg-xls-r-300m-runyankore",
17
  # add more as needed
18
  }
19
 
20
+ # Pre-load pipelines for each language on CPU (device=-1)
21
  asr_pipelines = {
22
  lang: pipeline(
23
  task="automatic-speech-recognition",
24
  model=model_id,
25
+ device=-1, # force CPU usage
26
+ chunk_length_s=30
27
  )
28
  for lang, model_id in language_models.items()
29
  }
 
31
 
32
  def transcribe(audio_path: str, language: str) -> str:
33
  """
34
+ Load the audio via librosa (supports mp3, wav, flac, m4a, ogg, etc.),
35
+ convert to mono, then run it through the chosen ASR pipeline.
36
  """
37
+ if not audio_path:
38
  return "⚠️ Please upload or record an audio clip."
39
 
40
+ # librosa.load returns a 1D np.ndarray (mono) and the sample rate
41
+ speech, sr = librosa.load(audio_path, sr=None, mono=True)
 
 
 
42
 
43
+ # Call the Hugging Face ASR pipeline
44
  result = asr_pipelines[language]({
45
  "sampling_rate": sr,
46
  "raw": speech
 
52
  gr.Markdown(
53
  """
54
  ## 🎙️ Multilingual Speech-to-Text
55
+ Upload an audio file (MP3, WAV, FLAC, M4A, OGG,…) or record via your microphone.
56
+ Then choose the language/model and hit **Transcribe**.
57
  """
58
  )
59