Spaces:
Sleeping
Sleeping
File size: 3,894 Bytes
c0e8dca 0c33dd3 c0e8dca 5928b3c 0c33dd3 71aa7fe c9f09ce 7314930 414f828 64dc005 87a6add 0663960 6ded37f 87a6add 6ded37f c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 1f8f25c c0e8dca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# app.py
import gradio as gr
from transformers import pipeline
import numpy as np
import librosa # pip install librosa
# --- EDIT THIS: map display names to your HF Hub model IDs ---
language_models = {
"Akan (Asante Twi)": "FarmerlineML/w2v-bert-2.0_twi_alpha_v1",
"Ewe": "FarmerlineML/w2v-bert-2.0_ewe_2",
"Kiswahili": "FarmerlineML/w2v-bert-2.0_swahili_alpha",
"Luganda": "FarmerlineML/w2v-bert-2.0_luganda",
"Brazilian Portuguese": "FarmerlineML/w2v-bert-2.0_brazilian_portugese_alpha",
"FANTE": "misterkissi/w2v2-lg-xls-r-300m-fante",
"BEMBA": "DarliAI/kissi-w2v2-lg-xls-r-300m-bemba",
"BAMBARA": "DarliAI/kissi-w2v2-lg-xls-r-300m-bambara",
"DAGAARE": "DarliAI/kissi-w2v2-lg-xls-r-300m-dagaare",
"KINYARWANDA": "DarliAI/kissi-w2v2-lg-xls-r-300m-kinyarwanda",
"FULA": "DarliAI/kissi-wav2vec2-fula-fleurs-full",
"OROMO": "DarliAI/kissi-w2v-bert-2.0-oromo",
"RUNYANKORE": "misterkissi/w2v2-lg-xls-r-300m-runyankore",
"GA": "misterkissi/w2v2-lg-xls-r-300m-ga",
"VAI": "misterkissi/whisper-small-vai",
"KASEM": "misterkissi/w2v2-lg-xls-r-300m-kasem",
"LINGALA": "misterkissi/w2v2-lg-xls-r-300m-lingala",
"FONGBE": "misterkissi/whisper-small-fongbe",
"AMHARIC": "misterkissi/w2v2-lg-xls-r-1b-amharic",
"Yoruba": "FarmerlineML/w2v-bert-2.0_yoruba_v1",
"Luganda": "FarmerlineML/luganda_fkd",
"Luo": "FarmerlineML/w2v-bert-2.0_luo_v2",
"Somali": "FarmerlineML/w2v-bert-2.0_somali_alpha",
"Pidgin": "FarmerlineML/pidgin_nigerian",
"Kikuyu": "FarmerlineML/w2v-bert-2.0_kikuyu",
"Igbo": "FarmerlineML/w2v-bert-2.0_igbo_v1"
# add more as needed
}
# Pre-load pipelines for each language on CPU (device=-1)
asr_pipelines = {
lang: pipeline(
task="automatic-speech-recognition",
model=model_id,
device=-1, # force CPU usage
chunk_length_s=30
)
for lang, model_id in language_models.items()
}
def transcribe(audio_path: str, language: str) -> str:
"""
Load the audio via librosa (supports mp3, wav, flac, m4a, ogg, etc.),
convert to mono, then run it through the chosen ASR pipeline.
"""
if not audio_path:
return "β οΈ Please upload or record an audio clip."
# librosa.load returns a 1D np.ndarray (mono) and the sample rate
speech, sr = librosa.load(audio_path, sr=None, mono=True)
# Call the Hugging Face ASR pipeline
result = asr_pipelines[language]({
"sampling_rate": sr,
"raw": speech
})
return result.get("text", "")
with gr.Blocks(title="π Multilingual ASR Demo") as demo:
gr.Markdown(
"""
## ποΈ Multilingual Speech-to-Text
Upload an audio file (MP3, WAV, FLAC, M4A, OGG,β¦) or record via your microphone.
Then choose the language/model and hit **Transcribe**.
"""
)
with gr.Row():
lang = gr.Dropdown(
choices=list(language_models.keys()),
value=list(language_models.keys())[0],
label="Select Language / Model"
)
with gr.Row():
audio = gr.Audio(
sources=["upload", "microphone"],
type="filepath",
label="Upload or Record Audio"
)
btn = gr.Button("Transcribe")
output = gr.Textbox(label="Transcription")
btn.click(fn=transcribe, inputs=[audio, lang], outputs=output)
if __name__ == "__main__":
demo.launch()
|