Spaces:
Sleeping
Sleeping
File size: 4,262 Bytes
c0e8dca 0c33dd3 c0e8dca 5928b3c 0c33dd3 71aa7fe c9f09ce 7314930 54f3c62 1ea09d9 7f6cedb 61375c6 7f6cedb 414f828 64dc005 87a6add 0663960 6ded37f 87a6add 6ded37f c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 0c33dd3 c0e8dca 1f8f25c c0e8dca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# app.py
import gradio as gr
from transformers import pipeline
import numpy as np
import librosa # pip install librosa
# --- EDIT THIS: map display names to your HF Hub model IDs ---
language_models = {
"Akan (Asante Twi)": "FarmerlineML/w2v-bert-2.0_twi_alpha_v1",
"Ewe": "FarmerlineML/w2v-bert-2.0_ewe_2",
"Kiswahili": "FarmerlineML/w2v-bert-2.0_swahili_alpha",
"Luganda": "FarmerlineML/w2v-bert-2.0_luganda",
"Brazilian Portuguese": "FarmerlineML/w2v-bert-2.0_brazilian_portugese_alpha",
"FANTE": "misterkissi/w2v2-lg-xls-r-300m-fante",
"BEMBA": "DarliAI/kissi-w2v2-lg-xls-r-300m-bemba",
"BAMBARA": "DarliAI/kissi-w2v2-lg-xls-r-300m-bambara",
"DAGAARE": "DarliAI/kissi-w2v2-lg-xls-r-300m-dagaare",
"KINYARWANDA": "DarliAI/kissi-w2v2-lg-xls-r-300m-kinyarwanda",
"FULA": "DarliAI/kissi-wav2vec2-fula-fleurs-full",
"OROMO": "DarliAI/kissi-w2v-bert-2.0-oromo",
"RUNYANKORE": "misterkissi/w2v2-lg-xls-r-300m-runyankore",
"GA": "misterkissi/w2v2-lg-xls-r-300m-ga",
"VAI": "misterkissi/whisper-small-vai",
"KASEM": "misterkissi/w2v2-lg-xls-r-300m-kasem",
"LINGALA": "misterkissi/w2v2-lg-xls-r-300m-lingala",
"FONGBE": "misterkissi/whisper-small-fongbe",
"AMHARIC": "misterkissi/w2v2-lg-xls-r-1b-amharic",
"XHOSA": "misterkissi/w2v2-lg-xls-r-300m-xhosa",
"TSONGA": "misterkissi/w2v2-lg-xls-r-300m-tsonga",
# "WOLOF": "misterkissi/w2v2-lg-xls-r-1b-wolof",
# "HAITIAN CREOLE": "misterkissi/whisper-small-haitian-creole",
# "KABYLE": "misterkissi/w2v2-lg-xls-r-1b-kabyle",
"Yoruba": "FarmerlineML/w2v-bert-2.0_yoruba_v1",
"Luganda": "FarmerlineML/luganda_fkd",
"Luo": "FarmerlineML/w2v-bert-2.0_luo_v2",
"Somali": "FarmerlineML/w2v-bert-2.0_somali_alpha",
"Pidgin": "FarmerlineML/pidgin_nigerian",
"Kikuyu": "FarmerlineML/w2v-bert-2.0_kikuyu",
"Igbo": "FarmerlineML/w2v-bert-2.0_igbo_v1"
# add more as needed
}
# Pre-load pipelines for each language on CPU (device=-1)
asr_pipelines = {
lang: pipeline(
task="automatic-speech-recognition",
model=model_id,
device=-1, # force CPU usage
chunk_length_s=30
)
for lang, model_id in language_models.items()
}
def transcribe(audio_path: str, language: str) -> str:
"""
Load the audio via librosa (supports mp3, wav, flac, m4a, ogg, etc.),
convert to mono, then run it through the chosen ASR pipeline.
"""
if not audio_path:
return "β οΈ Please upload or record an audio clip."
# librosa.load returns a 1D np.ndarray (mono) and the sample rate
speech, sr = librosa.load(audio_path, sr=None, mono=True)
# Call the Hugging Face ASR pipeline
result = asr_pipelines[language]({
"sampling_rate": sr,
"raw": speech
})
return result.get("text", "")
with gr.Blocks(title="π Multilingual ASR Demo") as demo:
gr.Markdown(
"""
## ποΈ Multilingual Speech-to-Text
Upload an audio file (MP3, WAV, FLAC, M4A, OGG,β¦) or record via your microphone.
Then choose the language/model and hit **Transcribe**.
"""
)
with gr.Row():
lang = gr.Dropdown(
choices=list(language_models.keys()),
value=list(language_models.keys())[0],
label="Select Language / Model"
)
with gr.Row():
audio = gr.Audio(
sources=["upload", "microphone"],
type="filepath",
label="Upload or Record Audio"
)
btn = gr.Button("Transcribe")
output = gr.Textbox(label="Transcription")
btn.click(fn=transcribe, inputs=[audio, lang], outputs=output)
if __name__ == "__main__":
demo.launch()
|