Spaces:
Sleeping
Sleeping
# app.py | |
import gradio as gr | |
from transformers import pipeline | |
import numpy as np | |
import librosa # pip install librosa | |
# --- EDIT THIS: map display names to your HF Hub model IDs --- | |
language_models = { | |
"Akan (Asante Twi)": "FarmerlineML/w2v-bert-2.0_twi_alpha_v1", | |
"Ewe": "FarmerlineML/w2v-bert-2.0_ewe_2", | |
"Kiswahili": "FarmerlineML/w2v-bert-2.0_swahili_alpha", | |
"Luganda": "FarmerlineML/w2v-bert-2.0_luganda", | |
"Brazilian Portuguese": "FarmerlineML/w2v-bert-2.0_brazilian_portugese_alpha", | |
"FANTE": "misterkissi/w2v2-lg-xls-r-300m-fante", | |
"BEMBA": "DarliAI/kissi-w2v2-lg-xls-r-300m-bemba", | |
"BAMBARA": "DarliAI/kissi-w2v2-lg-xls-r-300m-bambara", | |
"DAGAARE": "DarliAI/kissi-w2v2-lg-xls-r-300m-dagaare", | |
"KINYARWANDA": "DarliAI/kissi-w2v2-lg-xls-r-300m-kinyarwanda", | |
"FULA": "DarliAI/kissi-wav2vec2-fula-fleurs-full", | |
"OROMO": "DarliAI/kissi-w2v-bert-2.0-oromo", | |
"RUNYANKORE": "misterkissi/w2v2-lg-xls-r-300m-runyankore", | |
"GA": "misterkissi/w2v2-lg-xls-r-300m-ga", | |
"VAI": "misterkissi/whisper-small-vai", | |
"KASEM": "misterkissi/w2v2-lg-xls-r-300m-kasem", | |
"LINGALA": "misterkissi/w2v2-lg-xls-r-300m-lingala", | |
"FONGBE": "misterkissi/whisper-small-fongbe", | |
"AMHARIC": "misterkissi/w2v2-lg-xls-r-1b-amharic", | |
"XHOSA": "misterkissi/w2v2-lg-xls-r-300m-xhosa", | |
"TSONGA": "misterkissi/w2v2-lg-xls-r-300m-tsonga", | |
# "WOLOF": "misterkissi/w2v2-lg-xls-r-1b-wolof", | |
# "HAITIAN CREOLE": "misterkissi/whisper-small-haitian-creole", | |
# "KABYLE": "misterkissi/w2v2-lg-xls-r-1b-kabyle", | |
"Yoruba": "FarmerlineML/w2v-bert-2.0_yoruba_v1", | |
"Luganda": "FarmerlineML/luganda_fkd", | |
"Luo": "FarmerlineML/w2v-bert-2.0_luo_v2", | |
"Somali": "FarmerlineML/w2v-bert-2.0_somali_alpha", | |
"Pidgin": "FarmerlineML/pidgin_nigerian", | |
"Kikuyu": "FarmerlineML/w2v-bert-2.0_kikuyu", | |
"Igbo": "FarmerlineML/w2v-bert-2.0_igbo_v1" | |
# add more as needed | |
} | |
# Pre-load pipelines for each language on CPU (device=-1) | |
asr_pipelines = { | |
lang: pipeline( | |
task="automatic-speech-recognition", | |
model=model_id, | |
device=-1, # force CPU usage | |
chunk_length_s=30 | |
) | |
for lang, model_id in language_models.items() | |
} | |
def transcribe(audio_path: str, language: str) -> str: | |
""" | |
Load the audio via librosa (supports mp3, wav, flac, m4a, ogg, etc.), | |
convert to mono, then run it through the chosen ASR pipeline. | |
""" | |
if not audio_path: | |
return "β οΈ Please upload or record an audio clip." | |
# librosa.load returns a 1D np.ndarray (mono) and the sample rate | |
speech, sr = librosa.load(audio_path, sr=None, mono=True) | |
# Call the Hugging Face ASR pipeline | |
result = asr_pipelines[language]({ | |
"sampling_rate": sr, | |
"raw": speech | |
}) | |
return result.get("text", "") | |
with gr.Blocks(title="π Multilingual ASR Demo") as demo: | |
gr.Markdown( | |
""" | |
## ποΈ Multilingual Speech-to-Text | |
Upload an audio file (MP3, WAV, FLAC, M4A, OGG,β¦) or record via your microphone. | |
Then choose the language/model and hit **Transcribe**. | |
""" | |
) | |
with gr.Row(): | |
lang = gr.Dropdown( | |
choices=list(language_models.keys()), | |
value=list(language_models.keys())[0], | |
label="Select Language / Model" | |
) | |
with gr.Row(): | |
audio = gr.Audio( | |
sources=["upload", "microphone"], | |
type="filepath", | |
label="Upload or Record Audio" | |
) | |
btn = gr.Button("Transcribe") | |
output = gr.Textbox(label="Transcription") | |
btn.click(fn=transcribe, inputs=[audio, lang], outputs=output) | |
if __name__ == "__main__": | |
demo.launch() | |