# app.py import gradio as gr from transformers import pipeline import numpy as np import librosa # pip install librosa # --- EDIT THIS: map display names to your HF Hub model IDs --- language_models = { "Akan (Asanti Twi)": "FarmerlineML/w2v-bert-2.0_twi_alpha_v1", "Ewe": "FarmerlineML/w2v-bert-2.0_ewe_2", "Kiswahili": "FarmerlineML/w2v-bert-2.0_swahili_alpha", "Luganda": "FarmerlineML/w2v-bert-2.0_luganda", "Brazilian Portuguese": "FarmerlineML/w2v-bert-2.0_brazilian_portugese_alpha", "Fante Kissi": "misterkissi/w2v2-lg-xls-r-300m-fante", "Runyankore Kissi": "misterkissi/w2v2-lg-xls-r-300m-runyankore", # add more as needed } # Pre-load pipelines for each language on CPU (device=-1) asr_pipelines = { lang: pipeline( task="automatic-speech-recognition", model=model_id, device=-1, # force CPU usage chunk_length_s=30 ) for lang, model_id in language_models.items() } def transcribe(audio_path: str, language: str) -> str: """ Load the audio via librosa (supports mp3, wav, flac, m4a, ogg, etc.), convert to mono, then run it through the chosen ASR pipeline. """ if not audio_path: return "⚠️ Please upload or record an audio clip." # librosa.load returns a 1D np.ndarray (mono) and the sample rate speech, sr = librosa.load(audio_path, sr=None, mono=True) # Call the Hugging Face ASR pipeline result = asr_pipelines[language]({ "sampling_rate": sr, "raw": speech }) return result.get("text", "") with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo: gr.Markdown( """ ## 🎙️ Multilingual Speech-to-Text Upload an audio file (MP3, WAV, FLAC, M4A, OGG,…) or record via your microphone. Then choose the language/model and hit **Transcribe**. """ ) with gr.Row(): lang = gr.Dropdown( choices=list(language_models.keys()), value=list(language_models.keys())[0], label="Select Language / Model" ) with gr.Row(): audio = gr.Audio( sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio" ) btn = gr.Button("Transcribe") output = gr.Textbox(label="Transcription") btn.click(fn=transcribe, inputs=[audio, lang], outputs=output) if __name__ == "__main__": demo.launch()