# app.py import gradio as gr from transformers import pipeline import soundfile as sf import numpy as np # --- EDIT THIS: map display names to your HF Hub model IDs --- language_models = { "Akan (Asanti Twi)": "FarmerlineML/w2v-bert-2.0_twi_alpha_v1", "Ewe": "FarmerlineML/w2v-bert-2.0_ewe_2", "Kiswahili": "FarmerlineML/w2v-bert-2.0_swahili_alpha", "Luganda": "FarmerlineML/w2v-bert-2.0_luganda", "Brazilian Portuguese": "FarmerlineML/w2v-bert-2.0_brazilian_portugese_alpha", # add more as needed } # Pre-load pipelines for each language asr_pipelines = { lang: pipeline( task="automatic-speech-recognition", model=model_id, # device=0, # uncomment if you have GPU chunk_length_s=30 # adjust if your audio can be longer ) for lang, model_id in language_models.items() } def transcribe(audio_path: str, language: str) -> str: """ Load the audio file, convert to mono if needed, and run it through the selected ASR pipeline. """ if audio_path is None: return "⚠️ Please upload or record an audio clip." # Read the file speech, sr = sf.read(audio_path) # Stereo → mono if speech.ndim > 1: speech = np.mean(speech, axis=1) result = asr_pipelines[language]({ "sampling_rate": sr, "raw": speech }) return result.get("text", "") with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo: gr.Markdown( """ ## 🎙️ Multilingual Speech-to-Text Upload an audio file or record via your microphone, then choose the language/model and hit **Transcribe**. """ ) with gr.Row(): lang = gr.Dropdown( choices=list(language_models.keys()), value=list(language_models.keys())[0], label="Select Language / Model" ) with gr.Row(): audio = gr.Audio( sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio" ) btn = gr.Button("Transcribe") output = gr.Textbox(label="Transcription") btn.click(fn=transcribe, inputs=[audio, lang], outputs=output) if __name__ == "__main__": demo.launch()