Spaces:
Sleeping
Sleeping
# app.py | |
import gradio as gr | |
from transformers import pipeline | |
import soundfile as sf | |
import numpy as np | |
# --- EDIT THIS: map display names to your HF Hub model IDs --- | |
language_models = { | |
"Akan (Asanti Twi)": "FarmerlineML/w2v-bert-2.0_twi_alpha_v1", | |
"Ewe": "FarmerlineML/w2v-bert-2.0_ewe_2", | |
"Kiswahili": "FarmerlineML/w2v-bert-2.0_swahili_alpha", | |
"Luganda": "FarmerlineML/w2v-bert-2.0_luganda", | |
"Brazilian Portuguese": "FarmerlineML/w2v-bert-2.0_brazilian_portugese_alpha", | |
# add more as needed | |
} | |
# Pre-load pipelines for each language | |
asr_pipelines = { | |
lang: pipeline( | |
task="automatic-speech-recognition", | |
model=model_id, | |
# device=0, # uncomment if you have GPU | |
chunk_length_s=30 # adjust if your audio can be longer | |
) | |
for lang, model_id in language_models.items() | |
} | |
def transcribe(audio_path: str, language: str) -> str: | |
""" | |
Load the audio file, convert to mono if needed, | |
and run it through the selected ASR pipeline. | |
""" | |
if audio_path is None: | |
return "β οΈ Please upload or record an audio clip." | |
# Read the file | |
speech, sr = sf.read(audio_path) | |
# Stereo β mono | |
if speech.ndim > 1: | |
speech = np.mean(speech, axis=1) | |
result = asr_pipelines[language]({ | |
"sampling_rate": sr, | |
"raw": speech | |
}) | |
return result.get("text", "") | |
with gr.Blocks(title="π Multilingual ASR Demo") as demo: | |
gr.Markdown( | |
""" | |
## ποΈ Multilingual Speech-to-Text | |
Upload an audio file or record via your microphone, | |
then choose the language/model and hit **Transcribe**. | |
""" | |
) | |
with gr.Row(): | |
lang = gr.Dropdown( | |
choices=list(language_models.keys()), | |
value=list(language_models.keys())[0], | |
label="Select Language / Model" | |
) | |
with gr.Row(): | |
audio = gr.Audio( | |
sources=["upload", "microphone"], | |
type="filepath", | |
label="Upload or Record Audio" | |
) | |
btn = gr.Button("Transcribe") | |
output = gr.Textbox(label="Transcription") | |
btn.click(fn=transcribe, inputs=[audio, lang], outputs=output) | |
if __name__ == "__main__": | |
demo.launch() | |