File size: 2,257 Bytes
c0e8dca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f8f25c
c0e8dca
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# app.py

import gradio as gr
from transformers import pipeline
import soundfile as sf
import numpy as np

# --- EDIT THIS: map display names to your HF Hub model IDs ---
language_models = {
    "Akan (Asanti Twi)": "FarmerlineML/w2v-bert-2.0_twi_alpha_v1",
    "Ewe":  "FarmerlineML/w2v-bert-2.0_ewe_2",
    "Kiswahili": "FarmerlineML/w2v-bert-2.0_swahili_alpha",
    "Luganda": "FarmerlineML/w2v-bert-2.0_luganda",
    "Brazilian Portuguese": "FarmerlineML/w2v-bert-2.0_brazilian_portugese_alpha",
    # add more as needed
}

# Pre-load pipelines for each language
asr_pipelines = {
    lang: pipeline(
        task="automatic-speech-recognition",
        model=model_id,
        # device=0,           # uncomment if you have GPU
        chunk_length_s=30      # adjust if your audio can be longer
    )
    for lang, model_id in language_models.items()
}


def transcribe(audio_path: str, language: str) -> str:
    """
    Load the audio file, convert to mono if needed,
    and run it through the selected ASR pipeline.
    """
    if audio_path is None:
        return "⚠️ Please upload or record an audio clip."

    # Read the file
    speech, sr = sf.read(audio_path)
    # Stereo β†’ mono
    if speech.ndim > 1:
        speech = np.mean(speech, axis=1)

    result = asr_pipelines[language]({
        "sampling_rate": sr,
        "raw": speech
    })
    return result.get("text", "")


with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo:
    gr.Markdown(
        """
        ## πŸŽ™οΈ Multilingual Speech-to-Text   
        Upload an audio file or record via your microphone,  
        then choose the language/model and hit **Transcribe**.
        """
    )

    with gr.Row():
        lang = gr.Dropdown(
            choices=list(language_models.keys()),
            value=list(language_models.keys())[0],
            label="Select Language / Model"
        )

    with gr.Row():
        audio = gr.Audio(
            sources=["upload", "microphone"],
            type="filepath",
            label="Upload or Record Audio"
        )

    btn = gr.Button("Transcribe")
    output = gr.Textbox(label="Transcription")

    btn.click(fn=transcribe, inputs=[audio, lang], outputs=output)

if __name__ == "__main__":
    demo.launch()