File size: 7,543 Bytes
b38b29d
 
 
 
 
58be87c
b38b29d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d337e80
 
 
b38b29d
d337e80
b38b29d
 
 
 
62d3f93
 
b38b29d
 
 
 
 
 
 
 
62d3f93
 
b38b29d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f5ce70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d337e80
 
 
 
 
 
 
 
3f5ce70
 
 
 
 
 
58be87c
b38b29d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# imports 
import gradio as gr
import os
import requests
from transformers import pipeline
import torch

# Set your FastAPI backend endpoint
BACKEND_URL = "https://asr-evaluation-backend.emergentai.ug/submit-feedback"

model_map = {
    "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
    "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
    "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
    "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
    "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
    "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
    "bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
    "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
    "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
    "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
    "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
    "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
    "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
    "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
    "akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
    "ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
    "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
    "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
}

# Create storage directory
os.makedirs("responses", exist_ok=True)

# Transcription function
device = 0 if torch.cuda.is_available() else -1
asr_pipelines = {lang: pipeline("automatic-speech-recognition", model=model_name, device=device) for lang, model_name in model_map.items()}

def transcribe(audio, language):
    asr = asr_pipelines[language]
    text = asr(audio)["text"]
    return text, audio

# Save feedback by sending it to FastAPI backend
def save_feedback(audio_file, transcription, user_id, lang, env, device, domain, accuracy,
                  transcript_edit, orthography, orthography_issues,
                  meaning, meaning_loss, errors, error_examples, performance):

    try:
        with open(audio_file, "rb") as f:
            audio_content = f.read()

        metadata = {
            "transcription": transcription,
            "user_id": user_id,
            "transcript_edit": transcript_edit,
            "evaluated_language": lang,
            "environment": env,
            "device": device,
            "domain": domain,
            "accuracy": accuracy,
            "orthography": orthography,
            "orthography_issues": orthography_issues,
            "meaning": meaning,
            "meaning_loss": meaning_loss,
            "errors": ",".join(errors) if errors else "",
            "error_examples": error_examples,
            "performance": performance
        }

        files = {
            "audio_file": ("audio.wav", audio_content, "audio/wav")
        }

        response = requests.post(BACKEND_URL, data=metadata, files=files, timeout=20)

        if response.status_code == 201:
            return "βœ… Feedback submitted successfully. Thank you!"
        else:
            return f"⚠️ Submission failed: {response.status_code} β€” {response.text}"

    except Exception as e:
        return f"❌ Could not connect to the backend: {str(e)}"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## African ASR Evaluation Platform")
    
    user_id = gr.Textbox(label="Please enter user ID. *")
    proceed_btn = gr.Button("Proceed")

    with gr.Group(visible=False) as main_ui:
        with gr.Row():
            audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
            lang = gr.Dropdown(list(model_map.keys()), label="Select Language", value=None)

        transcribed_text = gr.Textbox(label="Transcribed Text")
        submit_btn = gr.Button("Transcribe")
        submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])

        gr.Markdown("---\n## Feedback Form")
        env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room (minimal noise)", "Noisy Background (e.g., street, cafe, market)","Other"], label="What was the type of recording environment for the speech you evaluated? *",value=None)
        device = gr.Dropdown(["Mobile Phone/Tablet", "Laptop/Computer Microphone", "Dedicated Microphone (e.g., headset, studio mic)", "Other"], label="What type of recording device  was used? *",value=None)
        domain = gr.Textbox(label="Was the speech related to a specific topic? If yes, please specify the topic (e.g., news, education, medical, law, religious, sports, science).")
        accuracy = gr.Slider(1, 10, step=1, label="Overall, how accurate was the model's transcription for the audio you reviewed? *")
        transcript_edit = gr.Textbox(label="If the transcription provided by the model was incorrect, please enter your corrected version.")
        orthography = gr.Radio(["Yes, mostly correct", "No, major issues", "Partially (some correct, some incorrect)", "Not Applicable"], label="Did the transcription correctly use the standard orthography (including accents, diacritics, special characters) for the language?",value=None)
        orthography_issues = gr.Textbox(label="If you selected \"No\" or \"Partially\", please describe any significant orthography issues you noticed.")
        meaning = gr.Slider(1, 5, step=1, label="Did the model's transcription preserve the original meaning of the speech? *")
        meaning_loss = gr.Textbox(label="If the meaning was not fully preserved (i.e., you rated 1-4 above), please briefly explain how it was changed or lost.")
        errors = gr.CheckboxGroup([
            "Substitutions (wrong words used)",
            "Omissions (words missing)",
            "Insertions (extra words added)",
            "Pronunciation-related errors (phonetically plausible but wrong word/spelling)",
            "Diacritic/Tone/Special Character errors",
            "Code-switching errors (mixing languages incorrectly)",
            "Named Entity errors (names of people/places wrong)",
            "Punctuation errors",
            "No significant errors observed"
        ] , label="Which types of errors were most prominent or impactful in the transcriptions? *", value=[])
        error_examples = gr.Textbox(label="(Optional) Can you provide 1-2 examples of significant errors and how you would correct them?")
        performance = gr.Textbox(label="Please describe the model's performance in your own words. What did it do well? What did it struggle with? *")

        save_btn = gr.Button("Submit Feedback")
        output_msg = gr.Textbox(interactive=False)
    
        save_btn.click(
            fn=save_feedback,
            inputs=[
                audio_input, transcribed_text, user_id, lang, env, device, domain, accuracy,
                transcript_edit, orthography, orthography_issues,
                meaning, meaning_loss, errors, error_examples, performance
            ],
            outputs=[output_msg]
            )
    def reveal_ui(user_input):
            if user_input.strip():
                return gr.update(visible=True)
            else:
                return gr.update(visible=False)
    
    proceed_btn.click(fn=reveal_ui, inputs=[user_id], outputs=[main_ui])
# Launch the interface
demo.launch()