|
|
|
import gradio as gr |
|
import os |
|
import cloudscraper |
|
import requests |
|
from transformers import pipeline |
|
import torch |
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
|
|
|
BACKEND_URL = "https://asr-evaluation-backend.emergentai.ug/submit-feedback" |
|
|
|
model_map = { |
|
"afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1", |
|
"akan": "asr-africa/wav2vec2-xls-r-akan-100-hours", |
|
"amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1", |
|
"bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset", |
|
"bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1", |
|
"ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours", |
|
"hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0", |
|
"igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0", |
|
"kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1", |
|
"lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2", |
|
"luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1", |
|
"oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4", |
|
"shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1", |
|
"swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1-nolm", |
|
"wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset", |
|
"xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1", |
|
"yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0", |
|
"zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1", |
|
} |
|
|
|
|
|
os.makedirs("responses", exist_ok=True) |
|
|
|
|
|
inference_device = 0 if torch.cuda.is_available() else -1 |
|
def transcribe(audio, language): |
|
asr = pipeline("automatic-speech-recognition", model=model_map[language], device=inference_device, token=HF_TOKEN) |
|
text = asr(audio)["text"] |
|
return text, audio |
|
|
|
|
|
def save_feedback(audio_file, transcription, user_id, lang, env, device, domain, accuracy, |
|
transcript_edit, orthography, orthography_issues, |
|
meaning, meaning_loss, errors, error_examples, performance): |
|
try: |
|
with open(audio_file, "rb") as f: |
|
audio_content = f.read() |
|
|
|
metadata = { |
|
"transcription": transcription, |
|
"user_id": user_id, |
|
"transcript_edit": transcript_edit, |
|
"evaluated_language": lang, |
|
"environment": env, |
|
"device": device, |
|
"domain": domain, |
|
"accuracy": accuracy, |
|
"orthography": orthography, |
|
"orthography_issues": orthography_issues, |
|
"meaning": meaning, |
|
"meaning_loss": meaning_loss, |
|
"errors": ",".join(errors) if errors else "", |
|
"error_examples": error_examples, |
|
"performance": performance |
|
} |
|
|
|
files = { |
|
"audio_file": ("audio.wav", audio_content, "audio/wav") |
|
} |
|
|
|
scraper = cloudscraper.create_scraper() |
|
response = scraper.post(BACKEND_URL, data=metadata, files=files, timeout=20) |
|
|
|
if response.status_code == 201: |
|
return "β
Feedback submitted successfully. Thank you!" |
|
else: |
|
return f"β οΈ Submission failed: {response.status_code} β {response.text}" |
|
|
|
except Exception as e: |
|
return f"β Could not connect to the backend: {str(e)}" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## African ASR Evaluation Platform") |
|
|
|
gr.Markdown("**Select Language**") |
|
lang = gr.Dropdown(list(model_map.keys()), label="", value=None) |
|
gr.Markdown("**Upload or Record Audio**") |
|
audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio") |
|
|
|
|
|
submit_btn = gr.Button("Transcribe") |
|
gr.Markdown("**Transcription**") |
|
transcribed_text = gr.Textbox(label="", interactive=False) |
|
|
|
submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input]) |
|
|
|
gr.Markdown("---\n## Feedback Form") |
|
user_id = gr.Textbox(label="Please enter user ID.*") |
|
env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room (minimal noise)", "Noisy Background (e.g., street, cafe, market)"], label="What was the type of recording environment for the speech you evaluated? *",value=None) |
|
device = gr.Dropdown(["Mobile Phone/Tablet", "Laptop/Computer Microphone", "Dedicated Microphone (e.g., headset, studio mic)"], label="What type of recording device was used? *",value=None) |
|
domain = gr.Textbox(label="Was the speech related to a specific topic? If yes, please specify the topic (e.g., news, education, medical, law, religious, sports, science).") |
|
accuracy = gr.Slider(1, 5, step=1, label="Overall, how accurate was the model's transcription for the audio you reviewed? *") |
|
transcript_edit = gr.Textbox(label="If the transcription provided by the model was incorrect, please enter your corrected version.") |
|
orthography = gr.Radio(["Yes, mostly correct", "No, major issues", "Partially (some correct, some incorrect)", "Not Applicable"], label="Did the transcription correctly use the standard orthography (including accents, diacritics, special characters) for the language?",value=None) |
|
orthography_issues = gr.Textbox(label="If you selected \"No\" or \"Partially\", please describe any significant orthography issues you noticed.") |
|
meaning = gr.Slider(1, 5, step=1, label="Did the model's transcription preserve the original meaning of the speech? *") |
|
meaning_loss = gr.Textbox(label="If the meaning was not fully preserved (i.e., you rated 1-4 above), please briefly explain how it was changed or lost.") |
|
errors = gr.CheckboxGroup([ |
|
"Substitutions (wrong words used)", |
|
"Omissions (words missing)", |
|
"Insertions (extra words added)", |
|
"Pronunciation-related errors (phonetically plausible but wrong word/spelling)", |
|
"Diacritic/Tone/Special Character errors", |
|
"Code-switching errors (mixing languages incorrectly)", |
|
"Named Entity errors (names of people/places wrong)", |
|
"Punctuation errors", |
|
"No significant errors observed" |
|
] , label="Which types of errors were most prominent or impactful in the transcriptions? *", value=[]) |
|
error_examples = gr.Textbox(label="(Optional) Can you provide 1-2 examples of significant errors and how you would correct them?") |
|
performance = gr.Textbox(label="Please describe the model's performance in your own words. What did it do well? What did it struggle with? *") |
|
|
|
save_btn = gr.Button("Submit Feedback") |
|
output_msg = gr.Textbox(label="Submission status",interactive=False) |
|
save_btn.click( |
|
fn=save_feedback, |
|
inputs=[ |
|
audio_input, transcribed_text, user_id, lang, env, device, domain, accuracy, |
|
transcript_edit, orthography, orthography_issues, |
|
meaning, meaning_loss, errors, error_examples, performance |
|
], |
|
|
|
outputs=[output_msg] |
|
) |
|
|
|
|
|
demo.launch() |
|
|