Spaces:
Sleeping
Sleeping
File size: 7,297 Bytes
b38b29d b60f760 b38b29d 58be87c b38b29d 3b90986 253fbb3 b38b29d c537c6d b38b29d c537c6d b38b29d c537c6d b38b29d c537c6d 41c9fb6 b38b29d c537c6d b38b29d 4564153 b38b29d 4564153 647b1d5 b38b29d 62d3f93 b38b29d b60f760 b38b29d 62d3f93 b38b29d b60f760 b38b29d b60f760 b38b29d 814e0fc 2e95e99 fa47f18 5a8db01 1f81c41 cdc9f54 814e0fc fa47f18 cdc9f54 3730c1d 814e0fc 1f81c41 814e0fc 4589546 814e0fc 7f09bb9 814e0fc 647b1d5 32c5012 814e0fc b38b29d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# imports
import gradio as gr
import os
import cloudscraper
import requests
from transformers import pipeline
import torch
HF_TOKEN = os.getenv("HF_TOKEN")
# Set your FastAPI backend endpoint
BACKEND_URL = "https://asr-evaluation-backend.emergentai.ug/submit-feedback"
model_map = {
"afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
"akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
"amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
"bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
"bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
"ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
"hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
"igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
"kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
"lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
"luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
"oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
"shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
"swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1-nolm",
"wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
"xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
"yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
"zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
}
# Create storage directory
os.makedirs("responses", exist_ok=True)
# Transcription function
inference_device = 0 if torch.cuda.is_available() else -1
def transcribe(audio, language):
asr = pipeline("automatic-speech-recognition", model=model_map[language], device=inference_device, token=HF_TOKEN)
text = asr(audio)["text"] # handling whisper models
return text, audio
# Save feedback by sending it to FastAPI backend
def save_feedback(audio_file, transcription, user_id, lang, env, device, domain, accuracy,
transcript_edit, orthography, orthography_issues,
meaning, meaning_loss, errors, error_examples, performance):
try:
with open(audio_file, "rb") as f:
audio_content = f.read()
metadata = {
"transcription": transcription,
"user_id": user_id,
"transcript_edit": transcript_edit,
"evaluated_language": lang,
"environment": env,
"device": device,
"domain": domain,
"accuracy": accuracy,
"orthography": orthography,
"orthography_issues": orthography_issues,
"meaning": meaning,
"meaning_loss": meaning_loss,
"errors": ",".join(errors) if errors else "",
"error_examples": error_examples,
"performance": performance
}
files = {
"audio_file": ("audio.wav", audio_content, "audio/wav")
}
scraper = cloudscraper.create_scraper()
response = scraper.post(BACKEND_URL, data=metadata, files=files, timeout=20)
if response.status_code == 201:
return "β
Feedback submitted successfully. Thank you!"
else:
return f"β οΈ Submission failed: {response.status_code} β {response.text}"
except Exception as e:
return f"β Could not connect to the backend: {str(e)}"
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## African ASR Evaluation Platform")
gr.Markdown("**Select Language**")
lang = gr.Dropdown(list(model_map.keys()), label="", value=None)
gr.Markdown("**Upload or Record Audio**")
audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
# transcribed_text = gr.Textbox(label="Transcription", interactive=False)
submit_btn = gr.Button("Transcribe")
gr.Markdown("**Transcription**")
transcribed_text = gr.Textbox(label="", interactive=False)
submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])
gr.Markdown("---\n## Feedback Form")
user_id = gr.Textbox(label="Please enter user ID.*")
env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room (minimal noise)", "Noisy Background (e.g., street, cafe, market)"], label="What was the type of recording environment for the speech you evaluated? *",value=None)
device = gr.Dropdown(["Mobile Phone/Tablet", "Laptop/Computer Microphone", "Dedicated Microphone (e.g., headset, studio mic)"], label="What type of recording device was used? *",value=None)
domain = gr.Textbox(label="Was the speech related to a specific topic? If yes, please specify the topic (e.g., news, education, medical, law, religious, sports, science).")
accuracy = gr.Slider(1, 5, step=1, label="Overall, how accurate was the model's transcription for the audio you reviewed? *")
transcript_edit = gr.Textbox(label="If the transcription provided by the model was incorrect, please enter your corrected version.")
orthography = gr.Radio(["Yes, mostly correct", "No, major issues", "Partially (some correct, some incorrect)", "Not Applicable"], label="Did the transcription correctly use the standard orthography (including accents, diacritics, special characters) for the language?",value=None)
orthography_issues = gr.Textbox(label="If you selected \"No\" or \"Partially\", please describe any significant orthography issues you noticed.")
meaning = gr.Slider(1, 5, step=1, label="Did the model's transcription preserve the original meaning of the speech? *")
meaning_loss = gr.Textbox(label="If the meaning was not fully preserved (i.e., you rated 1-4 above), please briefly explain how it was changed or lost.")
errors = gr.CheckboxGroup([
"Substitutions (wrong words used)",
"Omissions (words missing)",
"Insertions (extra words added)",
"Pronunciation-related errors (phonetically plausible but wrong word/spelling)",
"Diacritic/Tone/Special Character errors",
"Code-switching errors (mixing languages incorrectly)",
"Named Entity errors (names of people/places wrong)",
"Punctuation errors",
"No significant errors observed"
] , label="Which types of errors were most prominent or impactful in the transcriptions? *", value=[])
error_examples = gr.Textbox(label="(Optional) Can you provide 1-2 examples of significant errors and how you would correct them?")
performance = gr.Textbox(label="Please describe the model's performance in your own words. What did it do well? What did it struggle with? *")
save_btn = gr.Button("Submit Feedback")
output_msg = gr.Textbox(label="Submission status",interactive=False)
save_btn.click(
fn=save_feedback,
inputs=[
audio_input, transcribed_text, user_id, lang, env, device, domain, accuracy,
transcript_edit, orthography, orthography_issues,
meaning, meaning_loss, errors, error_examples, performance
],
outputs=[output_msg]
)
# Launch the interface
demo.launch()
|