Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# imports
|
2 |
+
import gradio as gr
|
3 |
+
import os
|
4 |
+
import requests
|
5 |
+
from transformers import pipeline
|
6 |
+
|
7 |
+
# Set your FastAPI backend endpoint
|
8 |
+
BACKEND_URL = "https://asr-evaluation-backend.emergentai.ug/submit-feedback"
|
9 |
+
|
10 |
+
model_map = {
|
11 |
+
"hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
|
12 |
+
"igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
|
13 |
+
"yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
|
14 |
+
"zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
|
15 |
+
"xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
|
16 |
+
"afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
|
17 |
+
"bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
|
18 |
+
"shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
|
19 |
+
"luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
|
20 |
+
"swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
|
21 |
+
"lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
|
22 |
+
"amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
|
23 |
+
"kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
|
24 |
+
"oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
|
25 |
+
"akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
|
26 |
+
"ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
|
27 |
+
"wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
|
28 |
+
"bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
|
29 |
+
}
|
30 |
+
|
31 |
+
# Create storage directory
|
32 |
+
os.makedirs("responses", exist_ok=True)
|
33 |
+
|
34 |
+
# Transcription function
|
35 |
+
def transcribe(audio, language):
|
36 |
+
asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0)
|
37 |
+
text = asr(audio)["text"]
|
38 |
+
return text, audio
|
39 |
+
|
40 |
+
# Save feedback by sending it to FastAPI backend
|
41 |
+
def save_feedback(audio_file, transcription, lang, env, device, domain, accuracy, orthography, orthography_issues,
|
42 |
+
meaning, meaning_loss, errors, error_examples, performance):
|
43 |
+
|
44 |
+
try:
|
45 |
+
with open(audio_file, "rb") as f:
|
46 |
+
audio_content = f.read()
|
47 |
+
|
48 |
+
metadata = {
|
49 |
+
"transcription": transcription,
|
50 |
+
"evaluated_language": lang,
|
51 |
+
"environment": env,
|
52 |
+
"device": device,
|
53 |
+
"domain": domain,
|
54 |
+
"accuracy": accuracy,
|
55 |
+
"orthography": orthography,
|
56 |
+
"orthography_issues": orthography_issues,
|
57 |
+
"meaning": meaning,
|
58 |
+
"meaning_loss": meaning_loss,
|
59 |
+
"errors": ",".join(errors) if errors else "",
|
60 |
+
"error_examples": error_examples,
|
61 |
+
"performance": performance
|
62 |
+
}
|
63 |
+
|
64 |
+
files = {
|
65 |
+
"audio_file": ("audio.wav", audio_content, "audio/wav")
|
66 |
+
}
|
67 |
+
|
68 |
+
response = requests.post(BACKEND_URL, data=metadata, files=files, timeout=20)
|
69 |
+
|
70 |
+
if response.status_code == 201:
|
71 |
+
return "✅ Feedback submitted successfully. Thank you!"
|
72 |
+
else:
|
73 |
+
return f"⚠️ Submission failed: {response.status_code} — {response.text}"
|
74 |
+
|
75 |
+
except Exception as e:
|
76 |
+
return f"❌ Could not connect to the backend: {str(e)}"
|
77 |
+
|
78 |
+
# Gradio UI
|
79 |
+
with gr.Blocks() as demo:
|
80 |
+
gr.Markdown("## African ASR Evaluation Platform")
|
81 |
+
|
82 |
+
with gr.Row():
|
83 |
+
audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
|
84 |
+
lang = gr.Dropdown(list(model_map.keys()), label="Select Language")
|
85 |
+
|
86 |
+
transcribed_text = gr.Textbox(label="Transcribed Text")
|
87 |
+
submit_btn = gr.Button("Transcribe")
|
88 |
+
submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])
|
89 |
+
|
90 |
+
gr.Markdown("---\n## Feedback Form")
|
91 |
+
user_id = gr.Textbox(label="Please enter user ID.")
|
92 |
+
env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room (minimal noise)", "Noisy Background (e.g., street, cafe, market)","Other"], label="What was the type of recording environment for the speech you evaluated? *")
|
93 |
+
device = gr.Dropdown(["Mobile Phone/Tablet", "Laptop/Computer Microphone", "Dedicated Microphone (e.g., headset, studio mic)", "Other"], label="What type of recording device was used? *")
|
94 |
+
domain = gr.Textbox(label="Was the speech related to a specific topic? If yes, please specify the topic (e.g., news, education, medical, law, religious, sports, science).")
|
95 |
+
accuracy = gr.Slider(1, 10, step=1, label="Overall, how accurate was the model's transcription for the audio you reviewed? *")
|
96 |
+
transcript_edit = gr.Textbox(label="If the transcription provided by the model was incorrect, please enter your corrected version.")
|
97 |
+
orthography = gr.Radio(["Yes, mostly correct", "No, major issues", "Partially (some correct, some incorrect)", "Not Applicable"], label="Did the transcription correctly use the standard orthography (including accents, diacritics, special characters) for the language?")
|
98 |
+
orthography_issues = gr.Textbox(label="If you selected \"No\" or \"Partially\", please describe any significant orthography issues you noticed.")
|
99 |
+
meaning = gr.Slider(1, 5, step=1, label="Did the model's transcription preserve the original meaning of the speech? *")
|
100 |
+
meaning_loss = gr.Textbox(label="If the meaning was not fully preserved (i.e., you rated 1-4 above), please briefly explain how it was changed or lost.")
|
101 |
+
errors = gr.CheckboxGroup([
|
102 |
+
"Substitutions (wrong words used)",
|
103 |
+
"Omissions (words missing)",
|
104 |
+
"Insertions (extra words added)",
|
105 |
+
"Pronunciation-related errors (phonetically plausible but wrong word/spelling)",
|
106 |
+
"Diacritic/Tone/Special Character errors",
|
107 |
+
"Code-switching errors (mixing languages incorrectly)",
|
108 |
+
"Named Entity errors (names of people/places wrong)",
|
109 |
+
"Punctuation errors",
|
110 |
+
"No significant errors observed"
|
111 |
+
] , label="Which types of errors were most prominent or impactful in the transcriptions? *")
|
112 |
+
error_examples = gr.Textbox(label="(Optional) Can you provide 1-2 examples of significant errors and how you would correct them?")
|
113 |
+
performance = gr.Textbox(label="Please describe the model's performance in your own words. What did it do well? What did it struggle with? *")
|
114 |
+
|
115 |
+
save_btn = gr.Button("Submit Feedback")
|
116 |
+
output_msg = gr.Textbox(interactive=False)
|
117 |
+
save_btn.click(
|
118 |
+
fn=save_feedback,
|
119 |
+
inputs=[lang, native_language, env, device, domain, accuracy, orthography, orthography_issues,
|
120 |
+
meaning, meaning_loss, errors, error_examples, performance],
|
121 |
+
outputs=[output_msg]
|
122 |
+
)
|
123 |
+
|
124 |
+
# Launch the interface
|
125 |
+
demo.launch()
|