Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# app.py
|
2 |
|
3 |
import os
|
4 |
import json
|
@@ -12,14 +12,6 @@ import librosa # pip install librosa
|
|
12 |
# --- External logging: push to a HF Dataset repo on each submit (no local storage) ---
|
13 |
from datasets import Dataset, Features, Value, Audio, load_dataset
|
14 |
|
15 |
-
# Optional but recommended for better jiwer performance
|
16 |
-
# pip install python-Levenshtein
|
17 |
-
try:
|
18 |
-
from jiwer import compute_measures, wer as jiwer_wer, cer as jiwer_cer
|
19 |
-
HAS_JIWER = True
|
20 |
-
except Exception:
|
21 |
-
HAS_JIWER = False
|
22 |
-
|
23 |
# -------- CONFIG: Hub dataset target (no persistent storage needed) --------
|
24 |
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "DarliAI/asr-feedback-logs")
|
25 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
@@ -39,21 +31,12 @@ HF_FEATURES = Features({
|
|
39 |
"decode_params": Value("string"),
|
40 |
|
41 |
"transcript_hyp": Value("string"),
|
42 |
-
"reference_text": Value("string"),
|
43 |
"corrected_text": Value("string"),
|
44 |
|
45 |
"latency_ms": Value("int32"),
|
46 |
"rtf": Value("float32"),
|
47 |
|
48 |
-
"wer": Value("float32"),
|
49 |
-
"cer": Value("float32"),
|
50 |
-
"subs": Value("int32"),
|
51 |
-
"ins": Value("int32"),
|
52 |
-
"dels": Value("int32"),
|
53 |
-
|
54 |
"score_out_of_10": Value("int32"),
|
55 |
-
"feedback_text": Value("string"),
|
56 |
-
"tags": Value("string"),
|
57 |
"share_publicly": Value("bool"),
|
58 |
})
|
59 |
|
@@ -82,9 +65,9 @@ def _push_row_to_hf_dataset(row, audio_file_path):
|
|
82 |
except Exception:
|
83 |
return None
|
84 |
|
85 |
-
for k in ["
|
86 |
example[k] = _to_int(example.get(k))
|
87 |
-
for k in ["
|
88 |
example[k] = _to_float(example.get(k))
|
89 |
|
90 |
ds = Dataset.from_list([example], features=HF_FEATURES)
|
@@ -138,7 +121,7 @@ language_models = {
|
|
138 |
"Pidgin": "FarmerlineML/pidgin_nigerian",
|
139 |
"Kikuyu": "FarmerlineML/w2v-bert-2.0_kikuyu",
|
140 |
"Igbo": "FarmerlineML/w2v-bert-2.0_igbo_v1",
|
141 |
-
|
142 |
}
|
143 |
|
144 |
# -------- Lazy-load pipeline cache (Space-safe) --------
|
@@ -187,21 +170,6 @@ def _model_revision_from_pipeline(pipe) -> str:
|
|
187 |
except Exception:
|
188 |
return "unknown"
|
189 |
|
190 |
-
def _compute_metrics(hyp: str, ref_or_corrected: str):
|
191 |
-
if not HAS_JIWER or not ref_or_corrected or not hyp:
|
192 |
-
return {"wer": None, "cer": None, "subs": None, "ins": None, "dels": None}
|
193 |
-
try:
|
194 |
-
measures = compute_measures(ref_or_corrected, hyp)
|
195 |
-
return {
|
196 |
-
"wer": measures.get("wer"),
|
197 |
-
"cer": jiwer_cer(ref_or_corrected, hyp),
|
198 |
-
"subs": measures.get("substitutions"),
|
199 |
-
"ins": measures.get("insertions"),
|
200 |
-
"dels": measures.get("deletions"),
|
201 |
-
}
|
202 |
-
except Exception:
|
203 |
-
return {"wer": None, "cer": None, "subs": None, "ins": None, "dels": None}
|
204 |
-
|
205 |
# -------- Inference --------
|
206 |
def transcribe(audio_path: str, language: str):
|
207 |
"""
|
@@ -241,40 +209,23 @@ def transcribe(audio_path: str, language: str):
|
|
241 |
}
|
242 |
return hyp_text, meta
|
243 |
|
244 |
-
# -------- Feedback submit --------
|
245 |
-
def submit_feedback(meta,
|
246 |
-
tags, store_audio, share_publicly, audio_file_path):
|
247 |
"""
|
248 |
-
|
249 |
-
No
|
250 |
"""
|
251 |
if not meta:
|
252 |
return {"status": "No transcription metadata available. Please transcribe first."}
|
253 |
|
254 |
-
ref_for_metrics = (reference_text or "").strip()
|
255 |
-
corrected_text = (corrected_text or "").strip()
|
256 |
-
if not ref_for_metrics and corrected_text:
|
257 |
-
ref_for_metrics = corrected_text
|
258 |
-
|
259 |
-
metrics = _compute_metrics(meta.get("transcript_hyp", ""), ref_for_metrics)
|
260 |
-
|
261 |
row = dict(meta)
|
262 |
row.update({
|
263 |
-
"
|
264 |
-
"corrected_text": corrected_text or "",
|
265 |
-
"wer": metrics["wer"],
|
266 |
-
"cer": metrics["cer"],
|
267 |
-
"subs": metrics["subs"],
|
268 |
-
"ins": metrics["ins"],
|
269 |
-
"dels": metrics["dels"],
|
270 |
"score_out_of_10": int(score) if score is not None else None,
|
271 |
-
"feedback_text": feedback_text or "",
|
272 |
-
"tags": json.dumps({"labels": tags or []}),
|
273 |
"share_publicly": bool(share_publicly),
|
274 |
})
|
275 |
|
276 |
try:
|
277 |
-
# Use the temporary upload path from Gradio iff the user consented
|
278 |
audio_to_push = audio_file_path if store_audio else None
|
279 |
hf_status = _push_row_to_hf_dataset(row, audio_to_push)
|
280 |
status = f"Feedback saved. {hf_status}"
|
@@ -283,15 +234,11 @@ def submit_feedback(meta, reference_text, corrected_text, score, feedback_text,
|
|
283 |
|
284 |
return {
|
285 |
"status": status,
|
286 |
-
"wer": row["wer"],
|
287 |
-
"cer": row["cer"],
|
288 |
-
"subs": row["subs"],
|
289 |
-
"ins": row["ins"],
|
290 |
-
"dels": row["dels"],
|
291 |
"latency_ms": row["latency_ms"],
|
292 |
"rtf": row["rtf"],
|
293 |
"model_id": row["model_id"],
|
294 |
-
"model_revision": row["model_revision"]
|
|
|
295 |
}
|
296 |
|
297 |
# -------- UI (original preserved; additions appended) --------
|
@@ -331,27 +278,18 @@ with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo:
|
|
331 |
# Pre-fill corrected with hypothesis for easy edits
|
332 |
return hyp, meta, hyp
|
333 |
|
334 |
-
# --- Evaluation
|
335 |
-
with gr.Accordion("Evaluation
|
336 |
-
with gr.Row():
|
337 |
-
reference_tb = gr.Textbox(label="Reference text (optional)", lines=4, value="")
|
338 |
with gr.Row():
|
339 |
corrected_tb = gr.Textbox(label="Corrected transcript (optional)", lines=4, value="")
|
340 |
with gr.Row():
|
341 |
score_slider = gr.Slider(minimum=0, maximum=10, step=1, label="Score out of 10", value=7)
|
342 |
-
with gr.Row():
|
343 |
-
feedback_tb = gr.Textbox(label="Feedback (what went right/wrong?)", lines=3, value="")
|
344 |
-
with gr.Row():
|
345 |
-
tags_cb = gr.CheckboxGroup(
|
346 |
-
["noisy", "far-field", "code-switching", "numbers-heavy", "named-entities", "read-speech", "spontaneous", "call-center", "voicenote"],
|
347 |
-
label="Slice tags (select any that apply)"
|
348 |
-
)
|
349 |
with gr.Row():
|
350 |
store_audio_cb = gr.Checkbox(label="Allow storing my audio for research/eval", value=False)
|
351 |
share_cb = gr.Checkbox(label="Allow sharing this example publicly", value=False)
|
352 |
|
353 |
-
submit_btn = gr.Button("Submit
|
354 |
-
results_json = gr.JSON(label="
|
355 |
|
356 |
# Wire events
|
357 |
btn.click(
|
@@ -364,11 +302,8 @@ with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo:
|
|
364 |
fn=submit_feedback,
|
365 |
inputs=[
|
366 |
meta_state,
|
367 |
-
reference_tb,
|
368 |
corrected_tb,
|
369 |
score_slider,
|
370 |
-
feedback_tb,
|
371 |
-
tags_cb,
|
372 |
store_audio_cb,
|
373 |
share_cb,
|
374 |
audio # raw file path from gr.Audio
|
|
|
1 |
+
# app.py (simplified: no WER/CER, minimal feedback)
|
2 |
|
3 |
import os
|
4 |
import json
|
|
|
12 |
# --- External logging: push to a HF Dataset repo on each submit (no local storage) ---
|
13 |
from datasets import Dataset, Features, Value, Audio, load_dataset
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# -------- CONFIG: Hub dataset target (no persistent storage needed) --------
|
16 |
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "DarliAI/asr-feedback-logs")
|
17 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
31 |
"decode_params": Value("string"),
|
32 |
|
33 |
"transcript_hyp": Value("string"),
|
|
|
34 |
"corrected_text": Value("string"),
|
35 |
|
36 |
"latency_ms": Value("int32"),
|
37 |
"rtf": Value("float32"),
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
"score_out_of_10": Value("int32"),
|
|
|
|
|
40 |
"share_publicly": Value("bool"),
|
41 |
})
|
42 |
|
|
|
65 |
except Exception:
|
66 |
return None
|
67 |
|
68 |
+
for k in ["latency_ms", "score_out_of_10", "sample_rate"]:
|
69 |
example[k] = _to_int(example.get(k))
|
70 |
+
for k in ["rtf", "audio_duration_s"]:
|
71 |
example[k] = _to_float(example.get(k))
|
72 |
|
73 |
ds = Dataset.from_list([example], features=HF_FEATURES)
|
|
|
121 |
"Pidgin": "FarmerlineML/pidgin_nigerian",
|
122 |
"Kikuyu": "FarmerlineML/w2v-bert-2.0_kikuyu",
|
123 |
"Igbo": "FarmerlineML/w2v-bert-2.0_igbo_v1",
|
124 |
+
"Krio": "FarmerlineML/w2v-bert-2.0_krio_v3"
|
125 |
}
|
126 |
|
127 |
# -------- Lazy-load pipeline cache (Space-safe) --------
|
|
|
170 |
except Exception:
|
171 |
return "unknown"
|
172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
# -------- Inference --------
|
174 |
def transcribe(audio_path: str, language: str):
|
175 |
"""
|
|
|
209 |
}
|
210 |
return hyp_text, meta
|
211 |
|
212 |
+
# -------- Feedback submit (minimal) --------
|
213 |
+
def submit_feedback(meta, corrected_text, score, store_audio, share_publicly, audio_file_path):
|
|
|
214 |
"""
|
215 |
+
Push a minimal row to HF Dataset: model info, language, transcript, optional corrected text, score.
|
216 |
+
No WER/CER computations.
|
217 |
"""
|
218 |
if not meta:
|
219 |
return {"status": "No transcription metadata available. Please transcribe first."}
|
220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
row = dict(meta)
|
222 |
row.update({
|
223 |
+
"corrected_text": (corrected_text or "").strip(),
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
"score_out_of_10": int(score) if score is not None else None,
|
|
|
|
|
225 |
"share_publicly": bool(share_publicly),
|
226 |
})
|
227 |
|
228 |
try:
|
|
|
229 |
audio_to_push = audio_file_path if store_audio else None
|
230 |
hf_status = _push_row_to_hf_dataset(row, audio_to_push)
|
231 |
status = f"Feedback saved. {hf_status}"
|
|
|
234 |
|
235 |
return {
|
236 |
"status": status,
|
|
|
|
|
|
|
|
|
|
|
237 |
"latency_ms": row["latency_ms"],
|
238 |
"rtf": row["rtf"],
|
239 |
"model_id": row["model_id"],
|
240 |
+
"model_revision": row["model_revision"],
|
241 |
+
"language": row["language_display"],
|
242 |
}
|
243 |
|
244 |
# -------- UI (original preserved; additions appended) --------
|
|
|
278 |
# Pre-fill corrected with hypothesis for easy edits
|
279 |
return hyp, meta, hyp
|
280 |
|
281 |
+
# --- Minimal Evaluation (score + optional corrected text) ---
|
282 |
+
with gr.Accordion("Evaluation", open=False):
|
|
|
|
|
283 |
with gr.Row():
|
284 |
corrected_tb = gr.Textbox(label="Corrected transcript (optional)", lines=4, value="")
|
285 |
with gr.Row():
|
286 |
score_slider = gr.Slider(minimum=0, maximum=10, step=1, label="Score out of 10", value=7)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
with gr.Row():
|
288 |
store_audio_cb = gr.Checkbox(label="Allow storing my audio for research/eval", value=False)
|
289 |
share_cb = gr.Checkbox(label="Allow sharing this example publicly", value=False)
|
290 |
|
291 |
+
submit_btn = gr.Button("Submit")
|
292 |
+
results_json = gr.JSON(label="Status")
|
293 |
|
294 |
# Wire events
|
295 |
btn.click(
|
|
|
302 |
fn=submit_feedback,
|
303 |
inputs=[
|
304 |
meta_state,
|
|
|
305 |
corrected_tb,
|
306 |
score_slider,
|
|
|
|
|
307 |
store_audio_cb,
|
308 |
share_cb,
|
309 |
audio # raw file path from gr.Audio
|