Evaluation-2

Sleeping

App Files Files Community

FarmerlineML commited on 16 days ago

Commit

47da0c9

verified ·

1 Parent(s): b6c35e7

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -80

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py
 import os
 import json
@@ -12,14 +12,6 @@ import librosa  # pip install librosa
 # --- External logging: push to a HF Dataset repo on each submit (no local storage) ---
 from datasets import Dataset, Features, Value, Audio, load_dataset
-# Optional but recommended for better jiwer performance
-# pip install python-Levenshtein
-try:
-    from jiwer import compute_measures, wer as jiwer_wer, cer as jiwer_cer
-    HAS_JIWER = True
-except Exception:
-    HAS_JIWER = False
 # -------- CONFIG: Hub dataset target (no persistent storage needed) --------
 HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "DarliAI/asr-feedback-logs")
 HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -39,21 +31,12 @@ HF_FEATURES = Features({
     "decode_params":    Value("string"),
     "transcript_hyp":   Value("string"),
-    "reference_text":   Value("string"),
     "corrected_text":   Value("string"),
     "latency_ms":       Value("int32"),
     "rtf":              Value("float32"),
-    "wer":              Value("float32"),
-    "cer":              Value("float32"),
-    "subs":             Value("int32"),
-    "ins":              Value("int32"),
-    "dels":             Value("int32"),
     "score_out_of_10":  Value("int32"),
-    "feedback_text":    Value("string"),
-    "tags":             Value("string"),
     "share_publicly":   Value("bool"),
 })
@@ -82,9 +65,9 @@ def _push_row_to_hf_dataset(row, audio_file_path):
         except Exception:
             return None
-    for k in ["subs", "ins", "dels", "latency_ms", "score_out_of_10", "sample_rate"]:
         example[k] = _to_int(example.get(k))
-    for k in ["wer", "cer", "rtf", "audio_duration_s"]:
         example[k] = _to_float(example.get(k))
     ds = Dataset.from_list([example], features=HF_FEATURES)
@@ -138,7 +121,7 @@ language_models = {
     "Pidgin":                   "FarmerlineML/pidgin_nigerian",
     "Kikuyu":                   "FarmerlineML/w2v-bert-2.0_kikuyu",
     "Igbo":                     "FarmerlineML/w2v-bert-2.0_igbo_v1",
-    #"Krio":                     "FarmerlineML/w2v-bert-2.0_krio_v3"
 }
 # -------- Lazy-load pipeline cache (Space-safe) --------
@@ -187,21 +170,6 @@ def _model_revision_from_pipeline(pipe) -> str:
     except Exception:
         return "unknown"
-def _compute_metrics(hyp: str, ref_or_corrected: str):
-    if not HAS_JIWER or not ref_or_corrected or not hyp:
-        return {"wer": None, "cer": None, "subs": None, "ins": None, "dels": None}
-    try:
-        measures = compute_measures(ref_or_corrected, hyp)
-        return {
-            "wer": measures.get("wer"),
-            "cer": jiwer_cer(ref_or_corrected, hyp),
-            "subs": measures.get("substitutions"),
-            "ins": measures.get("insertions"),
-            "dels": measures.get("deletions"),
-        }
-    except Exception:
-        return {"wer": None, "cer": None, "subs": None, "ins": None, "dels": None}
 # -------- Inference --------
 def transcribe(audio_path: str, language: str):
     """
@@ -241,40 +209,23 @@ def transcribe(audio_path: str, language: str):
     }
     return hyp_text, meta
-# -------- Feedback submit --------
-def submit_feedback(meta, reference_text, corrected_text, score, feedback_text,
-                    tags, store_audio, share_publicly, audio_file_path):
     """
-    Compute metrics (if possible) and push a row to HF Dataset immediately.
-    No local CSV/audio writes.
     """
     if not meta:
         return {"status": "No transcription metadata available. Please transcribe first."}
-    ref_for_metrics = (reference_text or "").strip()
-    corrected_text = (corrected_text or "").strip()
-    if not ref_for_metrics and corrected_text:
-        ref_for_metrics = corrected_text
-    metrics = _compute_metrics(meta.get("transcript_hyp", ""), ref_for_metrics)
     row = dict(meta)
     row.update({
-        "reference_text": reference_text or "",
-        "corrected_text": corrected_text or "",
-        "wer": metrics["wer"],
-        "cer": metrics["cer"],
-        "subs": metrics["subs"],
-        "ins": metrics["ins"],
-        "dels": metrics["dels"],
         "score_out_of_10": int(score) if score is not None else None,
-        "feedback_text": feedback_text or "",
-        "tags": json.dumps({"labels": tags or []}),
         "share_publicly": bool(share_publicly),
     })
     try:
-        # Use the temporary upload path from Gradio iff the user consented
         audio_to_push = audio_file_path if store_audio else None
         hf_status = _push_row_to_hf_dataset(row, audio_to_push)
         status = f"Feedback saved. {hf_status}"
@@ -283,15 +234,11 @@ def submit_feedback(meta, reference_text, corrected_text, score, feedback_text,
     return {
         "status": status,
-        "wer": row["wer"],
-        "cer": row["cer"],
-        "subs": row["subs"],
-        "ins": row["ins"],
-        "dels": row["dels"],
         "latency_ms": row["latency_ms"],
         "rtf": row["rtf"],
         "model_id": row["model_id"],
-        "model_revision": row["model_revision"]
     }
 # -------- UI (original preserved; additions appended) --------
@@ -331,27 +278,18 @@ with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo:
         # Pre-fill corrected with hypothesis for easy edits
         return hyp, meta, hyp
-    # --- Evaluation & Feedback (no style changes) ---
-    with gr.Accordion("Evaluation & Feedback", open=False):
-        with gr.Row():
-            reference_tb = gr.Textbox(label="Reference text (optional)", lines=4, value="")
         with gr.Row():
             corrected_tb = gr.Textbox(label="Corrected transcript (optional)", lines=4, value="")
         with gr.Row():
             score_slider = gr.Slider(minimum=0, maximum=10, step=1, label="Score out of 10", value=7)
-        with gr.Row():
-            feedback_tb = gr.Textbox(label="Feedback (what went right/wrong?)", lines=3, value="")
-        with gr.Row():
-            tags_cb = gr.CheckboxGroup(
-                ["noisy", "far-field", "code-switching", "numbers-heavy", "named-entities", "read-speech", "spontaneous", "call-center", "voicenote"],
-                label="Slice tags (select any that apply)"
-            )
         with gr.Row():
             store_audio_cb = gr.Checkbox(label="Allow storing my audio for research/eval", value=False)
             share_cb = gr.Checkbox(label="Allow sharing this example publicly", value=False)
-        submit_btn = gr.Button("Submit Feedback / Compute Metrics")
-        results_json = gr.JSON(label="Metrics & Status")
     # Wire events
     btn.click(
@@ -364,11 +302,8 @@ with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo:
         fn=submit_feedback,
         inputs=[
             meta_state,
-            reference_tb,
             corrected_tb,
             score_slider,
-            feedback_tb,
-            tags_cb,
             store_audio_cb,
             share_cb,
             audio  # raw file path from gr.Audio

+# app.py (simplified: no WER/CER, minimal feedback)
 import os
 import json
 # --- External logging: push to a HF Dataset repo on each submit (no local storage) ---
 from datasets import Dataset, Features, Value, Audio, load_dataset
 # -------- CONFIG: Hub dataset target (no persistent storage needed) --------
 HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "DarliAI/asr-feedback-logs")
 HF_TOKEN = os.environ.get("HF_TOKEN")
     "decode_params":    Value("string"),
     "transcript_hyp":   Value("string"),
     "corrected_text":   Value("string"),
     "latency_ms":       Value("int32"),
     "rtf":              Value("float32"),
     "score_out_of_10":  Value("int32"),
     "share_publicly":   Value("bool"),
 })
         except Exception:
             return None
+    for k in ["latency_ms", "score_out_of_10", "sample_rate"]:
         example[k] = _to_int(example.get(k))
+    for k in ["rtf", "audio_duration_s"]:
         example[k] = _to_float(example.get(k))
     ds = Dataset.from_list([example], features=HF_FEATURES)
     "Pidgin":                   "FarmerlineML/pidgin_nigerian",
     "Kikuyu":                   "FarmerlineML/w2v-bert-2.0_kikuyu",
     "Igbo":                     "FarmerlineML/w2v-bert-2.0_igbo_v1",
+    "Krio":                     "FarmerlineML/w2v-bert-2.0_krio_v3"
 }
 # -------- Lazy-load pipeline cache (Space-safe) --------
     except Exception:
         return "unknown"
 # -------- Inference --------
 def transcribe(audio_path: str, language: str):
     """
     }
     return hyp_text, meta
+# -------- Feedback submit (minimal) --------
+def submit_feedback(meta, corrected_text, score, store_audio, share_publicly, audio_file_path):
     """
+    Push a minimal row to HF Dataset: model info, language, transcript, optional corrected text, score.
+    No WER/CER computations.
     """
     if not meta:
         return {"status": "No transcription metadata available. Please transcribe first."}
     row = dict(meta)
     row.update({
+        "corrected_text": (corrected_text or "").strip(),
         "score_out_of_10": int(score) if score is not None else None,
         "share_publicly": bool(share_publicly),
     })
     try:
         audio_to_push = audio_file_path if store_audio else None
         hf_status = _push_row_to_hf_dataset(row, audio_to_push)
         status = f"Feedback saved. {hf_status}"
     return {
         "status": status,
         "latency_ms": row["latency_ms"],
         "rtf": row["rtf"],
         "model_id": row["model_id"],
+        "model_revision": row["model_revision"],
+        "language": row["language_display"],
     }
 # -------- UI (original preserved; additions appended) --------
         # Pre-fill corrected with hypothesis for easy edits
         return hyp, meta, hyp
+    # --- Minimal Evaluation (score + optional corrected text) ---
+    with gr.Accordion("Evaluation", open=False):
         with gr.Row():
             corrected_tb = gr.Textbox(label="Corrected transcript (optional)", lines=4, value="")
         with gr.Row():
             score_slider = gr.Slider(minimum=0, maximum=10, step=1, label="Score out of 10", value=7)
         with gr.Row():
             store_audio_cb = gr.Checkbox(label="Allow storing my audio for research/eval", value=False)
             share_cb = gr.Checkbox(label="Allow sharing this example publicly", value=False)
+        submit_btn = gr.Button("Submit")
+        results_json = gr.JSON(label="Status")
     # Wire events
     btn.click(
         fn=submit_feedback,
         inputs=[
             meta_state,
             corrected_tb,
             score_slider,
             store_audio_cb,
             share_cb,
             audio  # raw file path from gr.Audio