Evaluation-2

Sleeping

App Files Files Community

FarmerlineML commited on 16 days ago

Commit

b6c35e7

verified ·

1 Parent(s): dc9e10c

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -109

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 # app.py
 import os
-import csv
 import json
 import time
 import uuid
@@ -10,6 +9,9 @@ from transformers import pipeline
 import numpy as np
 import librosa  # pip install librosa
 # Optional but recommended for better jiwer performance
 # pip install python-Levenshtein
 try:
@@ -18,12 +20,90 @@ try:
 except Exception:
     HAS_JIWER = False
-# -------- CONFIG: storage paths (Space-friendly) --------
-DATA_DIR = "/home/user/data"
-AUDIO_DIR = os.path.join(DATA_DIR, "audio")
-LOG_CSV = os.path.join(DATA_DIR, "logs.csv")
-os.makedirs(DATA_DIR, exist_ok=True)
-os.makedirs(AUDIO_DIR, exist_ok=True)
 # --- EDIT THIS: map display names to your HF Hub model IDs ---
 language_models = {
@@ -58,14 +138,13 @@ language_models = {
     "Pidgin":                   "FarmerlineML/pidgin_nigerian",
     "Kikuyu":                   "FarmerlineML/w2v-bert-2.0_kikuyu",
     "Igbo":                     "FarmerlineML/w2v-bert-2.0_igbo_v1",
-    "Krio":                     "FarmerlineML/w2v-bert-2.0_krio_v3"
 }
 # -------- Lazy-load pipeline cache (Space-safe) --------
-# Small LRU-style cache to avoid loading all models into RAM
 _PIPELINE_CACHE = {}
-_CACHE_ORDER = []  # keeps track of usage order
-_CACHE_MAX_SIZE = 3  # adjust if you have more RAM
 def _touch_cache(key):
     if key in _CACHE_ORDER:
@@ -74,7 +153,7 @@ def _touch_cache(key):
 def _evict_if_needed():
     while len(_PIPELINE_CACHE) > _CACHE_MAX_SIZE:
-        oldest = _CACHE_ORDER.pop()  # least-recently used
         try:
             del _PIPELINE_CACHE[oldest]
         except KeyError:
@@ -88,7 +167,7 @@ def get_asr_pipeline(language_display: str):
     pipe = pipeline(
         task="automatic-speech-recognition",
         model=model_id,
-        device=-1,          # force CPU usage on Spaces CPU
         chunk_length_s=30
     )
     _PIPELINE_CACHE[language_display] = pipe
@@ -103,43 +182,14 @@ def _model_revision_from_pipeline(pipe) -> str:
         val = getattr(getattr(pipe, "model", None), attr, None)
         if val:
             return str(val)
-    # Fallback to config name_or_path or unknown
     try:
         return str(getattr(pipe.model.config, "_name_or_path", "unknown"))
     except Exception:
         return "unknown"
-def _append_log_row(row: dict):
-    field_order = [
-        "timestamp", "session_id",
-        "language_display", "model_id", "model_revision",
-        "audio_duration_s", "sample_rate", "source",
-        "decode_params",
-        "transcript_hyp",
-        "reference_text", "corrected_text",
-        "latency_ms", "rtf",
-        "wer", "cer",
-        "subs", "ins", "dels",
-        "score_out_of_10", "feedback_text",
-        "tags",
-        "store_audio", "audio_path"
-    ]
-    file_exists = os.path.isfile(LOG_CSV)
-    with open(LOG_CSV, "a", newline="", encoding="utf-8") as f:
-        writer = csv.DictWriter(f, fieldnames=field_order)
-        if not file_exists:
-            writer.writeheader()
-        # Ensure all fields exist
-        for k in field_order:
-            row.setdefault(k, "")
-        writer.writerow(row)
 def _compute_metrics(hyp: str, ref_or_corrected: str):
     if not HAS_JIWER or not ref_or_corrected or not hyp:
-        return {
-            "wer": None, "cer": None,
-            "subs": None, "ins": None, "dels": None
-        }
     try:
         measures = compute_measures(ref_or_corrected, hyp)
         return {
@@ -150,24 +200,18 @@ def _compute_metrics(hyp: str, ref_or_corrected: str):
             "dels": measures.get("deletions"),
         }
     except Exception:
-        # Be resilient if jiwer errors on edge cases
-        return {
-            "wer": None, "cer": None,
-            "subs": None, "ins": None, "dels": None
-        }
 # -------- Inference --------
 def transcribe(audio_path: str, language: str):
     """
     Load the audio via librosa (supports mp3, wav, flac, m4a, ogg, etc.),
     convert to mono, then run it through the chosen ASR pipeline.
-    Returns only the transcript (to keep existing behavior),
-    while metadata is stored in a hidden state for the feedback step.
     """
     if not audio_path:
         return "⚠️ Please upload or record an audio clip.", None
-    # librosa.load returns a 1D np.ndarray (mono) and the sample rate
     speech, sr = librosa.load(audio_path, sr=None, mono=True)
     duration_s = float(librosa.get_duration(y=speech, sr=sr))
@@ -181,7 +225,6 @@ def transcribe(audio_path: str, language: str):
     rtf = (latency_ms / 1000.0) / max(duration_s, 1e-9)
-    # Prepare metadata for the feedback logger
     meta = {
         "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
         "session_id": f"anon-{uuid.uuid4()}",
@@ -190,90 +233,61 @@ def transcribe(audio_path: str, language: str):
         "model_revision": _model_revision_from_pipeline(pipe),
         "audio_duration_s": duration_s,
         "sample_rate": sr,
-        "source": "upload",  # gr.Audio combines both; we don't distinguish here
         "decode_params": json.dumps(decode_params),
         "transcript_hyp": hyp_text,
         "latency_ms": latency_ms,
         "rtf": rtf,
-        # Placeholders to be filled on feedback submit
-        "reference_text": "",
-        "corrected_text": "",
-        "wer": "",
-        "cer": "",
-        "subs": "",
-        "ins": "",
-        "dels": "",
-        "score_out_of_10": "",
-        "feedback_text": "",
-        "tags": "",
-        "store_audio": False,
-        "audio_path": ""
     }
     return hyp_text, meta
 # -------- Feedback submit --------
 def submit_feedback(meta, reference_text, corrected_text, score, feedback_text,
                     tags, store_audio, share_publicly, audio_file_path):
     """
-    Compute metrics (if possible), optionally store audio (consented),
-    and append a row to CSV. Returns a compact dict for display.
     """
     if not meta:
         return {"status": "No transcription metadata available. Please transcribe first."}
-    # Choose text to compare against hyp: prefer explicit reference, else corrected
-    ref_for_metrics = reference_text.strip() if reference_text else ""
-    corrected_text = corrected_text.strip() if corrected_text else ""
     if not ref_for_metrics and corrected_text:
         ref_for_metrics = corrected_text
     metrics = _compute_metrics(meta.get("transcript_hyp", ""), ref_for_metrics)
-    # Handle audio storage (optional, consented)
-    stored_path = ""
-    if store_audio and audio_file_path:
-        try:
-            # Copy the original file to AUDIO_DIR with a random name
-            ext = os.path.splitext(audio_file_path)[1] or ".wav"
-            stored_path = os.path.join(AUDIO_DIR, f"{uuid.uuid4()}{ext}")
-            # Simple byte copy
-            with open(audio_file_path, "rb") as src, open(stored_path, "wb") as dst:
-                dst.write(src.read())
-        except Exception:
-            stored_path = ""
-    # Build log row
-    row = dict(meta)  # start from recorded meta
     row.update({
         "reference_text": reference_text or "",
         "corrected_text": corrected_text or "",
-        "wer": metrics["wer"] if metrics["wer"] is not None else "",
-        "cer": metrics["cer"] if metrics["cer"] is not None else "",
-        "subs": metrics["subs"] if metrics["subs"] is not None else "",
-        "ins": metrics["ins"] if metrics["ins"] is not None else "",
-        "dels": metrics["dels"] if metrics["dels"] is not None else "",
-        "score_out_of_10": score if score is not None else "",
         "feedback_text": feedback_text or "",
-        "tags": json.dumps({"labels": tags or [], "share_publicly": bool(share_publicly)}),
-        "store_audio": bool(store_audio),
-        "audio_path": stored_path
     })
     try:
-        _append_log_row(row)
-        status = "Feedback saved."
     except Exception as e:
-        status = f"Failed to save feedback: {e}"
-    # Compact result to show back to user
     return {
         "status": status,
-        "wer": row["wer"] if row["wer"] != "" else None,
-        "cer": row["cer"] if row["cer"] != "" else None,
-        "subs": row["subs"] if row["subs"] != "" else None,
-        "ins": row["ins"] if row["ins"] != "" else None,
-        "dels": row["dels"] if row["dels"] != "" else None,
         "latency_ms": row["latency_ms"],
         "rtf": row["rtf"],
         "model_id": row["model_id"],
@@ -314,10 +328,10 @@ with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo:
     # Also capture meta into the hidden state
     def _transcribe_and_store(audio_path, language):
         hyp, meta = transcribe(audio_path, language)
-        # For convenience, populate corrected_text with the hyp by default
         return hyp, meta, hyp
-    # --- Evaluation & Feedback (appended UI, no style/font changes) ---
     with gr.Accordion("Evaluation & Feedback", open=False):
         with gr.Row():
             reference_tb = gr.Textbox(label="Reference text (optional)", lines=4, value="")
@@ -362,7 +376,7 @@ with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo:
         outputs=results_json
     )
-# Use a queue to keep Spaces stable under load
 if __name__ == "__main__":
-    demo.queue()  # enable_queue=True by default in recent Gradio
     demo.launch()

 # app.py
 import os
 import json
 import time
 import uuid
 import numpy as np
 import librosa  # pip install librosa
+# --- External logging: push to a HF Dataset repo on each submit (no local storage) ---
+from datasets import Dataset, Features, Value, Audio, load_dataset
 # Optional but recommended for better jiwer performance
 # pip install python-Levenshtein
 try:
 except Exception:
     HAS_JIWER = False
+# -------- CONFIG: Hub dataset target (no persistent storage needed) --------
+HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "DarliAI/asr-feedback-logs")
+HF_TOKEN = os.environ.get("HF_TOKEN")
+PUSH_TO_HF = bool(HF_TOKEN and HF_DATASET_REPO)
+HF_FEATURES = Features({
+    "timestamp":        Value("string"),
+    "session_id":       Value("string"),
+    "language_display": Value("string"),
+    "model_id":         Value("string"),
+    "model_revision":   Value("string"),
+    "audio":            Audio(sampling_rate=None),   # uploaded only if user consents
+    "audio_duration_s": Value("float32"),
+    "sample_rate":      Value("int32"),
+    "source":           Value("string"),
+    "decode_params":    Value("string"),
+    "transcript_hyp":   Value("string"),
+    "reference_text":   Value("string"),
+    "corrected_text":   Value("string"),
+    "latency_ms":       Value("int32"),
+    "rtf":              Value("float32"),
+    "wer":              Value("float32"),
+    "cer":              Value("float32"),
+    "subs":             Value("int32"),
+    "ins":              Value("int32"),
+    "dels":             Value("int32"),
+    "score_out_of_10":  Value("int32"),
+    "feedback_text":    Value("string"),
+    "tags":             Value("string"),
+    "share_publicly":   Value("bool"),
+})
+def _push_row_to_hf_dataset(row, audio_file_path):
+    """
+    Append a single example to the HF dataset repo (train split).
+    If user didn't consent or no audio path, 'audio' field is None.
+    """
+    if not PUSH_TO_HF:
+        return "HF push disabled (missing HF_TOKEN or repo)."
+    example = dict(row)
+    # Audio: only include if user consented and file exists
+    example["audio"] = audio_file_path if (audio_file_path and os.path.isfile(audio_file_path)) else None
+    # Normalize types
+    def _to_int(v):
+        try:
+            return int(v)
+        except Exception:
+            return None
+    def _to_float(v):
+        try:
+            return float(v)
+        except Exception:
+            return None
+    for k in ["subs", "ins", "dels", "latency_ms", "score_out_of_10", "sample_rate"]:
+        example[k] = _to_int(example.get(k))
+    for k in ["wer", "cer", "rtf", "audio_duration_s"]:
+        example[k] = _to_float(example.get(k))
+    ds = Dataset.from_list([example], features=HF_FEATURES)
+    # Load existing split if present, then append
+    try:
+        existing = load_dataset(HF_DATASET_REPO, split="train", token=HF_TOKEN)
+        merged = existing.concatenate(ds)
+    except Exception:
+        merged = ds
+    merged.push_to_hub(
+        HF_DATASET_REPO,
+        split="train",
+        private=True,
+        token=HF_TOKEN,
+        commit_message="append feedback row"
+    )
+    return "Pushed to HF Dataset."
 # --- EDIT THIS: map display names to your HF Hub model IDs ---
 language_models = {
     "Pidgin":                   "FarmerlineML/pidgin_nigerian",
     "Kikuyu":                   "FarmerlineML/w2v-bert-2.0_kikuyu",
     "Igbo":                     "FarmerlineML/w2v-bert-2.0_igbo_v1",
+    #"Krio":                     "FarmerlineML/w2v-bert-2.0_krio_v3"
 }
 # -------- Lazy-load pipeline cache (Space-safe) --------
 _PIPELINE_CACHE = {}
+_CACHE_ORDER = []  # usage order
+_CACHE_MAX_SIZE = 3  # tune for RAM
 def _touch_cache(key):
     if key in _CACHE_ORDER:
 def _evict_if_needed():
     while len(_PIPELINE_CACHE) > _CACHE_MAX_SIZE:
+        oldest = _CACHE_ORDER.pop()
         try:
             del _PIPELINE_CACHE[oldest]
         except KeyError:
     pipe = pipeline(
         task="automatic-speech-recognition",
         model=model_id,
+        device=-1,          # CPU on Spaces (explicit)
         chunk_length_s=30
     )
     _PIPELINE_CACHE[language_display] = pipe
         val = getattr(getattr(pipe, "model", None), attr, None)
         if val:
             return str(val)
     try:
         return str(getattr(pipe.model.config, "_name_or_path", "unknown"))
     except Exception:
         return "unknown"
 def _compute_metrics(hyp: str, ref_or_corrected: str):
     if not HAS_JIWER or not ref_or_corrected or not hyp:
+        return {"wer": None, "cer": None, "subs": None, "ins": None, "dels": None}
     try:
         measures = compute_measures(ref_or_corrected, hyp)
         return {
             "dels": measures.get("deletions"),
         }
     except Exception:
+        return {"wer": None, "cer": None, "subs": None, "ins": None, "dels": None}
 # -------- Inference --------
 def transcribe(audio_path: str, language: str):
     """
     Load the audio via librosa (supports mp3, wav, flac, m4a, ogg, etc.),
     convert to mono, then run it through the chosen ASR pipeline.
+    Returns transcript (unchanged behavior) and a meta dict for feedback.
     """
     if not audio_path:
         return "⚠️ Please upload or record an audio clip.", None
     speech, sr = librosa.load(audio_path, sr=None, mono=True)
     duration_s = float(librosa.get_duration(y=speech, sr=sr))
     rtf = (latency_ms / 1000.0) / max(duration_s, 1e-9)
     meta = {
         "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
         "session_id": f"anon-{uuid.uuid4()}",
         "model_revision": _model_revision_from_pipeline(pipe),
         "audio_duration_s": duration_s,
         "sample_rate": sr,
+        "source": "upload",
         "decode_params": json.dumps(decode_params),
         "transcript_hyp": hyp_text,
         "latency_ms": latency_ms,
         "rtf": rtf,
     }
     return hyp_text, meta
 # -------- Feedback submit --------
 def submit_feedback(meta, reference_text, corrected_text, score, feedback_text,
                     tags, store_audio, share_publicly, audio_file_path):
     """
+    Compute metrics (if possible) and push a row to HF Dataset immediately.
+    No local CSV/audio writes.
     """
     if not meta:
         return {"status": "No transcription metadata available. Please transcribe first."}
+    ref_for_metrics = (reference_text or "").strip()
+    corrected_text = (corrected_text or "").strip()
     if not ref_for_metrics and corrected_text:
         ref_for_metrics = corrected_text
     metrics = _compute_metrics(meta.get("transcript_hyp", ""), ref_for_metrics)
+    row = dict(meta)
     row.update({
         "reference_text": reference_text or "",
         "corrected_text": corrected_text or "",
+        "wer": metrics["wer"],
+        "cer": metrics["cer"],
+        "subs": metrics["subs"],
+        "ins": metrics["ins"],
+        "dels": metrics["dels"],
+        "score_out_of_10": int(score) if score is not None else None,
         "feedback_text": feedback_text or "",
+        "tags": json.dumps({"labels": tags or []}),
+        "share_publicly": bool(share_publicly),
     })
     try:
+        # Use the temporary upload path from Gradio iff the user consented
+        audio_to_push = audio_file_path if store_audio else None
+        hf_status = _push_row_to_hf_dataset(row, audio_to_push)
+        status = f"Feedback saved. {hf_status}"
     except Exception as e:
+        status = f"Failed to push to HF Dataset: {e}"
     return {
         "status": status,
+        "wer": row["wer"],
+        "cer": row["cer"],
+        "subs": row["subs"],
+        "ins": row["ins"],
+        "dels": row["dels"],
         "latency_ms": row["latency_ms"],
         "rtf": row["rtf"],
         "model_id": row["model_id"],
     # Also capture meta into the hidden state
     def _transcribe_and_store(audio_path, language):
         hyp, meta = transcribe(audio_path, language)
+        # Pre-fill corrected with hypothesis for easy edits
         return hyp, meta, hyp
+    # --- Evaluation & Feedback (no style changes) ---
     with gr.Accordion("Evaluation & Feedback", open=False):
         with gr.Row():
             reference_tb = gr.Textbox(label="Reference text (optional)", lines=4, value="")
         outputs=results_json
     )
+# Keep Spaces stable under load
 if __name__ == "__main__":
+    demo.queue()
     demo.launch()