Evaluation-2

Sleeping

App Files Files Community

Evaluation-2 / app.py

FarmerlineML

Update app.py

b6c35e7 verified 16 days ago

raw

history blame

14.2 kB

	# app.py

	import os
	import json
	import time
	import uuid
	import gradio as gr
	from transformers import pipeline
	import numpy as np
	import librosa # pip install librosa

	# --- External logging: push to a HF Dataset repo on each submit (no local storage) ---
	from datasets import Dataset, Features, Value, Audio, load_dataset

	# Optional but recommended for better jiwer performance
	# pip install python-Levenshtein
	try:
	from jiwer import compute_measures, wer as jiwer_wer, cer as jiwer_cer
	HAS_JIWER = True
	except Exception:
	HAS_JIWER = False

	# -------- CONFIG: Hub dataset target (no persistent storage needed) --------
	HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "DarliAI/asr-feedback-logs")
	HF_TOKEN = os.environ.get("HF_TOKEN")
	PUSH_TO_HF = bool(HF_TOKEN and HF_DATASET_REPO)

	HF_FEATURES = Features({
	"timestamp": Value("string"),
	"session_id": Value("string"),
	"language_display": Value("string"),
	"model_id": Value("string"),
	"model_revision": Value("string"),

	"audio": Audio(sampling_rate=None), # uploaded only if user consents
	"audio_duration_s": Value("float32"),
	"sample_rate": Value("int32"),
	"source": Value("string"),
	"decode_params": Value("string"),

	"transcript_hyp": Value("string"),
	"reference_text": Value("string"),
	"corrected_text": Value("string"),

	"latency_ms": Value("int32"),
	"rtf": Value("float32"),

	"wer": Value("float32"),
	"cer": Value("float32"),
	"subs": Value("int32"),
	"ins": Value("int32"),
	"dels": Value("int32"),

	"score_out_of_10": Value("int32"),
	"feedback_text": Value("string"),
	"tags": Value("string"),
	"share_publicly": Value("bool"),
	})

	def _push_row_to_hf_dataset(row, audio_file_path):
	"""
	Append a single example to the HF dataset repo (train split).
	If user didn't consent or no audio path, 'audio' field is None.
	"""
	if not PUSH_TO_HF:
	return "HF push disabled (missing HF_TOKEN or repo)."

	example = dict(row)

	# Audio: only include if user consented and file exists
	example["audio"] = audio_file_path if (audio_file_path and os.path.isfile(audio_file_path)) else None

	# Normalize types
	def _to_int(v):
	try:
	return int(v)
	except Exception:
	return None
	def _to_float(v):
	try:
	return float(v)
	except Exception:
	return None

	for k in ["subs", "ins", "dels", "latency_ms", "score_out_of_10", "sample_rate"]:
	example[k] = _to_int(example.get(k))
	for k in ["wer", "cer", "rtf", "audio_duration_s"]:
	example[k] = _to_float(example.get(k))

	ds = Dataset.from_list([example], features=HF_FEATURES)

	# Load existing split if present, then append
	try:
	existing = load_dataset(HF_DATASET_REPO, split="train", token=HF_TOKEN)
	merged = existing.concatenate(ds)
	except Exception:
	merged = ds

	merged.push_to_hub(
	HF_DATASET_REPO,
	split="train",
	private=True,
	token=HF_TOKEN,
	commit_message="append feedback row"
	)
	return "Pushed to HF Dataset."

	# --- EDIT THIS: map display names to your HF Hub model IDs ---
	language_models = {
	"Akan (Asante Twi)": "FarmerlineML/w2v-bert-2.0_twi_alpha_v1",
	"Ewe": "FarmerlineML/w2v-bert-2.0_ewe_2",
	"Kiswahili": "FarmerlineML/w2v-bert-2.0_swahili_alpha",
	"Luganda": "FarmerlineML/w2v-bert-2.0_luganda",
	"Brazilian Portuguese": "FarmerlineML/w2v-bert-2.0_brazilian_portugese_alpha",
	"Fante": "misterkissi/w2v2-lg-xls-r-300m-fante",
	"Bemba": "DarliAI/kissi-w2v2-lg-xls-r-300m-bemba",
	"Bambara": "DarliAI/kissi-w2v2-lg-xls-r-300m-bambara",
	"Dagaare": "DarliAI/kissi-w2v2-lg-xls-r-300m-dagaare",
	"Kinyarwanda": "DarliAI/kissi-w2v2-lg-xls-r-300m-kinyarwanda",
	"Fula": "DarliAI/kissi-wav2vec2-fula-fleurs-full",
	"Oromo": "DarliAI/kissi-w2v-bert-2.0-oromo",
	"Runynakore": "misterkissi/w2v2-lg-xls-r-300m-runyankore",
	"Ga": "misterkissi/w2v2-lg-xls-r-300m-ga",
	"Vai": "misterkissi/whisper-small-vai",
	"Kasem": "misterkissi/w2v2-lg-xls-r-300m-kasem",
	"Lingala": "misterkissi/w2v2-lg-xls-r-300m-lingala",
	"Fongbe": "misterkissi/whisper-small-fongbe",
	"Amharic": "misterkissi/w2v2-lg-xls-r-1b-amharic",
	"Xhosa": "misterkissi/w2v2-lg-xls-r-300m-xhosa",
	"Tsonga": "misterkissi/w2v2-lg-xls-r-300m-tsonga",
	# "WOLOF": "misterkissi/w2v2-lg-xls-r-1b-wolof",
	# "HAITIAN CREOLE": "misterkissi/whisper-small-haitian-creole",
	# "KABYLE": "misterkissi/w2v2-lg-xls-r-1b-kabyle",
	"Yoruba": "FarmerlineML/w2v-bert-2.0_yoruba_v1",
	"Luganda": "FarmerlineML/luganda_fkd",
	"Luo": "FarmerlineML/w2v-bert-2.0_luo_v2",
	"Somali": "FarmerlineML/w2v-bert-2.0_somali_alpha",
	"Pidgin": "FarmerlineML/pidgin_nigerian",
	"Kikuyu": "FarmerlineML/w2v-bert-2.0_kikuyu",
	"Igbo": "FarmerlineML/w2v-bert-2.0_igbo_v1",
	#"Krio": "FarmerlineML/w2v-bert-2.0_krio_v3"
	}

	# -------- Lazy-load pipeline cache (Space-safe) --------
	_PIPELINE_CACHE = {}
	_CACHE_ORDER = [] # usage order
	_CACHE_MAX_SIZE = 3 # tune for RAM

	def _touch_cache(key):
	if key in _CACHE_ORDER:
	_CACHE_ORDER.remove(key)
	_CACHE_ORDER.insert(0, key)

	def _evict_if_needed():
	while len(_PIPELINE_CACHE) > _CACHE_MAX_SIZE:
	oldest = _CACHE_ORDER.pop()
	try:
	del _PIPELINE_CACHE[oldest]
	except KeyError:
	pass

	def get_asr_pipeline(language_display: str):
	if language_display in _PIPELINE_CACHE:
	_touch_cache(language_display)
	return _PIPELINE_CACHE[language_display]
	model_id = language_models[language_display]
	pipe = pipeline(
	task="automatic-speech-recognition",
	model=model_id,
	device=-1, # CPU on Spaces (explicit)
	chunk_length_s=30
	)
	_PIPELINE_CACHE[language_display] = pipe
	_touch_cache(language_display)
	_evict_if_needed()
	return pipe

	# -------- Helpers --------
	def _model_revision_from_pipeline(pipe) -> str:
	# Best-effort capture of revision/hash for reproducibility
	for attr in ("hub_revision", "revision", "_commit_hash"):
	val = getattr(getattr(pipe, "model", None), attr, None)
	if val:
	return str(val)
	try:
	return str(getattr(pipe.model.config, "_name_or_path", "unknown"))
	except Exception:
	return "unknown"

	def _compute_metrics(hyp: str, ref_or_corrected: str):
	if not HAS_JIWER or not ref_or_corrected or not hyp:
	return {"wer": None, "cer": None, "subs": None, "ins": None, "dels": None}
	try:
	measures = compute_measures(ref_or_corrected, hyp)
	return {
	"wer": measures.get("wer"),
	"cer": jiwer_cer(ref_or_corrected, hyp),
	"subs": measures.get("substitutions"),
	"ins": measures.get("insertions"),
	"dels": measures.get("deletions"),
	}
	except Exception:
	return {"wer": None, "cer": None, "subs": None, "ins": None, "dels": None}

	# -------- Inference --------
	def transcribe(audio_path: str, language: str):
	"""
	Load the audio via librosa (supports mp3, wav, flac, m4a, ogg, etc.),
	convert to mono, then run it through the chosen ASR pipeline.
	Returns transcript (unchanged behavior) and a meta dict for feedback.
	"""
	if not audio_path:
	return "⚠️ Please upload or record an audio clip.", None

	speech, sr = librosa.load(audio_path, sr=None, mono=True)
	duration_s = float(librosa.get_duration(y=speech, sr=sr))

	pipe = get_asr_pipeline(language)
	decode_params = {"chunk_length_s": getattr(pipe, "chunk_length_s", 30)}

	t0 = time.time()
	result = pipe({"sampling_rate": sr, "raw": speech})
	latency_ms = int((time.time() - t0) * 1000.0)
	hyp_text = result.get("text", "")

	rtf = (latency_ms / 1000.0) / max(duration_s, 1e-9)

	meta = {
	"timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
	"session_id": f"anon-{uuid.uuid4()}",
	"language_display": language,
	"model_id": language_models.get(language, "unknown"),
	"model_revision": _model_revision_from_pipeline(pipe),
	"audio_duration_s": duration_s,
	"sample_rate": sr,
	"source": "upload",
	"decode_params": json.dumps(decode_params),
	"transcript_hyp": hyp_text,
	"latency_ms": latency_ms,
	"rtf": rtf,
	}
	return hyp_text, meta

	# -------- Feedback submit --------
	def submit_feedback(meta, reference_text, corrected_text, score, feedback_text,
	tags, store_audio, share_publicly, audio_file_path):
	"""
	Compute metrics (if possible) and push a row to HF Dataset immediately.
	No local CSV/audio writes.
	"""
	if not meta:
	return {"status": "No transcription metadata available. Please transcribe first."}

	ref_for_metrics = (reference_text or "").strip()
	corrected_text = (corrected_text or "").strip()
	if not ref_for_metrics and corrected_text:
	ref_for_metrics = corrected_text

	metrics = _compute_metrics(meta.get("transcript_hyp", ""), ref_for_metrics)

	row = dict(meta)
	row.update({
	"reference_text": reference_text or "",
	"corrected_text": corrected_text or "",
	"wer": metrics["wer"],
	"cer": metrics["cer"],
	"subs": metrics["subs"],
	"ins": metrics["ins"],
	"dels": metrics["dels"],
	"score_out_of_10": int(score) if score is not None else None,
	"feedback_text": feedback_text or "",
	"tags": json.dumps({"labels": tags or []}),
	"share_publicly": bool(share_publicly),
	})

	try:
	# Use the temporary upload path from Gradio iff the user consented
	audio_to_push = audio_file_path if store_audio else None
	hf_status = _push_row_to_hf_dataset(row, audio_to_push)
	status = f"Feedback saved. {hf_status}"
	except Exception as e:
	status = f"Failed to push to HF Dataset: {e}"

	return {
	"status": status,
	"wer": row["wer"],
	"cer": row["cer"],
	"subs": row["subs"],
	"ins": row["ins"],
	"dels": row["dels"],
	"latency_ms": row["latency_ms"],
	"rtf": row["rtf"],
	"model_id": row["model_id"],
	"model_revision": row["model_revision"]
	}

	# -------- UI (original preserved; additions appended) --------
	with gr.Blocks(title="🌐 Multilingual ASR Demo") as demo:
	gr.Markdown(
	"""
	## 🎙️ Multilingual Speech-to-Text
	Upload an audio file (MP3, WAV, FLAC, M4A, OGG,…) or record via your microphone.
	Then choose the language/model and hit Transcribe.
	"""
	)

	with gr.Row():
	lang = gr.Dropdown(
	choices=list(language_models.keys()),
	value=list(language_models.keys())[0],
	label="Select Language / Model"
	)

	with gr.Row():
	audio = gr.Audio(
	sources=["upload", "microphone"],
	type="filepath",
	label="Upload or Record Audio"
	)

	btn = gr.Button("Transcribe")
	output = gr.Textbox(label="Transcription")

	# Hidden state to carry metadata from transcribe -> feedback
	meta_state = gr.State(value=None)

	# Keep original behavior: output shows transcript
	# Also capture meta into the hidden state
	def _transcribe_and_store(audio_path, language):
	hyp, meta = transcribe(audio_path, language)
	# Pre-fill corrected with hypothesis for easy edits
	return hyp, meta, hyp

	# --- Evaluation & Feedback (no style changes) ---
	with gr.Accordion("Evaluation & Feedback", open=False):
	with gr.Row():
	reference_tb = gr.Textbox(label="Reference text (optional)", lines=4, value="")
	with gr.Row():
	corrected_tb = gr.Textbox(label="Corrected transcript (optional)", lines=4, value="")
	with gr.Row():
	score_slider = gr.Slider(minimum=0, maximum=10, step=1, label="Score out of 10", value=7)
	with gr.Row():
	feedback_tb = gr.Textbox(label="Feedback (what went right/wrong?)", lines=3, value="")
	with gr.Row():
	tags_cb = gr.CheckboxGroup(
	["noisy", "far-field", "code-switching", "numbers-heavy", "named-entities", "read-speech", "spontaneous", "call-center", "voicenote"],
	label="Slice tags (select any that apply)"
	)
	with gr.Row():
	store_audio_cb = gr.Checkbox(label="Allow storing my audio for research/eval", value=False)
	share_cb = gr.Checkbox(label="Allow sharing this example publicly", value=False)

	submit_btn = gr.Button("Submit Feedback / Compute Metrics")
	results_json = gr.JSON(label="Metrics & Status")

	# Wire events
	btn.click(
	fn=_transcribe_and_store,
	inputs=[audio, lang],
	outputs=[output, meta_state, corrected_tb]
	)

	submit_btn.click(
	fn=submit_feedback,
	inputs=[
	meta_state,
	reference_tb,
	corrected_tb,
	score_slider,
	feedback_tb,
	tags_cb,
	store_audio_cb,
	share_cb,
	audio # raw file path from gr.Audio
	],
	outputs=results_json
	)

	# Keep Spaces stable under load
	if __name__ == "__main__":
	demo.queue()
	demo.launch()