Spaces:

meldynamics
/

lt-whisper-leaderboard

Running

App Files Files Community

lt-whisper-leaderboard / app.py

aleksasp

Add lithuanian whisperevaluation data

4ccccc6 17 days ago

raw

history blame contribute delete

2.81 kB

	import gradio as gr
	import pandas as pd
	from pathlib import Path

	from src.asr.build_dataframe import per_dataset_tables, overall_table, list_corpora

	RESULTS_DIR = Path(__file__).parent / "asr_data" / "results"

	CSS = """
	#df * { white-space: nowrap; } /* keep single-line model names */
	.gradio-container { max-width: 1200px !important; }
	th, td { font-size: 14px; }
	"""

	def get_overall_df() -> pd.DataFrame:
	try:
	df = overall_table(str(RESULTS_DIR))
	return df.fillna("")
	except Exception as e:
	return pd.DataFrame({"Error": [f"{type(e).__name__}: {e}"]})

	def get_per_corpus_df(corpus: str) -> pd.DataFrame:
	try:
	tables = per_dataset_tables(str(RESULTS_DIR))
	df = tables.get(corpus, pd.DataFrame())
	return df.fillna("")
	except Exception as e:
	return pd.DataFrame({"Error": [f"{type(e).__name__}: {e}"]})

	with gr.Blocks(css=CSS) as demo:
	gr.Markdown("# 🇱🇹 Lithuanian ASR Leaderboard (Local Results)")

	with gr.Tab("Overall"):
	df_overall = gr.Dataframe(
	value=get_overall_df(),
	interactive=False,
	elem_id="df",
	label="Overall (averages)",
	wrap=False,
	)
	gr.Button("Reload overall").click(lambda: get_overall_df(), outputs=df_overall)

	# One tab per dataset
	corpora = list_corpora(str(RESULTS_DIR))
	for corpus in corpora:
	with gr.Tab(corpus.upper()):
	df_c = gr.Dataframe(
	value=get_per_corpus_df(corpus),
	interactive=False,
	elem_id="df",
	label=f"{corpus.upper()} (WER, CER)",
	wrap=False,
	)
	gr.Button(f"Reload {corpus}").click(lambda c=corpus: get_per_corpus_df(c), outputs=df_c)

	gr.Markdown(
	"""
	### Evaluation
	- GPU: 1 × NVIDIA GeForce RTX 4090

	### 📚 Models
	- [Whisper Large V2 Lithuanian (GGML)](https://huggingface.co/meldynamics/whisper-large-v2-lithuanian-ggml)
	- [Whisper Large V3 Turbo Lithuanian 0.06 CER Filtered (GGML)](https://huggingface.co/meldynamics/whisper-large-v3-turbo-lithuanian-lithuania-0.06-cer-filtered-ggml)
	- [Whisper Base Lithuanian (GGML)](https://huggingface.co/meldynamics/whisper-base-lithuanian-ggml)

	### 🗂 Datasets
	- [Lithuania 0.06 CER Filtered](https://huggingface.co/datasets/sam8000/lithuania)
	- [Liepa-2](https://huggingface.co/datasets/isLucid/liepa-2)
	- [Common Voice 13.0 (Lithuanian subset)](https://huggingface.co/datasets/mozilla-foundation/common_voice_13_0)
	- [Common Voice 17.0 (Lithuanian subset)](https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0)

	---
	Tip: add new JSON files to `asr_data/results/` and click Reload to refresh the tables.
	"""
	)

	if __name__ == "__main__":
	demo.launch()