|
import gradio as gr |
|
import pandas as pd |
|
from pathlib import Path |
|
|
|
from src.asr.build_dataframe import per_dataset_tables, overall_table, list_corpora |
|
|
|
RESULTS_DIR = Path(__file__).parent / "asr_data" / "results" |
|
|
|
CSS = """ |
|
#df * { white-space: nowrap; } /* keep single-line model names */ |
|
.gradio-container { max-width: 1200px !important; } |
|
th, td { font-size: 14px; } |
|
""" |
|
|
|
def get_overall_df() -> pd.DataFrame: |
|
try: |
|
df = overall_table(str(RESULTS_DIR)) |
|
return df.fillna("") |
|
except Exception as e: |
|
return pd.DataFrame({"Error": [f"{type(e).__name__}: {e}"]}) |
|
|
|
def get_per_corpus_df(corpus: str) -> pd.DataFrame: |
|
try: |
|
tables = per_dataset_tables(str(RESULTS_DIR)) |
|
df = tables.get(corpus, pd.DataFrame()) |
|
return df.fillna("") |
|
except Exception as e: |
|
return pd.DataFrame({"Error": [f"{type(e).__name__}: {e}"]}) |
|
|
|
with gr.Blocks(css=CSS) as demo: |
|
gr.Markdown("# π±πΉ Lithuanian ASR Leaderboard (Local Results)") |
|
|
|
with gr.Tab("Overall"): |
|
df_overall = gr.Dataframe( |
|
value=get_overall_df(), |
|
interactive=False, |
|
elem_id="df", |
|
label="Overall (averages)", |
|
wrap=False, |
|
) |
|
gr.Button("Reload overall").click(lambda: get_overall_df(), outputs=df_overall) |
|
|
|
|
|
corpora = list_corpora(str(RESULTS_DIR)) |
|
for corpus in corpora: |
|
with gr.Tab(corpus.upper()): |
|
df_c = gr.Dataframe( |
|
value=get_per_corpus_df(corpus), |
|
interactive=False, |
|
elem_id="df", |
|
label=f"{corpus.upper()} (WER, CER)", |
|
wrap=False, |
|
) |
|
gr.Button(f"Reload {corpus}").click(lambda c=corpus: get_per_corpus_df(c), outputs=df_c) |
|
|
|
gr.Markdown( |
|
""" |
|
### Evaluation |
|
- GPU: 1 Γ NVIDIA GeForce RTX 4090 |
|
|
|
### π Models |
|
- [Whisper Large V2 Lithuanian (GGML)](https://huggingface.co/meldynamics/whisper-large-v2-lithuanian-ggml) |
|
- [Whisper Large V3 Turbo Lithuanian 0.06 CER Filtered (GGML)](https://huggingface.co/meldynamics/whisper-large-v3-turbo-lithuanian-lithuania-0.06-cer-filtered-ggml) |
|
- [Whisper Base Lithuanian (GGML)](https://huggingface.co/meldynamics/whisper-base-lithuanian-ggml) |
|
|
|
### π Datasets |
|
- [Lithuania 0.06 CER Filtered](https://huggingface.co/datasets/sam8000/lithuania) |
|
- [Liepa-2](https://huggingface.co/datasets/isLucid/liepa-2) |
|
- [Common Voice 13.0 (Lithuanian subset)](https://huggingface.co/datasets/mozilla-foundation/common_voice_13_0) |
|
- [Common Voice 17.0 (Lithuanian subset)](https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0) |
|
|
|
--- |
|
*Tip:* add new JSON files to `asr_data/results/` and click **Reload** to refresh the tables. |
|
""" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|