|
import gradio as gr |
|
import pandas as pd |
|
from pathlib import Path |
|
import logging |
|
from datetime import datetime |
|
import sys |
|
import uuid |
|
from typing import Dict, Any |
|
|
|
|
|
sys.path.append(str(Path(__file__).parent)) |
|
from main import ( |
|
StorageManager, |
|
EvaluationRequest, |
|
evaluate_model, |
|
PATHS |
|
) |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
|
|
|
storage_manager = StorageManager(PATHS) |
|
|
|
def load_leaderboard_data(): |
|
try: |
|
return pd.DataFrame(storage_manager.load('leaderboard')) |
|
except Exception as e: |
|
logging.error(f"Error loading leaderboard: {e}") |
|
return pd.DataFrame() |
|
|
|
def format_leaderboard_df(df): |
|
if df.empty: |
|
return df |
|
|
|
display_df = pd.DataFrame({ |
|
"Model": df["model"], |
|
"Average PER β¬οΈ": df["average_per"].apply(lambda x: f"{x:.4f}"), |
|
"Average PWED β¬οΈ": df["average_pwed"].apply(lambda x: f"{x:.4f}"), |
|
"GitHub": df["github_url"].apply(lambda x: f'<a href="{x}" target="_blank">Repository</a>' if x else "N/A"), |
|
"Submission Date": pd.to_datetime(df["submission_date"]).dt.strftime("%Y-%m-%d") |
|
}) |
|
|
|
return display_df.sort_values("Average PER β¬οΈ") |
|
|
|
def create_html_table(df): |
|
return df.to_html(escape=False, index=False, classes="styled-table") |
|
|
|
def submit_evaluation(model_name: str, submission_name: str, github_url: str) -> str: |
|
if not model_name or not submission_name: |
|
return "β οΈ Please provide both model name and submission name." |
|
|
|
try: |
|
|
|
task_id = str(uuid.uuid4()) |
|
|
|
|
|
request = EvaluationRequest( |
|
transcription_model=model_name, |
|
submission_name=submission_name, |
|
github_url=github_url if github_url else None, |
|
subset="test" |
|
) |
|
|
|
|
|
task = { |
|
"id": task_id, |
|
"model": model_name, |
|
"subset": "test", |
|
"submission_name": submission_name, |
|
"github_url": github_url, |
|
"status": "queued", |
|
"submitted_at": datetime.now().isoformat() |
|
} |
|
|
|
|
|
tasks = storage_manager.load('tasks') |
|
tasks.append(task) |
|
storage_manager.save('tasks', tasks) |
|
|
|
|
|
import asyncio |
|
asyncio.run(evaluate_model(task_id, request)) |
|
|
|
return f"β
Evaluation submitted successfully! Task ID: {task_id}" |
|
except Exception as e: |
|
return f"β Error: {str(e)}" |
|
|
|
def check_status(query: str) -> Dict[str, Any]: |
|
if not query: |
|
return {"error": "Please enter a model name or task ID"} |
|
|
|
try: |
|
results = storage_manager.load('results') |
|
tasks = storage_manager.load('tasks') |
|
|
|
|
|
result = next((r for r in results if r["task_id"] == query), None) |
|
task = next((t for t in tasks if t["id"] == query), None) |
|
|
|
|
|
if not result: |
|
result = next((r for r in results if r["model"] == query), None) |
|
if not task: |
|
task = next((t for t in tasks if t["model"] == query), None) |
|
|
|
if result: |
|
|
|
return { |
|
"status": "completed", |
|
"model": result["model"], |
|
"subset": result["subset"], |
|
"num_files": result["num_files"], |
|
"average_per": result["average_per"], |
|
"average_pwed": result["average_pwed"], |
|
"detailed_results": result["detailed_results"], |
|
"timestamp": result["timestamp"] |
|
} |
|
elif task: |
|
|
|
return task |
|
else: |
|
return {"error": f"No results found for '{query}'"} |
|
|
|
except Exception as e: |
|
logging.error(f"Error checking status: {e}") |
|
return {"error": f"Error checking status: {str(e)}"} |
|
|
|
with gr.Blocks(css=""" |
|
.styled-table { |
|
width: 100%; |
|
border-collapse: collapse; |
|
margin: 25px 0; |
|
font-size: 0.9em; |
|
font-family: sans-serif; |
|
box-shadow: 0 0 20px rgba(0, 0, 0, 0.15); |
|
} |
|
.styled-table thead tr { |
|
background-color: #96b9D0; |
|
color: #ffffff; |
|
text-align: left; |
|
} |
|
.styled-table th, |
|
.styled-table td { |
|
padding: 12px 15px; |
|
} |
|
.styled-table tbody tr { |
|
border-bottom: 1px solid #dddddd; |
|
} |
|
""") as demo: |
|
gr.Markdown("# π― Phonemic Transcription Model Evaluation Leaderboard") |
|
gr.Markdown(""" |
|
## Explanation of Metrics |
|
- **PER (Phoneme Error Rate)**: The Levenshtein distance calculated between phoneme sequences of the predicted and actual transcriptions. |
|
- **PWED (Phoneme Weighted Edit Distance)**: A measure of the edit distance between the predicted and actual phoneme sequences, weighted by the phonemic feature distance. Feature vectors provided by panphon library |
|
""") |
|
with gr.Tabs(): |
|
with gr.TabItem("π Leaderboard"): |
|
leaderboard_html = gr.HTML(create_html_table(format_leaderboard_df(load_leaderboard_data()))) |
|
refresh_btn = gr.Button("π Refresh") |
|
refresh_btn.click( |
|
lambda: gr.HTML.update(value=create_html_table(format_leaderboard_df(load_leaderboard_data()))), |
|
outputs=leaderboard_html |
|
) |
|
|
|
with gr.TabItem("π Submit Model"): |
|
model_name = gr.Textbox(label="Model Name", placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft") |
|
submission_name = gr.Textbox(label="Submission Name", placeholder="My Model v1.0") |
|
github_url = gr.Textbox(label="GitHub URL (optional)", placeholder="https://github.com/username/repo") |
|
submit_btn = gr.Button("Submit") |
|
result = gr.Textbox(label="Submission Status") |
|
|
|
submit_btn.click( |
|
fn=submit_evaluation, |
|
inputs=[model_name, submission_name, github_url], |
|
outputs=result |
|
) |
|
|
|
with gr.TabItem("π Model Status"): |
|
query = gr.Textbox(label="Model Name or Task ID", placeholder="Enter model name (e.g., facebook/wav2vec2-lv-60-espeak-cv-ft)") |
|
status_btn = gr.Button("Check Status") |
|
status_output = gr.JSON(label="Status") |
|
|
|
status_btn.click( |
|
fn=check_status, |
|
inputs=query, |
|
outputs=status_output |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |