Spaces:

MLRS
/

MELABench

Running

App Files Files Community

MELABench / app.py

KurtMica

ACL 2025 paper citation.

d088b76 about 1 month ago

raw

history blame contribute delete

9.29 kB

	import gradio as gr
	from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
	import pandas as pd
	from apscheduler.schedulers.background import BackgroundScheduler
	from huggingface_hub import snapshot_download

	from src.about import (
	CITATION_BUTTON_LABEL,
	CITATION_BUTTON_TEXT,
	EVALUATION_QUEUE_TEXT,
	INTRODUCTION_TEXT,
	LLM_BENCHMARKS_TEXT,
	TITLE,
	)
	from src.display.css_html_js import custom_css
	from src.display.utils import (
	BENCHMARK_COLS,
	COLS,
	EVAL_COLS,
	EVAL_TYPES,
	AutoEvalColumn,
	ModelTraining,
	fields,
	MalteseTraining
	)
	from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
	from src.populate import get_evaluation_queue_df, get_leaderboard_df
	from src.submission.submit import add_new_eval, read_configuration


	def restart_space():
	API.restart_space(repo_id=REPO_ID)

	### Space initialisation
	try:
	print(EVAL_REQUESTS_PATH)
	snapshot_download(
	repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
	)
	except Exception:
	restart_space()
	try:
	print(EVAL_RESULTS_PATH)
	snapshot_download(
	repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
	)
	except Exception:
	restart_space()


	LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)

	(
	finished_eval_queue_df,
	running_eval_queue_df,
	pending_eval_queue_df,
	) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)

	def init_leaderboard(dataframe):
	if dataframe is None or dataframe.empty:
	raise ValueError("Leaderboard DataFrame is empty or None.")
	return Leaderboard(
	value=dataframe,
	datatype=[c.type for c in fields(AutoEvalColumn)],
	select_columns=SelectColumns(
	default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
	cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
	label="Select Columns to Display:",
	),
	search_columns=[AutoEvalColumn.model.name],
	hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
	filter_columns=[
	ColumnFilter(AutoEvalColumn.model_training.name, type="checkboxgroup", label="Model types"),
	ColumnFilter(AutoEvalColumn.maltese_training.name, type="checkboxgroup", label="Maltese training"),
	ColumnFilter(
	AutoEvalColumn.language_count.name,
	type="slider",
	min=1,
	max=1000,
	label="Number of languages during training",
	),
	ColumnFilter(
	AutoEvalColumn.params.name,
	type="slider",
	min=0.01,
	max=150,
	label="Select the number of parameters (B)",
	),
	ColumnFilter(AutoEvalColumn.prompt_version.name, type="checkboxgroup", label="Prompt Version"),
	ColumnFilter(AutoEvalColumn.n_shot.name, type="slider", min=0, max=100, label="Number of Shots"),
	ColumnFilter(
	AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
	),
	],
	bool_checkboxgroup_label="Hide models",
	interactive=False,
	)


	demo = gr.Blocks(css=custom_css)
	with demo:
	gr.HTML(TITLE)
	gr.HTML(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
	leaderboard = init_leaderboard(LEADERBOARD_DF)

	with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
	gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

	with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
	with gr.Column():
	with gr.Row():
	gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

	with gr.Column():
	with gr.Accordion(
	f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
	open=False,
	):
	with gr.Row():
	finished_eval_table = gr.components.Dataframe(
	value=finished_eval_queue_df,
	headers=EVAL_COLS,
	datatype=EVAL_TYPES,
	row_count=5,
	)
	with gr.Accordion(
	f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
	open=False,
	):
	with gr.Row():
	running_eval_table = gr.components.Dataframe(
	value=running_eval_queue_df,
	headers=EVAL_COLS,
	datatype=EVAL_TYPES,
	row_count=5,
	)

	with gr.Accordion(
	f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
	open=False,
	):
	with gr.Row():
	pending_eval_table = gr.components.Dataframe(
	value=pending_eval_queue_df,
	headers=EVAL_COLS,
	datatype=EVAL_TYPES,
	row_count=5,
	)
	with gr.Row():
	gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")

	with gr.Row():
	files = gr.File(
	label="Files (Configuration File & Prediction Outputs)",
	file_count="directory",
	type="filepath",
	)

	with gr.Row(equal_height=True):
	with gr.Column():
	model_name = gr.Textbox(
	label="Model name",
	info="Read automatically from the results file.",
	interactive=False,
	)
	version = gr.Textbox(
	label="Prompt Version",
	info="Read automatically from the results file.",
	interactive=False,
	)
	n_shots = gr.Number(
	label="Number of Shots",
	info="Read automatically from the results file.",
	interactive=False,
	)

	with gr.Column():
	model_training = gr.Dropdown(
	choices=[t.to_str(": ") for t in ModelTraining if t != ModelTraining.NK],
	label="Model Training",
	info="How to model is trained.",
	multiselect=False,
	value=None,
	interactive=True,
	)
	maltese_training = gr.Dropdown(
	choices=[t.to_str(": ") for t in MalteseTraining if t != ModelTraining.NK],
	label="Maltese Training",
	info="The last stage of training in which Maltese was included.",
	multiselect=False,
	value=None,
	interactive=True,
	)
	language_count = gr.Number(
	label="Number of languages",
	info="Include languages for all training stages. Set to 0 if unknown.",
	minimum=0,
	interactive=True,
	)

	submit_button = gr.Button("Submit Eval")
	submission_result = gr.Markdown()

	configuration = gr.State()
	file_paths = gr.State()
	files.change(read_configuration,
	files,
	[configuration, file_paths, model_name, version, n_shots, submission_result])

	submit_button.click(
	add_new_eval,
	[
	model_training,
	maltese_training,
	language_count,
	configuration,
	file_paths
	],
	submission_result,
	)

	with gr.Row():
	with gr.Accordion("📙 Citation", open=False):
	citation_button = gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	lines=20,
	elem_id="citation-button",
	show_copy_button=True,
	)

	scheduler = BackgroundScheduler()
	scheduler.add_job(restart_space, "interval", seconds=1800)
	scheduler.start()
	demo.queue(default_concurrency_limit=40).launch()