abdev-leaderboard

Running

App Files Files Community

abdev-leaderboard / app.py

loodvanniekerkginkgo

Added new validation for very high spearman correlations

d6a0c44 9 days ago

raw

history blame

14.3 kB

	import hashlib
	import pandas as pd
	import gradio as gr
	from gradio.themes.utils import sizes
	from gradio_leaderboard import Leaderboard

	from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
	from constants import (
	ASSAY_RENAME, # noqa: F401
	EXAMPLE_FILE_DICT,
	LEADERBOARD_DISPLAY_COLUMNS,
	ABOUT_TAB_NAME,
	FAQ_TAB_NAME,
	TERMS_URL,
	LEADERBOARD_COLUMNS_RENAME,
	LEADERBOARD_COLUMNS_RENAME_LIST,
	SUBMIT_TAB_NAME,
	)
	from submit import make_submission
	from utils import fetch_hf_results, show_output_box, get_time


	def format_leaderboard_table(df_results: pd.DataFrame, assay: str \| None = None):
	"""
	Format the dataframe for display on the leaderboard. The dataframe comes from utils.fetch_hf_results().
	"""
	df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
	if assay is not None:
	df = df[df["assay"] == assay]
	df = df[LEADERBOARD_DISPLAY_COLUMNS]
	df = df.sort_values(by="spearman", ascending=False)
	# After sorting, just add the reason for excluding heldout test set
	# Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close"
	# Convert spearman column to string to avoid dtype incompatibility when assigning text
	df["spearman"] = df["spearman"].astype(str)
	df.loc[
	(df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"
	] = "N/A, evaluated at competition close"

	# Finally, rename columns for readability
	df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
	return df


	def get_leaderboard_object(assay: str \| None = None):
	filter_columns = ["dataset"]
	if assay is None:
	filter_columns.append("property")
	# TODO how to sort filter columns alphabetically?
	# Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
	# Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
	lb = Leaderboard(
	value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
	datatype=["str", "str", "str", "number", "str"],
	select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(
	["model", "property", "spearman", "dataset", "user"]
	),
	search_columns=["Model Name"],
	filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
	every=15,
	render=True,
	)
	return lb


	# Initialize global dataframe
	current_dataframe = fetch_hf_results()

	# Lood: Two problems currently:
	# 1. The data_version state value isn't being incremented, it seems (even though it's triggering the dataframe change correctly)
	# 2. The global current_dataframe is being shared across all sessions

	# Make font size bigger using gradio theme
	with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
	timer = gr.Timer(3) # Run every 3 seconds when page is focused
	data_version = gr.State(value=0) # Track data changes

	def update_current_dataframe():
	global current_dataframe
	new_dataframe = fetch_hf_results()
	new_hash = hashlib.sha256(pd.util.hash_pandas_object(new_dataframe).values).hexdigest()

	# Check if data has actually changed
	if new_hash != data_version.value:
	print(f"TMP Dataframe has changed at {get_time()}. Old hash: {str(data_version.value)[:8]}, new hash: {str(new_hash)[:8]}")
	current_dataframe = new_dataframe
	data_version.value = new_hash # Increment version to trigger updates
	return new_hash
	return data_version.value

	timer.tick(fn=update_current_dataframe, outputs=data_version)

	## Header

	with gr.Row():
	with gr.Column(scale=6): # bigger text area
	gr.Markdown(
	f"""
	## Welcome to the Ginkgo Antibody Developability Benchmark!

	Participants can submit their model to the leaderboards by simply uploading a CSV file (see the "✉️ Submit" tab).

	You can predict any or all of the 5 properties, and you can filter the main leaderboard by property.
	See more details in the "{ABOUT_TAB_NAME}" tab.
	Submissions close on 1 November 2025.
	"""
	)
	with gr.Column(scale=2): # smaller side column for logo
	gr.Image(
	value="./assets/competition_logo.jpg",
	show_label=False,
	show_download_button=False,
	show_share_button=False,
	width="25vw", # Take up the width of the column (2/8 = 1/4)
	)

	with gr.Tabs(elem_classes="tab-buttons"):
	with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"):
	gr.Markdown(ABOUT_INTRO)
	gr.Image(
	value="./assets/prediction_explainer.png",
	show_label=False,
	show_download_button=False,
	show_share_button=False,
	width="50vw",
	)
	gr.Markdown(ABOUT_TEXT)

	# Procedurally make these 5 tabs
	# for i, assay in enumerate(ASSAY_LIST):
	# with gr.TabItem(
	# f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}",
	# elem_id="abdev-benchmark-tab-table",
	# ) as tab_item:
	# gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
	# lb = get_leaderboard_object(assay=assay)

	# def refresh_leaderboard(assay=assay):
	# return format_leaderboard_table(df_results=current_dataframe, assay=assay)

	# # Refresh when data version changes
	# data_version.change(fn=refresh_leaderboard, outputs=lb)

	# Note(Lood): Trying out just one leaderboard. We could also have a dropdown here that shows different leaderboards for each property, but that's just the same as the filters
	with gr.TabItem(
	"🏆 Leaderboard", elem_id="abdev-benchmark-tab-table"
	) as leaderboard_tab:
	gr.Markdown(
	"""
	# Overall Leaderboard (filter below by property)
	Each property has its own prize, and participants can submit models for any combination of properties.

	Note: It is trivial to overfit the public GDPa1 dataset, which results in very high Spearman correlations.
	We would suggest training using cross-validation a limited number of times to give a better indication of the model's performance on the eventual private test set.
	"""
	)
	lb = get_leaderboard_object()

	def refresh_overall_leaderboard():
	print(f"TMP Refreshing overall leaderboard at {get_time()}. Data version: {data_version.value}")
	return format_leaderboard_table(df_results=current_dataframe)

	# Refresh when data version changes
	data_version.change(fn=refresh_overall_leaderboard, outputs=lb)

	# At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
	# gr.Markdown(
	# "_ℹ️ Results for the private test set will not be shown here and will be used for final judging at the close of the competition._"
	# )

	with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"):
	gr.Markdown(SUBMIT_INTRUCTIONS)
	submission_type_state = gr.State(value="GDPa1_cross_validation")
	download_file_state = gr.State(value=EXAMPLE_FILE_DICT["GDPa1_cross_validation"])

	with gr.Row():
	with gr.Column():
	username_input = gr.Textbox(
	label="Username",
	placeholder="Enter your Hugging Face username",
	info="This will be used to identify valid submissions, and to update your results if you submit again.",
	)

	# gr.LoginButton()

	anonymous_checkbox = gr.Checkbox(
	label="Anonymous",
	value=False,
	info="If checked, your username will be replaced with an anonymous hash on the leaderboard.",
	)
	model_name_input = gr.Textbox(
	label="Model Name",
	placeholder="Enter your model name (e.g., 'MyProteinLM-v1')",
	info="This will be displayed on the leaderboard.",
	)
	model_description_input = gr.Textbox(
	label="Model Description (optional)",
	placeholder="Brief description of your model and approach",
	info="Describe your model, training data, or methodology.",
	lines=3,
	)
	registration_code = gr.Textbox(
	label="Registration Code",
	placeholder="Enter your registration code",
	info="If you did not receive a registration code, please sign up on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>.",
	)

	# Extra validation / warning
	# Add the conditional warning checkbox
	high_corr_warning = gr.Markdown(
	value="",
	visible=False,
	elem_classes=["warning-box"]
	)
	high_corr_checkbox = gr.Checkbox(
	label="I understand this may be overfitting",
	value=False,
	visible=False,
	info="This checkbox will appear if your submission shows suspiciously high correlations (>0.9).",
	)

	with gr.Column():
	submission_type_dropdown = gr.Dropdown(
	choices=["GDPa1", "GDPa1_cross_validation", "Heldout Test Set"],
	value="GDPa1_cross_validation",
	label="Submission Type",
	info=f"Choose the dataset corresponding to the track you're participating in. See the '{ABOUT_TAB_NAME}' tab for details.",
	)
	download_button = gr.DownloadButton(
	label="📥 Download example submission CSV for GDPa1",
	value=EXAMPLE_FILE_DICT["GDPa1_cross_validation"],
	variant="secondary",
	)
	submission_file = gr.File(label="Submission CSV")

	def update_submission_type_and_file(submission_type):
	"""
	Based on the submission type selected in the dropdown,
	Update the submission type state
	Dynamically update example file for download
	"""
	download_file = EXAMPLE_FILE_DICT.get(
	submission_type, EXAMPLE_FILE_DICT[submission_type]
	)
	download_label = (
	f"📥 Download example submission CSV for {submission_type}"
	)
	return (
	submission_type,
	download_file,
	gr.DownloadButton(
	label=download_label,
	value=download_file,
	variant="secondary",
	),
	)

	# Update submission type state and download button when dropdown changes
	submission_type_dropdown.change(
	fn=update_submission_type_and_file,
	inputs=submission_type_dropdown,
	outputs=[submission_type_state, download_file_state, download_button],
	)

	submit_btn = gr.Button("Evaluate")
	message = gr.Textbox(label="Status", lines=1, visible=False)
	# help message
	gr.Markdown(
	"If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space."
	)

	submit_btn.click(
	make_submission,
	inputs=[
	submission_file,
	username_input,
	submission_type_state,
	model_name_input,
	model_description_input,
	anonymous_checkbox,
	registration_code,
	],
	outputs=[message],
	).then(
	fn=show_output_box,
	inputs=[message],
	outputs=[message],
	)
	with gr.Tab(FAQ_TAB_NAME):
	gr.Markdown("# Frequently Asked Questions")
	for i, (question, answer) in enumerate(FAQS.items()):
	# Would love to make questions bold but accordion doesn't support it
	question = f"{i+1}. {question}"
	with gr.Accordion(question, open=False):
	gr.Markdown(f"{answer}") # Italics for answers

	# Footnote
	gr.Markdown(
	f"""
	<div style="text-align: center; font-size: 14px; color: gray; margin-top: 2em;">
	📬 For questions or feedback, contact <a href="mailto:[email protected]">[email protected]</a> or visit the Community tab at the top of this page.<br>
	Visit the <a href="https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition">Competition Registration page</a> to sign up for updates and to register a team, and see Terms <a href="{TERMS_URL}">here</a>.
	</div>
	""",
	elem_id="contact-footer",
	)

	if __name__ == "__main__":
	demo.launch(ssr_mode=False, share=True)