galb-dai's picture
Remove some unused code/imports.
416ebf1
raw
history blame
2.82 kB
import pandas as pd
from datasets import get_dataset_config_names, load_dataset
from datasets.exceptions import DatasetNotFoundError
from tqdm.auto import tqdm
from src.display.utils import AutoEvalColumn
from src.envs import TOKEN
from src.logger import get_logger
logger = get_logger(__name__)
def get_leaderboard_df(results_dataset_name: str) -> pd.DataFrame:
"""
@brief Creates a dataframe from all the individual experiment results.
"""
try:
configs = get_dataset_config_names(
results_dataset_name,
token=TOKEN,
)
except (DatasetNotFoundError, FileNotFoundError):
# Return an empty DataFrame with expected columns
logger.warning("Failed to load configuration", exc_info=True)
return pd.DataFrame(
columns=[
"System Name",
"System Type",
"Organization",
"Success Rate (%)",
"Problems Solved",
"Submitted On",
]
)
rows = []
for submission_id in tqdm(
configs,
total=len(configs),
desc="Processing Submission Results",
):
submission_ds = load_dataset(
results_dataset_name,
submission_id,
split="train",
token=TOKEN,
)
submission_df = pd.DataFrame(submission_ds)
if submission_df.empty or "did_pass" not in submission_df.columns or submission_df.did_pass.isna().any():
logger.warning(f"Skipping {submission_id} due to invalid did_pass values")
continue
success_rate = 100 * submission_df["did_pass"].mean()
num_solved = submission_df["did_pass"].sum()
first_row = submission_df.iloc[0]
rows.append(
{
"System Name": first_row["system_name"],
"System Type": first_row["system_type"],
"Organization": first_row["organization"],
"Success Rate (%)": success_rate,
"Problems Solved": num_solved,
"Submitted On": pd.to_datetime(first_row["submission_ts"]).strftime("%Y-%m-%d %H:%M"),
}
)
full_df = pd.DataFrame(rows)
# TODO: Forbid multiple submissions under the same name?
# Keep only the latest entry per unique (System Name, System Type, Organization) triplet
final_df = (
full_df.sort_values("Submitted On", ascending=False)
.drop_duplicates(subset=["System Name", "System Type", "Organization"], keep="first")
.sort_values(by=[AutoEvalColumn.success_rate.name], ascending=False)
.reset_index(drop=True)
)
cols_to_round = ["Success Rate (%)"]
final_df[cols_to_round] = final_df[cols_to_round].round(decimals=2)
return final_df