|
from datetime import datetime, timezone, timedelta |
|
import pandas as pd |
|
from datasets import load_dataset |
|
import gradio as gr |
|
|
|
from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS |
|
|
|
pd.set_option('display.max_columns', None) |
|
|
|
|
|
def show_output_box(message): |
|
return gr.update(value=message, visible=True) |
|
|
|
|
|
def fetch_hf_results(): |
|
|
|
EST = timezone(timedelta(hours=-4)) |
|
print(f"tmp: Fetching results from HF at {datetime.now(EST)}") |
|
|
|
df = load_dataset( |
|
RESULTS_REPO, data_files="auto_submissions/metrics_all.csv", |
|
)["train"].to_pandas() |
|
assert all(col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_COLUMNS) - set(df.columns)}" |
|
|
|
df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay"], keep="first") |
|
df["property"] = df["assay"].map(ASSAY_RENAME) |
|
return df |
|
|