abdev-leaderboard

Running

File size: 6,613 Bytes

c4f1261
3edbc93
 
58db0a0
3e8741e
 
069fb2c
10e69e7
2dafeb1
50e75cf
8f9985e
c4f1261
 
 
 
 
 
 
 
3edbc93
 
 
2dafeb1
3e8741e
 
 
 
8f9985e
3edbc93
b37d53e
10e69e7
2dafeb1
 
10e69e7
2dafeb1
 
 
069fb2c
 
 
89d69bf
069fb2c
 
 
2dafeb1
069fb2c
 
 
 
 
3edbc93
069fb2c
 
 
 
 
d2511c2
3e8741e
3edbc93
3e8741e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2dafeb1
3e8741e
 
2dafeb1
3e8741e
 
 
2dafeb1
3e8741e

from datetime import datetime, timezone, timedelta
import pandas as pd
from datasets import load_dataset
import gradio as gr
import hashlib
from typing import Iterable, Union
from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS, BASELINE_USERNAMES

pd.set_option("display.max_columns", None)


def get_time(tz_name="EST") -> str:
    offsets = {"EST": -5, "UTC": 0}
    if tz_name not in offsets:
        print("Invalid timezone, using EST")
        tz_name = "EST"
    offset = offsets[tz_name]
    return datetime.now(timezone(timedelta(hours=offset))).strftime("%Y-%m-%d %H:%M:%S") + f" ({tz_name})"

def show_output_box(message):
    return gr.update(value=message, visible=True)


def anonymize_user(username: str) -> str:
    # Anonymize using a hash of the username
    return hashlib.sha256(username.encode()).hexdigest()[:8]


def fetch_hf_results():
    # load_dataset should cache by default if not using force_redownload
    df = load_dataset(
        RESULTS_REPO,
        data_files="auto_submissions/metrics_all.csv",
    )["train"].to_pandas()
    assert all(
        col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
    ), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"
    
    df_baseline = df[df["user"].isin(BASELINE_USERNAMES)]
    df_non_baseline = df[~df["user"].isin(BASELINE_USERNAMES)]
    # Show latest submission only
    # For baselines: Keep unique model names
    df_baseline = df_baseline.sort_values("submission_time", ascending=False).drop_duplicates(
        subset=["model", "assay", "dataset", "user"], keep="first"
    )
    # For users: Just show latest submission
    df_non_baseline = df_non_baseline.sort_values("submission_time", ascending=False).drop_duplicates(
        subset=["assay", "dataset", "user"], keep="first"
    )
    df = pd.concat([df_baseline, df_non_baseline], ignore_index=True)
    df["property"] = df["assay"].map(ASSAY_RENAME)
    
    # Rename baseline username to just "Baseline"
    df.loc[df["user"].isin(BASELINE_USERNAMES), "user"] = "Baseline"
    # Note: Could optionally add a column "is_baseline" to the dataframe to indicate whether the model is a baseline model or not. If things get crowded.
    # Anonymize the user column at this point (so note: users can submit anonymous / non-anonymous and we'll show their latest submission regardless)
    df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
    
    return df


# Readable hashing function similar to coolname or codenamize
ADJECTIVES = [
    "ancient","brave","calm","clever","crimson","curious","dapper","eager",
    "fuzzy","gentle","glowing","golden","happy","icy","jolly","lucky",
    "magical","mellow","nimble","peachy","quick","royal","shiny","silent",
    "sly","sparkly","spicy","spry","sturdy","sunny","swift","tiny","vivid",
    "witty"
]

ANIMALS = [
    "ant","bat","bear","bee","bison","boar","bug","cat","crab","crow",
    "deer","dog","duck","eel","elk","fox","frog","goat","gull","hare",
    "hawk","hen","horse","ibis","kid","kiwi","koala","lamb","lark","lemur",
    "lion","llama","loon","lynx","mole","moose","mouse","newt","otter","owl",
    "ox","panda","pig","prawn","puma","quail","quokka","rabbit","rat","ray",
    "robin","seal","shark","sheep","shrew","skunk","slug","snail","snake",
    "swan","toad","trout","turtle","vole","walrus","wasp","whale","wolf",
    "worm","yak","zebra"
]
NOUNS = [
    "rock","sand","star","tree","leaf","seed","stone","cloud","rain","snow",
    "wind","fire","ash","dirt","mud","ice","wave","shell","dust","sun",
    "moon","hill","lake","pond","reef","root","twig","wood"
]


def readable_hash(
    data: Union[str, bytes, Iterable[int]],
    *,
    salt: Union[str, bytes, None] = None,
    words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS + NOUNS),
    sep: str = "-",
    checksum_len: int = 2,  # 0 to disable; 2–3 is plenty
    case: str = "lower",  # "lower" | "title" | "upper"
) -> str:
    """
    Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT.

    Examples
    --------
    >>> readable_hash("hello world")
    'magical-panda-6h'

    >>> readable_hash("hello world", salt="my-app-v1", checksum_len=3)
    'royal-otter-1pz'

    >>> readable_hash(b"\x00\x01\x02\x03", case="title", checksum_len=0)
    'Fuzzy-Tiger'

    Vocabulary
    ----------
    ADJECTIVES: ~160 safe, descriptive words (e.g. "ancient", "brave", "silent", "swift")
    ANIMALS: ~80 short, common animals (e.g. "dog", "owl", "whale", "tiger")
    NOUNS: optional set of ~30 neutral nouns (e.g. "rock", "star", "tree", "cloud")

    Combinations
    ------------
    - adjective + animal: ~13,000 unique names
    - adjective + noun: ~5,000 unique names
    - adjective + animal + noun: ~390,000 unique names

    Checksum
    --------
    An optional short base-36 suffix (e.g. "-6h" or "-1pz"). The checksum
    acts as a disambiguator in case two different inputs map to the same
    word combination. With 2-3 characters, collisions become vanishingly rare.
    If you only need fun, human-readable names, you can disable it by setting
    ``checksum_len=0``. If you need unique, stable identifiers, keep it enabled.
    """
    if isinstance(data, str):
        data = data.encode()
    elif isinstance(data, Iterable) and not isinstance(data, (bytes, bytearray)):
        data = bytes(data)

    h = hashlib.blake2b(digest_size=8)  # fast, stable, short digest
    if salt:
        h.update(salt.encode() if isinstance(salt, str) else salt)
        h.update(b"\x00")  # domain-separate salt from data
    h.update(data)
    digest = h.digest()

    # Use the first 6 bytes to index words; last bytes for checksum
    n1 = int.from_bytes(digest[0:3], "big")
    n2 = int.from_bytes(digest[3:6], "big")

    adj = words[0][n1 % len(words[0])]
    noun = words[1][n2 % len(words[1])]
    phrase = f"{adj}{sep}{noun}"

    if checksum_len > 0:
        # Short base36 checksum for collision visibility
        cs = int.from_bytes(digest[6:], "big")
        base36 = ""
        alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
        while cs:
            cs, r = divmod(cs, 36)
            base36 = alphabet[r] + base36
        base36 = (base36 or "0")[:checksum_len]
        phrase = f"{phrase}{sep}{base36}"

    if case == "title":
        phrase = sep.join(p.capitalize() for p in phrase.split(sep))
    elif case == "upper":
        phrase = phrase.upper()

    return phrase