abdev-leaderboard

Running

File size: 5,485 Bytes

3edbc93
 
58db0a0
3e8741e
 
58db0a0
10e69e7
 
50e75cf
8f9985e
3edbc93
 
 
3e8741e
 
 
 
8f9985e
3edbc93
21f87d6
 
 
 
10e69e7
 
 
 
 
89d69bf
62b6599
3edbc93
3e8741e
 
d2511c2
3e8741e
3edbc93
3e8741e

import pandas as pd
from datasets import load_dataset
import gradio as gr
import hashlib
from typing import Iterable, Union
from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS

pd.set_option('display.max_columns', None)


def show_output_box(message):
    return gr.update(value=message, visible=True)

def anonymize_user(username: str) -> str:
    # Anonymize using a hash of the username
    return hashlib.sha256(username.encode()).hexdigest()[:8]


def fetch_hf_results():
    # For debugging
    # # Print current time in EST
    # EST = timezone(timedelta(hours=-4))
    # print(f"tmp: Fetching results from HF at {datetime.now(EST)}") 
    # Should cache by default if not using force_redownload
    df = load_dataset(
        RESULTS_REPO, data_files="auto_submissions/metrics_all.csv",
    )["train"].to_pandas()
    assert all(col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_COLUMNS) - set(df.columns)}"
    # Show latest submission only
    df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay", "user"], keep="first")
    df["property"] = df["assay"].map(ASSAY_RENAME)
    
    # Anonymize the user column at this point
    df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
    
    return df


# Readable hashing function similar to coolname or codenamize
ADJECTIVES = [
    "ancient","brave","calm","clever","crimson","curious","dapper","eager",
    "fuzzy","gentle","glowing","golden","happy","icy","jolly","lucky",
    "magical","mellow","nimble","peachy","quick","royal","shiny","silent",
    "sly","sparkly","spicy","spry","sturdy","sunny","swift","tiny","vivid",
    "witty"
]

ANIMALS = [
    "ant","bat","bear","bee","bison","boar","bug","cat","crab","crow",
    "deer","dog","duck","eel","elk","fox","frog","goat","gull","hare",
    "hawk","hen","horse","ibis","kid","kiwi","koala","lamb","lark","lemur",
    "lion","llama","loon","lynx","mole","moose","mouse","newt","otter","owl",
    "ox","panda","pig","prawn","puma","quail","quokka","rabbit","rat","ray",
    "robin","seal","shark","sheep","shrew","skunk","slug","snail","snake",
    "swan","toad","trout","turtle","vole","walrus","wasp","whale","wolf",
    "worm","yak","zebra"
]
NOUNS = [
    "rock","sand","star","tree","leaf","seed","stone","cloud","rain","snow",
    "wind","fire","ash","dirt","mud","ice","wave","shell","dust","sun",
    "moon","hill","lake","pond","reef","root","twig","wood"
]


def readable_hash(
    data: Union[str, bytes, Iterable[int]],
    *,
    salt: Union[str, bytes, None] = None,
    words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS+NOUNS),
    sep: str = "-",
    checksum_len: int = 2,  # 0 to disable; 2–3 is plenty
    case: str = "lower"     # "lower" | "title" | "upper"
) -> str:
    """
    Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT.
    
    Examples
    --------
    >>> readable_hash("hello world")
    'magical-panda-6h'

    >>> readable_hash("hello world", salt="my-app-v1", checksum_len=3)
    'royal-otter-1pz'

    >>> readable_hash(b"\x00\x01\x02\x03", case="title", checksum_len=0)
    'Fuzzy-Tiger'

    Vocabulary
    ----------
    ADJECTIVES: ~160 safe, descriptive words (e.g. "ancient", "brave", "silent", "swift")
    ANIMALS: ~80 short, common animals (e.g. "dog", "owl", "whale", "tiger")
    NOUNS: optional set of ~30 neutral nouns (e.g. "rock", "star", "tree", "cloud")

    Combinations
    ------------
    - adjective + animal: ~13,000 unique names
    - adjective + noun: ~5,000 unique names
    - adjective + animal + noun: ~390,000 unique names

    Checksum
    --------
    An optional short base-36 suffix (e.g. "-6h" or "-1pz"). The checksum
    acts as a disambiguator in case two different inputs map to the same
    word combination. With 2-3 characters, collisions become vanishingly rare.
    If you only need fun, human-readable names, you can disable it by setting
    ``checksum_len=0``. If you need unique, stable identifiers, keep it enabled.
    """
    if isinstance(data, str):
        data = data.encode()
    elif isinstance(data, Iterable) and not isinstance(data, (bytes, bytearray)):
        data = bytes(data)

    h = hashlib.blake2b(digest_size=8)  # fast, stable, short digest
    if salt:
        h.update(salt.encode() if isinstance(salt, str) else salt)
        h.update(b"\x00")  # domain-separate salt from data
    h.update(data)
    digest = h.digest()

    # Use the first 6 bytes to index words; last bytes for checksum
    n1 = int.from_bytes(digest[0:3], "big")
    n2 = int.from_bytes(digest[3:6], "big")

    adj = words[0][n1 % len(words[0])]
    noun = words[1][n2 % len(words[1])]
    phrase = f"{adj}{sep}{noun}"

    if checksum_len > 0:
        # Short base36 checksum for collision visibility
        cs = int.from_bytes(digest[6:], "big")
        base36 = ""
        alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
        while cs:
            cs, r = divmod(cs, 36)
            base36 = alphabet[r] + base36
        base36 = (base36 or "0")[:checksum_len]
        phrase = f"{phrase}{sep}{base36}"

    if case == "title":
        phrase = sep.join(p.capitalize() for p in phrase.split(sep))
    elif case == "upper":
        phrase = phrase.upper()

    return phrase