Spaces:

nbroad
/

asdf

Paused

File size: 11,027 Bytes

from pathlib import Path
import gradio as gr
# import polars as pl
import pandas as pd
import torch
import json
from gradio import ChatMessage
import os
import matplotlib.pyplot as plt

IN_SPACE = bool(os.environ.get("SPACE_AUTHOR_NAME", False))

files = [
    "./lmsys-ex38-model_oof_df.parquet",
    "./lmsys-ex41-model_oof_df.parquet",
    "./lmsys-ex43-model_oof_df.parquet",
    "./lmsys-exp-llm-049-weight_preds.parquet",
    "./lmsys-exp-llm-053-weight_preds.parquet",
    "./lmsys-exp-llm-063-weight_preds.parquet",
    "./lmsys-exp-llm-065-weight_preds.parquet",
    "./lmsys-exp-llm-073-weight_preds.parquet",
    "./lmsys-exp-llm-078-weight_preds.parquet",
    "./lmsys-exp-llm-081-weight_preds.parquet",
    "./lmsys-exp-llm-085-weight_preds.parquet",
    "./lmsys-oof-exp2_preds.parquet",
    "./lmsys-oof-exp29_preds.parquet",
]
train_filepath = "./train.parquet"

if not IN_SPACE:
    files = [x.replace("./", "../../data/oofs/") for x in files]
    train_filepath = "../../data/train.parquet"
    from dotenv import load_dotenv
    loaded = load_dotenv("../../.env")
    print("Loaded .env file:", loaded)

HF_TOKEN = os.getenv("HF_READ_OOFS_TOKEN")

if not HF_TOKEN:
    print("be sure to set HF_READ_OOFS_TOKEN in .env file")

if not Path(files[0]).exists():
    from huggingface_hub import snapshot_download, login

    login(token=HF_TOKEN)

    snapshot_download("nbroad/lmsys-cahpp-oofs", repo_type="dataset", local_dir="./", local_dir_use_symlinks=False)


exps = {}

for f in files:
    if "lmsys-exp-llm-" in f:
        exp = f.split("lmsys-exp-llm-")[1].split("-")[0]
    elif "lmsys-ex" in f:
        exp = f.split("lmsys-ex")[1].split("-")[0]
    elif "lmsys-oof-exp" in f:
        exp = f.split("lmsys-oof-exp")[1].split("_")[0]
    exps[f] = exp
    exps[f.split("/")[-1]] = exp


def make_df():
    data = {f: pd.read_parquet(f) for f in files}

    for k in data.keys():
        exp = exps[k]

        if "0" in data[k].columns:
            data[k] = data[k].rename(
                columns={
                    "0": f"winner_model_a_prob_{exp}",
                    "1": f"winner_model_b_prob_{exp}",
                    "2": f"winner_tie_prob_{exp}",
                },
            )

        elif "winner_tie_prob" not in data[k].columns:

            data[k] = data[k].rename(
                columns={
                    "winner_model_a": f"winner_model_a_prob_{exp}",
                    "winner_model_b": f"winner_model_b_prob_{exp}",
                    "winner_tie": f"winner_tie_prob_{exp}",
                }
            )
        else:
            data[k] = data[k].rename(
                columns={
                    "winner_model_a_prob": f"winner_model_a_prob_{exp}",
                    "winner_model_b_prob": f"winner_model_b_prob_{exp}",
                    "winner_tie_prob": f"winner_tie_prob_{exp}",
                }
            )

        pred_cols = [
            f"winner_model_a_prob_{exp}",
            f"winner_model_b_prob_{exp}",
            f"winner_tie_prob_{exp}",
        ]

        data[k] = data[k].sort_values("id")

        final_columns = ["id"] + pred_cols

        data[k] = data[k][final_columns]

    id_col = data[files[0]].iloc[:, 0]

    joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1)


    joined["id"] = id_col

    tdf = pd.read_parquet(train_filepath)

    joined = joined.merge(tdf, on="id", how="left")

    joined["winner"] = ""
    joined.loc[joined["winner_model_a"] == 1, "winner"] = "A"
    joined.loc[joined["winner_model_b"] == 1, "winner"] = "B"
    joined.loc[joined["winner_tie"] == 1, "winner"] = "Tie"

    for exp in exps.values():
        pred_cols = [
            f"winner_model_a_prob_{exp}",
            f"winner_model_b_prob_{exp}",
            f"winner_tie_prob_{exp}",
        ]

        temp_scores = joined[pred_cols].values

        if temp_scores.sum(axis=-1).max() > 1.1:
            temp_scores = torch.tensor(temp_scores).softmax(-1)
        else:
            temp_scores = torch.tensor(temp_scores)

        joined[pred_cols] = temp_scores.numpy()

        gt_idxs = joined[
            ["winner_model_a", "winner_model_b", "winner_tie"]
        ].values.argsort()[:, -1]
        temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]

        joined[f"loss_{exp}"] = torch.nn.functional.binary_cross_entropy(
            temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
        )

    joined["prompt_length"] = [len(x) for x in joined["prompt"]]
    joined["response_a_length"] = [len(x) for x in joined["response_a"]]
    joined["response_b_length"] = [len(x) for x in joined["response_b"]]
    joined["total_length"] = (
        joined["prompt_length"]
        + joined["response_a_length"]
        + joined["response_b_length"]
    )

    loss_cols = [x for x in joined.columns if "loss" in x]
    joined["avg_loss"] = joined[loss_cols].mean(axis=1)
    joined["avg_winner_model_a"] = joined[
        [x for x in joined.columns if "winner_model_a_prob" in x]
    ].mean(axis=1)
    joined["avg_winner_model_b"] = joined[
        [x for x in joined.columns if "winner_model_b_prob" in x]
    ].mean(axis=1)
    joined["avg_winner_tie"] = joined[
        [x for x in joined.columns if "winner_tie_prob" in x]
    ].mean(axis=1)

    prob_cols = [x for x in joined.columns if "prob" in x]
    loss_cols = [x for x in joined.columns if "loss" in x]

    joined[prob_cols + loss_cols] = joined[prob_cols + loss_cols].astype("float16")

    id2texts = {i: (p, a, b) for i, p, a, b in joined[["id", "prompt", "response_a", "response_b"]].values}

    joined = joined.drop(columns=["prompt", "response_a", "response_b"])

    return joined, id2texts


MAIN_DF, id2texts = make_df()


def filter_df(lower_limit, upper_limit, file, all_check):
    if all_check or file is None or file == "":
        loss_col = "avg_loss"
    else:
        loss_col = f"loss_{exps[file]}"

    temp = MAIN_DF[
        (MAIN_DF[loss_col] > lower_limit) & (MAIN_DF[loss_col] < upper_limit)
    ]
    temp = temp.sort_values(loss_col, ascending=False).reset_index(drop=True)

    return 0, temp


def make_chat(prompt, response, side, label):
    prompts = json.loads(prompt)
    responses = json.loads(response)

    header = None
    if side == label:
        header = "✅ Winner ✅"
    elif label == 2 or label == "Tie":
        header = "🟨 Tie 🟨"
    else:
        header = "❌ Loser ❌"

    chat = []
    for p, r in zip(prompts, responses):
        chat.append(
            ChatMessage(
                role="user",
                content=header + "\n" + p,
            )
        )

        if r is None:
            r = ""

        chat.append(ChatMessage(role="assistant", content=header + "\n" + r))

    return chat


def show_chats(idx, df, file, all_check):
    if idx is None:
        return None, None

    if idx >= df.shape[0]:
        idx = df.shape[0] - 1
    if idx < 0:
        idx = 0

    row = df.iloc[idx]
    label = row["winner"]

    id_ = row["id"]

    p, a, b = id2texts[id_]

    chat_a = make_chat(p, a, "A", label)
    chat_b = make_chat(p, b, "B", label)

    # chat_a = make_chat(row["prompt"], row["response_a"], 0, label_idx)
    # chat_b = make_chat(row["prompt"], row["response_b"], 1, label_idx)

    if all_check or file is None or file == "":
        score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
    else:
        score_cols = [
            f"winner_model_a_prob_{exps[file]}",
            f"winner_model_b_prob_{exps[file]}",
            f"winner_tie_prob_{exps[file]}",
        ]

    scores = row[score_cols].to_list()

    if all_check or file is None or file == "":
        loss_col = "avg_loss"
    else:
        loss_col = f"loss_{exps[file]}"

    loss = row[loss_col]

    # labels = ["A", "B", "Tie"]

    return chat_a, chat_b, label, *scores, loss


    def show_split(text):
        if len(text) == 0:
            gr.Markdown("## No Input Provided")
        else:
            for letter in text:
                with gr.Row():
                    text = gr.Textbox(letter)
                    btn = gr.Button("Clear")
                    btn.click(lambda: gr.Textbox(value=""), None, text)

def update_plot(df, file, all_check):

    print(df.columns)

    print("avg_loss" in df.columns)
    
    if all_check or file is None or file == "":
        loss_col = "avg_loss"
    else:
        loss_col = f"loss_{exps[file]}"

    return plt.hist(df[loss_col], bins=50)



with gr.Blocks() as demo:

    gr.Markdown(
        """
        # OOF Visualization

        This is a demo for visualizing the out-of-fold predictions of a model. 
        It currently shows the predictions for the outputs of [this notebook](https://www.kaggle.com/code/kcotton21/lmsys-preds/notebook).
        """
    )
    with gr.Row():
        with gr.Column():
            file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files])
        with gr.Column():
            all_check = gr.Checkbox(label="Use average loss of all files", value=True)
    with gr.Row():
        lower_limit = gr.Slider(
            label="Show samples with loss > this value", minimum=0, maximum=5, value=1
        )
        upper_limit = gr.Slider(
            label="Show samples with loss < this value", minimum=0, maximum=5, value=5
        )

        # id_ = gr.Number(label="ID")
        idx = gr.Number(visible=True)
        hidden_df = gr.Dataframe(visible=False)
    with gr.Row():
        correct_label = gr.Textbox(label="Correct Label", interactive=False)
        score_a = gr.Textbox(label="Model A Score", interactive=False)
        score_b = gr.Textbox(label="Model B Score", interactive=False)
        score_tie = gr.Textbox(label="Tie Score", interactive=False)
        loss = gr.Textbox(label="Loss", interactive=False)
    with gr.Row():
        with gr.Column():
            prev_btn = gr.Button(value="Previous")
        with gr.Column():
            next_btn = gr.Button(value="Next")

    with gr.Row():
        with gr.Column():
            chat_a = gr.Chatbot(label="Model A", type="messages", height=1000)
        with gr.Column():
            chat_b = gr.Chatbot(label="Model B", type="messages", height=1000)

    # with gr.Row():
    #     plot = gr.Plot()

    lower_limit.change(
        filter_df,
        inputs=[lower_limit, upper_limit, file, all_check],
        outputs=[idx, hidden_df],
    )
    upper_limit.change(
        filter_df,
        inputs=[lower_limit, upper_limit, file, all_check],
        outputs=[idx, hidden_df],
    )

    # hidden_df.change(update_plot, [hidden_df, file, all_check], plot)

    idx.change(
        show_chats,
        inputs=[idx, hidden_df, file, all_check],
        outputs=[chat_a, chat_b, correct_label, score_a, score_b, score_tie, loss],
    )
    prev_btn.click(lambda x: max(0, x - 1), inputs=idx, outputs=idx)
    next_btn.click(lambda x: x + 1, inputs=idx, outputs=idx)

    


if __name__ == "__main__":
    demo.launch(debug=True)