asdf / app.py
nbroad's picture
1
a591d0d
raw
history blame
15.9 kB
from pathlib import Path
import gradio as gr
import polars as pl
import pandas as pd
import torch
import json
from gradio import ChatMessage
import os
IN_SPACE = bool(os.environ.get("SPACE_AUTHOR_NAME", False))
files = [
"./lmsys-ex38-model_oof_df.parquet",
"./lmsys-ex41-model_oof_df.parquet",
"./lmsys-ex43-model_oof_df.parquet",
"./lmsys-exp-llm-049-weight_preds.parquet",
"./lmsys-exp-llm-053-weight_preds.parquet",
"./lmsys-exp-llm-063-weight_preds.parquet",
"./lmsys-exp-llm-065-weight_preds.parquet",
"./lmsys-exp-llm-073-weight_preds.parquet",
"./lmsys-exp-llm-078-weight_preds.parquet",
"./lmsys-exp-llm-081-weight_preds.parquet",
"./lmsys-exp-llm-085-weight_preds.parquet",
"./lmsys-oof-exp2_preds.parquet",
"./lmsys-oof-exp29_preds.parquet",
]
train_filepath = "./train.parquet"
if not IN_SPACE:
files = [x.replace("./", "../../data/oofs/") for x in files]
train_filepath = "../../data/train.parquet"
from dotenv import load_dotenv
loaded = load_dotenv("../../.env")
print("Loaded .env file:", loaded)
HF_TOKEN = os.getenv("HF_READ_OOFS_TOKEN")
if not HF_TOKEN:
print("be sure to set HF_READ_OOFS_TOKEN in .env file")
if not Path(files[0]).exists():
from huggingface_hub import snapshot_download, login
login(token=HF_TOKEN)
snapshot_download("nbroad/lmsys-cahpp-oofs", repo_type="dataset", local_dir="./", local_dir_use_symlinks=False)
exps = {}
for f in files:
if "lmsys-exp-llm-" in f:
exp = f.split("lmsys-exp-llm-")[1].split("-")[0]
elif "lmsys-ex" in f:
exp = f.split("lmsys-ex")[1].split("-")[0]
elif "lmsys-oof-exp" in f:
exp = f.split("lmsys-oof-exp")[1].split("_")[0]
exps[f] = exp
exps[f.split("/")[-1]] = exp
def make_df():
data = {f: pd.read_parquet(f) for f in files}
for k in data.keys():
exp = exps[k]
if "0" in data[k].columns:
data[k] = data[k].rename(
columns={
"0": f"winner_model_a_prob_{exp}",
"1": f"winner_model_b_prob_{exp}",
"2": f"winner_tie_prob_{exp}",
},
)
elif "winner_tie_prob" not in data[k].columns:
data[k] = data[k].rename(
columns={
"winner_model_a": f"winner_model_a_prob_{exp}",
"winner_model_b": f"winner_model_b_prob_{exp}",
"winner_tie": f"winner_tie_prob_{exp}",
}
)
else:
data[k] = data[k].rename(
columns={
"winner_model_a_prob": f"winner_model_a_prob_{exp}",
"winner_model_b_prob": f"winner_model_b_prob_{exp}",
"winner_tie_prob": f"winner_tie_prob_{exp}",
}
)
pred_cols = [
f"winner_model_a_prob_{exp}",
f"winner_model_b_prob_{exp}",
f"winner_tie_prob_{exp}",
]
data[k] = data[k].sort_values("id")
final_columns = ["id"] + pred_cols
data[k] = data[k][final_columns]
id_col = data[files[0]].iloc[:, 0]
joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1)
# joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
# id_col = joined.iloc[:, 0]
# joined = joined.drop("id")
# joined = joined.insert_column(0, id_col)
joined["id"] = id_col
tdf = pd.read_parquet(train_filepath)
joined = joined.merge(tdf, on="id", how="left")
joined["winner"] = ""
joined.loc[joined["winner_model_a"] == 1, "winner"] = "A"
joined.loc[joined["winner_model_b"] == 1, "winner"] = "B"
joined.loc[joined["winner_tie"] == 1, "winner"] = "Tie"
for exp in exps.values():
pred_cols = [
f"winner_model_a_prob_{exp}",
f"winner_model_b_prob_{exp}",
f"winner_tie_prob_{exp}",
]
temp_scores = joined[pred_cols].values
if temp_scores.sum(axis=-1).max() > 1.1:
temp_scores = torch.tensor(temp_scores).softmax(-1)
else:
temp_scores = torch.tensor(temp_scores)
joined[pred_cols] = temp_scores.numpy()
gt_idxs = joined[
["winner_model_a", "winner_model_b", "winner_tie"]
].values.argsort()[:, -1]
temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]
joined[f"loss_{exp}"] = torch.nn.functional.binary_cross_entropy(
temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
)
joined["prompt_length"] = [len(x) for x in joined["prompt"]]
joined["response_a_length"] = [len(x) for x in joined["response_a"]]
joined["response_b_length"] = [len(x) for x in joined["response_b"]]
joined["total_length"] = (
joined["prompt_length"]
+ joined["response_a_length"]
+ joined["response_b_length"]
)
loss_cols = [x for x in joined.columns if "loss" in x]
joined["avg_loss"] = joined[loss_cols].mean(axis=1)
joined["avg_winner_model_a"] = joined[
[x for x in joined.columns if "winner_model_a_prob" in x]
].mean(axis=1)
joined["avg_winner_model_b"] = joined[
[x for x in joined.columns if "winner_model_b_prob" in x]
].mean(axis=1)
joined["avg_winner_tie"] = joined[
[x for x in joined.columns if "winner_tie_prob" in x]
].mean(axis=1)
prob_cols = [x for x in joined.columns if "prob" in x]
loss_cols = [x for x in joined.columns if "loss" in x]
joined[prob_cols + loss_cols] = joined[prob_cols + loss_cols].astype("float16")
id2texts = {i: (p, a, b) for i, p, a, b in joined[["id", "prompt", "response_a", "response_b"]].values}
joined = joined.drop(columns=["prompt", "response_a", "response_b"])
return joined, id2texts
# def make_df():
# data = {f: pl.read_csv(f) for f in files}
# for k in data.keys():
# exp = exps[k]
# if "0" in data[k].columns:
# data[k] = data[k].rename({
# "0": f"winner_model_a_prob_{exp}",
# "1": f"winner_model_b_prob_{exp}",
# "2": f"winner_tie_prob_{exp}",
# })
# elif "winner_tie_prob" not in data[k].columns:
# data[k] = data[k].rename({
# "winner_model_a": f"winner_model_a_prob_{exp}",
# "winner_model_b": f"winner_model_b_prob_{exp}",
# "winner_tie": f"winner_tie_prob_{exp}",
# })
# else:
# data[k] = data[k].rename({
# "winner_model_a_prob": f"winner_model_a_prob_{exp}",
# "winner_model_b_prob": f"winner_model_b_prob_{exp}",
# "winner_tie_prob": f"winner_tie_prob_{exp}",
# })
# pred_cols = [
# f"winner_model_a_prob_{exp}",
# f"winner_model_b_prob_{exp}",
# f"winner_tie_prob_{exp}",
# ]
# data[k] = data[k].sort("id")
# final_columns = ["id"] + pred_cols
# data[k] = data[k].select(final_columns)
# id_col = data[files[0]].select("id")
# joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
# joined = pl.concat([id_col, joined], how="horizontal")
# tdf = pl.read_csv(train_csv_path)
# joined = joined.join(tdf, on="id", how="left")
# joined = joined.with_columns([
# pl.when(pl.col("winner_model_a") == 1).then(0).otherwise(
# pl.when(pl.col("winner_model_b") == 1).then(1).otherwise(
# pl.when(pl.col("winner_tie") == 1).then(2).otherwise(3)
# )).alias("winner")
# ])
# for exp in exps.values():
# pred_cols = [
# f"winner_model_a_prob_{exp}",
# f"winner_model_b_prob_{exp}",
# f"winner_tie_prob_{exp}",
# ]
# temp_scores = joined.select(pred_cols).to_numpy()
# if temp_scores.sum(axis=-1).max() > 1.1:
# temp_scores = torch.tensor(temp_scores).softmax(-1)
# else:
# temp_scores = torch.tensor(temp_scores)
# joined = joined.with_columns([
# pl.Series(name=col, values=temp_scores[:, i].numpy())
# for i, col in enumerate(pred_cols)
# ])
# gt_idxs = joined.select(["winner_model_a", "winner_model_b", "winner_tie"]).to_numpy().argsort()[:, -1]
# temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]
# loss = torch.nn.functional.binary_cross_entropy(
# temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
# )
# joined = joined.with_columns([
# pl.Series(name=f"loss_{exp}", values=loss.numpy())
# ])
# joined = joined.with_columns([
# pl.col("prompt").str.len_chars().alias("prompt_length"),
# pl.col("response_a").str.len_chars().alias("response_a_length"),
# pl.col("response_b").str.len_chars().alias("response_b_length"),
# ])
# joined = joined.with_columns([
# (pl.col("prompt_length") + pl.col("response_a_length") + pl.col("response_b_length")).alias("total_length")
# ])
# loss_cols = [x for x in joined.columns if "loss" in x]
# joined = joined.with_columns([
# pl.mean_horizontal(loss_cols).alias("avg_loss"),
# pl.mean_horizontal([x for x in joined.columns if "winner_model_a_prob" in x]).alias("avg_winner_model_a"),
# pl.mean_horizontal([x for x in joined.columns if "winner_model_b_prob" in x]).alias("avg_winner_model_b"),
# pl.mean_horizontal([x for x in joined.columns if "winner_tie_prob" in x]).alias("avg_winner_tie"),
# ])
# prob_cols = [x for x in joined.columns if "prob" in x]
# loss_cols = [x for x in joined.columns if "loss" in x]
# joined = joined.with_columns([
# pl.col(prob_cols + loss_cols).cast(pl.Float32)
# ])
# return joined
MAIN_DF, id2texts = make_df()
def filter_df(lower_limit, upper_limit, file, all_check):
if all_check or file is None or file == "":
loss_col = "avg_loss"
else:
loss_col = f"loss_{exps[file]}"
temp = MAIN_DF[
(MAIN_DF[loss_col] > lower_limit) & (MAIN_DF[loss_col] < upper_limit)
]
temp = temp.sort_values(loss_col, ascending=False).reset_index(drop=True)
return 0, temp
# def filter_df(lower_limit, upper_limit, file, all_check):
# if all_check or file is None or file == "":
# loss_col = "avg_loss"
# else:
# loss_col = f"loss_{exps[file]}"
# temp = MAIN_DF.filter(
# (pl.col(loss_col) > lower_limit) & (pl.col(loss_col) < upper_limit)
# ).sort(loss_col, descending=True)
# return 0, temp
def make_chat(prompt, response, side, label):
prompts = json.loads(prompt)
responses = json.loads(response)
header = None
if side == label:
header = "✅ Winner ✅"
elif label == 2 or label == "Tie":
header = "🟨 Tie 🟨"
else:
header = "❌ Loser ❌"
chat = []
for p, r in zip(prompts, responses):
chat.append(
ChatMessage(
role="user",
content=header + "\n" + p,
)
)
if r is None:
r = ""
chat.append(ChatMessage(role="assistant", content=header + "\n" + r))
return chat
# def show_chats(idx, df, file, all_check):
# if idx is None:
# return None, None
# if idx > len(df):
# idx = len(df) - 1
# if idx < 0:
# idx = 0
# label = df["winner"].iloc[idx]
# chat_a = make_chat(df["prompt"].iloc[idx], df["response_a"].iloc[idx], "A", label)
# chat_b = make_chat(df["prompt"].iloc[idx], df["response_b"].iloc[idx], "B", label)
# if all_check or file is None or file == "":
# score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
# else:
# score_cols = [
# f"winner_model_a_prob_{exps[file]}",
# f"winner_model_b_prob_{exps[file]}",
# f"winner_tie_prob_{exps[file]}",
# ]
# scores = df[score_cols].iloc[idx].tolist()
# if all_check or file is None or file == "":
# loss_col = "avg_loss"
# else:
# loss_col = f"loss_{exps[file]}"
# loss = df[loss_col].iloc[idx]
# return chat_a, chat_b, label, *scores, loss
def show_chats(idx, df, file, all_check):
if idx is None:
return None, None
if idx >= df.shape[0]:
idx = df.shape[0] - 1
if idx < 0:
idx = 0
row = df.iloc[idx]
label = row["winner"]
id_ = row["id"]
p, a, b = id2texts[id_]
chat_a = make_chat(p, a, "A", label)
chat_b = make_chat(p, b, "B", label)
# chat_a = make_chat(row["prompt"], row["response_a"], 0, label_idx)
# chat_b = make_chat(row["prompt"], row["response_b"], 1, label_idx)
if all_check or file is None or file == "":
score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
else:
score_cols = [
f"winner_model_a_prob_{exps[file]}",
f"winner_model_b_prob_{exps[file]}",
f"winner_tie_prob_{exps[file]}",
]
scores = row[score_cols].to_list()
if all_check or file is None or file == "":
loss_col = "avg_loss"
else:
loss_col = f"loss_{exps[file]}"
loss = row[loss_col]
# labels = ["A", "B", "Tie"]
return chat_a, chat_b, label, *scores, loss
with gr.Blocks() as demo:
gr.LoginButton()
gr.Markdown(
"""
# OOF Visualization
This is a demo for visualizing the out-of-fold predictions of a model.
It currently shows the predictions for the outputs of [this notebook](https://www.kaggle.com/code/kcotton21/lmsys-preds/notebook).
"""
)
with gr.Row():
with gr.Column():
file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files])
with gr.Column():
all_check = gr.Checkbox(label="Use average loss of all files")
with gr.Row():
lower_limit = gr.Slider(
label="Show samples with loss > this value", minimum=0, maximum=5, value=1
)
upper_limit = gr.Slider(
label="Show samples with loss < this value", minimum=0, maximum=5, value=5
)
# id_ = gr.Number(label="ID")
idx = gr.Number(visible=True)
hidden_df = gr.Dataframe(visible=False)
with gr.Row():
correct_label = gr.Textbox(label="Correct Label", interactive=False)
score_a = gr.Textbox(label="Model A Score", interactive=False)
score_b = gr.Textbox(label="Model B Score", interactive=False)
score_tie = gr.Textbox(label="Tie Score", interactive=False)
loss = gr.Textbox(label="Loss", interactive=False)
with gr.Row():
with gr.Column():
prev_btn = gr.Button(value="Previous")
with gr.Column():
next_btn = gr.Button(value="Next")
with gr.Row():
with gr.Column():
chat_a = gr.Chatbot(label="Model A", type="messages", height=1000)
with gr.Column():
chat_b = gr.Chatbot(label="Model B", type="messages", height=1000)
lower_limit.change(
filter_df,
inputs=[lower_limit, upper_limit, file, all_check],
outputs=[idx, hidden_df],
)
upper_limit.change(
filter_df,
inputs=[lower_limit, upper_limit, file, all_check],
outputs=[idx, hidden_df],
)
idx.change(
show_chats,
inputs=[idx, hidden_df, file, all_check],
outputs=[chat_a, chat_b, correct_label, score_a, score_b, score_tie, loss],
)
prev_btn.click(lambda x: max(0, x - 1), inputs=idx, outputs=idx)
next_btn.click(lambda x: x + 1, inputs=idx, outputs=idx)
demo.launch(debug=True)