asdf / app.py
nbroad's picture
Update app.py
fc978de verified
raw
history blame
16.2 kB
from pathlib import Path
import gradio as gr
# import polars as pl
import pandas as pd
import torch
import json
from gradio import ChatMessage
import os
IN_SPACE = bool(os.environ.get("SPACE_AUTHOR_NAME", False))
files = [
"./lmsys-ex38-model_oof_df.parquet",
"./lmsys-ex41-model_oof_df.parquet",
"./lmsys-ex43-model_oof_df.parquet",
"./lmsys-exp-llm-049-weight_preds.parquet",
"./lmsys-exp-llm-053-weight_preds.parquet",
"./lmsys-exp-llm-063-weight_preds.parquet",
"./lmsys-exp-llm-065-weight_preds.parquet",
"./lmsys-exp-llm-073-weight_preds.parquet",
"./lmsys-exp-llm-078-weight_preds.parquet",
"./lmsys-exp-llm-081-weight_preds.parquet",
"./lmsys-exp-llm-085-weight_preds.parquet",
"./lmsys-oof-exp2_preds.parquet",
"./lmsys-oof-exp29_preds.parquet",
]
train_filepath = "./train.parquet"
if not IN_SPACE:
files = [x.replace("./", "../../data/oofs/") for x in files]
train_filepath = "../../data/train.parquet"
from dotenv import load_dotenv
loaded = load_dotenv("../../.env")
print("Loaded .env file:", loaded)
HF_TOKEN = os.getenv("HF_READ_OOFS_TOKEN")
if not HF_TOKEN:
print("be sure to set HF_READ_OOFS_TOKEN in .env file")
if not Path(files[0]).exists():
from huggingface_hub import snapshot_download, login
login(token=HF_TOKEN)
snapshot_download("nbroad/lmsys-cahpp-oofs", repo_type="dataset", local_dir="./", local_dir_use_symlinks=False)
exps = {}
for f in files:
if "lmsys-exp-llm-" in f:
exp = f.split("lmsys-exp-llm-")[1].split("-")[0]
elif "lmsys-ex" in f:
exp = f.split("lmsys-ex")[1].split("-")[0]
elif "lmsys-oof-exp" in f:
exp = f.split("lmsys-oof-exp")[1].split("_")[0]
exps[f] = exp
exps[f.split("/")[-1]] = exp
def make_df():
data = {f: pd.read_parquet(f) for f in files}
for k in data.keys():
exp = exps[k]
if "0" in data[k].columns:
data[k] = data[k].rename(
columns={
"0": f"winner_model_a_prob_{exp}",
"1": f"winner_model_b_prob_{exp}",
"2": f"winner_tie_prob_{exp}",
},
)
elif "winner_tie_prob" not in data[k].columns:
data[k] = data[k].rename(
columns={
"winner_model_a": f"winner_model_a_prob_{exp}",
"winner_model_b": f"winner_model_b_prob_{exp}",
"winner_tie": f"winner_tie_prob_{exp}",
}
)
else:
data[k] = data[k].rename(
columns={
"winner_model_a_prob": f"winner_model_a_prob_{exp}",
"winner_model_b_prob": f"winner_model_b_prob_{exp}",
"winner_tie_prob": f"winner_tie_prob_{exp}",
}
)
pred_cols = [
f"winner_model_a_prob_{exp}",
f"winner_model_b_prob_{exp}",
f"winner_tie_prob_{exp}",
]
data[k] = data[k].sort_values("id")
final_columns = ["id"] + pred_cols
data[k] = data[k][final_columns]
id_col = data[files[0]].iloc[:, 0]
joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1)
# joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
# id_col = joined.iloc[:, 0]
# joined = joined.drop("id")
# joined = joined.insert_column(0, id_col)
joined["id"] = id_col
tdf = pd.read_parquet(train_filepath)
joined = joined.merge(tdf, on="id", how="left")
joined["winner"] = ""
joined.loc[joined["winner_model_a"] == 1, "winner"] = "A"
joined.loc[joined["winner_model_b"] == 1, "winner"] = "B"
joined.loc[joined["winner_tie"] == 1, "winner"] = "Tie"
for exp in exps.values():
pred_cols = [
f"winner_model_a_prob_{exp}",
f"winner_model_b_prob_{exp}",
f"winner_tie_prob_{exp}",
]
temp_scores = joined[pred_cols].values
if temp_scores.sum(axis=-1).max() > 1.1:
temp_scores = torch.tensor(temp_scores).softmax(-1)
else:
temp_scores = torch.tensor(temp_scores)
joined[pred_cols] = temp_scores.numpy()
gt_idxs = joined[
["winner_model_a", "winner_model_b", "winner_tie"]
].values.argsort()[:, -1]
temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]
joined[f"loss_{exp}"] = torch.nn.functional.binary_cross_entropy(
temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
)
joined["prompt_length"] = [len(x) for x in joined["prompt"]]
joined["response_a_length"] = [len(x) for x in joined["response_a"]]
joined["response_b_length"] = [len(x) for x in joined["response_b"]]
joined["total_length"] = (
joined["prompt_length"]
+ joined["response_a_length"]
+ joined["response_b_length"]
)
loss_cols = [x for x in joined.columns if "loss" in x]
joined["avg_loss"] = joined[loss_cols].mean(axis=1)
joined["avg_winner_model_a"] = joined[
[x for x in joined.columns if "winner_model_a_prob" in x]
].mean(axis=1)
joined["avg_winner_model_b"] = joined[
[x for x in joined.columns if "winner_model_b_prob" in x]
].mean(axis=1)
joined["avg_winner_tie"] = joined[
[x for x in joined.columns if "winner_tie_prob" in x]
].mean(axis=1)
prob_cols = [x for x in joined.columns if "prob" in x]
loss_cols = [x for x in joined.columns if "loss" in x]
joined[prob_cols + loss_cols] = joined[prob_cols + loss_cols].astype("float16")
id2texts = {i: (p, a, b) for i, p, a, b in joined[["id", "prompt", "response_a", "response_b"]].values}
joined = joined.drop(columns=["prompt", "response_a", "response_b"])
return joined, id2texts
# def make_df():
# data = {f: pl.read_csv(f) for f in files}
# for k in data.keys():
# exp = exps[k]
# if "0" in data[k].columns:
# data[k] = data[k].rename({
# "0": f"winner_model_a_prob_{exp}",
# "1": f"winner_model_b_prob_{exp}",
# "2": f"winner_tie_prob_{exp}",
# })
# elif "winner_tie_prob" not in data[k].columns:
# data[k] = data[k].rename({
# "winner_model_a": f"winner_model_a_prob_{exp}",
# "winner_model_b": f"winner_model_b_prob_{exp}",
# "winner_tie": f"winner_tie_prob_{exp}",
# })
# else:
# data[k] = data[k].rename({
# "winner_model_a_prob": f"winner_model_a_prob_{exp}",
# "winner_model_b_prob": f"winner_model_b_prob_{exp}",
# "winner_tie_prob": f"winner_tie_prob_{exp}",
# })
# pred_cols = [
# f"winner_model_a_prob_{exp}",
# f"winner_model_b_prob_{exp}",
# f"winner_tie_prob_{exp}",
# ]
# data[k] = data[k].sort("id")
# final_columns = ["id"] + pred_cols
# data[k] = data[k].select(final_columns)
# id_col = data[files[0]].select("id")
# joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
# joined = pl.concat([id_col, joined], how="horizontal")
# tdf = pl.read_csv(train_csv_path)
# joined = joined.join(tdf, on="id", how="left")
# joined = joined.with_columns([
# pl.when(pl.col("winner_model_a") == 1).then(0).otherwise(
# pl.when(pl.col("winner_model_b") == 1).then(1).otherwise(
# pl.when(pl.col("winner_tie") == 1).then(2).otherwise(3)
# )).alias("winner")
# ])
# for exp in exps.values():
# pred_cols = [
# f"winner_model_a_prob_{exp}",
# f"winner_model_b_prob_{exp}",
# f"winner_tie_prob_{exp}",
# ]
# temp_scores = joined.select(pred_cols).to_numpy()
# if temp_scores.sum(axis=-1).max() > 1.1:
# temp_scores = torch.tensor(temp_scores).softmax(-1)
# else:
# temp_scores = torch.tensor(temp_scores)
# joined = joined.with_columns([
# pl.Series(name=col, values=temp_scores[:, i].numpy())
# for i, col in enumerate(pred_cols)
# ])
# gt_idxs = joined.select(["winner_model_a", "winner_model_b", "winner_tie"]).to_numpy().argsort()[:, -1]
# temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]
# loss = torch.nn.functional.binary_cross_entropy(
# temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
# )
# joined = joined.with_columns([
# pl.Series(name=f"loss_{exp}", values=loss.numpy())
# ])
# joined = joined.with_columns([
# pl.col("prompt").str.len_chars().alias("prompt_length"),
# pl.col("response_a").str.len_chars().alias("response_a_length"),
# pl.col("response_b").str.len_chars().alias("response_b_length"),
# ])
# joined = joined.with_columns([
# (pl.col("prompt_length") + pl.col("response_a_length") + pl.col("response_b_length")).alias("total_length")
# ])
# loss_cols = [x for x in joined.columns if "loss" in x]
# joined = joined.with_columns([
# pl.mean_horizontal(loss_cols).alias("avg_loss"),
# pl.mean_horizontal([x for x in joined.columns if "winner_model_a_prob" in x]).alias("avg_winner_model_a"),
# pl.mean_horizontal([x for x in joined.columns if "winner_model_b_prob" in x]).alias("avg_winner_model_b"),
# pl.mean_horizontal([x for x in joined.columns if "winner_tie_prob" in x]).alias("avg_winner_tie"),
# ])
# prob_cols = [x for x in joined.columns if "prob" in x]
# loss_cols = [x for x in joined.columns if "loss" in x]
# joined = joined.with_columns([
# pl.col(prob_cols + loss_cols).cast(pl.Float32)
# ])
# return joined
MAIN_DF, id2texts = make_df()
def filter_df(lower_limit, upper_limit, file, all_check):
if all_check or file is None or file == "":
loss_col = "avg_loss"
else:
loss_col = f"loss_{exps[file]}"
temp = MAIN_DF[
(MAIN_DF[loss_col] > lower_limit) & (MAIN_DF[loss_col] < upper_limit)
]
temp = temp.sort_values(loss_col, ascending=False).reset_index(drop=True)
return 0, temp
# def filter_df(lower_limit, upper_limit, file, all_check):
# if all_check or file is None or file == "":
# loss_col = "avg_loss"
# else:
# loss_col = f"loss_{exps[file]}"
# temp = MAIN_DF.filter(
# (pl.col(loss_col) > lower_limit) & (pl.col(loss_col) < upper_limit)
# ).sort(loss_col, descending=True)
# return 0, temp
def make_chat(prompt, response, side, label):
prompts = json.loads(prompt)
responses = json.loads(response)
header = None
if side == label:
header = "✅ Winner ✅"
elif label == 2 or label == "Tie":
header = "🟨 Tie 🟨"
else:
header = "❌ Loser ❌"
chat = []
for p, r in zip(prompts, responses):
chat.append(
ChatMessage(
role="user",
content=header + "\n" + p,
)
)
if r is None:
r = ""
chat.append(ChatMessage(role="assistant", content=header + "\n" + r))
return chat
# def show_chats(idx, df, file, all_check):
# if idx is None:
# return None, None
# if idx > len(df):
# idx = len(df) - 1
# if idx < 0:
# idx = 0
# label = df["winner"].iloc[idx]
# chat_a = make_chat(df["prompt"].iloc[idx], df["response_a"].iloc[idx], "A", label)
# chat_b = make_chat(df["prompt"].iloc[idx], df["response_b"].iloc[idx], "B", label)
# if all_check or file is None or file == "":
# score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
# else:
# score_cols = [
# f"winner_model_a_prob_{exps[file]}",
# f"winner_model_b_prob_{exps[file]}",
# f"winner_tie_prob_{exps[file]}",
# ]
# scores = df[score_cols].iloc[idx].tolist()
# if all_check or file is None or file == "":
# loss_col = "avg_loss"
# else:
# loss_col = f"loss_{exps[file]}"
# loss = df[loss_col].iloc[idx]
# return chat_a, chat_b, label, *scores, loss
def show_chats(idx, df, file, all_check):
if idx is None:
return None, None
if idx >= df.shape[0]:
idx = df.shape[0] - 1
if idx < 0:
idx = 0
row = df.iloc[idx]
label = row["winner"]
id_ = row["id"]
p, a, b = id2texts[id_]
chat_a = make_chat(p, a, "A", label)
chat_b = make_chat(p, b, "B", label)
# chat_a = make_chat(row["prompt"], row["response_a"], 0, label_idx)
# chat_b = make_chat(row["prompt"], row["response_b"], 1, label_idx)
if all_check or file is None or file == "":
score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
else:
score_cols = [
f"winner_model_a_prob_{exps[file]}",
f"winner_model_b_prob_{exps[file]}",
f"winner_tie_prob_{exps[file]}",
]
scores = row[score_cols].to_list()
if all_check or file is None or file == "":
loss_col = "avg_loss"
else:
loss_col = f"loss_{exps[file]}"
loss = row[loss_col]
# labels = ["A", "B", "Tie"]
return chat_a, chat_b, label, *scores, loss
def show_split(text):
if len(text) == 0:
gr.Markdown("## No Input Provided")
else:
for letter in text:
with gr.Row():
text = gr.Textbox(letter)
btn = gr.Button("Clear")
btn.click(lambda: gr.Textbox(value=""), None, text)
with gr.Blocks() as demo:
gr.Markdown(
"""
# OOF Visualization
This is a demo for visualizing the out-of-fold predictions of a model.
It currently shows the predictions for the outputs of [this notebook](https://www.kaggle.com/code/kcotton21/lmsys-preds/notebook).
"""
)
with gr.Row():
with gr.Column():
file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files])
with gr.Column():
all_check = gr.Checkbox(label="Use average loss of all files")
with gr.Row():
lower_limit = gr.Slider(
label="Show samples with loss > this value", minimum=0, maximum=5, value=1
)
upper_limit = gr.Slider(
label="Show samples with loss < this value", minimum=0, maximum=5, value=5
)
# id_ = gr.Number(label="ID")
idx = gr.Number(visible=True)
hidden_df = gr.Dataframe(visible=False)
with gr.Row():
correct_label = gr.Textbox(label="Correct Label", interactive=False)
score_a = gr.Textbox(label="Model A Score", interactive=False)
score_b = gr.Textbox(label="Model B Score", interactive=False)
score_tie = gr.Textbox(label="Tie Score", interactive=False)
loss = gr.Textbox(label="Loss", interactive=False)
with gr.Row():
with gr.Column():
prev_btn = gr.Button(value="Previous")
with gr.Column():
next_btn = gr.Button(value="Next")
with gr.Row():
with gr.Column():
chat_a = gr.Chatbot(label="Model A", type="messages", height=1000)
with gr.Column():
chat_b = gr.Chatbot(label="Model B", type="messages", height=1000)
lower_limit.change(
filter_df,
inputs=[lower_limit, upper_limit, file, all_check],
outputs=[idx, hidden_df],
)
upper_limit.change(
filter_df,
inputs=[lower_limit, upper_limit, file, all_check],
outputs=[idx, hidden_df],
)
idx.change(
show_chats,
inputs=[idx, hidden_df, file, all_check],
outputs=[chat_a, chat_b, correct_label, score_a, score_b, score_tie, loss],
)
prev_btn.click(lambda x: max(0, x - 1), inputs=idx, outputs=idx)
next_btn.click(lambda x: x + 1, inputs=idx, outputs=idx)
if __name__ == "__main__":
demo.launch(debug=True)