Spaces:

nbroad
/

asdf

Paused

asdf / app.py

a591d0d about 1 year ago

15.9 kB

	from pathlib import Path
	import gradio as gr
	import polars as pl
	import pandas as pd
	import torch
	import json
	from gradio import ChatMessage
	import os

	IN_SPACE = bool(os.environ.get("SPACE_AUTHOR_NAME", False))

	files = [
	"./lmsys-ex38-model_oof_df.parquet",
	"./lmsys-ex41-model_oof_df.parquet",
	"./lmsys-ex43-model_oof_df.parquet",
	"./lmsys-exp-llm-049-weight_preds.parquet",
	"./lmsys-exp-llm-053-weight_preds.parquet",
	"./lmsys-exp-llm-063-weight_preds.parquet",
	"./lmsys-exp-llm-065-weight_preds.parquet",
	"./lmsys-exp-llm-073-weight_preds.parquet",
	"./lmsys-exp-llm-078-weight_preds.parquet",
	"./lmsys-exp-llm-081-weight_preds.parquet",
	"./lmsys-exp-llm-085-weight_preds.parquet",
	"./lmsys-oof-exp2_preds.parquet",
	"./lmsys-oof-exp29_preds.parquet",
	]
	train_filepath = "./train.parquet"

	if not IN_SPACE:
	files = [x.replace("./", "../../data/oofs/") for x in files]
	train_filepath = "../../data/train.parquet"
	from dotenv import load_dotenv
	loaded = load_dotenv("../../.env")
	print("Loaded .env file:", loaded)

	HF_TOKEN = os.getenv("HF_READ_OOFS_TOKEN")

	if not HF_TOKEN:
	print("be sure to set HF_READ_OOFS_TOKEN in .env file")

	if not Path(files[0]).exists():
	from huggingface_hub import snapshot_download, login

	login(token=HF_TOKEN)

	snapshot_download("nbroad/lmsys-cahpp-oofs", repo_type="dataset", local_dir="./", local_dir_use_symlinks=False)


	exps = {}

	for f in files:
	if "lmsys-exp-llm-" in f:
	exp = f.split("lmsys-exp-llm-")[1].split("-")[0]
	elif "lmsys-ex" in f:
	exp = f.split("lmsys-ex")[1].split("-")[0]
	elif "lmsys-oof-exp" in f:
	exp = f.split("lmsys-oof-exp")[1].split("_")[0]
	exps[f] = exp
	exps[f.split("/")[-1]] = exp


	def make_df():
	data = {f: pd.read_parquet(f) for f in files}

	for k in data.keys():
	exp = exps[k]

	if "0" in data[k].columns:
	data[k] = data[k].rename(
	columns={
	"0": f"winner_model_a_prob_{exp}",
	"1": f"winner_model_b_prob_{exp}",
	"2": f"winner_tie_prob_{exp}",
	},
	)

	elif "winner_tie_prob" not in data[k].columns:

	data[k] = data[k].rename(
	columns={
	"winner_model_a": f"winner_model_a_prob_{exp}",
	"winner_model_b": f"winner_model_b_prob_{exp}",
	"winner_tie": f"winner_tie_prob_{exp}",
	}
	)
	else:
	data[k] = data[k].rename(
	columns={
	"winner_model_a_prob": f"winner_model_a_prob_{exp}",
	"winner_model_b_prob": f"winner_model_b_prob_{exp}",
	"winner_tie_prob": f"winner_tie_prob_{exp}",
	}
	)

	pred_cols = [
	f"winner_model_a_prob_{exp}",
	f"winner_model_b_prob_{exp}",
	f"winner_tie_prob_{exp}",
	]

	data[k] = data[k].sort_values("id")

	final_columns = ["id"] + pred_cols

	data[k] = data[k][final_columns]

	id_col = data[files[0]].iloc[:, 0]

	joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1)

	# joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
	# id_col = joined.iloc[:, 0]
	# joined = joined.drop("id")
	# joined = joined.insert_column(0, id_col)

	joined["id"] = id_col

	tdf = pd.read_parquet(train_filepath)

	joined = joined.merge(tdf, on="id", how="left")

	joined["winner"] = ""
	joined.loc[joined["winner_model_a"] == 1, "winner"] = "A"
	joined.loc[joined["winner_model_b"] == 1, "winner"] = "B"
	joined.loc[joined["winner_tie"] == 1, "winner"] = "Tie"

	for exp in exps.values():
	pred_cols = [
	f"winner_model_a_prob_{exp}",
	f"winner_model_b_prob_{exp}",
	f"winner_tie_prob_{exp}",
	]

	temp_scores = joined[pred_cols].values

	if temp_scores.sum(axis=-1).max() > 1.1:
	temp_scores = torch.tensor(temp_scores).softmax(-1)
	else:
	temp_scores = torch.tensor(temp_scores)

	joined[pred_cols] = temp_scores.numpy()

	gt_idxs = joined[
	["winner_model_a", "winner_model_b", "winner_tie"]
	].values.argsort()[:, -1]
	temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]

	joined[f"loss_{exp}"] = torch.nn.functional.binary_cross_entropy(
	temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
	)

	joined["prompt_length"] = [len(x) for x in joined["prompt"]]
	joined["response_a_length"] = [len(x) for x in joined["response_a"]]
	joined["response_b_length"] = [len(x) for x in joined["response_b"]]
	joined["total_length"] = (
	joined["prompt_length"]
	+ joined["response_a_length"]
	+ joined["response_b_length"]
	)

	loss_cols = [x for x in joined.columns if "loss" in x]
	joined["avg_loss"] = joined[loss_cols].mean(axis=1)
	joined["avg_winner_model_a"] = joined[
	[x for x in joined.columns if "winner_model_a_prob" in x]
	].mean(axis=1)
	joined["avg_winner_model_b"] = joined[
	[x for x in joined.columns if "winner_model_b_prob" in x]
	].mean(axis=1)
	joined["avg_winner_tie"] = joined[
	[x for x in joined.columns if "winner_tie_prob" in x]
	].mean(axis=1)

	prob_cols = [x for x in joined.columns if "prob" in x]
	loss_cols = [x for x in joined.columns if "loss" in x]

	joined[prob_cols + loss_cols] = joined[prob_cols + loss_cols].astype("float16")

	id2texts = {i: (p, a, b) for i, p, a, b in joined[["id", "prompt", "response_a", "response_b"]].values}

	joined = joined.drop(columns=["prompt", "response_a", "response_b"])

	return joined, id2texts

	# def make_df():
	# data = {f: pl.read_csv(f) for f in files}

	# for k in data.keys():
	# exp = exps[k]

	# if "0" in data[k].columns:
	# data[k] = data[k].rename({
	# "0": f"winner_model_a_prob_{exp}",
	# "1": f"winner_model_b_prob_{exp}",
	# "2": f"winner_tie_prob_{exp}",
	# })
	# elif "winner_tie_prob" not in data[k].columns:
	# data[k] = data[k].rename({
	# "winner_model_a": f"winner_model_a_prob_{exp}",
	# "winner_model_b": f"winner_model_b_prob_{exp}",
	# "winner_tie": f"winner_tie_prob_{exp}",
	# })
	# else:
	# data[k] = data[k].rename({
	# "winner_model_a_prob": f"winner_model_a_prob_{exp}",
	# "winner_model_b_prob": f"winner_model_b_prob_{exp}",
	# "winner_tie_prob": f"winner_tie_prob_{exp}",
	# })

	# pred_cols = [
	# f"winner_model_a_prob_{exp}",
	# f"winner_model_b_prob_{exp}",
	# f"winner_tie_prob_{exp}",
	# ]

	# data[k] = data[k].sort("id")

	# final_columns = ["id"] + pred_cols
	# data[k] = data[k].select(final_columns)

	# id_col = data[files[0]].select("id")

	# joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
	# joined = pl.concat([id_col, joined], how="horizontal")

	# tdf = pl.read_csv(train_csv_path)

	# joined = joined.join(tdf, on="id", how="left")

	# joined = joined.with_columns([
	# pl.when(pl.col("winner_model_a") == 1).then(0).otherwise(
	# pl.when(pl.col("winner_model_b") == 1).then(1).otherwise(
	# pl.when(pl.col("winner_tie") == 1).then(2).otherwise(3)
	# )).alias("winner")
	# ])

	# for exp in exps.values():
	# pred_cols = [
	# f"winner_model_a_prob_{exp}",
	# f"winner_model_b_prob_{exp}",
	# f"winner_tie_prob_{exp}",
	# ]

	# temp_scores = joined.select(pred_cols).to_numpy()

	# if temp_scores.sum(axis=-1).max() > 1.1:
	# temp_scores = torch.tensor(temp_scores).softmax(-1)
	# else:
	# temp_scores = torch.tensor(temp_scores)

	# joined = joined.with_columns([
	# pl.Series(name=col, values=temp_scores[:, i].numpy())
	# for i, col in enumerate(pred_cols)
	# ])

	# gt_idxs = joined.select(["winner_model_a", "winner_model_b", "winner_tie"]).to_numpy().argsort()[:, -1]
	# temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]

	# loss = torch.nn.functional.binary_cross_entropy(
	# temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
	# )

	# joined = joined.with_columns([
	# pl.Series(name=f"loss_{exp}", values=loss.numpy())
	# ])

	# joined = joined.with_columns([
	# pl.col("prompt").str.len_chars().alias("prompt_length"),
	# pl.col("response_a").str.len_chars().alias("response_a_length"),
	# pl.col("response_b").str.len_chars().alias("response_b_length"),
	# ])

	# joined = joined.with_columns([
	# (pl.col("prompt_length") + pl.col("response_a_length") + pl.col("response_b_length")).alias("total_length")
	# ])

	# loss_cols = [x for x in joined.columns if "loss" in x]

	# joined = joined.with_columns([
	# pl.mean_horizontal(loss_cols).alias("avg_loss"),
	# pl.mean_horizontal([x for x in joined.columns if "winner_model_a_prob" in x]).alias("avg_winner_model_a"),
	# pl.mean_horizontal([x for x in joined.columns if "winner_model_b_prob" in x]).alias("avg_winner_model_b"),
	# pl.mean_horizontal([x for x in joined.columns if "winner_tie_prob" in x]).alias("avg_winner_tie"),
	# ])

	# prob_cols = [x for x in joined.columns if "prob" in x]
	# loss_cols = [x for x in joined.columns if "loss" in x]

	# joined = joined.with_columns([
	# pl.col(prob_cols + loss_cols).cast(pl.Float32)
	# ])



	# return joined

	MAIN_DF, id2texts = make_df()


	def filter_df(lower_limit, upper_limit, file, all_check):
	if all_check or file is None or file == "":
	loss_col = "avg_loss"
	else:
	loss_col = f"loss_{exps[file]}"

	temp = MAIN_DF[
	(MAIN_DF[loss_col] > lower_limit) & (MAIN_DF[loss_col] < upper_limit)
	]
	temp = temp.sort_values(loss_col, ascending=False).reset_index(drop=True)

	return 0, temp

	# def filter_df(lower_limit, upper_limit, file, all_check):
	# if all_check or file is None or file == "":
	# loss_col = "avg_loss"
	# else:
	# loss_col = f"loss_{exps[file]}"

	# temp = MAIN_DF.filter(
	# (pl.col(loss_col) > lower_limit) & (pl.col(loss_col) < upper_limit)
	# ).sort(loss_col, descending=True)

	# return 0, temp


	def make_chat(prompt, response, side, label):
	prompts = json.loads(prompt)
	responses = json.loads(response)

	header = None
	if side == label:
	header = "✅ Winner ✅"
	elif label == 2 or label == "Tie":
	header = "🟨 Tie 🟨"
	else:
	header = "❌ Loser ❌"

	chat = []
	for p, r in zip(prompts, responses):
	chat.append(
	ChatMessage(
	role="user",
	content=header + "\n" + p,
	)
	)

	if r is None:
	r = ""

	chat.append(ChatMessage(role="assistant", content=header + "\n" + r))

	return chat


	# def show_chats(idx, df, file, all_check):

	# if idx is None:
	# return None, None

	# if idx > len(df):
	# idx = len(df) - 1
	# if idx < 0:
	# idx = 0

	# label = df["winner"].iloc[idx]

	# chat_a = make_chat(df["prompt"].iloc[idx], df["response_a"].iloc[idx], "A", label)
	# chat_b = make_chat(df["prompt"].iloc[idx], df["response_b"].iloc[idx], "B", label)

	# if all_check or file is None or file == "":
	# score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
	# else:
	# score_cols = [
	# f"winner_model_a_prob_{exps[file]}",
	# f"winner_model_b_prob_{exps[file]}",
	# f"winner_tie_prob_{exps[file]}",
	# ]

	# scores = df[score_cols].iloc[idx].tolist()

	# if all_check or file is None or file == "":
	# loss_col = "avg_loss"
	# else:
	# loss_col = f"loss_{exps[file]}"

	# loss = df[loss_col].iloc[idx]

	# return chat_a, chat_b, label, *scores, loss

	def show_chats(idx, df, file, all_check):
	if idx is None:
	return None, None

	if idx >= df.shape[0]:
	idx = df.shape[0] - 1
	if idx < 0:
	idx = 0

	row = df.iloc[idx]
	label = row["winner"]

	id_ = row["id"]

	p, a, b = id2texts[id_]

	chat_a = make_chat(p, a, "A", label)
	chat_b = make_chat(p, b, "B", label)

	# chat_a = make_chat(row["prompt"], row["response_a"], 0, label_idx)
	# chat_b = make_chat(row["prompt"], row["response_b"], 1, label_idx)

	if all_check or file is None or file == "":
	score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
	else:
	score_cols = [
	f"winner_model_a_prob_{exps[file]}",
	f"winner_model_b_prob_{exps[file]}",
	f"winner_tie_prob_{exps[file]}",
	]

	scores = row[score_cols].to_list()

	if all_check or file is None or file == "":
	loss_col = "avg_loss"
	else:
	loss_col = f"loss_{exps[file]}"

	loss = row[loss_col]

	# labels = ["A", "B", "Tie"]

	return chat_a, chat_b, label, *scores, loss


	with gr.Blocks() as demo:

	gr.LoginButton()

	gr.Markdown(
	"""
	# OOF Visualization

	This is a demo for visualizing the out-of-fold predictions of a model.
	It currently shows the predictions for the outputs of [this notebook](https://www.kaggle.com/code/kcotton21/lmsys-preds/notebook).
	"""
	)
	with gr.Row():
	with gr.Column():
	file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files])
	with gr.Column():
	all_check = gr.Checkbox(label="Use average loss of all files")
	with gr.Row():
	lower_limit = gr.Slider(
	label="Show samples with loss > this value", minimum=0, maximum=5, value=1
	)
	upper_limit = gr.Slider(
	label="Show samples with loss < this value", minimum=0, maximum=5, value=5
	)

	# id_ = gr.Number(label="ID")
	idx = gr.Number(visible=True)
	hidden_df = gr.Dataframe(visible=False)
	with gr.Row():
	correct_label = gr.Textbox(label="Correct Label", interactive=False)
	score_a = gr.Textbox(label="Model A Score", interactive=False)
	score_b = gr.Textbox(label="Model B Score", interactive=False)
	score_tie = gr.Textbox(label="Tie Score", interactive=False)
	loss = gr.Textbox(label="Loss", interactive=False)
	with gr.Row():
	with gr.Column():
	prev_btn = gr.Button(value="Previous")
	with gr.Column():
	next_btn = gr.Button(value="Next")

	with gr.Row():
	with gr.Column():
	chat_a = gr.Chatbot(label="Model A", type="messages", height=1000)
	with gr.Column():
	chat_b = gr.Chatbot(label="Model B", type="messages", height=1000)

	lower_limit.change(
	filter_df,
	inputs=[lower_limit, upper_limit, file, all_check],
	outputs=[idx, hidden_df],
	)
	upper_limit.change(
	filter_df,
	inputs=[lower_limit, upper_limit, file, all_check],
	outputs=[idx, hidden_df],
	)

	idx.change(
	show_chats,
	inputs=[idx, hidden_df, file, all_check],
	outputs=[chat_a, chat_b, correct_label, score_a, score_b, score_tie, loss],
	)
	prev_btn.click(lambda x: max(0, x - 1), inputs=idx, outputs=idx)
	next_btn.click(lambda x: x + 1, inputs=idx, outputs=idx)


	demo.launch(debug=True)