Spaces:

nbroad
/

asdf

Paused

App Files Files Community

asdf / app.py

nbroad

Update app.py

fc978de verified 11 months ago

raw

history blame

16.2 kB

	from pathlib import Path
	import gradio as gr
	# import polars as pl
	import pandas as pd
	import torch
	import json
	from gradio import ChatMessage
	import os

	IN_SPACE = bool(os.environ.get("SPACE_AUTHOR_NAME", False))

	files = [
	"./lmsys-ex38-model_oof_df.parquet",
	"./lmsys-ex41-model_oof_df.parquet",
	"./lmsys-ex43-model_oof_df.parquet",
	"./lmsys-exp-llm-049-weight_preds.parquet",
	"./lmsys-exp-llm-053-weight_preds.parquet",
	"./lmsys-exp-llm-063-weight_preds.parquet",
	"./lmsys-exp-llm-065-weight_preds.parquet",
	"./lmsys-exp-llm-073-weight_preds.parquet",
	"./lmsys-exp-llm-078-weight_preds.parquet",
	"./lmsys-exp-llm-081-weight_preds.parquet",
	"./lmsys-exp-llm-085-weight_preds.parquet",
	"./lmsys-oof-exp2_preds.parquet",
	"./lmsys-oof-exp29_preds.parquet",
	]
	train_filepath = "./train.parquet"

	if not IN_SPACE:
	files = [x.replace("./", "../../data/oofs/") for x in files]
	train_filepath = "../../data/train.parquet"
	from dotenv import load_dotenv
	loaded = load_dotenv("../../.env")
	print("Loaded .env file:", loaded)

	HF_TOKEN = os.getenv("HF_READ_OOFS_TOKEN")

	if not HF_TOKEN:
	print("be sure to set HF_READ_OOFS_TOKEN in .env file")

	if not Path(files[0]).exists():
	from huggingface_hub import snapshot_download, login

	login(token=HF_TOKEN)

	snapshot_download("nbroad/lmsys-cahpp-oofs", repo_type="dataset", local_dir="./", local_dir_use_symlinks=False)


	exps = {}

	for f in files:
	if "lmsys-exp-llm-" in f:
	exp = f.split("lmsys-exp-llm-")[1].split("-")[0]
	elif "lmsys-ex" in f:
	exp = f.split("lmsys-ex")[1].split("-")[0]
	elif "lmsys-oof-exp" in f:
	exp = f.split("lmsys-oof-exp")[1].split("_")[0]
	exps[f] = exp
	exps[f.split("/")[-1]] = exp


	def make_df():
	data = {f: pd.read_parquet(f) for f in files}

	for k in data.keys():
	exp = exps[k]

	if "0" in data[k].columns:
	data[k] = data[k].rename(
	columns={
	"0": f"winner_model_a_prob_{exp}",
	"1": f"winner_model_b_prob_{exp}",
	"2": f"winner_tie_prob_{exp}",
	},
	)

	elif "winner_tie_prob" not in data[k].columns:

	data[k] = data[k].rename(
	columns={
	"winner_model_a": f"winner_model_a_prob_{exp}",
	"winner_model_b": f"winner_model_b_prob_{exp}",
	"winner_tie": f"winner_tie_prob_{exp}",
	}
	)
	else:
	data[k] = data[k].rename(
	columns={
	"winner_model_a_prob": f"winner_model_a_prob_{exp}",
	"winner_model_b_prob": f"winner_model_b_prob_{exp}",
	"winner_tie_prob": f"winner_tie_prob_{exp}",
	}
	)

	pred_cols = [
	f"winner_model_a_prob_{exp}",
	f"winner_model_b_prob_{exp}",
	f"winner_tie_prob_{exp}",
	]

	data[k] = data[k].sort_values("id")

	final_columns = ["id"] + pred_cols

	data[k] = data[k][final_columns]

	id_col = data[files[0]].iloc[:, 0]

	joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1)

	# joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
	# id_col = joined.iloc[:, 0]
	# joined = joined.drop("id")
	# joined = joined.insert_column(0, id_col)

	joined["id"] = id_col

	tdf = pd.read_parquet(train_filepath)

	joined = joined.merge(tdf, on="id", how="left")

	joined["winner"] = ""
	joined.loc[joined["winner_model_a"] == 1, "winner"] = "A"
	joined.loc[joined["winner_model_b"] == 1, "winner"] = "B"
	joined.loc[joined["winner_tie"] == 1, "winner"] = "Tie"

	for exp in exps.values():
	pred_cols = [
	f"winner_model_a_prob_{exp}",
	f"winner_model_b_prob_{exp}",
	f"winner_tie_prob_{exp}",
	]

	temp_scores = joined[pred_cols].values

	if temp_scores.sum(axis=-1).max() > 1.1:
	temp_scores = torch.tensor(temp_scores).softmax(-1)
	else:
	temp_scores = torch.tensor(temp_scores)

	joined[pred_cols] = temp_scores.numpy()

	gt_idxs = joined[
	["winner_model_a", "winner_model_b", "winner_tie"]
	].values.argsort()[:, -1]
	temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]

	joined[f"loss_{exp}"] = torch.nn.functional.binary_cross_entropy(
	temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
	)

	joined["prompt_length"] = [len(x) for x in joined["prompt"]]
	joined["response_a_length"] = [len(x) for x in joined["response_a"]]
	joined["response_b_length"] = [len(x) for x in joined["response_b"]]
	joined["total_length"] = (
	joined["prompt_length"]
	+ joined["response_a_length"]
	+ joined["response_b_length"]
	)

	loss_cols = [x for x in joined.columns if "loss" in x]
	joined["avg_loss"] = joined[loss_cols].mean(axis=1)
	joined["avg_winner_model_a"] = joined[
	[x for x in joined.columns if "winner_model_a_prob" in x]
	].mean(axis=1)
	joined["avg_winner_model_b"] = joined[
	[x for x in joined.columns if "winner_model_b_prob" in x]
	].mean(axis=1)
	joined["avg_winner_tie"] = joined[
	[x for x in joined.columns if "winner_tie_prob" in x]
	].mean(axis=1)

	prob_cols = [x for x in joined.columns if "prob" in x]
	loss_cols = [x for x in joined.columns if "loss" in x]

	joined[prob_cols + loss_cols] = joined[prob_cols + loss_cols].astype("float16")

	id2texts = {i: (p, a, b) for i, p, a, b in joined[["id", "prompt", "response_a", "response_b"]].values}

	joined = joined.drop(columns=["prompt", "response_a", "response_b"])

	return joined, id2texts

	# def make_df():
	# data = {f: pl.read_csv(f) for f in files}

	# for k in data.keys():
	# exp = exps[k]

	# if "0" in data[k].columns:
	# data[k] = data[k].rename({
	# "0": f"winner_model_a_prob_{exp}",
	# "1": f"winner_model_b_prob_{exp}",
	# "2": f"winner_tie_prob_{exp}",
	# })
	# elif "winner_tie_prob" not in data[k].columns:
	# data[k] = data[k].rename({
	# "winner_model_a": f"winner_model_a_prob_{exp}",
	# "winner_model_b": f"winner_model_b_prob_{exp}",
	# "winner_tie": f"winner_tie_prob_{exp}",
	# })
	# else:
	# data[k] = data[k].rename({
	# "winner_model_a_prob": f"winner_model_a_prob_{exp}",
	# "winner_model_b_prob": f"winner_model_b_prob_{exp}",
	# "winner_tie_prob": f"winner_tie_prob_{exp}",
	# })

	# pred_cols = [
	# f"winner_model_a_prob_{exp}",
	# f"winner_model_b_prob_{exp}",
	# f"winner_tie_prob_{exp}",
	# ]

	# data[k] = data[k].sort("id")

	# final_columns = ["id"] + pred_cols
	# data[k] = data[k].select(final_columns)

	# id_col = data[files[0]].select("id")

	# joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
	# joined = pl.concat([id_col, joined], how="horizontal")

	# tdf = pl.read_csv(train_csv_path)

	# joined = joined.join(tdf, on="id", how="left")

	# joined = joined.with_columns([
	# pl.when(pl.col("winner_model_a") == 1).then(0).otherwise(
	# pl.when(pl.col("winner_model_b") == 1).then(1).otherwise(
	# pl.when(pl.col("winner_tie") == 1).then(2).otherwise(3)
	# )).alias("winner")
	# ])

	# for exp in exps.values():
	# pred_cols = [
	# f"winner_model_a_prob_{exp}",
	# f"winner_model_b_prob_{exp}",
	# f"winner_tie_prob_{exp}",
	# ]

	# temp_scores = joined.select(pred_cols).to_numpy()

	# if temp_scores.sum(axis=-1).max() > 1.1:
	# temp_scores = torch.tensor(temp_scores).softmax(-1)
	# else:
	# temp_scores = torch.tensor(temp_scores)

	# joined = joined.with_columns([
	# pl.Series(name=col, values=temp_scores[:, i].numpy())
	# for i, col in enumerate(pred_cols)
	# ])

	# gt_idxs = joined.select(["winner_model_a", "winner_model_b", "winner_tie"]).to_numpy().argsort()[:, -1]
	# temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]

	# loss = torch.nn.functional.binary_cross_entropy(
	# temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
	# )

	# joined = joined.with_columns([
	# pl.Series(name=f"loss_{exp}", values=loss.numpy())
	# ])

	# joined = joined.with_columns([
	# pl.col("prompt").str.len_chars().alias("prompt_length"),
	# pl.col("response_a").str.len_chars().alias("response_a_length"),
	# pl.col("response_b").str.len_chars().alias("response_b_length"),
	# ])

	# joined = joined.with_columns([
	# (pl.col("prompt_length") + pl.col("response_a_length") + pl.col("response_b_length")).alias("total_length")
	# ])

	# loss_cols = [x for x in joined.columns if "loss" in x]

	# joined = joined.with_columns([
	# pl.mean_horizontal(loss_cols).alias("avg_loss"),
	# pl.mean_horizontal([x for x in joined.columns if "winner_model_a_prob" in x]).alias("avg_winner_model_a"),
	# pl.mean_horizontal([x for x in joined.columns if "winner_model_b_prob" in x]).alias("avg_winner_model_b"),
	# pl.mean_horizontal([x for x in joined.columns if "winner_tie_prob" in x]).alias("avg_winner_tie"),
	# ])

	# prob_cols = [x for x in joined.columns if "prob" in x]
	# loss_cols = [x for x in joined.columns if "loss" in x]

	# joined = joined.with_columns([
	# pl.col(prob_cols + loss_cols).cast(pl.Float32)
	# ])



	# return joined

	MAIN_DF, id2texts = make_df()


	def filter_df(lower_limit, upper_limit, file, all_check):
	if all_check or file is None or file == "":
	loss_col = "avg_loss"
	else:
	loss_col = f"loss_{exps[file]}"

	temp = MAIN_DF[
	(MAIN_DF[loss_col] > lower_limit) & (MAIN_DF[loss_col] < upper_limit)
	]
	temp = temp.sort_values(loss_col, ascending=False).reset_index(drop=True)

	return 0, temp

	# def filter_df(lower_limit, upper_limit, file, all_check):
	# if all_check or file is None or file == "":
	# loss_col = "avg_loss"
	# else:
	# loss_col = f"loss_{exps[file]}"

	# temp = MAIN_DF.filter(
	# (pl.col(loss_col) > lower_limit) & (pl.col(loss_col) < upper_limit)
	# ).sort(loss_col, descending=True)

	# return 0, temp


	def make_chat(prompt, response, side, label):
	prompts = json.loads(prompt)
	responses = json.loads(response)

	header = None
	if side == label:
	header = "✅ Winner ✅"
	elif label == 2 or label == "Tie":
	header = "🟨 Tie 🟨"
	else:
	header = "❌ Loser ❌"

	chat = []
	for p, r in zip(prompts, responses):
	chat.append(
	ChatMessage(
	role="user",
	content=header + "\n" + p,
	)
	)

	if r is None:
	r = ""

	chat.append(ChatMessage(role="assistant", content=header + "\n" + r))

	return chat


	# def show_chats(idx, df, file, all_check):

	# if idx is None:
	# return None, None

	# if idx > len(df):
	# idx = len(df) - 1
	# if idx < 0:
	# idx = 0

	# label = df["winner"].iloc[idx]

	# chat_a = make_chat(df["prompt"].iloc[idx], df["response_a"].iloc[idx], "A", label)
	# chat_b = make_chat(df["prompt"].iloc[idx], df["response_b"].iloc[idx], "B", label)

	# if all_check or file is None or file == "":
	# score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
	# else:
	# score_cols = [
	# f"winner_model_a_prob_{exps[file]}",
	# f"winner_model_b_prob_{exps[file]}",
	# f"winner_tie_prob_{exps[file]}",
	# ]

	# scores = df[score_cols].iloc[idx].tolist()

	# if all_check or file is None or file == "":
	# loss_col = "avg_loss"
	# else:
	# loss_col = f"loss_{exps[file]}"

	# loss = df[loss_col].iloc[idx]

	# return chat_a, chat_b, label, *scores, loss

	def show_chats(idx, df, file, all_check):
	if idx is None:
	return None, None

	if idx >= df.shape[0]:
	idx = df.shape[0] - 1
	if idx < 0:
	idx = 0

	row = df.iloc[idx]
	label = row["winner"]

	id_ = row["id"]

	p, a, b = id2texts[id_]

	chat_a = make_chat(p, a, "A", label)
	chat_b = make_chat(p, b, "B", label)

	# chat_a = make_chat(row["prompt"], row["response_a"], 0, label_idx)
	# chat_b = make_chat(row["prompt"], row["response_b"], 1, label_idx)

	if all_check or file is None or file == "":
	score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
	else:
	score_cols = [
	f"winner_model_a_prob_{exps[file]}",
	f"winner_model_b_prob_{exps[file]}",
	f"winner_tie_prob_{exps[file]}",
	]

	scores = row[score_cols].to_list()

	if all_check or file is None or file == "":
	loss_col = "avg_loss"
	else:
	loss_col = f"loss_{exps[file]}"

	loss = row[loss_col]

	# labels = ["A", "B", "Tie"]

	return chat_a, chat_b, label, *scores, loss


	def show_split(text):
	if len(text) == 0:
	gr.Markdown("## No Input Provided")
	else:
	for letter in text:
	with gr.Row():
	text = gr.Textbox(letter)
	btn = gr.Button("Clear")
	btn.click(lambda: gr.Textbox(value=""), None, text)




	with gr.Blocks() as demo:

	gr.Markdown(
	"""
	# OOF Visualization

	This is a demo for visualizing the out-of-fold predictions of a model.
	It currently shows the predictions for the outputs of [this notebook](https://www.kaggle.com/code/kcotton21/lmsys-preds/notebook).
	"""
	)
	with gr.Row():
	with gr.Column():
	file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files])
	with gr.Column():
	all_check = gr.Checkbox(label="Use average loss of all files")
	with gr.Row():
	lower_limit = gr.Slider(
	label="Show samples with loss > this value", minimum=0, maximum=5, value=1
	)
	upper_limit = gr.Slider(
	label="Show samples with loss < this value", minimum=0, maximum=5, value=5
	)

	# id_ = gr.Number(label="ID")
	idx = gr.Number(visible=True)
	hidden_df = gr.Dataframe(visible=False)
	with gr.Row():
	correct_label = gr.Textbox(label="Correct Label", interactive=False)
	score_a = gr.Textbox(label="Model A Score", interactive=False)
	score_b = gr.Textbox(label="Model B Score", interactive=False)
	score_tie = gr.Textbox(label="Tie Score", interactive=False)
	loss = gr.Textbox(label="Loss", interactive=False)
	with gr.Row():
	with gr.Column():
	prev_btn = gr.Button(value="Previous")
	with gr.Column():
	next_btn = gr.Button(value="Next")

	with gr.Row():
	with gr.Column():
	chat_a = gr.Chatbot(label="Model A", type="messages", height=1000)
	with gr.Column():
	chat_b = gr.Chatbot(label="Model B", type="messages", height=1000)

	lower_limit.change(
	filter_df,
	inputs=[lower_limit, upper_limit, file, all_check],
	outputs=[idx, hidden_df],
	)
	upper_limit.change(
	filter_df,
	inputs=[lower_limit, upper_limit, file, all_check],
	outputs=[idx, hidden_df],
	)

	idx.change(
	show_chats,
	inputs=[idx, hidden_df, file, all_check],
	outputs=[chat_a, chat_b, correct_label, score_a, score_b, score_tie, loss],
	)
	prev_btn.click(lambda x: max(0, x - 1), inputs=idx, outputs=idx)
	next_btn.click(lambda x: x + 1, inputs=idx, outputs=idx)


	if __name__ == "__main__":
	demo.launch(debug=True)