nbroad commited on
Commit
a591d0d
Β·
1 Parent(s): 638ad87
Files changed (2) hide show
  1. app.py +497 -4
  2. requirements.txt +5 -0
app.py CHANGED
@@ -1,7 +1,500 @@
 
1
  import gradio as gr
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
  import gradio as gr
3
+ import polars as pl
4
+ import pandas as pd
5
+ import torch
6
+ import json
7
+ from gradio import ChatMessage
8
+ import os
9
 
10
+ IN_SPACE = bool(os.environ.get("SPACE_AUTHOR_NAME", False))
 
11
 
12
+ files = [
13
+ "./lmsys-ex38-model_oof_df.parquet",
14
+ "./lmsys-ex41-model_oof_df.parquet",
15
+ "./lmsys-ex43-model_oof_df.parquet",
16
+ "./lmsys-exp-llm-049-weight_preds.parquet",
17
+ "./lmsys-exp-llm-053-weight_preds.parquet",
18
+ "./lmsys-exp-llm-063-weight_preds.parquet",
19
+ "./lmsys-exp-llm-065-weight_preds.parquet",
20
+ "./lmsys-exp-llm-073-weight_preds.parquet",
21
+ "./lmsys-exp-llm-078-weight_preds.parquet",
22
+ "./lmsys-exp-llm-081-weight_preds.parquet",
23
+ "./lmsys-exp-llm-085-weight_preds.parquet",
24
+ "./lmsys-oof-exp2_preds.parquet",
25
+ "./lmsys-oof-exp29_preds.parquet",
26
+ ]
27
+ train_filepath = "./train.parquet"
28
+
29
+ if not IN_SPACE:
30
+ files = [x.replace("./", "../../data/oofs/") for x in files]
31
+ train_filepath = "../../data/train.parquet"
32
+ from dotenv import load_dotenv
33
+ loaded = load_dotenv("../../.env")
34
+ print("Loaded .env file:", loaded)
35
+
36
+ HF_TOKEN = os.getenv("HF_READ_OOFS_TOKEN")
37
+
38
+ if not HF_TOKEN:
39
+ print("be sure to set HF_READ_OOFS_TOKEN in .env file")
40
+
41
+ if not Path(files[0]).exists():
42
+ from huggingface_hub import snapshot_download, login
43
+
44
+ login(token=HF_TOKEN)
45
+
46
+ snapshot_download("nbroad/lmsys-cahpp-oofs", repo_type="dataset", local_dir="./", local_dir_use_symlinks=False)
47
+
48
+
49
+ exps = {}
50
+
51
+ for f in files:
52
+ if "lmsys-exp-llm-" in f:
53
+ exp = f.split("lmsys-exp-llm-")[1].split("-")[0]
54
+ elif "lmsys-ex" in f:
55
+ exp = f.split("lmsys-ex")[1].split("-")[0]
56
+ elif "lmsys-oof-exp" in f:
57
+ exp = f.split("lmsys-oof-exp")[1].split("_")[0]
58
+ exps[f] = exp
59
+ exps[f.split("/")[-1]] = exp
60
+
61
+
62
+ def make_df():
63
+ data = {f: pd.read_parquet(f) for f in files}
64
+
65
+ for k in data.keys():
66
+ exp = exps[k]
67
+
68
+ if "0" in data[k].columns:
69
+ data[k] = data[k].rename(
70
+ columns={
71
+ "0": f"winner_model_a_prob_{exp}",
72
+ "1": f"winner_model_b_prob_{exp}",
73
+ "2": f"winner_tie_prob_{exp}",
74
+ },
75
+ )
76
+
77
+ elif "winner_tie_prob" not in data[k].columns:
78
+
79
+ data[k] = data[k].rename(
80
+ columns={
81
+ "winner_model_a": f"winner_model_a_prob_{exp}",
82
+ "winner_model_b": f"winner_model_b_prob_{exp}",
83
+ "winner_tie": f"winner_tie_prob_{exp}",
84
+ }
85
+ )
86
+ else:
87
+ data[k] = data[k].rename(
88
+ columns={
89
+ "winner_model_a_prob": f"winner_model_a_prob_{exp}",
90
+ "winner_model_b_prob": f"winner_model_b_prob_{exp}",
91
+ "winner_tie_prob": f"winner_tie_prob_{exp}",
92
+ }
93
+ )
94
+
95
+ pred_cols = [
96
+ f"winner_model_a_prob_{exp}",
97
+ f"winner_model_b_prob_{exp}",
98
+ f"winner_tie_prob_{exp}",
99
+ ]
100
+
101
+ data[k] = data[k].sort_values("id")
102
+
103
+ final_columns = ["id"] + pred_cols
104
+
105
+ data[k] = data[k][final_columns]
106
+
107
+ id_col = data[files[0]].iloc[:, 0]
108
+
109
+ joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1)
110
+
111
+ # joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
112
+ # id_col = joined.iloc[:, 0]
113
+ # joined = joined.drop("id")
114
+ # joined = joined.insert_column(0, id_col)
115
+
116
+ joined["id"] = id_col
117
+
118
+ tdf = pd.read_parquet(train_filepath)
119
+
120
+ joined = joined.merge(tdf, on="id", how="left")
121
+
122
+ joined["winner"] = ""
123
+ joined.loc[joined["winner_model_a"] == 1, "winner"] = "A"
124
+ joined.loc[joined["winner_model_b"] == 1, "winner"] = "B"
125
+ joined.loc[joined["winner_tie"] == 1, "winner"] = "Tie"
126
+
127
+ for exp in exps.values():
128
+ pred_cols = [
129
+ f"winner_model_a_prob_{exp}",
130
+ f"winner_model_b_prob_{exp}",
131
+ f"winner_tie_prob_{exp}",
132
+ ]
133
+
134
+ temp_scores = joined[pred_cols].values
135
+
136
+ if temp_scores.sum(axis=-1).max() > 1.1:
137
+ temp_scores = torch.tensor(temp_scores).softmax(-1)
138
+ else:
139
+ temp_scores = torch.tensor(temp_scores)
140
+
141
+ joined[pred_cols] = temp_scores.numpy()
142
+
143
+ gt_idxs = joined[
144
+ ["winner_model_a", "winner_model_b", "winner_tie"]
145
+ ].values.argsort()[:, -1]
146
+ temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]
147
+
148
+ joined[f"loss_{exp}"] = torch.nn.functional.binary_cross_entropy(
149
+ temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
150
+ )
151
+
152
+ joined["prompt_length"] = [len(x) for x in joined["prompt"]]
153
+ joined["response_a_length"] = [len(x) for x in joined["response_a"]]
154
+ joined["response_b_length"] = [len(x) for x in joined["response_b"]]
155
+ joined["total_length"] = (
156
+ joined["prompt_length"]
157
+ + joined["response_a_length"]
158
+ + joined["response_b_length"]
159
+ )
160
+
161
+ loss_cols = [x for x in joined.columns if "loss" in x]
162
+ joined["avg_loss"] = joined[loss_cols].mean(axis=1)
163
+ joined["avg_winner_model_a"] = joined[
164
+ [x for x in joined.columns if "winner_model_a_prob" in x]
165
+ ].mean(axis=1)
166
+ joined["avg_winner_model_b"] = joined[
167
+ [x for x in joined.columns if "winner_model_b_prob" in x]
168
+ ].mean(axis=1)
169
+ joined["avg_winner_tie"] = joined[
170
+ [x for x in joined.columns if "winner_tie_prob" in x]
171
+ ].mean(axis=1)
172
+
173
+ prob_cols = [x for x in joined.columns if "prob" in x]
174
+ loss_cols = [x for x in joined.columns if "loss" in x]
175
+
176
+ joined[prob_cols + loss_cols] = joined[prob_cols + loss_cols].astype("float16")
177
+
178
+ id2texts = {i: (p, a, b) for i, p, a, b in joined[["id", "prompt", "response_a", "response_b"]].values}
179
+
180
+ joined = joined.drop(columns=["prompt", "response_a", "response_b"])
181
+
182
+ return joined, id2texts
183
+
184
+ # def make_df():
185
+ # data = {f: pl.read_csv(f) for f in files}
186
+
187
+ # for k in data.keys():
188
+ # exp = exps[k]
189
+
190
+ # if "0" in data[k].columns:
191
+ # data[k] = data[k].rename({
192
+ # "0": f"winner_model_a_prob_{exp}",
193
+ # "1": f"winner_model_b_prob_{exp}",
194
+ # "2": f"winner_tie_prob_{exp}",
195
+ # })
196
+ # elif "winner_tie_prob" not in data[k].columns:
197
+ # data[k] = data[k].rename({
198
+ # "winner_model_a": f"winner_model_a_prob_{exp}",
199
+ # "winner_model_b": f"winner_model_b_prob_{exp}",
200
+ # "winner_tie": f"winner_tie_prob_{exp}",
201
+ # })
202
+ # else:
203
+ # data[k] = data[k].rename({
204
+ # "winner_model_a_prob": f"winner_model_a_prob_{exp}",
205
+ # "winner_model_b_prob": f"winner_model_b_prob_{exp}",
206
+ # "winner_tie_prob": f"winner_tie_prob_{exp}",
207
+ # })
208
+
209
+ # pred_cols = [
210
+ # f"winner_model_a_prob_{exp}",
211
+ # f"winner_model_b_prob_{exp}",
212
+ # f"winner_tie_prob_{exp}",
213
+ # ]
214
+
215
+ # data[k] = data[k].sort("id")
216
+
217
+ # final_columns = ["id"] + pred_cols
218
+ # data[k] = data[k].select(final_columns)
219
+
220
+ # id_col = data[files[0]].select("id")
221
+
222
+ # joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
223
+ # joined = pl.concat([id_col, joined], how="horizontal")
224
+
225
+ # tdf = pl.read_csv(train_csv_path)
226
+
227
+ # joined = joined.join(tdf, on="id", how="left")
228
+
229
+ # joined = joined.with_columns([
230
+ # pl.when(pl.col("winner_model_a") == 1).then(0).otherwise(
231
+ # pl.when(pl.col("winner_model_b") == 1).then(1).otherwise(
232
+ # pl.when(pl.col("winner_tie") == 1).then(2).otherwise(3)
233
+ # )).alias("winner")
234
+ # ])
235
+
236
+ # for exp in exps.values():
237
+ # pred_cols = [
238
+ # f"winner_model_a_prob_{exp}",
239
+ # f"winner_model_b_prob_{exp}",
240
+ # f"winner_tie_prob_{exp}",
241
+ # ]
242
+
243
+ # temp_scores = joined.select(pred_cols).to_numpy()
244
+
245
+ # if temp_scores.sum(axis=-1).max() > 1.1:
246
+ # temp_scores = torch.tensor(temp_scores).softmax(-1)
247
+ # else:
248
+ # temp_scores = torch.tensor(temp_scores)
249
+
250
+ # joined = joined.with_columns([
251
+ # pl.Series(name=col, values=temp_scores[:, i].numpy())
252
+ # for i, col in enumerate(pred_cols)
253
+ # ])
254
+
255
+ # gt_idxs = joined.select(["winner_model_a", "winner_model_b", "winner_tie"]).to_numpy().argsort()[:, -1]
256
+ # temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]
257
+
258
+ # loss = torch.nn.functional.binary_cross_entropy(
259
+ # temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
260
+ # )
261
+
262
+ # joined = joined.with_columns([
263
+ # pl.Series(name=f"loss_{exp}", values=loss.numpy())
264
+ # ])
265
+
266
+ # joined = joined.with_columns([
267
+ # pl.col("prompt").str.len_chars().alias("prompt_length"),
268
+ # pl.col("response_a").str.len_chars().alias("response_a_length"),
269
+ # pl.col("response_b").str.len_chars().alias("response_b_length"),
270
+ # ])
271
+
272
+ # joined = joined.with_columns([
273
+ # (pl.col("prompt_length") + pl.col("response_a_length") + pl.col("response_b_length")).alias("total_length")
274
+ # ])
275
+
276
+ # loss_cols = [x for x in joined.columns if "loss" in x]
277
+
278
+ # joined = joined.with_columns([
279
+ # pl.mean_horizontal(loss_cols).alias("avg_loss"),
280
+ # pl.mean_horizontal([x for x in joined.columns if "winner_model_a_prob" in x]).alias("avg_winner_model_a"),
281
+ # pl.mean_horizontal([x for x in joined.columns if "winner_model_b_prob" in x]).alias("avg_winner_model_b"),
282
+ # pl.mean_horizontal([x for x in joined.columns if "winner_tie_prob" in x]).alias("avg_winner_tie"),
283
+ # ])
284
+
285
+ # prob_cols = [x for x in joined.columns if "prob" in x]
286
+ # loss_cols = [x for x in joined.columns if "loss" in x]
287
+
288
+ # joined = joined.with_columns([
289
+ # pl.col(prob_cols + loss_cols).cast(pl.Float32)
290
+ # ])
291
+
292
+
293
+
294
+ # return joined
295
+
296
+ MAIN_DF, id2texts = make_df()
297
+
298
+
299
+ def filter_df(lower_limit, upper_limit, file, all_check):
300
+ if all_check or file is None or file == "":
301
+ loss_col = "avg_loss"
302
+ else:
303
+ loss_col = f"loss_{exps[file]}"
304
+
305
+ temp = MAIN_DF[
306
+ (MAIN_DF[loss_col] > lower_limit) & (MAIN_DF[loss_col] < upper_limit)
307
+ ]
308
+ temp = temp.sort_values(loss_col, ascending=False).reset_index(drop=True)
309
+
310
+ return 0, temp
311
+
312
+ # def filter_df(lower_limit, upper_limit, file, all_check):
313
+ # if all_check or file is None or file == "":
314
+ # loss_col = "avg_loss"
315
+ # else:
316
+ # loss_col = f"loss_{exps[file]}"
317
+
318
+ # temp = MAIN_DF.filter(
319
+ # (pl.col(loss_col) > lower_limit) & (pl.col(loss_col) < upper_limit)
320
+ # ).sort(loss_col, descending=True)
321
+
322
+ # return 0, temp
323
+
324
+
325
+ def make_chat(prompt, response, side, label):
326
+ prompts = json.loads(prompt)
327
+ responses = json.loads(response)
328
+
329
+ header = None
330
+ if side == label:
331
+ header = "βœ… Winner βœ…"
332
+ elif label == 2 or label == "Tie":
333
+ header = "🟨 Tie 🟨"
334
+ else:
335
+ header = "❌ Loser ❌"
336
+
337
+ chat = []
338
+ for p, r in zip(prompts, responses):
339
+ chat.append(
340
+ ChatMessage(
341
+ role="user",
342
+ content=header + "\n" + p,
343
+ )
344
+ )
345
+
346
+ if r is None:
347
+ r = ""
348
+
349
+ chat.append(ChatMessage(role="assistant", content=header + "\n" + r))
350
+
351
+ return chat
352
+
353
+
354
+ # def show_chats(idx, df, file, all_check):
355
+
356
+ # if idx is None:
357
+ # return None, None
358
+
359
+ # if idx > len(df):
360
+ # idx = len(df) - 1
361
+ # if idx < 0:
362
+ # idx = 0
363
+
364
+ # label = df["winner"].iloc[idx]
365
+
366
+ # chat_a = make_chat(df["prompt"].iloc[idx], df["response_a"].iloc[idx], "A", label)
367
+ # chat_b = make_chat(df["prompt"].iloc[idx], df["response_b"].iloc[idx], "B", label)
368
+
369
+ # if all_check or file is None or file == "":
370
+ # score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
371
+ # else:
372
+ # score_cols = [
373
+ # f"winner_model_a_prob_{exps[file]}",
374
+ # f"winner_model_b_prob_{exps[file]}",
375
+ # f"winner_tie_prob_{exps[file]}",
376
+ # ]
377
+
378
+ # scores = df[score_cols].iloc[idx].tolist()
379
+
380
+ # if all_check or file is None or file == "":
381
+ # loss_col = "avg_loss"
382
+ # else:
383
+ # loss_col = f"loss_{exps[file]}"
384
+
385
+ # loss = df[loss_col].iloc[idx]
386
+
387
+ # return chat_a, chat_b, label, *scores, loss
388
+
389
+ def show_chats(idx, df, file, all_check):
390
+ if idx is None:
391
+ return None, None
392
+
393
+ if idx >= df.shape[0]:
394
+ idx = df.shape[0] - 1
395
+ if idx < 0:
396
+ idx = 0
397
+
398
+ row = df.iloc[idx]
399
+ label = row["winner"]
400
+
401
+ id_ = row["id"]
402
+
403
+ p, a, b = id2texts[id_]
404
+
405
+ chat_a = make_chat(p, a, "A", label)
406
+ chat_b = make_chat(p, b, "B", label)
407
+
408
+ # chat_a = make_chat(row["prompt"], row["response_a"], 0, label_idx)
409
+ # chat_b = make_chat(row["prompt"], row["response_b"], 1, label_idx)
410
+
411
+ if all_check or file is None or file == "":
412
+ score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
413
+ else:
414
+ score_cols = [
415
+ f"winner_model_a_prob_{exps[file]}",
416
+ f"winner_model_b_prob_{exps[file]}",
417
+ f"winner_tie_prob_{exps[file]}",
418
+ ]
419
+
420
+ scores = row[score_cols].to_list()
421
+
422
+ if all_check or file is None or file == "":
423
+ loss_col = "avg_loss"
424
+ else:
425
+ loss_col = f"loss_{exps[file]}"
426
+
427
+ loss = row[loss_col]
428
+
429
+ # labels = ["A", "B", "Tie"]
430
+
431
+ return chat_a, chat_b, label, *scores, loss
432
+
433
+
434
+ with gr.Blocks() as demo:
435
+
436
+ gr.LoginButton()
437
+
438
+ gr.Markdown(
439
+ """
440
+ # OOF Visualization
441
+
442
+ This is a demo for visualizing the out-of-fold predictions of a model.
443
+ It currently shows the predictions for the outputs of [this notebook](https://www.kaggle.com/code/kcotton21/lmsys-preds/notebook).
444
+ """
445
+ )
446
+ with gr.Row():
447
+ with gr.Column():
448
+ file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files])
449
+ with gr.Column():
450
+ all_check = gr.Checkbox(label="Use average loss of all files")
451
+ with gr.Row():
452
+ lower_limit = gr.Slider(
453
+ label="Show samples with loss > this value", minimum=0, maximum=5, value=1
454
+ )
455
+ upper_limit = gr.Slider(
456
+ label="Show samples with loss < this value", minimum=0, maximum=5, value=5
457
+ )
458
+
459
+ # id_ = gr.Number(label="ID")
460
+ idx = gr.Number(visible=True)
461
+ hidden_df = gr.Dataframe(visible=False)
462
+ with gr.Row():
463
+ correct_label = gr.Textbox(label="Correct Label", interactive=False)
464
+ score_a = gr.Textbox(label="Model A Score", interactive=False)
465
+ score_b = gr.Textbox(label="Model B Score", interactive=False)
466
+ score_tie = gr.Textbox(label="Tie Score", interactive=False)
467
+ loss = gr.Textbox(label="Loss", interactive=False)
468
+ with gr.Row():
469
+ with gr.Column():
470
+ prev_btn = gr.Button(value="Previous")
471
+ with gr.Column():
472
+ next_btn = gr.Button(value="Next")
473
+
474
+ with gr.Row():
475
+ with gr.Column():
476
+ chat_a = gr.Chatbot(label="Model A", type="messages", height=1000)
477
+ with gr.Column():
478
+ chat_b = gr.Chatbot(label="Model B", type="messages", height=1000)
479
+
480
+ lower_limit.change(
481
+ filter_df,
482
+ inputs=[lower_limit, upper_limit, file, all_check],
483
+ outputs=[idx, hidden_df],
484
+ )
485
+ upper_limit.change(
486
+ filter_df,
487
+ inputs=[lower_limit, upper_limit, file, all_check],
488
+ outputs=[idx, hidden_df],
489
+ )
490
+
491
+ idx.change(
492
+ show_chats,
493
+ inputs=[idx, hidden_df, file, all_check],
494
+ outputs=[chat_a, chat_b, correct_label, score_a, score_b, score_tie, loss],
495
+ )
496
+ prev_btn.click(lambda x: max(0, x - 1), inputs=idx, outputs=idx)
497
+ next_btn.click(lambda x: x + 1, inputs=idx, outputs=idx)
498
+
499
+
500
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ numpy
3
+ pandas
4
+ gradio
5
+ polars