nbroad commited on
Commit
d6c1103
·
verified ·
1 Parent(s): fc978de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -163
app.py CHANGED
@@ -108,10 +108,6 @@ def make_df():
108
 
109
  joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1)
110
 
111
- # joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
112
- # id_col = joined.iloc[:, 0]
113
- # joined = joined.drop("id")
114
- # joined = joined.insert_column(0, id_col)
115
 
116
  joined["id"] = id_col
117
 
@@ -181,117 +177,6 @@ def make_df():
181
 
182
  return joined, id2texts
183
 
184
- # def make_df():
185
- # data = {f: pl.read_csv(f) for f in files}
186
-
187
- # for k in data.keys():
188
- # exp = exps[k]
189
-
190
- # if "0" in data[k].columns:
191
- # data[k] = data[k].rename({
192
- # "0": f"winner_model_a_prob_{exp}",
193
- # "1": f"winner_model_b_prob_{exp}",
194
- # "2": f"winner_tie_prob_{exp}",
195
- # })
196
- # elif "winner_tie_prob" not in data[k].columns:
197
- # data[k] = data[k].rename({
198
- # "winner_model_a": f"winner_model_a_prob_{exp}",
199
- # "winner_model_b": f"winner_model_b_prob_{exp}",
200
- # "winner_tie": f"winner_tie_prob_{exp}",
201
- # })
202
- # else:
203
- # data[k] = data[k].rename({
204
- # "winner_model_a_prob": f"winner_model_a_prob_{exp}",
205
- # "winner_model_b_prob": f"winner_model_b_prob_{exp}",
206
- # "winner_tie_prob": f"winner_tie_prob_{exp}",
207
- # })
208
-
209
- # pred_cols = [
210
- # f"winner_model_a_prob_{exp}",
211
- # f"winner_model_b_prob_{exp}",
212
- # f"winner_tie_prob_{exp}",
213
- # ]
214
-
215
- # data[k] = data[k].sort("id")
216
-
217
- # final_columns = ["id"] + pred_cols
218
- # data[k] = data[k].select(final_columns)
219
-
220
- # id_col = data[files[0]].select("id")
221
-
222
- # joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
223
- # joined = pl.concat([id_col, joined], how="horizontal")
224
-
225
- # tdf = pl.read_csv(train_csv_path)
226
-
227
- # joined = joined.join(tdf, on="id", how="left")
228
-
229
- # joined = joined.with_columns([
230
- # pl.when(pl.col("winner_model_a") == 1).then(0).otherwise(
231
- # pl.when(pl.col("winner_model_b") == 1).then(1).otherwise(
232
- # pl.when(pl.col("winner_tie") == 1).then(2).otherwise(3)
233
- # )).alias("winner")
234
- # ])
235
-
236
- # for exp in exps.values():
237
- # pred_cols = [
238
- # f"winner_model_a_prob_{exp}",
239
- # f"winner_model_b_prob_{exp}",
240
- # f"winner_tie_prob_{exp}",
241
- # ]
242
-
243
- # temp_scores = joined.select(pred_cols).to_numpy()
244
-
245
- # if temp_scores.sum(axis=-1).max() > 1.1:
246
- # temp_scores = torch.tensor(temp_scores).softmax(-1)
247
- # else:
248
- # temp_scores = torch.tensor(temp_scores)
249
-
250
- # joined = joined.with_columns([
251
- # pl.Series(name=col, values=temp_scores[:, i].numpy())
252
- # for i, col in enumerate(pred_cols)
253
- # ])
254
-
255
- # gt_idxs = joined.select(["winner_model_a", "winner_model_b", "winner_tie"]).to_numpy().argsort()[:, -1]
256
- # temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]
257
-
258
- # loss = torch.nn.functional.binary_cross_entropy(
259
- # temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
260
- # )
261
-
262
- # joined = joined.with_columns([
263
- # pl.Series(name=f"loss_{exp}", values=loss.numpy())
264
- # ])
265
-
266
- # joined = joined.with_columns([
267
- # pl.col("prompt").str.len_chars().alias("prompt_length"),
268
- # pl.col("response_a").str.len_chars().alias("response_a_length"),
269
- # pl.col("response_b").str.len_chars().alias("response_b_length"),
270
- # ])
271
-
272
- # joined = joined.with_columns([
273
- # (pl.col("prompt_length") + pl.col("response_a_length") + pl.col("response_b_length")).alias("total_length")
274
- # ])
275
-
276
- # loss_cols = [x for x in joined.columns if "loss" in x]
277
-
278
- # joined = joined.with_columns([
279
- # pl.mean_horizontal(loss_cols).alias("avg_loss"),
280
- # pl.mean_horizontal([x for x in joined.columns if "winner_model_a_prob" in x]).alias("avg_winner_model_a"),
281
- # pl.mean_horizontal([x for x in joined.columns if "winner_model_b_prob" in x]).alias("avg_winner_model_b"),
282
- # pl.mean_horizontal([x for x in joined.columns if "winner_tie_prob" in x]).alias("avg_winner_tie"),
283
- # ])
284
-
285
- # prob_cols = [x for x in joined.columns if "prob" in x]
286
- # loss_cols = [x for x in joined.columns if "loss" in x]
287
-
288
- # joined = joined.with_columns([
289
- # pl.col(prob_cols + loss_cols).cast(pl.Float32)
290
- # ])
291
-
292
-
293
-
294
- # return joined
295
 
296
  MAIN_DF, id2texts = make_df()
297
 
@@ -309,18 +194,6 @@ def filter_df(lower_limit, upper_limit, file, all_check):
309
 
310
  return 0, temp
311
 
312
- # def filter_df(lower_limit, upper_limit, file, all_check):
313
- # if all_check or file is None or file == "":
314
- # loss_col = "avg_loss"
315
- # else:
316
- # loss_col = f"loss_{exps[file]}"
317
-
318
- # temp = MAIN_DF.filter(
319
- # (pl.col(loss_col) > lower_limit) & (pl.col(loss_col) < upper_limit)
320
- # ).sort(loss_col, descending=True)
321
-
322
- # return 0, temp
323
-
324
 
325
  def make_chat(prompt, response, side, label):
326
  prompts = json.loads(prompt)
@@ -351,41 +224,6 @@ def make_chat(prompt, response, side, label):
351
  return chat
352
 
353
 
354
- # def show_chats(idx, df, file, all_check):
355
-
356
- # if idx is None:
357
- # return None, None
358
-
359
- # if idx > len(df):
360
- # idx = len(df) - 1
361
- # if idx < 0:
362
- # idx = 0
363
-
364
- # label = df["winner"].iloc[idx]
365
-
366
- # chat_a = make_chat(df["prompt"].iloc[idx], df["response_a"].iloc[idx], "A", label)
367
- # chat_b = make_chat(df["prompt"].iloc[idx], df["response_b"].iloc[idx], "B", label)
368
-
369
- # if all_check or file is None or file == "":
370
- # score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
371
- # else:
372
- # score_cols = [
373
- # f"winner_model_a_prob_{exps[file]}",
374
- # f"winner_model_b_prob_{exps[file]}",
375
- # f"winner_tie_prob_{exps[file]}",
376
- # ]
377
-
378
- # scores = df[score_cols].iloc[idx].tolist()
379
-
380
- # if all_check or file is None or file == "":
381
- # loss_col = "avg_loss"
382
- # else:
383
- # loss_col = f"loss_{exps[file]}"
384
-
385
- # loss = df[loss_col].iloc[idx]
386
-
387
- # return chat_a, chat_b, label, *scores, loss
388
-
389
  def show_chats(idx, df, file, all_check):
390
  if idx is None:
391
  return None, None
@@ -458,7 +296,7 @@ with gr.Blocks() as demo:
458
  with gr.Column():
459
  file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files])
460
  with gr.Column():
461
- all_check = gr.Checkbox(label="Use average loss of all files")
462
  with gr.Row():
463
  lower_limit = gr.Slider(
464
  label="Show samples with loss > this value", minimum=0, maximum=5, value=1
 
108
 
109
  joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1)
110
 
 
 
 
 
111
 
112
  joined["id"] = id_col
113
 
 
177
 
178
  return joined, id2texts
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  MAIN_DF, id2texts = make_df()
182
 
 
194
 
195
  return 0, temp
196
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
  def make_chat(prompt, response, side, label):
199
  prompts = json.loads(prompt)
 
224
  return chat
225
 
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  def show_chats(idx, df, file, all_check):
228
  if idx is None:
229
  return None, None
 
296
  with gr.Column():
297
  file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files])
298
  with gr.Column():
299
+ all_check = gr.Checkbox(label="Use average loss of all files", value=True)
300
  with gr.Row():
301
  lower_limit = gr.Slider(
302
  label="Show samples with loss > this value", minimum=0, maximum=5, value=1