Update app.py
Browse files
app.py
CHANGED
@@ -108,10 +108,6 @@ def make_df():
|
|
108 |
|
109 |
joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1)
|
110 |
|
111 |
-
# joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
|
112 |
-
# id_col = joined.iloc[:, 0]
|
113 |
-
# joined = joined.drop("id")
|
114 |
-
# joined = joined.insert_column(0, id_col)
|
115 |
|
116 |
joined["id"] = id_col
|
117 |
|
@@ -181,117 +177,6 @@ def make_df():
|
|
181 |
|
182 |
return joined, id2texts
|
183 |
|
184 |
-
# def make_df():
|
185 |
-
# data = {f: pl.read_csv(f) for f in files}
|
186 |
-
|
187 |
-
# for k in data.keys():
|
188 |
-
# exp = exps[k]
|
189 |
-
|
190 |
-
# if "0" in data[k].columns:
|
191 |
-
# data[k] = data[k].rename({
|
192 |
-
# "0": f"winner_model_a_prob_{exp}",
|
193 |
-
# "1": f"winner_model_b_prob_{exp}",
|
194 |
-
# "2": f"winner_tie_prob_{exp}",
|
195 |
-
# })
|
196 |
-
# elif "winner_tie_prob" not in data[k].columns:
|
197 |
-
# data[k] = data[k].rename({
|
198 |
-
# "winner_model_a": f"winner_model_a_prob_{exp}",
|
199 |
-
# "winner_model_b": f"winner_model_b_prob_{exp}",
|
200 |
-
# "winner_tie": f"winner_tie_prob_{exp}",
|
201 |
-
# })
|
202 |
-
# else:
|
203 |
-
# data[k] = data[k].rename({
|
204 |
-
# "winner_model_a_prob": f"winner_model_a_prob_{exp}",
|
205 |
-
# "winner_model_b_prob": f"winner_model_b_prob_{exp}",
|
206 |
-
# "winner_tie_prob": f"winner_tie_prob_{exp}",
|
207 |
-
# })
|
208 |
-
|
209 |
-
# pred_cols = [
|
210 |
-
# f"winner_model_a_prob_{exp}",
|
211 |
-
# f"winner_model_b_prob_{exp}",
|
212 |
-
# f"winner_tie_prob_{exp}",
|
213 |
-
# ]
|
214 |
-
|
215 |
-
# data[k] = data[k].sort("id")
|
216 |
-
|
217 |
-
# final_columns = ["id"] + pred_cols
|
218 |
-
# data[k] = data[k].select(final_columns)
|
219 |
-
|
220 |
-
# id_col = data[files[0]].select("id")
|
221 |
-
|
222 |
-
# joined = pl.concat([x.drop("id") for x in data.values()], how="horizontal")
|
223 |
-
# joined = pl.concat([id_col, joined], how="horizontal")
|
224 |
-
|
225 |
-
# tdf = pl.read_csv(train_csv_path)
|
226 |
-
|
227 |
-
# joined = joined.join(tdf, on="id", how="left")
|
228 |
-
|
229 |
-
# joined = joined.with_columns([
|
230 |
-
# pl.when(pl.col("winner_model_a") == 1).then(0).otherwise(
|
231 |
-
# pl.when(pl.col("winner_model_b") == 1).then(1).otherwise(
|
232 |
-
# pl.when(pl.col("winner_tie") == 1).then(2).otherwise(3)
|
233 |
-
# )).alias("winner")
|
234 |
-
# ])
|
235 |
-
|
236 |
-
# for exp in exps.values():
|
237 |
-
# pred_cols = [
|
238 |
-
# f"winner_model_a_prob_{exp}",
|
239 |
-
# f"winner_model_b_prob_{exp}",
|
240 |
-
# f"winner_tie_prob_{exp}",
|
241 |
-
# ]
|
242 |
-
|
243 |
-
# temp_scores = joined.select(pred_cols).to_numpy()
|
244 |
-
|
245 |
-
# if temp_scores.sum(axis=-1).max() > 1.1:
|
246 |
-
# temp_scores = torch.tensor(temp_scores).softmax(-1)
|
247 |
-
# else:
|
248 |
-
# temp_scores = torch.tensor(temp_scores)
|
249 |
-
|
250 |
-
# joined = joined.with_columns([
|
251 |
-
# pl.Series(name=col, values=temp_scores[:, i].numpy())
|
252 |
-
# for i, col in enumerate(pred_cols)
|
253 |
-
# ])
|
254 |
-
|
255 |
-
# gt_idxs = joined.select(["winner_model_a", "winner_model_b", "winner_tie"]).to_numpy().argsort()[:, -1]
|
256 |
-
# temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]
|
257 |
-
|
258 |
-
# loss = torch.nn.functional.binary_cross_entropy(
|
259 |
-
# temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
|
260 |
-
# )
|
261 |
-
|
262 |
-
# joined = joined.with_columns([
|
263 |
-
# pl.Series(name=f"loss_{exp}", values=loss.numpy())
|
264 |
-
# ])
|
265 |
-
|
266 |
-
# joined = joined.with_columns([
|
267 |
-
# pl.col("prompt").str.len_chars().alias("prompt_length"),
|
268 |
-
# pl.col("response_a").str.len_chars().alias("response_a_length"),
|
269 |
-
# pl.col("response_b").str.len_chars().alias("response_b_length"),
|
270 |
-
# ])
|
271 |
-
|
272 |
-
# joined = joined.with_columns([
|
273 |
-
# (pl.col("prompt_length") + pl.col("response_a_length") + pl.col("response_b_length")).alias("total_length")
|
274 |
-
# ])
|
275 |
-
|
276 |
-
# loss_cols = [x for x in joined.columns if "loss" in x]
|
277 |
-
|
278 |
-
# joined = joined.with_columns([
|
279 |
-
# pl.mean_horizontal(loss_cols).alias("avg_loss"),
|
280 |
-
# pl.mean_horizontal([x for x in joined.columns if "winner_model_a_prob" in x]).alias("avg_winner_model_a"),
|
281 |
-
# pl.mean_horizontal([x for x in joined.columns if "winner_model_b_prob" in x]).alias("avg_winner_model_b"),
|
282 |
-
# pl.mean_horizontal([x for x in joined.columns if "winner_tie_prob" in x]).alias("avg_winner_tie"),
|
283 |
-
# ])
|
284 |
-
|
285 |
-
# prob_cols = [x for x in joined.columns if "prob" in x]
|
286 |
-
# loss_cols = [x for x in joined.columns if "loss" in x]
|
287 |
-
|
288 |
-
# joined = joined.with_columns([
|
289 |
-
# pl.col(prob_cols + loss_cols).cast(pl.Float32)
|
290 |
-
# ])
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
# return joined
|
295 |
|
296 |
MAIN_DF, id2texts = make_df()
|
297 |
|
@@ -309,18 +194,6 @@ def filter_df(lower_limit, upper_limit, file, all_check):
|
|
309 |
|
310 |
return 0, temp
|
311 |
|
312 |
-
# def filter_df(lower_limit, upper_limit, file, all_check):
|
313 |
-
# if all_check or file is None or file == "":
|
314 |
-
# loss_col = "avg_loss"
|
315 |
-
# else:
|
316 |
-
# loss_col = f"loss_{exps[file]}"
|
317 |
-
|
318 |
-
# temp = MAIN_DF.filter(
|
319 |
-
# (pl.col(loss_col) > lower_limit) & (pl.col(loss_col) < upper_limit)
|
320 |
-
# ).sort(loss_col, descending=True)
|
321 |
-
|
322 |
-
# return 0, temp
|
323 |
-
|
324 |
|
325 |
def make_chat(prompt, response, side, label):
|
326 |
prompts = json.loads(prompt)
|
@@ -351,41 +224,6 @@ def make_chat(prompt, response, side, label):
|
|
351 |
return chat
|
352 |
|
353 |
|
354 |
-
# def show_chats(idx, df, file, all_check):
|
355 |
-
|
356 |
-
# if idx is None:
|
357 |
-
# return None, None
|
358 |
-
|
359 |
-
# if idx > len(df):
|
360 |
-
# idx = len(df) - 1
|
361 |
-
# if idx < 0:
|
362 |
-
# idx = 0
|
363 |
-
|
364 |
-
# label = df["winner"].iloc[idx]
|
365 |
-
|
366 |
-
# chat_a = make_chat(df["prompt"].iloc[idx], df["response_a"].iloc[idx], "A", label)
|
367 |
-
# chat_b = make_chat(df["prompt"].iloc[idx], df["response_b"].iloc[idx], "B", label)
|
368 |
-
|
369 |
-
# if all_check or file is None or file == "":
|
370 |
-
# score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
|
371 |
-
# else:
|
372 |
-
# score_cols = [
|
373 |
-
# f"winner_model_a_prob_{exps[file]}",
|
374 |
-
# f"winner_model_b_prob_{exps[file]}",
|
375 |
-
# f"winner_tie_prob_{exps[file]}",
|
376 |
-
# ]
|
377 |
-
|
378 |
-
# scores = df[score_cols].iloc[idx].tolist()
|
379 |
-
|
380 |
-
# if all_check or file is None or file == "":
|
381 |
-
# loss_col = "avg_loss"
|
382 |
-
# else:
|
383 |
-
# loss_col = f"loss_{exps[file]}"
|
384 |
-
|
385 |
-
# loss = df[loss_col].iloc[idx]
|
386 |
-
|
387 |
-
# return chat_a, chat_b, label, *scores, loss
|
388 |
-
|
389 |
def show_chats(idx, df, file, all_check):
|
390 |
if idx is None:
|
391 |
return None, None
|
@@ -458,7 +296,7 @@ with gr.Blocks() as demo:
|
|
458 |
with gr.Column():
|
459 |
file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files])
|
460 |
with gr.Column():
|
461 |
-
all_check = gr.Checkbox(label="Use average loss of all files")
|
462 |
with gr.Row():
|
463 |
lower_limit = gr.Slider(
|
464 |
label="Show samples with loss > this value", minimum=0, maximum=5, value=1
|
|
|
108 |
|
109 |
joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1)
|
110 |
|
|
|
|
|
|
|
|
|
111 |
|
112 |
joined["id"] = id_col
|
113 |
|
|
|
177 |
|
178 |
return joined, id2texts
|
179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
MAIN_DF, id2texts = make_df()
|
182 |
|
|
|
194 |
|
195 |
return 0, temp
|
196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
def make_chat(prompt, response, side, label):
|
199 |
prompts = json.loads(prompt)
|
|
|
224 |
return chat
|
225 |
|
226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
def show_chats(idx, df, file, all_check):
|
228 |
if idx is None:
|
229 |
return None, None
|
|
|
296 |
with gr.Column():
|
297 |
file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files])
|
298 |
with gr.Column():
|
299 |
+
all_check = gr.Checkbox(label="Use average loss of all files", value=True)
|
300 |
with gr.Row():
|
301 |
lower_limit = gr.Slider(
|
302 |
label="Show samples with loss > this value", minimum=0, maximum=5, value=1
|