Spaces:
Runtime error
Runtime error
Update app
Browse files
app.py
CHANGED
@@ -23,14 +23,16 @@ with st.sidebar.expander("⚙️ Dataset Options", expanded=False):
|
|
23 |
st.markdown("This allows you to switch between the MRPC train and test sets, as well as choose to display only the original paraphrase pairs (MRPC) and/or the corrected pairs (MRPC-R1).")
|
24 |
split = st.radio("Dataset Split", ["train", "test"])
|
25 |
display = st.radio("Display only pairs from", [
|
26 |
-
|
27 |
|
28 |
-
ptype = st.sidebar.radio("Display Types", ["All",
|
29 |
-
"Only Paraphrases
|
30 |
-
"Only Paraphrases (MRPC)",
|
31 |
"Rejected Paraphrases from MRPC",
|
32 |
"Corrected Paraphrases from MRPC"])
|
33 |
|
|
|
|
|
|
|
34 |
st.sidebar.markdown("**WPD/LD Score Filter Options**")
|
35 |
display_range_wpd = st.sidebar.slider(
|
36 |
"Filter by WPD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.7))
|
@@ -49,7 +51,7 @@ with st.sidebar.expander("📍 WPD/LD Score Explanation", expanded=False):
|
|
49 |
When LD is set to a high range (>0.5), we can find many pairs labelled as paraphrases in MRPC are not in fact paraphrases.
|
50 |
""")
|
51 |
|
52 |
-
st.markdown("**Additional Options**")
|
53 |
|
54 |
filter_by = st.radio(
|
55 |
"Filter By Scores From", ["MRPC", "MRPC-R1"])
|
@@ -73,10 +75,10 @@ def filter_df(df, display, ptype, filter_by, display_scores):
|
|
73 |
elif display == "Only MRPC-R1":
|
74 |
df = df.drop(["og_s1", "og_s2"], axis=1)
|
75 |
# filter paraphrase type
|
76 |
-
if ptype == "
|
77 |
condition = df.og_label == 1
|
78 |
df_sel = df[condition]
|
79 |
-
elif ptype == "Only Paraphrases
|
80 |
condition = df.new_label == 1
|
81 |
df_sel = df[condition]
|
82 |
elif ptype == "Rejected Paraphrases from MRPC":
|
@@ -117,10 +119,13 @@ def filter_df(df, display, ptype, filter_by, display_scores):
|
|
117 |
if not display_scores:
|
118 |
df_sel.drop(["og_ld", "og_wpd", "new_ld", "new_wpd"],
|
119 |
axis=1, inplace=True)
|
|
|
|
|
|
|
120 |
label_col = df_sel["og_label"].astype(
|
121 |
str)+"->"+df_sel["new_label"].astype(str)
|
122 |
df_sel["og/new label"] = label_col
|
123 |
-
df_sel.drop(["
|
124 |
return df_sel
|
125 |
|
126 |
|
|
|
23 |
st.markdown("This allows you to switch between the MRPC train and test sets, as well as choose to display only the original paraphrase pairs (MRPC) and/or the corrected pairs (MRPC-R1).")
|
24 |
split = st.radio("Dataset Split", ["train", "test"])
|
25 |
display = st.radio("Display only pairs from", [
|
26 |
+
"Both MRPC and MRPC-R1", "Only MRPC", "Only MRPC-R1"])
|
27 |
|
28 |
+
ptype = st.sidebar.radio("Display Types", ["All Paraphrases",
|
29 |
+
"Only Paraphrases in MRPC-R1",
|
|
|
30 |
"Rejected Paraphrases from MRPC",
|
31 |
"Corrected Paraphrases from MRPC"])
|
32 |
|
33 |
+
display_reason = st.sidebar.checkbox(
|
34 |
+
"Display reason for label change", value=False)
|
35 |
+
|
36 |
st.sidebar.markdown("**WPD/LD Score Filter Options**")
|
37 |
display_range_wpd = st.sidebar.slider(
|
38 |
"Filter by WPD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.7))
|
|
|
51 |
When LD is set to a high range (>0.5), we can find many pairs labelled as paraphrases in MRPC are not in fact paraphrases.
|
52 |
""")
|
53 |
|
54 |
+
st.markdown("**Additional Filter Options**")
|
55 |
|
56 |
filter_by = st.radio(
|
57 |
"Filter By Scores From", ["MRPC", "MRPC-R1"])
|
|
|
75 |
elif display == "Only MRPC-R1":
|
76 |
df = df.drop(["og_s1", "og_s2"], axis=1)
|
77 |
# filter paraphrase type
|
78 |
+
if ptype == "All Paraphrases":
|
79 |
condition = df.og_label == 1
|
80 |
df_sel = df[condition]
|
81 |
+
elif ptype == "Only Paraphrases in MRPC-R1":
|
82 |
condition = df.new_label == 1
|
83 |
df_sel = df[condition]
|
84 |
elif ptype == "Rejected Paraphrases from MRPC":
|
|
|
119 |
if not display_scores:
|
120 |
df_sel.drop(["og_ld", "og_wpd", "new_ld", "new_wpd"],
|
121 |
axis=1, inplace=True)
|
122 |
+
if not display_reason:
|
123 |
+
df_sel.drop(["remarks", ],
|
124 |
+
axis=1, inplace=True)
|
125 |
label_col = df_sel["og_label"].astype(
|
126 |
str)+"->"+df_sel["new_label"].astype(str)
|
127 |
df_sel["og/new label"] = label_col
|
128 |
+
df_sel.drop(["og_label", "new_label"], axis=1, inplace=True)
|
129 |
return df_sel
|
130 |
|
131 |
|