Spaces:

tlkh
/

paraphrase-metrics-mrpc

Runtime error

App Files Files Community

tlkh commited on Mar 13, 2022

Commit

0c4f0e2

1 Parent(s): c1a06e1

Update app

Browse files

Files changed (1) hide show

app.py +13 -8

app.py CHANGED Viewed

@@ -23,14 +23,16 @@ with st.sidebar.expander("⚙️ Dataset Options", expanded=False):
     st.markdown("This allows you to switch between the MRPC train and test sets, as well as choose to display only the original paraphrase pairs (MRPC) and/or the corrected pairs (MRPC-R1).")
     split = st.radio("Dataset Split", ["train", "test"])
     display = st.radio("Display only pairs from", [
-                           "All", "Only MRPC", "Only MRPC-R1"])
-ptype = st.sidebar.radio("Display Types", ["All",
-                                           "Only Paraphrases (MRPC-R1)",
-                                           "Only Paraphrases (MRPC)",
                                            "Rejected Paraphrases from MRPC",
                                            "Corrected Paraphrases from MRPC"])
 st.sidebar.markdown("**WPD/LD Score Filter Options**")
 display_range_wpd = st.sidebar.slider(
     "Filter by WPD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.7))
@@ -49,7 +51,7 @@ with st.sidebar.expander("📍 WPD/LD Score Explanation", expanded=False):
     When LD is set to a high range (>0.5), we can find many pairs labelled as paraphrases in MRPC are not in fact paraphrases.
     """)
-    st.markdown("**Additional Options**")
     filter_by = st.radio(
         "Filter By Scores From", ["MRPC", "MRPC-R1"])
@@ -73,10 +75,10 @@ def filter_df(df, display, ptype, filter_by, display_scores):
     elif display == "Only MRPC-R1":
         df = df.drop(["og_s1", "og_s2"], axis=1)
     # filter paraphrase type
-    if ptype == "Only Paraphrases (MRPC)":
         condition = df.og_label == 1
         df_sel = df[condition]
-    elif ptype == "Only Paraphrases (MRPC-R1)":
         condition = df.new_label == 1
         df_sel = df[condition]
     elif ptype == "Rejected Paraphrases from MRPC":
@@ -117,10 +119,13 @@ def filter_df(df, display, ptype, filter_by, display_scores):
     if not display_scores:
         df_sel.drop(["og_ld", "og_wpd", "new_ld", "new_wpd"],
                     axis=1, inplace=True)
     label_col = df_sel["og_label"].astype(
         str)+"->"+df_sel["new_label"].astype(str)
     df_sel["og/new label"] = label_col
-    df_sel.drop(["remarks", "og_label", "new_label"], axis=1, inplace=True)
     return df_sel

     st.markdown("This allows you to switch between the MRPC train and test sets, as well as choose to display only the original paraphrase pairs (MRPC) and/or the corrected pairs (MRPC-R1).")
     split = st.radio("Dataset Split", ["train", "test"])
     display = st.radio("Display only pairs from", [
+        "Both MRPC and MRPC-R1", "Only MRPC", "Only MRPC-R1"])
+ptype = st.sidebar.radio("Display Types", ["All Paraphrases",
+                                           "Only Paraphrases in MRPC-R1",
                                            "Rejected Paraphrases from MRPC",
                                            "Corrected Paraphrases from MRPC"])
+display_reason = st.sidebar.checkbox(
+    "Display reason for label change", value=False)
 st.sidebar.markdown("**WPD/LD Score Filter Options**")
 display_range_wpd = st.sidebar.slider(
     "Filter by WPD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.7))
     When LD is set to a high range (>0.5), we can find many pairs labelled as paraphrases in MRPC are not in fact paraphrases.
     """)
+    st.markdown("**Additional Filter Options**")
     filter_by = st.radio(
         "Filter By Scores From", ["MRPC", "MRPC-R1"])
     elif display == "Only MRPC-R1":
         df = df.drop(["og_s1", "og_s2"], axis=1)
     # filter paraphrase type
+    if ptype == "All Paraphrases":
         condition = df.og_label == 1
         df_sel = df[condition]
+    elif ptype == "Only Paraphrases in MRPC-R1":
         condition = df.new_label == 1
         df_sel = df[condition]
     elif ptype == "Rejected Paraphrases from MRPC":
     if not display_scores:
         df_sel.drop(["og_ld", "og_wpd", "new_ld", "new_wpd"],
                     axis=1, inplace=True)
+    if not display_reason:
+        df_sel.drop(["remarks", ],
+                    axis=1, inplace=True)
     label_col = df_sel["og_label"].astype(
         str)+"->"+df_sel["new_label"].astype(str)
     df_sel["og/new label"] = label_col
+    df_sel.drop(["og_label", "new_label"], axis=1, inplace=True)
     return df_sel