Spaces:

tlkh
/

paraphrase-metrics-mrpc

Runtime error

App Files Files Community

tlkh commited on Mar 11, 2022

Commit

42528b7

1 Parent(s): f8f27d8

Update app

Browse files

Files changed (1) hide show

app.py +20 -14

app.py CHANGED Viewed

@@ -3,15 +3,22 @@ import pandas as pd
 st.set_page_config(layout="wide")
-st.sidebar.markdown("**Data Filter Options**")
-split = st.sidebar.selectbox("Dataset Split", ["train", "test"])
-display = st.sidebar.selectbox("Source", ["All", "Only MRPC", "Only MRPC-R1"])
-ptype = st.sidebar.radio("Paraphrase Pair Types", ["All",
-                                                   "Only Paraphrases (MRPC-R1)",
-                                                   "Only Paraphrases (MRPC)",
-                                                   "Rejected Paraphrases from MRPC",
-                                                   "Corrected Paraphrases from MRPC"])
 st.sidebar.markdown("**Score Filter Options**")
 filter_by = st.sidebar.selectbox("Filter By Scores From", ["MRPC", "MRPC-R1"])
@@ -21,10 +28,7 @@ display_range_ld = st.sidebar.slider(
     "Filter by LD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.4))
 display_scores = st.sidebar.checkbox("Display scores", value=False)
-st.sidebar.markdown("""**Explanation**
-This demo allows you to explore the data inside [MRPC](https://www.microsoft.com/en-us/download/details.aspx?id=52398), showing how we can use Word Position Deviation (WPD) and Lexical Deviation (LD) to find different types of paraphrases. By using what we observe from the data, we can also correct numerous labelling errors inside MRPC, presenting the a revision of MRPC termed as MRPC-R1. This demo accompanies the paper ["Towards Better Characterization of Paraphrases" (ACL 2022)](https://github.com/tlkh/paraphrase-metrics).""")
-st.markdown("**MRPC Paraphrase Data Explorer**")
 def load_df(split):
@@ -85,8 +89,10 @@ def filter_df(df, display, ptype, filter_by, display_scores):
         df_sel.sort_values("new_ld", inplace=True)
         df_sel.sort_values("new_wpd", inplace=True)
     if not display_scores:
-        df_sel.drop(["og_ld", "og_wpd", "new_ld", "new_wpd"], axis=1, inplace=True)
-    label_col = df_sel["og_label"].astype(str)+"->"+df_sel["new_label"].astype(str)
     df_sel["og/new label"] = label_col
     df_sel.drop(["remarks", "og_label", "new_label"], axis=1, inplace=True)
     return df_sel

 st.set_page_config(layout="wide")
+with st.sidebar.expander("Explanation", expanded=False):
+    st.markdown("""This demo allows you to explore the data inside [MRPC](https://www.microsoft.com/en-us/download/details.aspx?id=52398),
+    showing how we can use Word Position Deviation (WPD) and Lexical Deviation (LD) to find different types of paraphrases.
+    By using what we observe from the data, we can also correct numerous labelling errors inside MRPC, presenting the a revision of MRPC termed as MRPC-R1.
+    You can see the rejected and corrected paraphrases by changing the **Display Types** option below.
+    This demo accompanies the paper ["Towards Better Characterization of Paraphrases" (ACL 2022)](https://github.com/tlkh/paraphrase-metrics).""")
+with st.sidebar.expander("Dataset Options", expanded=False):
+    split = st.selectbox("Dataset Split", ["train", "test"])
+    display = st.selectbox("Source", ["All", "Only MRPC", "Only MRPC-R1"])
+ptype = st.sidebar.radio("Display Types", ["All",
+                                           "Only Paraphrases (MRPC-R1)",
+                                           "Only Paraphrases (MRPC)",
+                                           "Rejected Paraphrases from MRPC",
+                                           "Corrected Paraphrases from MRPC"])
 st.sidebar.markdown("**Score Filter Options**")
 filter_by = st.sidebar.selectbox("Filter By Scores From", ["MRPC", "MRPC-R1"])
     "Filter by LD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.4))
 display_scores = st.sidebar.checkbox("Display scores", value=False)
+st.markdown("### MRPC Paraphrase Data Explorer")
 def load_df(split):
         df_sel.sort_values("new_ld", inplace=True)
         df_sel.sort_values("new_wpd", inplace=True)
     if not display_scores:
+        df_sel.drop(["og_ld", "og_wpd", "new_ld", "new_wpd"],
+                    axis=1, inplace=True)
+    label_col = df_sel["og_label"].astype(
+        str)+"->"+df_sel["new_label"].astype(str)
     df_sel["og/new label"] = label_col
     df_sel.drop(["remarks", "og_label", "new_label"], axis=1, inplace=True)
     return df_sel