tlkh commited on
Commit
c1a06e1
·
1 Parent(s): f1e8fe8

update app

Browse files
Files changed (1) hide show
  1. app.py +19 -7
app.py CHANGED
@@ -21,8 +21,9 @@ with st.sidebar.expander("📍 Explanation", expanded=False):
21
 
22
  with st.sidebar.expander("⚙️ Dataset Options", expanded=False):
23
  st.markdown("This allows you to switch between the MRPC train and test sets, as well as choose to display only the original paraphrase pairs (MRPC) and/or the corrected pairs (MRPC-R1).")
24
- split = st.selectbox("Dataset Split", ["train", "test"])
25
- display = st.selectbox("Display only pairs from", ["All", "Only MRPC", "Only MRPC-R1"])
 
26
 
27
  ptype = st.sidebar.radio("Display Types", ["All",
28
  "Only Paraphrases (MRPC-R1)",
@@ -31,12 +32,10 @@ ptype = st.sidebar.radio("Display Types", ["All",
31
  "Corrected Paraphrases from MRPC"])
32
 
33
  st.sidebar.markdown("**WPD/LD Score Filter Options**")
34
- filter_by = st.sidebar.selectbox("Filter By Scores From", ["MRPC", "MRPC-R1"])
35
  display_range_wpd = st.sidebar.slider(
36
  "Filter by WPD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.7))
37
  display_range_ld = st.sidebar.slider(
38
  "Filter by LD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.4))
39
- display_scores = st.sidebar.checkbox("Display scores", value=False)
40
 
41
  with st.sidebar.expander("📍 WPD/LD Score Explanation", expanded=False):
42
  st.markdown("""
@@ -45,9 +44,19 @@ with st.sidebar.expander("📍 WPD/LD Score Explanation", expanded=False):
45
  * WPD measures difference in the sentence structure
46
  * LD measures differences in the words used
47
 
48
- By setting WPD to a high range (eg >0.4) and LD to a low range (eg <0.1), we can find paraphrases that do not change much in words used but have very different structures.
 
 
49
  """)
50
 
 
 
 
 
 
 
 
 
51
  def load_df(split):
52
  if split == "train":
53
  df = pd.read_csv("./mrpc_train_scores.csv")
@@ -114,11 +123,14 @@ def filter_df(df, display, ptype, filter_by, display_scores):
114
  df_sel.drop(["remarks", "og_label", "new_label"], axis=1, inplace=True)
115
  return df_sel
116
 
 
117
  df = load_df(split)
118
 
119
  df_sel = filter_df(df, display, ptype, filter_by, display_scores)
120
- df_sel.rename(columns={"og_s1": "Original S1 (MRPC)", "og_s2": "Original S2 (MRPC)", "new_s1": "New S1 (MRPC-R1)", "new_s2": "New S2 (MRPC-R1)"}, inplace=True)
 
121
 
122
- st.markdown("**MRPC Paraphrase Data Explorer** (Displaying "+str(len(df_sel))+" items)")
 
123
 
124
  st.table(data=df_sel)
 
21
 
22
  with st.sidebar.expander("⚙️ Dataset Options", expanded=False):
23
  st.markdown("This allows you to switch between the MRPC train and test sets, as well as choose to display only the original paraphrase pairs (MRPC) and/or the corrected pairs (MRPC-R1).")
24
+ split = st.radio("Dataset Split", ["train", "test"])
25
+ display = st.radio("Display only pairs from", [
26
+ "All", "Only MRPC", "Only MRPC-R1"])
27
 
28
  ptype = st.sidebar.radio("Display Types", ["All",
29
  "Only Paraphrases (MRPC-R1)",
 
32
  "Corrected Paraphrases from MRPC"])
33
 
34
  st.sidebar.markdown("**WPD/LD Score Filter Options**")
 
35
  display_range_wpd = st.sidebar.slider(
36
  "Filter by WPD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.7))
37
  display_range_ld = st.sidebar.slider(
38
  "Filter by LD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.4))
 
39
 
40
  with st.sidebar.expander("📍 WPD/LD Score Explanation", expanded=False):
41
  st.markdown("""
 
44
  * WPD measures difference in the sentence structure
45
  * LD measures differences in the words used
46
 
47
+ By setting WPD to a high range (>0.4) and LD to a low range (<0.1), we can find paraphrases that do not change much in words used but have very different structures.
48
+
49
+ When LD is set to a high range (>0.5), we can find many pairs labelled as paraphrases in MRPC are not in fact paraphrases.
50
  """)
51
 
52
+ st.markdown("**Additional Options**")
53
+
54
+ filter_by = st.radio(
55
+ "Filter By Scores From", ["MRPC", "MRPC-R1"])
56
+
57
+ display_scores = st.checkbox("Display scores", value=False)
58
+
59
+
60
  def load_df(split):
61
  if split == "train":
62
  df = pd.read_csv("./mrpc_train_scores.csv")
 
123
  df_sel.drop(["remarks", "og_label", "new_label"], axis=1, inplace=True)
124
  return df_sel
125
 
126
+
127
  df = load_df(split)
128
 
129
  df_sel = filter_df(df, display, ptype, filter_by, display_scores)
130
+ df_sel.rename(columns={"og_s1": "Original S1 (MRPC)", "og_s2": "Original S2 (MRPC)",
131
+ "new_s1": "New S1 (MRPC-R1)", "new_s2": "New S2 (MRPC-R1)"}, inplace=True)
132
 
133
+ st.markdown("**MRPC Paraphrase Data Explorer** (Displaying " +
134
+ str(len(df_sel))+" items)")
135
 
136
  st.table(data=df_sel)