tlkh commited on
Commit
f8f27d8
·
1 Parent(s): 0d49845

update app

Browse files
Files changed (1) hide show
  1. app.py +28 -23
app.py CHANGED
@@ -8,21 +8,25 @@ split = st.sidebar.selectbox("Dataset Split", ["train", "test"])
8
  display = st.sidebar.selectbox("Source", ["All", "Only MRPC", "Only MRPC-R1"])
9
 
10
  ptype = st.sidebar.radio("Paraphrase Pair Types", ["All",
11
- "Only Paraphrases (MRPC-R1)",
12
- "Only Paraphrases (MRPC)",
13
- "Rejected Paraphrases from MRPC",
14
- "Corrected Paraphrases from MRPC"])
15
 
16
  st.sidebar.markdown("**Score Filter Options**")
17
  filter_by = st.sidebar.selectbox("Filter By Scores From", ["MRPC", "MRPC-R1"])
18
- display_range_wpd = st.sidebar.slider("Filter by WPD Scores", min_value=0.0, max_value=1.0, value=(0.1,0.7))
19
- display_range_ld = st.sidebar.slider("Filter by LD Scores", min_value=0.0, max_value=1.0, value=(0.1,0.4))
 
 
 
20
 
21
  st.sidebar.markdown("""**Explanation**
22
  This demo allows you to explore the data inside [MRPC](https://www.microsoft.com/en-us/download/details.aspx?id=52398), showing how we can use Word Position Deviation (WPD) and Lexical Deviation (LD) to find different types of paraphrases. By using what we observe from the data, we can also correct numerous labelling errors inside MRPC, presenting the a revision of MRPC termed as MRPC-R1. This demo accompanies the paper ["Towards Better Characterization of Paraphrases" (ACL 2022)](https://github.com/tlkh/paraphrase-metrics).""")
23
 
24
  st.markdown("**MRPC Paraphrase Data Explorer**")
25
 
 
26
  def load_df(split):
27
  if split == "train":
28
  df = pd.read_csv("./mrpc_train_scores.csv")
@@ -31,7 +35,8 @@ def load_df(split):
31
  df.reset_index(drop=True, inplace=True)
32
  return df
33
 
34
- def filter_df(df, display, ptype, filter_by):
 
35
  # filter data
36
  if display == "MRPC":
37
  df = df.drop(["new_s1", "new_s2"], axis=1)
@@ -56,17 +61,21 @@ def filter_df(df, display, ptype, filter_by):
56
  # sort by scores
57
  if filter_by == "MRPC":
58
  # wpd
59
- condition = (df_sel.og_wpd >= display_range_wpd[0]) & (df_sel.og_wpd < display_range_wpd[1])
 
60
  df_sel = df_sel[condition]
61
  # ld
62
- condition = (df_sel.og_ld >= display_range_ld[0]) & (df_sel.og_ld < display_range_ld[1])
 
63
  df_sel = df_sel[condition]
64
  else:
65
  # wpd
66
- condition = (df_sel.new_wpd >= display_range_wpd[0]) & (df_sel.new_wpd < display_range_wpd[1])
 
67
  df_sel = df_sel[condition]
68
  # ld
69
- condition = (df_sel.new_ld >= display_range_ld[0]) & (df_sel.new_ld < display_range_ld[1])
 
70
  df_sel = df_sel[condition]
71
  # filter scores
72
  if filter_by == "MRPC":
@@ -75,22 +84,18 @@ def filter_df(df, display, ptype, filter_by):
75
  else:
76
  df_sel.sort_values("new_ld", inplace=True)
77
  df_sel.sort_values("new_wpd", inplace=True)
 
 
 
 
 
78
  return df_sel
79
-
80
 
81
- df = load_df(split)
82
 
83
- df_sel = filter_df(df, display, ptype, filter_by)
84
 
85
- # CSS to inject contained in a string
86
- hide_table_row_index = """
87
- <style>
88
- tbody th {display:none}
89
- .blank {display:none}
90
- </style>
91
- """
92
 
93
- # inject css at the end
94
- st.markdown("Total "+str(len(df_sel))+" items"+hide_table_row_index, unsafe_allow_html=True)
95
 
96
  st.table(data=df_sel)
 
8
  display = st.sidebar.selectbox("Source", ["All", "Only MRPC", "Only MRPC-R1"])
9
 
10
  ptype = st.sidebar.radio("Paraphrase Pair Types", ["All",
11
+ "Only Paraphrases (MRPC-R1)",
12
+ "Only Paraphrases (MRPC)",
13
+ "Rejected Paraphrases from MRPC",
14
+ "Corrected Paraphrases from MRPC"])
15
 
16
  st.sidebar.markdown("**Score Filter Options**")
17
  filter_by = st.sidebar.selectbox("Filter By Scores From", ["MRPC", "MRPC-R1"])
18
+ display_range_wpd = st.sidebar.slider(
19
+ "Filter by WPD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.7))
20
+ display_range_ld = st.sidebar.slider(
21
+ "Filter by LD Scores", min_value=0.0, max_value=1.0, value=(0.1, 0.4))
22
+ display_scores = st.sidebar.checkbox("Display scores", value=False)
23
 
24
  st.sidebar.markdown("""**Explanation**
25
  This demo allows you to explore the data inside [MRPC](https://www.microsoft.com/en-us/download/details.aspx?id=52398), showing how we can use Word Position Deviation (WPD) and Lexical Deviation (LD) to find different types of paraphrases. By using what we observe from the data, we can also correct numerous labelling errors inside MRPC, presenting the a revision of MRPC termed as MRPC-R1. This demo accompanies the paper ["Towards Better Characterization of Paraphrases" (ACL 2022)](https://github.com/tlkh/paraphrase-metrics).""")
26
 
27
  st.markdown("**MRPC Paraphrase Data Explorer**")
28
 
29
+
30
  def load_df(split):
31
  if split == "train":
32
  df = pd.read_csv("./mrpc_train_scores.csv")
 
35
  df.reset_index(drop=True, inplace=True)
36
  return df
37
 
38
+
39
+ def filter_df(df, display, ptype, filter_by, display_scores):
40
  # filter data
41
  if display == "MRPC":
42
  df = df.drop(["new_s1", "new_s2"], axis=1)
 
61
  # sort by scores
62
  if filter_by == "MRPC":
63
  # wpd
64
+ condition = (df_sel.og_wpd >= display_range_wpd[0]) & (
65
+ df_sel.og_wpd < display_range_wpd[1])
66
  df_sel = df_sel[condition]
67
  # ld
68
+ condition = (df_sel.og_ld >= display_range_ld[0]) & (
69
+ df_sel.og_ld < display_range_ld[1])
70
  df_sel = df_sel[condition]
71
  else:
72
  # wpd
73
+ condition = (df_sel.new_wpd >= display_range_wpd[0]) & (
74
+ df_sel.new_wpd < display_range_wpd[1])
75
  df_sel = df_sel[condition]
76
  # ld
77
+ condition = (df_sel.new_ld >= display_range_ld[0]) & (
78
+ df_sel.new_ld < display_range_ld[1])
79
  df_sel = df_sel[condition]
80
  # filter scores
81
  if filter_by == "MRPC":
 
84
  else:
85
  df_sel.sort_values("new_ld", inplace=True)
86
  df_sel.sort_values("new_wpd", inplace=True)
87
+ if not display_scores:
88
+ df_sel.drop(["og_ld", "og_wpd", "new_ld", "new_wpd"], axis=1, inplace=True)
89
+ label_col = df_sel["og_label"].astype(str)+"->"+df_sel["new_label"].astype(str)
90
+ df_sel["og/new label"] = label_col
91
+ df_sel.drop(["remarks", "og_label", "new_label"], axis=1, inplace=True)
92
  return df_sel
 
93
 
 
94
 
95
+ df = load_df(split)
96
 
97
+ df_sel = filter_df(df, display, ptype, filter_by, display_scores)
 
 
 
 
 
 
98
 
99
+ st.markdown("Displaying "+str(len(df_sel))+" items")
 
100
 
101
  st.table(data=df_sel)