Commit
·
1188a0f
1
Parent(s):
4a454ea
update app
Browse files
app.py
CHANGED
|
@@ -31,12 +31,6 @@ st.markdown(
|
|
| 31 |
margin-bottom: 13px;
|
| 32 |
border-bottom: 2px solid #333;
|
| 33 |
}
|
| 34 |
-
#logo {
|
| 35 |
-
width: auto;
|
| 36 |
-
height: 75px;
|
| 37 |
-
margin-top: -15px;
|
| 38 |
-
margin-bottom: 15px;
|
| 39 |
-
}
|
| 40 |
.container {
|
| 41 |
background-color: #fff;
|
| 42 |
padding: 30px;
|
|
@@ -66,12 +60,6 @@ st.markdown(
|
|
| 66 |
font-size: 24px;
|
| 67 |
margin-bottom: 20px;
|
| 68 |
}
|
| 69 |
-
h4 {
|
| 70 |
-
font-weight: 500;
|
| 71 |
-
font-size: 15px;
|
| 72 |
-
margin-top: 15px;
|
| 73 |
-
margin-bottom: 15px;
|
| 74 |
-
}
|
| 75 |
label {
|
| 76 |
font-weight: 500;
|
| 77 |
}
|
|
@@ -89,22 +77,38 @@ st.markdown(
|
|
| 89 |
border-radius: 5px;
|
| 90 |
display: inline-block;
|
| 91 |
margin-top: 15px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
}
|
| 93 |
</style>
|
| 94 |
""",
|
| 95 |
unsafe_allow_html=True,
|
| 96 |
)
|
| 97 |
|
| 98 |
-
st.markdown(
|
| 99 |
-
"""
|
| 100 |
-
<div class="header">
|
| 101 |
-
<img src="https://raw.githubusercontent.com/ca-roll/ca-roll.github.io/release/images/logopic/caroll.png" alt="Research Group Logo" id="logo">
|
| 102 |
-
<h4>Demonstrating <a href="https://ca-roll.github.io/" target="_blank">CAROLL Research Group</a>'s Language Models</h4>
|
| 103 |
-
</div>
|
| 104 |
-
""",
|
| 105 |
-
unsafe_allow_html=True,
|
| 106 |
-
)
|
| 107 |
-
|
| 108 |
# Initialization for Legal NER
|
| 109 |
tokenizer_legal = AutoTokenizer.from_pretrained("PaDaS-Lab/gbert-legal-ner")
|
| 110 |
model_legal = AutoModelForTokenClassification.from_pretrained(
|
|
@@ -203,7 +207,7 @@ def color_substrings(input_string, model_output, ner_labels, current_classes):
|
|
| 203 |
start, end, label = entity["start"], entity["end"], entity["label"]
|
| 204 |
html_output += input_string[last_end:start]
|
| 205 |
tooltip = current_classes.get(label, "")
|
| 206 |
-
html_output += f'<span style="color: {label_to_color.get(label)}; font-weight: bold;"
|
| 207 |
last_end = end
|
| 208 |
|
| 209 |
html_output += input_string[last_end:]
|
|
@@ -214,7 +218,7 @@ def color_substrings(input_string, model_output, ner_labels, current_classes):
|
|
| 214 |
st.title("CAROLL Language Models - Demo")
|
| 215 |
st.markdown("<hr>", unsafe_allow_html=True)
|
| 216 |
|
| 217 |
-
test_sentence = st.text_area("Enter Text:", height=
|
| 218 |
model_choice = st.selectbox(
|
| 219 |
"Choose a model:", ["Legal NER", "GDPR Privacy Policy NER"], index=0
|
| 220 |
)
|
|
@@ -244,7 +248,7 @@ if st.button("Analyze"):
|
|
| 244 |
)
|
| 245 |
|
| 246 |
st.markdown(
|
| 247 |
-
'<div class="sec"><strong>Analyzed text</strong></div><br
|
| 248 |
colored_html
|
| 249 |
),
|
| 250 |
unsafe_allow_html=True,
|
|
|
|
| 31 |
margin-bottom: 13px;
|
| 32 |
border-bottom: 2px solid #333;
|
| 33 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
.container {
|
| 35 |
background-color: #fff;
|
| 36 |
padding: 30px;
|
|
|
|
| 60 |
font-size: 24px;
|
| 61 |
margin-bottom: 20px;
|
| 62 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
label {
|
| 64 |
font-weight: 500;
|
| 65 |
}
|
|
|
|
| 77 |
border-radius: 5px;
|
| 78 |
display: inline-block;
|
| 79 |
margin-top: 15px;
|
| 80 |
+
margin-bottom: 15px;
|
| 81 |
+
}
|
| 82 |
+
.tooltip {
|
| 83 |
+
position: relative;
|
| 84 |
+
display: inline-block;
|
| 85 |
+
cursor: pointer;
|
| 86 |
+
}
|
| 87 |
+
.tooltip .tooltiptext {
|
| 88 |
+
visibility: hidden;
|
| 89 |
+
width: 120px;
|
| 90 |
+
background-color: #6c757d;
|
| 91 |
+
color: #fff;
|
| 92 |
+
text-align: center;
|
| 93 |
+
border-radius: 3px;
|
| 94 |
+
padding: 3px;
|
| 95 |
+
position: absolute;
|
| 96 |
+
z-index: 1;
|
| 97 |
+
bottom: 125%;
|
| 98 |
+
left: 50%;
|
| 99 |
+
margin-left: -60px;
|
| 100 |
+
opacity: 0;
|
| 101 |
+
transition: opacity 0.3s;
|
| 102 |
+
}
|
| 103 |
+
.tooltip:hover .tooltiptext {
|
| 104 |
+
visibility: visible;
|
| 105 |
+
opacity: 1;
|
| 106 |
}
|
| 107 |
</style>
|
| 108 |
""",
|
| 109 |
unsafe_allow_html=True,
|
| 110 |
)
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# Initialization for Legal NER
|
| 113 |
tokenizer_legal = AutoTokenizer.from_pretrained("PaDaS-Lab/gbert-legal-ner")
|
| 114 |
model_legal = AutoModelForTokenClassification.from_pretrained(
|
|
|
|
| 207 |
start, end, label = entity["start"], entity["end"], entity["label"]
|
| 208 |
html_output += input_string[last_end:start]
|
| 209 |
tooltip = current_classes.get(label, "")
|
| 210 |
+
html_output += f'<span class="tooltip" style="color: {label_to_color.get(label)}; font-weight: bold;">{input_string[start:end]}<span class="tooltiptext">{tooltip}</span></span>'
|
| 211 |
last_end = end
|
| 212 |
|
| 213 |
html_output += input_string[last_end:]
|
|
|
|
| 218 |
st.title("CAROLL Language Models - Demo")
|
| 219 |
st.markdown("<hr>", unsafe_allow_html=True)
|
| 220 |
|
| 221 |
+
test_sentence = st.text_area("Enter Text:", height=150)
|
| 222 |
model_choice = st.selectbox(
|
| 223 |
"Choose a model:", ["Legal NER", "GDPR Privacy Policy NER"], index=0
|
| 224 |
)
|
|
|
|
| 248 |
)
|
| 249 |
|
| 250 |
st.markdown(
|
| 251 |
+
'<div class="sec"><strong>Analyzed text</strong></div><br>{}<br><br>'.format(
|
| 252 |
colored_html
|
| 253 |
),
|
| 254 |
unsafe_allow_html=True,
|