Spaces:
Sleeping
Sleeping
Upload streamlit_app.py
Browse files- src/streamlit_app.py +61 -2
src/streamlit_app.py
CHANGED
@@ -20,6 +20,18 @@ LABEL_COLORS = {
|
|
20 |
'LABEL-8': '#ffc6ff', # I-SET
|
21 |
}
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
@st.cache_resource(show_spinner=True)
|
24 |
def load_model():
|
25 |
tokenizer = AutoTokenizer.from_pretrained('asdc/Bio-RoBERTime')
|
@@ -75,12 +87,59 @@ def colorize_entities(ner_result: List[Tuple[str, str]]) -> str:
|
|
75 |
html += f'{token} '
|
76 |
return html
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
st.title('LLM-powered Named Entity Recognition (NER)')
|
79 |
|
|
|
|
|
|
|
80 |
user_text = st.text_area('Enter text for NER:', height=150)
|
81 |
|
82 |
if user_text:
|
83 |
ner_result = ner_with_robertime(user_text)
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
st.caption('Model: [asdc/Bio-RoBERTime](https://huggingface.co/asdc/Bio-RoBERTime)')
|
|
|
20 |
'LABEL-8': '#ffc6ff', # I-SET
|
21 |
}
|
22 |
|
23 |
+
LABEL_MEANINGS = {
|
24 |
+
'LABEL-0': 'NONE',
|
25 |
+
'LABEL-1': 'B-DATE',
|
26 |
+
'LABEL-2': 'I-DATE',
|
27 |
+
'LABEL-3': 'B-TIME',
|
28 |
+
'LABEL-4': 'I-TIME',
|
29 |
+
'LABEL-5': 'B-DURATION',
|
30 |
+
'LABEL-6': 'I-DURATION',
|
31 |
+
'LABEL-7': 'B-SET',
|
32 |
+
'LABEL-8': 'I-SET',
|
33 |
+
}
|
34 |
+
|
35 |
@st.cache_resource(show_spinner=True)
|
36 |
def load_model():
|
37 |
tokenizer = AutoTokenizer.from_pretrained('asdc/Bio-RoBERTime')
|
|
|
87 |
html += f'{token} '
|
88 |
return html
|
89 |
|
90 |
+
def extract_entities(ner_result: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
91 |
+
# Group consecutive tokens with the same entity label (not LABEL-0)
|
92 |
+
entities = []
|
93 |
+
current_entity = []
|
94 |
+
current_label = None
|
95 |
+
for token, label in ner_result:
|
96 |
+
if label != 'LABEL-0':
|
97 |
+
if current_label == label:
|
98 |
+
current_entity.append(token)
|
99 |
+
else:
|
100 |
+
if current_entity:
|
101 |
+
entities.append((' '.join(current_entity), current_label))
|
102 |
+
current_entity = [token]
|
103 |
+
current_label = label
|
104 |
+
else:
|
105 |
+
if current_entity:
|
106 |
+
entities.append((' '.join(current_entity), current_label))
|
107 |
+
current_entity = []
|
108 |
+
current_label = None
|
109 |
+
if current_entity:
|
110 |
+
entities.append((' '.join(current_entity), current_label))
|
111 |
+
return entities
|
112 |
+
|
113 |
+
def legend_html() -> str:
|
114 |
+
html = '<div style="display:flex;flex-wrap:wrap;gap:8px;">'
|
115 |
+
for label, color in LABEL_COLORS.items():
|
116 |
+
if label == 'LABEL-0':
|
117 |
+
continue
|
118 |
+
meaning = LABEL_MEANINGS[label]
|
119 |
+
html += f'<span style="background-color:{color};padding:2px 8px;border-radius:4px;">{meaning} ({label})</span>'
|
120 |
+
html += '</div>'
|
121 |
+
return html
|
122 |
+
|
123 |
st.title('LLM-powered Named Entity Recognition (NER)')
|
124 |
|
125 |
+
st.markdown('**Legend:**')
|
126 |
+
st.markdown(legend_html(), unsafe_allow_html=True)
|
127 |
+
|
128 |
user_text = st.text_area('Enter text for NER:', height=150)
|
129 |
|
130 |
if user_text:
|
131 |
ner_result = ner_with_robertime(user_text)
|
132 |
+
has_entity = any(label != 'LABEL-0' for _, label in ner_result)
|
133 |
+
if has_entity:
|
134 |
+
st.markdown('#### Entities Highlighted:')
|
135 |
+
st.markdown(colorize_entities(ner_result), unsafe_allow_html=True)
|
136 |
+
entities = extract_entities(ner_result)
|
137 |
+
if entities:
|
138 |
+
st.markdown('#### Detected Entities:')
|
139 |
+
for ent, label in entities:
|
140 |
+
st.markdown(f'- <span style="background-color:{LABEL_COLORS[label]};padding:2px 8px;border-radius:4px;">{ent}</span> <span style="color:#888;">({LABEL_MEANINGS[label]})</span>', unsafe_allow_html=True)
|
141 |
+
else:
|
142 |
+
st.info('No entities detected.')
|
143 |
+
else:
|
144 |
+
st.info('No entities detected.')
|
145 |
st.caption('Model: [asdc/Bio-RoBERTime](https://huggingface.co/asdc/Bio-RoBERTime)')
|