asdc commited on
Commit
d9a6a4e
·
verified ·
1 Parent(s): b6ac0c6

Upload streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +61 -2
src/streamlit_app.py CHANGED
@@ -20,6 +20,18 @@ LABEL_COLORS = {
20
  'LABEL-8': '#ffc6ff', # I-SET
21
  }
22
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  @st.cache_resource(show_spinner=True)
24
  def load_model():
25
  tokenizer = AutoTokenizer.from_pretrained('asdc/Bio-RoBERTime')
@@ -75,12 +87,59 @@ def colorize_entities(ner_result: List[Tuple[str, str]]) -> str:
75
  html += f'{token} '
76
  return html
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  st.title('LLM-powered Named Entity Recognition (NER)')
79
 
 
 
 
80
  user_text = st.text_area('Enter text for NER:', height=150)
81
 
82
  if user_text:
83
  ner_result = ner_with_robertime(user_text)
84
- st.markdown('#### Entities:')
85
- st.markdown(colorize_entities(ner_result), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
86
  st.caption('Model: [asdc/Bio-RoBERTime](https://huggingface.co/asdc/Bio-RoBERTime)')
 
20
  'LABEL-8': '#ffc6ff', # I-SET
21
  }
22
 
23
+ LABEL_MEANINGS = {
24
+ 'LABEL-0': 'NONE',
25
+ 'LABEL-1': 'B-DATE',
26
+ 'LABEL-2': 'I-DATE',
27
+ 'LABEL-3': 'B-TIME',
28
+ 'LABEL-4': 'I-TIME',
29
+ 'LABEL-5': 'B-DURATION',
30
+ 'LABEL-6': 'I-DURATION',
31
+ 'LABEL-7': 'B-SET',
32
+ 'LABEL-8': 'I-SET',
33
+ }
34
+
35
  @st.cache_resource(show_spinner=True)
36
  def load_model():
37
  tokenizer = AutoTokenizer.from_pretrained('asdc/Bio-RoBERTime')
 
87
  html += f'{token} '
88
  return html
89
 
90
+ def extract_entities(ner_result: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
91
+ # Group consecutive tokens with the same entity label (not LABEL-0)
92
+ entities = []
93
+ current_entity = []
94
+ current_label = None
95
+ for token, label in ner_result:
96
+ if label != 'LABEL-0':
97
+ if current_label == label:
98
+ current_entity.append(token)
99
+ else:
100
+ if current_entity:
101
+ entities.append((' '.join(current_entity), current_label))
102
+ current_entity = [token]
103
+ current_label = label
104
+ else:
105
+ if current_entity:
106
+ entities.append((' '.join(current_entity), current_label))
107
+ current_entity = []
108
+ current_label = None
109
+ if current_entity:
110
+ entities.append((' '.join(current_entity), current_label))
111
+ return entities
112
+
113
+ def legend_html() -> str:
114
+ html = '<div style="display:flex;flex-wrap:wrap;gap:8px;">'
115
+ for label, color in LABEL_COLORS.items():
116
+ if label == 'LABEL-0':
117
+ continue
118
+ meaning = LABEL_MEANINGS[label]
119
+ html += f'<span style="background-color:{color};padding:2px 8px;border-radius:4px;">{meaning} ({label})</span>'
120
+ html += '</div>'
121
+ return html
122
+
123
  st.title('LLM-powered Named Entity Recognition (NER)')
124
 
125
+ st.markdown('**Legend:**')
126
+ st.markdown(legend_html(), unsafe_allow_html=True)
127
+
128
  user_text = st.text_area('Enter text for NER:', height=150)
129
 
130
  if user_text:
131
  ner_result = ner_with_robertime(user_text)
132
+ has_entity = any(label != 'LABEL-0' for _, label in ner_result)
133
+ if has_entity:
134
+ st.markdown('#### Entities Highlighted:')
135
+ st.markdown(colorize_entities(ner_result), unsafe_allow_html=True)
136
+ entities = extract_entities(ner_result)
137
+ if entities:
138
+ st.markdown('#### Detected Entities:')
139
+ for ent, label in entities:
140
+ st.markdown(f'- <span style="background-color:{LABEL_COLORS[label]};padding:2px 8px;border-radius:4px;">{ent}</span> <span style="color:#888;">({LABEL_MEANINGS[label]})</span>', unsafe_allow_html=True)
141
+ else:
142
+ st.info('No entities detected.')
143
+ else:
144
+ st.info('No entities detected.')
145
  st.caption('Model: [asdc/Bio-RoBERTime](https://huggingface.co/asdc/Bio-RoBERTime)')