asdc commited on
Commit
2f2c452
·
verified ·
1 Parent(s): 27472b1

Upload streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +75 -32
src/streamlit_app.py CHANGED
@@ -1,40 +1,83 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
 
 
 
 
 
 
 
 
 
 
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
 
 
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
 
 
 
 
 
 
 
18
 
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
 
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
 
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ from typing import List, Tuple
3
+ import re
4
+ import torch
5
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
6
 
7
+ # Mapping of label to color
8
+ LABEL_COLORS = {
9
+ 'LABEL-0': '#cccccc', # NONE
10
+ 'LABEL-1': '#ffadad', # B-DATE
11
+ 'LABEL-2': '#ffd6a5', # I-DATE
12
+ 'LABEL-3': '#fdffb6', # B-TIME
13
+ 'LABEL-4': '#caffbf', # I-TIME
14
+ 'LABEL-5': '#9bf6ff', # B-DURATION
15
+ 'LABEL-6': '#a0c4ff', # I-DURATION
16
+ 'LABEL-7': '#bdb2ff', # B-SET
17
+ 'LABEL-8': '#ffc6ff', # I-SET
18
+ }
19
 
20
+ @st.cache_resource(show_spinner=True)
21
+ def load_model():
22
+ tokenizer = AutoTokenizer.from_pretrained('asdc/Bio-RoBERTime')
23
+ model = AutoModelForTokenClassification.from_pretrained('asdc/Bio-RoBERTime')
24
+ return tokenizer, model
25
 
26
+ def ner_with_robertime(text: str) -> List[Tuple[str, str]]:
27
+ tokenizer, model = load_model()
28
+ # Tokenize and get input tensors
29
+ tokens = tokenizer(text, return_tensors="pt", truncation=True, is_split_into_words=False)
30
+ with torch.no_grad():
31
+ outputs = model(**tokens)
32
+ predictions = torch.argmax(outputs.logits, dim=2)[0].tolist()
33
+ # Map ids to labels
34
+ labels = [model.config.id2label[pred] for pred in predictions]
35
+ # Get tokens (handling subwords)
36
+ word_ids = tokens.word_ids(batch_index=0)
37
+ token_list = tokenizer.convert_ids_to_tokens(tokens["input_ids"][0])
38
+ # Merge subwords and assign entity labels
39
+ entities = []
40
+ current_word = ''
41
+ current_label = None
42
+ last_word_id = None
43
+ for idx, word_id in enumerate(word_ids):
44
+ if word_id is None:
45
+ continue
46
+ token = token_list[idx]
47
+ label = labels[idx]
48
+ if token.startswith('▁') or token.startswith('##') or token.startswith('Ġ'):
49
+ token = token.lstrip('▁#Ġ')
50
+ if word_id != last_word_id and current_word:
51
+ entities.append((current_word, current_label))
52
+ current_word = token
53
+ current_label = label
54
+ else:
55
+ if current_word:
56
+ current_word += token if token.startswith("'") else f' {token}'
57
+ else:
58
+ current_word = token
59
+ current_label = label
60
+ last_word_id = word_id
61
+ if current_word:
62
+ entities.append((current_word, current_label))
63
+ return entities
64
 
65
+ def colorize_entities(ner_result: List[Tuple[str, str]]) -> str:
66
+ html = ''
67
+ for token, label in ner_result:
68
+ color = LABEL_COLORS.get(label, '#eeeeee')
69
+ if label != 'LABEL-0':
70
+ html += f'<span style="background-color:{color};padding:2px 4px;border-radius:4px;margin:1px;">{token}</span> '
71
+ else:
72
+ html += f'{token} '
73
+ return html
74
 
75
+ st.title('LLM-powered Named Entity Recognition (NER)')
 
 
76
 
77
+ user_text = st.text_area('Enter text for NER:', height=150)
 
78
 
79
+ if user_text:
80
+ ner_result = ner_with_robertime(user_text)
81
+ st.markdown('#### Entities:')
82
+ st.markdown(colorize_entities(ner_result), unsafe_allow_html=True)
83
+ st.caption('Model: [asdc/Bio-RoBERTime](https://huggingface.co/asdc/Bio-RoBERTime)')