Spaces:

asdc
/

Temporal_expression_extraction

Sleeping

App Files Files Community

asdc commited on 30 days ago

Commit

2f2c452

verified ·

1 Parent(s): 27472b1

Upload streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +75 -32

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,83 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+from typing import List, Tuple
+import re
+import torch
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+# Mapping of label to color
+LABEL_COLORS = {
+    'LABEL-0': '#cccccc',  # NONE
+    'LABEL-1': '#ffadad',  # B-DATE
+    'LABEL-2': '#ffd6a5',  # I-DATE
+    'LABEL-3': '#fdffb6',  # B-TIME
+    'LABEL-4': '#caffbf',  # I-TIME
+    'LABEL-5': '#9bf6ff',  # B-DURATION
+    'LABEL-6': '#a0c4ff',  # I-DURATION
+    'LABEL-7': '#bdb2ff',  # B-SET
+    'LABEL-8': '#ffc6ff',  # I-SET
+}
+@st.cache_resource(show_spinner=True)
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained('asdc/Bio-RoBERTime')
+    model = AutoModelForTokenClassification.from_pretrained('asdc/Bio-RoBERTime')
+    return tokenizer, model
+def ner_with_robertime(text: str) -> List[Tuple[str, str]]:
+    tokenizer, model = load_model()
+    # Tokenize and get input tensors
+    tokens = tokenizer(text, return_tensors="pt", truncation=True, is_split_into_words=False)
+    with torch.no_grad():
+        outputs = model(**tokens)
+    predictions = torch.argmax(outputs.logits, dim=2)[0].tolist()
+    # Map ids to labels
+    labels = [model.config.id2label[pred] for pred in predictions]
+    # Get tokens (handling subwords)
+    word_ids = tokens.word_ids(batch_index=0)
+    token_list = tokenizer.convert_ids_to_tokens(tokens["input_ids"][0])
+    # Merge subwords and assign entity labels
+    entities = []
+    current_word = ''
+    current_label = None
+    last_word_id = None
+    for idx, word_id in enumerate(word_ids):
+        if word_id is None:
+            continue
+        token = token_list[idx]
+        label = labels[idx]
+        if token.startswith('▁') or token.startswith('##') or token.startswith('Ġ'):
+            token = token.lstrip('▁#Ġ')
+        if word_id != last_word_id and current_word:
+            entities.append((current_word, current_label))
+            current_word = token
+            current_label = label
+        else:
+            if current_word:
+                current_word += token if token.startswith("'") else f' {token}'
+            else:
+                current_word = token
+            current_label = label
+        last_word_id = word_id
+    if current_word:
+        entities.append((current_word, current_label))
+    return entities
+def colorize_entities(ner_result: List[Tuple[str, str]]) -> str:
+    html = ''
+    for token, label in ner_result:
+        color = LABEL_COLORS.get(label, '#eeeeee')
+        if label != 'LABEL-0':
+            html += f'<span style="background-color:{color};padding:2px 4px;border-radius:4px;margin:1px;">{token}</span> '
+        else:
+            html += f'{token} '
+    return html
+st.title('LLM-powered Named Entity Recognition (NER)')
+user_text = st.text_area('Enter text for NER:', height=150)
+if user_text:
+    ner_result = ner_with_robertime(user_text)
+    st.markdown('#### Entities:')
+    st.markdown(colorize_entities(ner_result), unsafe_allow_html=True)
+    st.caption('Model: [asdc/Bio-RoBERTime](https://huggingface.co/asdc/Bio-RoBERTime)')