Spaces:
Runtime error
Runtime error
File size: 4,271 Bytes
adf4ac7 062527a adf4ac7 58ef0b0 df89742 58ef0b0 50e8f91 dcb64e4 e010ab9 dcb64e4 b8f9703 dcb64e4 3cf2a36 58ef0b0 3cf2a36 83f7a4f 3cf2a36 83f7a4f 3cf2a36 83f7a4f 3cf2a36 83f7a4f 3cf2a36 adf4ac7 96898b2 e010ab9 dcb64e4 44854e8 08b8ceb e010ab9 dcb64e4 3cf2a36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import streamlit as st
from transformers import pipeline
from ipymarkup import format_span_box_markup
# Load the pre-trained NER model
model = pipeline("ner", model="/home/user/app/mendobert/", tokenizer="indolem/indobert-base-uncased")
basemodel = pipeline("ner", model="/home/user/app/base-model/", tokenizer="indolem/indobert-base-uncased")
st.title(':blue[MendoBERT] - Named Entity Recognition Model :sunglasses:')
with st.container():
example1 = st.button('Aspartylglucosaminuria (AGU) adalah gangguan metabolisme glikoprotein langka.', use_container_width=True)
example2 = st.button('Mutasi germ - line dari gen BRCA1 membuat wanita cenderung mengalami kanker payudara dini dengan mengorbankan fungsi presumtif gen sebagai penekan tumor.', use_container_width=True)
if example1:
text = st.text_area('Enter some text: ', 'Aspartylglucosaminuria (AGU) adalah gangguan metabolisme glikoprotein langka.')
elif example2:
text = st.text_area('Enter some text: ', 'Mutasi germ - line dari gen BRCA1 membuat wanita cenderung mengalami kanker payudara dini dengan mengorbankan fungsi presumtif gen sebagai penekan tumor.')
else:
text = st.text_area('Enter some text: ', 'Enter your texts here...')
if text:
ner_results = model(text)
ner_results2 = basemodel(text)
# MendoBERT
formatted_results = []
for result in ner_results:
end = result["start"]+len(result["word"].replace("##", ""))
if result["word"].startswith("##"):
formatted_results[-1]["end"] = end
formatted_results[-1]["word"]+= result["word"].replace("##", "")
else:
formatted_results.append({
'start': result["start"],
'end': end,
'entity': result["entity"],
'index': result["index"],
'score': result["score"],
'word': result["word"]})
for result in formatted_results:
if result["entity"].startswith("LABEL_0"):
result["entity"] = "O"
elif result["entity"].startswith("LABEL_1"):
result["entity"] = "B"
elif result["entity"].startswith("LABEL_2"):
result["entity"] = "I"
mendo = []
spanMendo = []
for result in formatted_results:
if not result["entity"].startswith("O"):
spanMendo.append((result["start"],result["end"],result["entity"]))
mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
# Base Model
formatted_results = []
for result in ner_results2:
end = result["start"]+len(result["word"].replace("##", ""))
if result["word"].startswith("##"):
formatted_results[-1]["end"] = end
formatted_results[-1]["word"]+= result["word"].replace("##", "")
else:
formatted_results.append({
'start': result["start"],
'end': end,
'entity': result["entity"],
'index': result["index"],
'score': result["score"],
'word': result["word"]})
for result in formatted_results:
if result["entity"].startswith("LABEL_0"):
result["entity"] = "O"
elif result["entity"].startswith("LABEL_1"):
result["entity"] = "B"
elif result["entity"].startswith("LABEL_2"):
result["entity"] = "I"
base=[]
spanBase=[]
for result in formatted_results:
if not result["entity"].startswith("O"):
spanBase.append((result["start"],result["end"],result["entity"]))
base.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
formatMendo = format_span_box_markup(text, spanMendo)
htmlMendo = ''.join(formatMendo)
formatBase = format_span_box_markup(text, spanBase)
htmlBase = ''.join(formatBase)
st.divider()
st.subheader('MendoBERT')
st.json(mendo)
st.markdown(htmlMendo,unsafe_allow_html=True)
st.divider()
st.subheader('IndoLEM')
st.json(base)
st.markdown(htmlBase,unsafe_allow_html=True)
|