File size: 3,837 Bytes
e55dd89
 
 
 
 
8ad2755
5f8de8b
e55dd89
f7e22f8
e55dd89
 
 
e2ce4cb
e55dd89
 
 
da94fdb
e55dd89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2aaaa46
7ef8019
 
e55dd89
 
 
 
 
 
 
 
 
7a47ed9
e55dd89
7a47ed9
e55dd89
 
7a47ed9
f7e22f8
7a47ed9
 
 
 
 
f7e22f8
e55dd89
 
7c22d2b
7a47ed9
d1f7634
e55dd89
7a47ed9
f7e22f8
e55dd89
7a47ed9
e55dd89
 
 
7a47ed9
7c22d2b
e55dd89
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import streamlit as st
from transformers import pipeline
import spacy
from spacy import displacy
import plotly.express as px
import numpy as np
st.set_page_config(page_title="Vocabulary Categorizer Two")
st.title("Vocabulary Categorizer Two")
st.write("This application can identify, highlight and categorize: numbers (CARDINAL); dates (DATE); events (EVENT); geopolitical entities (GPE); locations (LOC); money (MONEY); national, religious or political groups (NORP); organizations (ORG); people (PERSON); products (PRODUCT); and time (TIME) in text.") 

@st.cache(allow_output_mutation=True, show_spinner=False)
def Loading_NLP():
  nlp = spacy.load('en_core_web_trf')
  return nlp
@st.cache(allow_output_mutation=True)
def entRecognizer(entDict, typeEnt):
  entList = [ent for ent in entDict if entDict[ent] == typeEnt]
  return entList
def plot_result(top_topics, scores):
  top_topics = np.array(top_topics)
  scores = np.array(scores)
  scores *= 100
  fig = px.bar(x=scores, y=top_topics, orientation='h',
               labels={'x': 'Probability', 'y': 'Category'},
               text=scores,
               range_x=(0,115),
               title='Top Predictions',
               color=np.linspace(0,1,len(scores)),
               color_continuous_scale="Blurred")
  fig.update(layout_coloraxi_showscale=False)
  fig.update_traces(texttemplate='%{text:0.1f}%', textposition='outside')
  st.plotly_chart(fig)
  
with st.spinner(text="Please wait for the models to load. This should take approximately 60 seconds."):
    nlp = Loading_NLP()

default_sentence = "A Russia-led military alliance began deploying paratroopers in Kazakhstan on Thursday to restore order after a night of protests in the Central Asian country turned violent, with the police reporting that dozens of antigovernment demonstrators had been killed and hundreds injured."

text = st.text_area('Enter Text Below:', default_sentence, height=300)
submit = st.button('Generate')
if submit:
    entities = []
    entityLabels = []
    doc = nlp(text)
    for ent in doc.ents:
        entities.append(ent.text)
        entityLabels.append(ent.label_)
    entDict = dict(zip(entities, entityLabels))
    entCardinal = entRecognizer(entDict, "CARDINAL")
    entDate = entRecognizer(entDict, "DATE")
    entEvent = entRecognizer(entDict, "EVENT")
    entGPE = entRecognizer(entDict, "GPE")
    entLoc = entRecognizer(entDict, "LOC")
    entMoney = entRecognizer(entDict, "MONEY")
    entNorp = entRecognizer(entDict, "NORP")
    entOrg = entRecognizer(entDict, "ORG")
    entPerson = entRecognizer(entDict, "PERSON")
    entProduct = entRecognizer(entDict, "PRODUCT")
    entTime = entRecognizer(entDict, "TIME")
   
    options = {"ents": ["CARDINAL", "Event", "MONEY", "NORP", "ORG", "GPE", "PERSON", "PRODUCT", "LOC", "DATE", "TIME"]}
    HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
    
    st.subheader("Categorized Vocabulary List:")
    st.write("Cardinal Numbers (CARDINAL): " + str(entCardinal))
    st.write("Event (EVENT): " + str(entEvent))
    st.write("Geopolitical Entities (GPE): " + str(entGPE))
    st.write("Money (MONEY): " + str(entMoney))
    st.write("National or Religious or Political Groups (NORP): " + str(entNorp))
    st.write("People (PERSON): " + str(entPerson))
    st.write("Product (PRODUCT): " + str(entProduct))
    st.write("Organizations (ORG): " + str(entOrg))
    st.write("Dates (DATE): " + str(entDate))
    st.write("Locations (LOC): " + str(entLoc))
    st.write("Time (TIME): " + str(entTime))
    st.subheader("Original Text with Vocabulary Highlighted")
    html = displacy.render(doc, style="ent", options=options)
    html = html.replace("\n", " ")
    st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)