Wootang01's picture
Update app.py
2aaaa46
import streamlit as st
from transformers import pipeline
import spacy
from spacy import displacy
import plotly.express as px
import numpy as np
st.set_page_config(page_title="Vocabulary Categorizer Two")
st.title("Vocabulary Categorizer Two")
st.write("This application can identify, highlight and categorize: numbers (CARDINAL); dates (DATE); events (EVENT); geopolitical entities (GPE); locations (LOC); money (MONEY); national, religious or political groups (NORP); organizations (ORG); people (PERSON); products (PRODUCT); and time (TIME) in text.")
@st.cache(allow_output_mutation=True, show_spinner=False)
def Loading_NLP():
nlp = spacy.load('en_core_web_trf')
return nlp
@st.cache(allow_output_mutation=True)
def entRecognizer(entDict, typeEnt):
entList = [ent for ent in entDict if entDict[ent] == typeEnt]
return entList
def plot_result(top_topics, scores):
top_topics = np.array(top_topics)
scores = np.array(scores)
scores *= 100
fig = px.bar(x=scores, y=top_topics, orientation='h',
labels={'x': 'Probability', 'y': 'Category'},
text=scores,
range_x=(0,115),
title='Top Predictions',
color=np.linspace(0,1,len(scores)),
color_continuous_scale="Blurred")
fig.update(layout_coloraxi_showscale=False)
fig.update_traces(texttemplate='%{text:0.1f}%', textposition='outside')
st.plotly_chart(fig)
with st.spinner(text="Please wait for the models to load. This should take approximately 60 seconds."):
nlp = Loading_NLP()
default_sentence = "A Russia-led military alliance began deploying paratroopers in Kazakhstan on Thursday to restore order after a night of protests in the Central Asian country turned violent, with the police reporting that dozens of antigovernment demonstrators had been killed and hundreds injured."
text = st.text_area('Enter Text Below:', default_sentence, height=300)
submit = st.button('Generate')
if submit:
entities = []
entityLabels = []
doc = nlp(text)
for ent in doc.ents:
entities.append(ent.text)
entityLabels.append(ent.label_)
entDict = dict(zip(entities, entityLabels))
entCardinal = entRecognizer(entDict, "CARDINAL")
entDate = entRecognizer(entDict, "DATE")
entEvent = entRecognizer(entDict, "EVENT")
entGPE = entRecognizer(entDict, "GPE")
entLoc = entRecognizer(entDict, "LOC")
entMoney = entRecognizer(entDict, "MONEY")
entNorp = entRecognizer(entDict, "NORP")
entOrg = entRecognizer(entDict, "ORG")
entPerson = entRecognizer(entDict, "PERSON")
entProduct = entRecognizer(entDict, "PRODUCT")
entTime = entRecognizer(entDict, "TIME")
options = {"ents": ["CARDINAL", "Event", "MONEY", "NORP", "ORG", "GPE", "PERSON", "PRODUCT", "LOC", "DATE", "TIME"]}
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
st.subheader("Categorized Vocabulary List:")
st.write("Cardinal Numbers (CARDINAL): " + str(entCardinal))
st.write("Event (EVENT): " + str(entEvent))
st.write("Geopolitical Entities (GPE): " + str(entGPE))
st.write("Money (MONEY): " + str(entMoney))
st.write("National or Religious or Political Groups (NORP): " + str(entNorp))
st.write("People (PERSON): " + str(entPerson))
st.write("Product (PRODUCT): " + str(entProduct))
st.write("Organizations (ORG): " + str(entOrg))
st.write("Dates (DATE): " + str(entDate))
st.write("Locations (LOC): " + str(entLoc))
st.write("Time (TIME): " + str(entTime))
st.subheader("Original Text with Vocabulary Highlighted")
html = displacy.render(doc, style="ent", options=options)
html = html.replace("\n", " ")
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)