import spacy import streamlit as st from spacy_streamlit import visualize_ner try: spacy.load("en_core_web_sm") except: spacy.cli.download("en_core_web_sm") st.write(""" # Named Entity Recognition for Technology and Science Keywords. """) nlp = spacy.load("en_core_web_sm") ruler = nlp.add_pipe("entity_ruler", before="ner") ruler.from_disk("tokens.json") description = "Engineering Type Median Salary (May 2020) Job Growth Rate (2020-30) Petroleum Engineers $137,330 8% Computer Hardware Engineers $119,560 2% Aerospace Engineers $118,610 8% Nuclear Engineers $116,140 -8% Chemical Engineers $108,540 9% Electrical and Electronics Engineers $103,390 7% Materials Engineers $95,640 8% Marine Engineers $95,440 4% Health and Safety Engineers $94,240 6% Mining and Geological Engineers $93,800 4% Biomedical Engineers $92,620 6% Environmental Engineers $92,120 4% Mechanical Engineers $90,160 7% Industrial Engineers $88,950 14% Civil Engineers $88,570 8% Agricultural Engineers $84,410 5%" text = st.text_area(label='Engineering Description', value=description, placeholder='Enter Text for Technology or Engineering') doc = nlp(text) visualize_ner(doc, labels=nlp.get_pipe("ner").labels) st.subheader('Keywords') st.json( [{'label': entity.label_, 'text': entity.text, 'start': entity.start, 'end': entity.end} \ for entity in doc.ents if entity.ent_id_ == 'SKILLS'] )