File size: 1,411 Bytes
640050f
 
 
 
 
 
 
 
 
 
1a4340b
640050f
 
 
 
 
536be9c
2aec4be
640050f
c301132
640050f
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import spacy
import streamlit as st
from spacy_streamlit import visualize_ner

try:
    spacy.load("en_core_web_sm")
except:
    spacy.cli.download("en_core_web_sm")

st.write("""
## NER AI/NLP for Technology and Science
""")


nlp = spacy.load("en_core_web_sm")
ruler = nlp.add_pipe("entity_ruler", before="ner")
#ruler.from_disk("tokens.json")
ruler.from_disk("tokens.jsonl")

description = "Engineering Jobs	Salary, 	Job Growth Rate, 	Petroleum Engineers	$137,330	8%, 	Computer Hardware Engineers	$119,560	2%, Aerospace Engineers	$118,610 	8%, 	Nuclear Engineers	$116,140	-8%, Chemical Engineers	$108,540	9%, 	Electrical and Electronics Engineers	$103,390	7%, 	Materials Engineers	$95,640	8%, 	Marine Engineers	$95,440	4%, 	Health and Safety Engineers	$94,240	6%, 	Mining and Geological Engineers	$93,800	4%, 	Biomedical Engineers	$92,620	6%, 	Environmental Engineers	$92,120	4%, 	Mechanical Engineers	$90,160	7%, 	Industrial Engineers	$88,950	14%, 	Civil Engineers	$88,570	8%, 	Agricultural Engineers	$84,410	5%"

text = st.text_area(label='Engineering Description', value=description, placeholder='Enter Text for Technology or Engineering')

doc = nlp(text)
visualize_ner(doc, labels=nlp.get_pipe("ner").labels)

st.subheader('Keywords')
st.json(
    [{'label': entity.label_, 'text': entity.text, 'start': entity.start, 'end': entity.end} \
        for entity in doc.ents if entity.ent_id_ == 'SKILLS']
)