vishnu23 commited on
Commit
897b414
·
1 Parent(s): 0a7ad2d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Core Pkgs
2
+ import streamlit as st
3
+
4
+ #NLP Pkgs
5
+ import spacy_streamlit
6
+ import spacy
7
+ # spacy.load('en_core_web_sm')
8
+
9
+ nlp = spacy.load("en_core_web_sm")
10
+
11
+
12
+ #Web Scraping Pkgs
13
+ from bs4 import BeautifulSoup
14
+ from urllib.request import urlopen
15
+
16
+ @st.cache
17
+ def get_text(raw_url):
18
+ page = urlopen(raw_url)
19
+ soup = BeautifulSoup(page)
20
+ fetched_text = " ".join(map(lambda p:p.text, soup.find_all('p')))
21
+ return fetched_text
22
+
23
+
24
+ def main():
25
+ """A Simple NLP App with Spacy-Streamlit"""
26
+ st.title("Named Entity Recognition")
27
+
28
+ menu = ["NER", "NER for URL"]
29
+ choice = st.sidebar.radio("Pick a choice", menu)
30
+
31
+
32
+ if choice == "NER":
33
+ raw_text = st.text_area("Enter Text","")
34
+ if raw_text != "":
35
+ docx = nlp(raw_text)
36
+ spacy_streamlit.visualize_ner(docx, labels = nlp.get_pipe('ner').labels)
37
+
38
+ elif choice == "NER for URL":
39
+ raw_url = st.text_input("Enter URL","")
40
+ text_length = st.slider("Length to Preview", 50,200)
41
+ if raw_url != "":
42
+ result = get_text(raw_url)
43
+ len_of_full_text = len(result)
44
+ len_of_short_text = round(len(result)/text_length)
45
+ st.subheader("Text to be analyzed:")
46
+ st.write(result[:len_of_short_text])
47
+ preview_docx = nlp(result[:len_of_short_text])
48
+ spacy_streamlit.visualize_ner(preview_docx, labels = nlp.get_pipe('ner').labels)
49
+
50
+ if __name__ == '__main__':
51
+ main()