eldoraboo commited on
Commit
dcf747d
·
1 Parent(s): 4377911

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import spacy
4
+ from itertools import groupby
5
+
6
+ title = "Named Entity Recognition with spaCy"
7
+ description = "en_core_web"
8
+
9
+ nlp_trf = spacy.load("en_core_web_trf")
10
+ nlp_sm = spacy.load("en_core_web_sm")
11
+ nlp_md = spacy.load("en_core_web_md")
12
+ nlp_lg = spacy.load("en_core_web_lg")
13
+
14
+ def doc_to_spans(doc):
15
+ tokens = [(tok.text, tok.idx, tok.ent_type_) for tok in doc]
16
+ results = []
17
+ entities = set()
18
+ for entity, group in groupby(tokens, key=lambda t: t[-1]):
19
+ if not entity:
20
+ continue
21
+ group = list(group)
22
+ _, start, _ = group[0]
23
+ word, last, _ = group[-1]
24
+ text = ' '.join(item[0] for item in group)
25
+ end = last + len(word)
26
+ results.append(
27
+ {"entity": entity, "start": start, "end": end})
28
+ entities.add(entity)
29
+ return results
30
+
31
+ #define a function to process your input and output
32
+ def ner(text, model):
33
+ if model == "en_core_web_trf":
34
+ nlp = nlp_trf
35
+ elif model == "en_core_web_sm":
36
+ nlp = nlp_sm
37
+ elif model == "en_core_web_md":
38
+ nlp = nlp_md
39
+ elif model == "en_core_web_lg":
40
+ nlp = nlp_lg
41
+ doc = nlp(text)
42
+ results = doc_to_spans(doc)
43
+ return {"text": text, "entities": results}
44
+
45
+ #create input and output objects
46
+ #input object1
47
+ input1 = gr.Textbox(label="Text")
48
+ #input object2
49
+ input2 = gr.Dropdown(label="Model",
50
+ choices=["en_core_web_trf", "en_core_web_sm", "en_core_web_md", "en_core_web_lg"],
51
+ value="en_core_web_trf")
52
+ #output object
53
+ output = gr.HighlightedText(label="Output")
54
+ #example object
55
+ examples = [
56
+ ["TDC A/S provides communications and entertainment solutions in Denmark. It operates through Nuuday and TDC NET segments. The company designs, builds, and operates broadband and mobile networks; and provides technical support to customers and networks. It offers services, such as landline voice, TV and streaming, broadband, Internet and network, mobility, and other services. The company provides its products and services under the YouSee, Hiper, Telmore, Blockbuster, TDC Business, TDC Erhverv, Fullrate, NetDesign, and Relatel brands. It serves consumer and business customers. The company was founded in 1882 and is based in Copenhagen, Denmark. TDC A/S is a subsidiary of DK Telekommunikation ApS."],
57
+ ["Giddy Inc., doing business as Boxed Wholesale, offers online wholesale and retailing services. The company provides cleaning and laundry, kitchen, paper, skin care, hair care, and grocery products. Additionally, it offers diapers and organic products. Giddy Inc. was founded in 2013 and is based in Edison, New Jersey."],
58
+ ["United Iron And Steel Manufacturing Company (P.L.C.) produces and sells iron and steel products in Jordan. It is also involved in trading scrap iron. The company was incorporated in 1992 and is headquartered in Amman, Jordan. United Iron And Steel Manufacturing Company (P.L.C.) is a subsidiary of Manaseer Group Corporation."]
59
+ ]
60
+ #create interface
61
+ gui = gr.Interface(title=title,
62
+ description=description,
63
+ fn=ner,
64
+ inputs=[input1, input2],
65
+ outputs=[output],
66
+ examples=examples)
67
+
68
+ #display the interface
69
+ gui.launch()