Spaces:

tdubon
/

CustomizeNER

Runtime error

App Files Files Community

tdubon commited on Feb 14, 2022

Commit

3e8d135

1 Parent(s): 4fb44eb

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -70

app.py CHANGED Viewed

@@ -7,78 +7,56 @@ from spacy.tokens import Span
 nlp = spacy.load("en_core_web_md")
-user_input = input(str(""))
-doc1 = nlp(user_input)
-print list of entities captured by pertained model
-for ent in doc1.ents:
-    print(ent.text, ent.label_)
-inspect labels and their meaning
-for ent in doc1.ents:
-    print(ent.label_, spacy.explain(ent.label_))
-Use PhraseMatcher to find all references of interest
-Define the different references to Covid
-user_entries = input(str("")) #gradio text box here to enter sample terms
-pattern_list = []
-for i in user_entries.strip().split():
-   pattern_list.append(i)
-patterns = list(nlp.pipe(pattern_list))
-print("patterns:", patterns)
-#Instantiate PhraseMatcher
-matcher = PhraseMatcher(nlp.vocab)
-#Create label for pattern
-user_named = input(str("").strip()) #gradio text box here to enter pattern label
-matcher.add(user_named, patterns)
-# Define the custom component
-@Language.component("covid_component")
-def covid_component_function(doc):
-  #Apply the matcher to the doc
-  matches = matcher(doc)
-  #Create a Span for each match and assign the label
-  spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
-  # Overwrite the doc.ents with the matched spans
-  doc.ents = spans
-  return doc
-# Add the component to the pipeline after the "ner" component
-nlp.add_pipe((user_named + "component"), after="ner")
-print(nlp.pipe_names)
-#Verify that your model now detects all specified mentions of Covid on another text
-user_doc = input(str("").strip())
-apply_doc = nlp(user_doc)
-print([(ent.text, ent.label_) for ent in apply_doc.ents])
-#Count total mentions of label COVID in the 3rd document
-from collections import Counter
-labels = [ent.label_ for ent in apply_doc.ents]
-Counter(labels)
-iface = gr.Interface(
-    process_text,
-    [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition")],
-    entities,
-    [gr.inputs.Textbox(lines=3, default= ("Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2"), label="Enter entity references")],
-    run,
-    [gr.inputs.Textbox(lines=1, default= ("COVID"), label="Enter entity label")],
-    gr.outputs.HighlightedText(),
-)
-     test,
-    [gr.inputs.Textbox(lines=1, default= ("The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2."), label="Test: Enter new sentence containing named entity")],
-    gr.outputs.HighlightedText(),
 )
-iface.launch()

 nlp = spacy.load("en_core_web_md")
+def load(txt1, txt2, txt3, txt4):
+    user_input = str(txt1.strip())
+    doc1 = nlp(user_input)
+    entities = [(ent.text, ent.label_) for ent in doc1.ents]
+    pattern_list = []
+    for i in txt2.strip().split():
+        pattern_list.append(i)
+    patterns = list(nlp.pipe(pattern_list))
+    matcher = PhraseMatcher(nlp.vocab)
+    user_named = str(txt3.strip())
+    matcher.add(user_named, patterns)
+    @Language.component("added_component")
+    def component_function(doc):
+        matches = matcher(doc)
+        spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
+        doc.ents = spans
+        return doc
+    if "added_component" not in nlp.pipe_names:
+        nlp.add_pipe(("added_component"), after="ner")
+    user_input4 = str(txt4.strip())
+    apply_doc = nlp(user_input4)
+    entities2 = [(ent.text, ent.label_) for ent in apply_doc.ents]
+    from collections import Counter
+    labels = [ent.label_ for ent in apply_doc.ents]
+    lab_counts = Counter(labels)
+    return(entities, entities2, lab_counts)
+description = "Use this space to produce and test your own customized NER"
+iface = gr.Interface(
+    title = "Customized Named Entity Recognition",
+    description = description,
+    fn = load,
+    interpretation = "shap",
+    inputs = [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition"), gr.inputs.Textbox(lines=3, default= "Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2", label="Enter entity references"), gr.inputs.Textbox(lines=1, default="COVID", label="Enter entity label"), gr.inputs.Textbox(lines=10, default="The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2.", label="Enter new sentence containing named entity")],
+    outputs = [gr.outputs.Textbox(type="str", label="Entities recognized before"),
+              gr.outputs.Textbox(type="str", label="Entites recognized after"),
+              gr.outputs.Textbox(type="str", label="Count of entities captured for new label")],
+    theme = "dark"
 )
+iface.launch()