import json from collections import defaultdict, Counter import matplotlib.pyplot as plt import gradio as gr import pandas as pd from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all") model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all") plt.switch_backend("Agg") examples = {} with open("examples.json", "r") as f: content = json.load(f) examples = {x["text"]: x["label"] for x in content} pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") def plot_to_figure(grouped): fig = plt.figure() plt.bar(x=list(grouped.keys()), height=list(grouped.values())) plt.margins(0.2) plt.subplots_adjust(bottom=0.4) plt.xticks(rotation=90) return fig def run_ner(text): raw = pipe(text) ner_content = { "text": text, "entities": [ { "entity": x["entity_group"], "word": x["word"], "score": x["score"], "start": x["start"], "end": x["end"], } for x in raw ], } grouped = Counter((x["entity_group"] for x in raw)) rows = [[k, v] for k, v in grouped.items()] figure = plot_to_figure(grouped) return ner_content, rows, figure with gr.Blocks() as demo: note = gr.Textbox(label="Note text") with gr.Accordion("Examples", open=False): examples = gr.Examples(examples=list(examples.keys()), inputs=note) with gr.Tab("NER"): highlight = gr.HighlightedText(label="NER", combine_adjacent=True) with gr.Tab("Bar"): plot = gr.Plot(label="Bar") with gr.Tab("Table"): table = gr.Dataframe(headers=["Entity", "Count"]) note.submit(run_ner, [note], [highlight, table, plot]) demo.launch()