Spaces:

hksung
/

ASC_tagger_V2

Running

App Files Files Community

Hakyung Sung commited on Apr 9

Commit

0bd74cf

1 Parent(s): f54d77f

Correct app.py

Browse files

Files changed (1) hide show

app.py +38 -108

app.py CHANGED Viewed

@@ -18,143 +18,73 @@ model_path = snapshot_download(model_repo)  # Assumes the repo is public; add to
 nlp = spacy.load(os.path.join(model_path, 'model-best'))
 # Make sure the pipeline can split into sentences
 if 'parser' not in nlp.pipe_names and 'senter' not in nlp.pipe_names:
     nlp.add_pipe('sentencizer')
 def get_highlighted_text(doc):
     """
-    For each sentence in the document, check if there are any entities.
-    If so, insert HTML spans with a class and data attribute to highlight them.
-    Returns a list of HTML strings (one per sentence).
     """
     highlighted_sentences = []
     for sent in doc.sents:
-        sent_start = sent.start_char
-        sent_end = sent.end_char
         sent_text = sent.text
-        # Find entities that fall completely within this sentence.
-        ents_in_sent = [ent for ent in doc.ents if ent.start_char >= sent_start and ent.end_char <= sent_end]
         if ents_in_sent:
-            # Process entities from the end to the start so that
-            # replacing text doesn’t mess up subsequent character indices.
             ents_in_sent = sorted(ents_in_sent, key=lambda x: x.start_char, reverse=True)
             s = sent_text
             for ent in ents_in_sent:
-                ent_start = ent.start_char - sent_start
-                ent_end = ent.end_char - sent_start
-                ent_label = ent.label_
-                # Wrap the entity text in a span tag with a CSS class and data attribute
                 s = (
                     s[:ent_start]
-                    + f'<span class="entity" data-entity="{ent_label}">{s[ent_start:ent_end]}</span>'
                     + s[ent_end:]
                 )
             highlighted_sentences.append(s)
         else:
             highlighted_sentences.append(sent_text)
-    return highlighted_sentences
-def create_tag_count_plot(tag_counts):
-    """
-    Create a Plotly bar chart that shows the count for each entity tag.
-    Returns the bar chart as a base64 encoded PNG image.
-    """
-    sorted_tags = sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)
-    tags, counts = zip(*sorted_tags)
-    fig = go.Figure(data=[
-        go.Bar(
-            x=tags,
-            y=counts,
-            text=counts,
-            textposition='auto',
-            marker=dict(color='#8ABB40'),
-            hoverinfo='x+y'
-        )
-    ])
-    fig.update_layout(
-        xaxis_title='Tag',
-        yaxis_title='Count',
-        template='none',
-        font=dict(size=10, family="Arial, sans-serif"),
-        xaxis_tickangle=-45,
-        margin=dict(l=50, r=50, t=50, b=80),
-        plot_bgcolor='white',
-        paper_bgcolor='white',
-        width=400,
-        height=550
-    )
-    img_buffer = io.BytesIO()
-    pio.write_image(fig, img_buffer, format='png')
-    img_buffer.seek(0)
-    plot_b64 = base64.b64encode(img_buffer.getvalue()).decode('utf8')
-    return plot_b64
-def base64_to_pil(b64_str):
-    """
-    Convert a base64 encoded string to a PIL Image.
-    """
-    img_data = base64.b64decode(b64_str)
-    return Image.open(io.BytesIO(img_data))
 def process_text(input_text):
     """
-    Process the user-input text:
-     - If the text is empty or has no valid sentences/entities, set an error message.
-     - Otherwise, produce HTML with highlighted entities and a bar chart image of tag counts.
     """
-    error_message = ""
-    html_output = ""
-    plot_image = None
     if not input_text.strip():
-        error_message = "No text provided. Please enter some text."
-        return html_output, plot_image, error_message
     doc = nlp(input_text)
-    sentences = list(doc.sents)
-    if len(sentences) < 1:
-        error_message = "Please enter at least one sentence."
-        return html_output, plot_image, error_message
-    if len(doc.ents) == 0:
-        error_message = "No entities were detected. Please try again with a different input."
-        return html_output, plot_image, error_message
-    # Generate highlighted sentences and join them with HTML breaks.
-    highlighted = get_highlighted_text(doc)
-    html_output = "<br><br>".join(highlighted)
-    # Get a counter for entity tags and create a bar chart.
-    tag_counts = Counter([ent.label_ for ent in doc.ents])
-    plot_b64 = create_tag_count_plot(tag_counts)
-    plot_image = base64_to_pil(plot_b64)
-    return html_output, plot_image, error_message
 # Build the Gradio interface.
 with gr.Blocks() as demo:
-    gr.Markdown("# Named Entity Highlighter and Tag Counter")
-    gr.Markdown(
-        "Enter some text to visualize named entities. The app will highlight any detected entities in each sentence and show a bar chart of entity tag counts."
-    )
-    # Input textbox for user text.
     input_textbox = gr.Textbox(lines=10, label="Input Text", placeholder="Enter text here...")
-    # Button to trigger the analysis.
-    analyze_btn = gr.Button("Analyze Text")
-    # Three outputs: highlighted text (as HTML), tag count plot image, and error message display.
-    highlighted_output = gr.HTML(label="Highlighted Sentences")
-    tag_plot_output = gr.Image(label="Tag Count Plot")
-    error_output = gr.Textbox(label="Error Message", interactive=False)
-    analyze_btn.click(
-        fn=process_text,
-        inputs=input_textbox,
-        outputs=[highlighted_output, tag_plot_output, error_output]
-    )
 if __name__ == "__main__":
-    demo.launch()

 nlp = spacy.load(os.path.join(model_path, 'model-best'))
 # Make sure the pipeline can split into sentences
+if 'parser' not in nlp.pipe_names and 'senter' not in nlp.pipe_names:
+    nlp.add_pipe('sentencizer')
+# If the pipeline is missing a sentence splitter, add one
 if 'parser' not in nlp.pipe_names and 'senter' not in nlp.pipe_names:
     nlp.add_pipe('sentencizer')
 def get_highlighted_text(doc):
     """
+    Wrap detected ASCs (entities) in each sentence with a span tag that has a custom inline style.
+    Here, we assume all entities from the model correspond to ASCs.
     """
     highlighted_sentences = []
     for sent in doc.sents:
         sent_text = sent.text
+        # Get entities that are fully contained within the sentence.
+        ents_in_sent = [ent for ent in doc.ents if ent.start_char >= sent.start_char and ent.end_char <= sent.end_char]
         if ents_in_sent:
+            # Process entities in reverse order to avoid messing up character indices.
             ents_in_sent = sorted(ents_in_sent, key=lambda x: x.start_char, reverse=True)
             s = sent_text
             for ent in ents_in_sent:
+                # Compute positions relative to the sentence start
+                ent_start = ent.start_char - sent.start_char
+                ent_end = ent.end_char - sent.start_char
+                # Wrap the entity in a span with a custom style. Adjust color & style as needed.
                 s = (
                     s[:ent_start]
+                    + f'<span style="background-color: #add8e6; font-weight: bold;" title="{ent.label_}">'
+                    + s[ent_start:ent_end]
+                    + '</span>'
                     + s[ent_end:]
                 )
             highlighted_sentences.append(s)
         else:
             highlighted_sentences.append(sent_text)
+    # Join sentences with HTML breaks so the output preserves sentence separations.
+    return "<br><br>".join(highlighted_sentences)
 def process_text(input_text):
     """
+    Process the user input text to detect and tag ASCs.
+    Returns an HTML string with tagged entities.
     """
     if not input_text.strip():
+        return "No text provided. Please enter some text."
     doc = nlp(input_text)
+    # Check if there are sentences; if not, return a message.
+    if len(list(doc.sents)) == 0:
+        return "Please enter at least one sentence."
+    # If no entities (ASCs) are found, let the user know.
+    if not doc.ents:
+        return "No ASCs were detected."
+    # Get the HTML with highlighted ASCs.
+    return get_highlighted_text(doc)
 # Build the Gradio interface.
 with gr.Blocks() as demo:
+    gr.Markdown("# ASC Tagger")
+    gr.Markdown("Enter some text to have ASCs tagged (highlighted with a custom color scheme).")
     input_textbox = gr.Textbox(lines=10, label="Input Text", placeholder="Enter text here...")
+    output_html = gr.HTML(label="Tagged Text")
+    tag_btn = gr.Button("Tag ASCs")
+    tag_btn.click(fn=process_text, inputs=input_textbox, outputs=output_html)
 if __name__ == "__main__":
+    demo.launch()