SorrelC commited on
Commit
d7a6294
Β·
verified Β·
1 Parent(s): 659fd45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -12
app.py CHANGED
@@ -30,6 +30,22 @@ STANDARD_COLORS = {
30
  'WORK OF ART': '#DDA0DD' # Plum
31
  }
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Additional colours for custom entities
34
  CUSTOM_COLOR_PALETTE = [
35
  '#FF9F43', '#10AC84', '#EE5A24', '#0FBC89', '#5F27CD',
@@ -730,9 +746,34 @@ def process_text(text, standard_entities, custom_entities_str, confidence_thresh
730
 
731
  return summary, legend_html + highlighted_html, results_html
732
 
 
 
 
 
 
 
 
 
 
 
 
733
  # Create Gradio interface
734
  def create_interface():
735
- with gr.Blocks(title="Hybrid NER + GLiNER Tool", theme=gr.themes.Soft()) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
736
  gr.Markdown("""
737
  # 🎯 Hybrid NER + Custom GLiNER Entity Recognition Tool
738
 
@@ -744,7 +785,7 @@ def create_interface():
744
  1. **πŸ“ Enter your text** in the text area below
745
  2. **🎯 Select a model** from the dropdown for common entities
746
  3. **β˜‘οΈ Select common entities** you want to find (PER, ORG, LOC, etc.)
747
- 4. **✨ Add custom entities** (comma-separated) like "relationships, occupations, skills"
748
  5. **βš™οΈ Adjust confidence threshold**
749
  6. **πŸ” Click "Analyse Text"** to see results with organized output
750
  """)
@@ -771,6 +812,14 @@ def create_interface():
771
  with gr.Column():
772
  gr.Markdown("### 🎯 Common Entity Types")
773
 
 
 
 
 
 
 
 
 
774
  # Model selector
775
  model_dropdown = gr.Dropdown(
776
  choices=ner_manager.model_names,
@@ -779,11 +828,39 @@ def create_interface():
779
  info="Choose which model to use for common NER"
780
  )
781
 
782
- # Common entities with select all functionality
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
783
  standard_entities = gr.CheckboxGroup(
784
- choices=STANDARD_ENTITIES,
785
- value=['PER', 'ORG', 'LOC', 'MISC'], # Default selection
786
- label="Select Common Entities"
 
787
  )
788
 
789
  # Select/Deselect All button
@@ -797,7 +874,7 @@ def create_interface():
797
  return [], "β˜‘οΈ Select All"
798
  else:
799
  # If none selected, select all
800
- return STANDARD_ENTITIES, "πŸ”˜ Deselect All"
801
 
802
  select_all_btn.click(
803
  fn=toggle_all_entities,
@@ -806,7 +883,7 @@ def create_interface():
806
  )
807
 
808
  with gr.Column():
809
- gr.Markdown("### ✨ Custom Entity Types")
810
  custom_entities = gr.Textbox(
811
  label="Custom Entities (comma-separated)",
812
  placeholder="e.g. relationships, occupations, skills, emotions",
@@ -818,6 +895,8 @@ def create_interface():
818
  - emotions, actions, objects
819
  - medical conditions, treatments
820
  - financial terms, business roles
 
 
821
  """)
822
 
823
  analyse_btn = gr.Button("πŸ” Analyse Text", variant="primary", size="lg")
@@ -835,9 +914,15 @@ def create_interface():
835
  gr.Markdown("### πŸ“‹ Detailed Results")
836
  results_output = gr.HTML(label="Entity Results")
837
 
 
 
 
 
 
 
838
  # Connect the button to the processing function
839
  analyse_btn.click(
840
- fn=process_text,
841
  inputs=[
842
  text_input,
843
  standard_entities,
@@ -853,21 +938,21 @@ def create_interface():
853
  examples=[
854
  [
855
  "John Smith works at Google in New York. He graduated from Stanford University in 2015 and specialises in artificial intelligence research. His wife Sarah is a doctor at Mount Sinai Hospital.",
856
- ["PER", "ORG", "LOC", "DATE"],
857
  "relationships, occupations, educational background",
858
  0.3,
859
  "entities_spacy_en_core_web_trf"
860
  ],
861
  [
862
  "The meeting between CEO Jane Doe and the board of directors at Microsoft headquarters in Seattle discussed the Q4 financial results and the new AI strategy for 2024.",
863
- ["PER", "ORG", "LOC", "DATE"],
864
  "corporate roles, business events, financial terms",
865
  0.4,
866
  "entities_flair_ner-ontonotes-large"
867
  ],
868
  [
869
  "Dr. Emily Watson published a research paper on machine learning algorithms at MIT. She collaborates with her colleague Prof. David Chen on natural language processing projects.",
870
- ["PER", "ORG", "Work of Art"],
871
  "academic titles, research topics, collaborations",
872
  0.3,
873
  "entities_gliner_knowledgator/modern-gliner-bi-large-v1.0"
@@ -881,6 +966,37 @@ def create_interface():
881
  model_dropdown
882
  ]
883
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
884
 
885
  return demo
886
 
 
30
  'WORK OF ART': '#DDA0DD' # Plum
31
  }
32
 
33
+ # Entity definitions for glossary
34
+ ENTITY_DEFINITIONS = {
35
+ 'DATE': 'Absolute or relative dates or periods',
36
+ 'EVENT': 'Named hurricanes, battles, wars, sports events, etc.',
37
+ 'FAC': 'Facilities - Buildings, airports, highways, bridges, etc.',
38
+ 'GPE': 'Geopolitical entities - Countries, cities, states',
39
+ 'LANG': 'Any named language',
40
+ 'LOC': 'Non-GPE locations - Mountain ranges, bodies of water',
41
+ 'MISC': 'Miscellaneous entities - Things that don\'t fit elsewhere',
42
+ 'NORP': 'Nationalities or religious or political groups',
43
+ 'ORG': 'Organizations - Companies, agencies, institutions, etc.',
44
+ 'PER': 'People, including fictional characters',
45
+ 'PRODUCT': 'Objects, vehicles, foods, etc. (Not services)',
46
+ 'Work of Art': 'Titles of books, songs, movies, paintings, etc.'
47
+ }
48
+
49
  # Additional colours for custom entities
50
  CUSTOM_COLOR_PALETTE = [
51
  '#FF9F43', '#10AC84', '#EE5A24', '#0FBC89', '#5F27CD',
 
746
 
747
  return summary, legend_html + highlighted_html, results_html
748
 
749
+ # Create colored checkbox labels
750
+ def create_colored_checkbox_labels():
751
+ """Create checkbox labels with color indicators"""
752
+ labels = []
753
+ for entity in STANDARD_ENTITIES:
754
+ colour_key = "WORK OF ART" if entity == "Work of Art" else entity.upper()
755
+ colour = STANDARD_COLORS.get(colour_key, '#CCCCCC')
756
+ # Create label with color dot
757
+ labels.append(f"● {entity}")
758
+ return labels
759
+
760
  # Create Gradio interface
761
  def create_interface():
762
+ # Custom CSS for colored checkboxes
763
+ custom_css = """
764
+ /* Color the checkbox labels based on entity type */
765
+ """
766
+ for i, entity in enumerate(STANDARD_ENTITIES):
767
+ colour_key = "WORK OF ART" if entity == "Work of Art" else entity.upper()
768
+ colour = STANDARD_COLORS.get(colour_key, '#CCCCCC')
769
+ custom_css += f"""
770
+ label[for="checkbox-{i}"] span:first-child {{
771
+ color: {colour} !important;
772
+ font-weight: bold;
773
+ }}
774
+ """
775
+
776
+ with gr.Blocks(title="Hybrid NER + GLiNER Tool", theme=gr.themes.Soft(), css=custom_css) as demo:
777
  gr.Markdown("""
778
  # 🎯 Hybrid NER + Custom GLiNER Entity Recognition Tool
779
 
 
785
  1. **πŸ“ Enter your text** in the text area below
786
  2. **🎯 Select a model** from the dropdown for common entities
787
  3. **β˜‘οΈ Select common entities** you want to find (PER, ORG, LOC, etc.)
788
+ 4. **✨ Add custom entities** (comma-separated) like "relationships, occupations, skills" - powered by GLiNER
789
  5. **βš™οΈ Adjust confidence threshold**
790
  6. **πŸ” Click "Analyse Text"** to see results with organized output
791
  """)
 
812
  with gr.Column():
813
  gr.Markdown("### 🎯 Common Entity Types")
814
 
815
+ # Add the inline help text
816
+ gr.HTML("""
817
+ <div style="background-color: #e3f2fd; padding: 10px; border-radius: 5px; margin-bottom: 10px; font-size: 13px;">
818
+ <strong>Quick Guide:</strong> PER = People | ORG = Organizations | LOC = Locations |
819
+ GPE = Countries/Cities | FAC = Facilities | DATE = Dates | EVENT = Named Events
820
+ </div>
821
+ """)
822
+
823
  # Model selector
824
  model_dropdown = gr.Dropdown(
825
  choices=ner_manager.model_names,
 
828
  info="Choose which model to use for common NER"
829
  )
830
 
831
+ # Add collapsible glossary with colors
832
+ glossary_html = """
833
+ <details style="margin: 10px 0; padding: 10px; background-color: #f8f9fa; border-radius: 8px; border: 1px solid #ddd;">
834
+ <summary style="cursor: pointer; font-weight: bold; padding: 5px; color: #1976d2;">
835
+ ℹ️ Detailed Entity Type Definitions (Click to expand)
836
+ </summary>
837
+ <div style="margin-top: 10px; padding: 10px;">
838
+ <dl style="margin: 0; font-size: 14px;">
839
+ """
840
+
841
+ for entity, definition in ENTITY_DEFINITIONS.items():
842
+ colour_key = "WORK OF ART" if entity == "Work of Art" else entity.upper()
843
+ colour = STANDARD_COLORS.get(colour_key, '#CCCCCC')
844
+ glossary_html += f"""
845
+ <div style="margin-bottom: 8px;">
846
+ <dt style="font-weight: bold; display: inline; color: {colour};">{entity}:</dt>
847
+ <dd style="display: inline; margin-left: 5px;">{definition}</dd>
848
+ </div>
849
+ """
850
+
851
+ glossary_html += """
852
+ </dl>
853
+ </div>
854
+ </details>
855
+ """
856
+ gr.HTML(glossary_html)
857
+
858
+ # Common entities checkboxes with colored labels
859
  standard_entities = gr.CheckboxGroup(
860
+ choices=create_colored_checkbox_labels(),
861
+ value=[f"● {entity}" for entity in ['PER', 'ORG', 'LOC', 'MISC']], # Default selection
862
+ label="Select Common Entities",
863
+ elem_id="standard-entities-checkbox"
864
  )
865
 
866
  # Select/Deselect All button
 
874
  return [], "β˜‘οΈ Select All"
875
  else:
876
  # If none selected, select all
877
+ return create_colored_checkbox_labels(), "πŸ”˜ Deselect All"
878
 
879
  select_all_btn.click(
880
  fn=toggle_all_entities,
 
883
  )
884
 
885
  with gr.Column():
886
+ gr.Markdown("### ✨ Custom Entity Types (Powered by GLiNER)")
887
  custom_entities = gr.Textbox(
888
  label="Custom Entities (comma-separated)",
889
  placeholder="e.g. relationships, occupations, skills, emotions",
 
895
  - emotions, actions, objects
896
  - medical conditions, treatments
897
  - financial terms, business roles
898
+
899
+ *GLiNER model will extract these custom entity types from your text*
900
  """)
901
 
902
  analyse_btn = gr.Button("πŸ” Analyse Text", variant="primary", size="lg")
 
914
  gr.Markdown("### πŸ“‹ Detailed Results")
915
  results_output = gr.HTML(label="Entity Results")
916
 
917
+ # Function to process with colored labels
918
+ def process_with_colored_labels(text, colored_labels, custom_entities, confidence_threshold, selected_model, progress=gr.Progress()):
919
+ # Remove the color dots from labels
920
+ standard_entities = [label.replace("● ", "") for label in colored_labels]
921
+ return process_text(text, standard_entities, custom_entities, confidence_threshold, selected_model, progress)
922
+
923
  # Connect the button to the processing function
924
  analyse_btn.click(
925
+ fn=process_with_colored_labels,
926
  inputs=[
927
  text_input,
928
  standard_entities,
 
938
  examples=[
939
  [
940
  "John Smith works at Google in New York. He graduated from Stanford University in 2015 and specialises in artificial intelligence research. His wife Sarah is a doctor at Mount Sinai Hospital.",
941
+ [f"● {entity}" for entity in ["PER", "ORG", "LOC", "DATE"]],
942
  "relationships, occupations, educational background",
943
  0.3,
944
  "entities_spacy_en_core_web_trf"
945
  ],
946
  [
947
  "The meeting between CEO Jane Doe and the board of directors at Microsoft headquarters in Seattle discussed the Q4 financial results and the new AI strategy for 2024.",
948
+ [f"● {entity}" for entity in ["PER", "ORG", "LOC", "DATE"]],
949
  "corporate roles, business events, financial terms",
950
  0.4,
951
  "entities_flair_ner-ontonotes-large"
952
  ],
953
  [
954
  "Dr. Emily Watson published a research paper on machine learning algorithms at MIT. She collaborates with her colleague Prof. David Chen on natural language processing projects.",
955
+ [f"● {entity}" for entity in ["PER", "ORG", "Work of Art"]],
956
  "academic titles, research topics, collaborations",
957
  0.3,
958
  "entities_gliner_knowledgator/modern-gliner-bi-large-v1.0"
 
966
  model_dropdown
967
  ]
968
  )
969
+
970
+ # Add model information links
971
+ gr.HTML("""
972
+ <hr style="margin-top: 40px; margin-bottom: 20px;">
973
+ <div style="background-color: #f8f9fa; padding: 20px; border-radius: 8px; margin-top: 20px;">
974
+ <h4 style="margin-top: 0;">πŸ“š Model Information & Documentation</h4>
975
+ <p style="font-size: 14px; margin-bottom: 15px;">Learn more about the models used in this tool:</p>
976
+ <ul style="font-size: 14px; line-height: 1.8;">
977
+ <li><strong>entities_flair_ner-large:</strong>
978
+ <a href="https://huggingface.co/flair/ner-english-large" target="_blank" style="color: #1976d2;">
979
+ Flair NER English Large Model β†—
980
+ </a>
981
+ </li>
982
+ <li><strong>entities_spacy_en_core_web_trf:</strong>
983
+ <a href="https://spacy.io/models/en#en_core_web_trf" target="_blank" style="color: #1976d2;">
984
+ spaCy English Transformer Model β†—
985
+ </a>
986
+ </li>
987
+ <li><strong>entities_flair_ner-ontonotes-large:</strong>
988
+ <a href="https://huggingface.co/flair/ner-english-ontonotes-large" target="_blank" style="color: #1976d2;">
989
+ Flair OntoNotes Large Model β†—
990
+ </a>
991
+ </li>
992
+ <li><strong>entities_gliner_knowledgator/modern-gliner-bi-large-v1.0:</strong>
993
+ <a href="https://github.com/urchade/GLiNER/blob/main/README_Extended.md" target="_blank" style="color: #1976d2;">
994
+ GLiNER Extended Documentation β†—
995
+ </a>
996
+ </li>
997
+ </ul>
998
+ </div>
999
+ """)
1000
 
1001
  return demo
1002