SorrelC commited on
Commit
2ec2f56
Β·
verified Β·
1 Parent(s): e7a516c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -14
app.py CHANGED
@@ -30,6 +30,22 @@ STANDARD_COLORS = {
30
  'WORK OF ART': '#DDA0DD' # Plum
31
  }
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Additional colours for custom entities
34
  CUSTOM_COLOR_PALETTE = [
35
  '#FF9F43', '#10AC84', '#EE5A24', '#0FBC89', '#5F27CD',
@@ -734,19 +750,25 @@ def process_text(text, standard_entities, custom_entities_str, confidence_thresh
734
  def create_interface():
735
  with gr.Blocks(title="Hybrid NER + GLiNER Tool", theme=gr.themes.Soft()) as demo:
736
  gr.Markdown("""
737
- # 🎯 Named Entity Recognition (NER) Explorer Tool
738
 
739
  Combine common NER categories with your own custom entity types! This tool uses both traditional NER models and GLiNER for comprehensive entity extraction.
740
-
741
- ## 🀝 Overlapping entities are automatically shared with split-colour highlighting!
742
-
743
  ### How to use:
744
  1. **πŸ“ Enter your text** in the text area below
745
  2. **🎯 Select a model** from the dropdown for common entities
746
  3. **β˜‘οΈ Select common entities** you want to find (PER, ORG, LOC, etc.)
747
  4. **✨ Add custom entities** (comma-separated) like "relationships, occupations, skills" - powered by GLiNER
748
  5. **βš™οΈ Adjust confidence threshold**
749
- 6. **πŸ” Click "Analyse Text"** to see results with organized output
 
 
 
 
 
 
 
 
750
  """)
751
 
752
  with gr.Row():
@@ -806,7 +828,7 @@ def create_interface():
806
  )
807
 
808
  with gr.Column():
809
- gr.Markdown("### ✨ Custom Entity Types")
810
  custom_entities = gr.Textbox(
811
  label="Custom Entities (comma-separated)",
812
  placeholder="e.g. relationships, occupations, skills, emotions",
@@ -818,8 +840,71 @@ def create_interface():
818
  - emotions, actions, objects
819
  - medical conditions, treatments
820
  - financial terms, business roles
 
 
821
  """)
822
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
823
  analyse_btn = gr.Button("πŸ” Analyse Text", variant="primary", size="lg")
824
 
825
  # Output sections
@@ -848,7 +933,7 @@ def create_interface():
848
  outputs=[summary_output, highlighted_output, results_output]
849
  )
850
 
851
- # Add examples
852
  gr.Examples(
853
  examples=[
854
  [
@@ -858,13 +943,6 @@ def create_interface():
858
  0.3,
859
  "entities_spacy_en_core_web_trf"
860
  ],
861
- [
862
- "The meeting between CEO Jane Doe and the board of directors at Microsoft headquarters in Seattle discussed the Q4 financial results and the new AI strategy for 2024.",
863
- ["PER", "ORG", "LOC", "DATE"],
864
- "corporate roles, business events, financial terms",
865
- 0.4,
866
- "entities_flair_ner-ontonotes-large"
867
- ],
868
  [
869
  "Dr. Emily Watson published a research paper on machine learning algorithms at MIT. She collaborates with her colleague Prof. David Chen on natural language processing projects.",
870
  ["PER", "ORG", "Work of Art"],
@@ -881,6 +959,7 @@ def create_interface():
881
  model_dropdown
882
  ]
883
  )
 
884
  # Add model information links
885
  gr.HTML("""
886
  <hr style="margin-top: 40px; margin-bottom: 20px;">
 
30
  'WORK OF ART': '#DDA0DD' # Plum
31
  }
32
 
33
+ # Entity definitions for glossary
34
+ ENTITY_DEFINITIONS = {
35
+ 'DATE': 'Absolute or relative dates or periods',
36
+ 'EVENT': 'Named hurricanes, battles, wars, sports events, etc.',
37
+ 'FAC': 'Facilities - Buildings, airports, highways, bridges, etc.',
38
+ 'GPE': 'Geopolitical entities - Countries, cities, states',
39
+ 'LANG': 'Any named language',
40
+ 'LOC': 'Non-GPE locations - Mountain ranges, bodies of water',
41
+ 'MISC': 'Miscellaneous entities - Things that don\'t fit elsewhere',
42
+ 'NORP': 'Nationalities or religious or political groups',
43
+ 'ORG': 'Organizations - Companies, agencies, institutions, etc.',
44
+ 'PER': 'People, including fictional characters',
45
+ 'PRODUCT': 'Objects, vehicles, foods, etc. (Not services)',
46
+ 'Work of Art': 'Titles of books, songs, movies, paintings, etc.'
47
+ }
48
+
49
  # Additional colours for custom entities
50
  CUSTOM_COLOR_PALETTE = [
51
  '#FF9F43', '#10AC84', '#EE5A24', '#0FBC89', '#5F27CD',
 
750
  def create_interface():
751
  with gr.Blocks(title="Hybrid NER + GLiNER Tool", theme=gr.themes.Soft()) as demo:
752
  gr.Markdown("""
753
+ # Named Entity Recognition (NER) Explorer Tool
754
 
755
  Combine common NER categories with your own custom entity types! This tool uses both traditional NER models and GLiNER for comprehensive entity extraction.
756
+
 
 
757
  ### How to use:
758
  1. **πŸ“ Enter your text** in the text area below
759
  2. **🎯 Select a model** from the dropdown for common entities
760
  3. **β˜‘οΈ Select common entities** you want to find (PER, ORG, LOC, etc.)
761
  4. **✨ Add custom entities** (comma-separated) like "relationships, occupations, skills" - powered by GLiNER
762
  5. **βš™οΈ Adjust confidence threshold**
763
+ 6. **πŸ” Click "Analyse Text"** to see results with organized output
764
+ (Common/custom entities which overlap are shown with split-colour highlighting)
765
+ """)
766
+
767
+ # Add tip box
768
+ gr.HTML("""
769
+ <div style="background-color: #fff3cd; border: 1px solid #ffeaa7; border-radius: 8px; padding: 12px; margin: 15px 0;">
770
+ <strong style="color: #856404;">πŸ’‘ Top tip:</strong> All models can both miss entities and/or miss categorise entity types - so keep an eye out for this.
771
+ </div>
772
  """)
773
 
774
  with gr.Row():
 
828
  )
829
 
830
  with gr.Column():
831
+ gr.Markdown("### ✨ Custom Entity Types (Powered by GLiNER)")
832
  custom_entities = gr.Textbox(
833
  label="Custom Entities (comma-separated)",
834
  placeholder="e.g. relationships, occupations, skills, emotions",
 
840
  - emotions, actions, objects
841
  - medical conditions, treatments
842
  - financial terms, business roles
843
+
844
+ *GLiNER model will extract these custom entity types from your text*
845
  """)
846
 
847
+ # Add glossary here (Option 1: below selection box but above analyse button)
848
+ gr.HTML("""
849
+ <details style="margin: 20px 0; padding: 10px; background-color: #f8f9fa; border-radius: 8px; border: 1px solid #ddd;">
850
+ <summary style="cursor: pointer; font-weight: bold; padding: 5px; color: #1976d2;">
851
+ ℹ️ Entity Type Definitions (Click to expand)
852
+ </summary>
853
+ <div style="margin-top: 10px; padding: 10px;">
854
+ <dl style="margin: 0; font-size: 14px;">
855
+ <div style="margin-bottom: 8px;">
856
+ <dt style="font-weight: bold; display: inline; color: #00B894;">PER:</dt>
857
+ <dd style="display: inline; margin-left: 5px;">People, including fictional characters</dd>
858
+ </div>
859
+ <div style="margin-bottom: 8px;">
860
+ <dt style="font-weight: bold; display: inline; color: #55A3FF;">ORG:</dt>
861
+ <dd style="display: inline; margin-left: 5px;">Organizations - Companies, agencies, institutions, etc.</dd>
862
+ </div>
863
+ <div style="margin-bottom: 8px;">
864
+ <dt style="font-weight: bold; display: inline; color: #A0E7E5;">LOC:</dt>
865
+ <dd style="display: inline; margin-left: 5px;">Non-GPE locations - Mountain ranges, bodies of water</dd>
866
+ </div>
867
+ <div style="margin-bottom: 8px;">
868
+ <dt style="font-weight: bold; display: inline; color: #F9CA24;">GPE:</dt>
869
+ <dd style="display: inline; margin-left: 5px;">Geopolitical entities - Countries, cities, states</dd>
870
+ </div>
871
+ <div style="margin-bottom: 8px;">
872
+ <dt style="font-weight: bold; display: inline; color: #45B7D1;">FAC:</dt>
873
+ <dd style="display: inline; margin-left: 5px;">Facilities - Buildings, airports, highways, bridges, etc.</dd>
874
+ </div>
875
+ <div style="margin-bottom: 8px;">
876
+ <dt style="font-weight: bold; display: inline; color: #FF6B6B;">DATE:</dt>
877
+ <dd style="display: inline; margin-left: 5px;">Absolute or relative dates or periods</dd>
878
+ </div>
879
+ <div style="margin-bottom: 8px;">
880
+ <dt style="font-weight: bold; display: inline; color: #4ECDC4;">EVENT:</dt>
881
+ <dd style="display: inline; margin-left: 5px;">Named hurricanes, battles, wars, sports events, etc.</dd>
882
+ </div>
883
+ <div style="margin-bottom: 8px;">
884
+ <dt style="font-weight: bold; display: inline; color: #8E8E93;">NORP:</dt>
885
+ <dd style="display: inline; margin-left: 5px;">Nationalities or religious or political groups</dd>
886
+ </div>
887
+ <div style="margin-bottom: 8px;">
888
+ <dt style="font-weight: bold; display: inline; color: #6C5CE7;">LANG:</dt>
889
+ <dd style="display: inline; margin-left: 5px;">Any named language</dd>
890
+ </div>
891
+ <div style="margin-bottom: 8px;">
892
+ <dt style="font-weight: bold; display: inline; color: #FD79A8;">MISC:</dt>
893
+ <dd style="display: inline; margin-left: 5px;">Miscellaneous entities - Things that don't fit elsewhere</dd>
894
+ </div>
895
+ <div style="margin-bottom: 8px;">
896
+ <dt style="font-weight: bold; display: inline; color: #E17055;">PRODUCT:</dt>
897
+ <dd style="display: inline; margin-left: 5px;">Objects, vehicles, foods, etc. (Not services)</dd>
898
+ </div>
899
+ <div style="margin-bottom: 8px;">
900
+ <dt style="font-weight: bold; display: inline; color: #DDA0DD;">Work of Art:</dt>
901
+ <dd style="display: inline; margin-left: 5px;">Titles of books, songs, movies, paintings, etc.</dd>
902
+ </div>
903
+ </dl>
904
+ </div>
905
+ </details>
906
+ """)
907
+
908
  analyse_btn = gr.Button("πŸ” Analyse Text", variant="primary", size="lg")
909
 
910
  # Output sections
 
933
  outputs=[summary_output, highlighted_output, results_output]
934
  )
935
 
936
+ # Add examples (removed the financial terms example as requested)
937
  gr.Examples(
938
  examples=[
939
  [
 
943
  0.3,
944
  "entities_spacy_en_core_web_trf"
945
  ],
 
 
 
 
 
 
 
946
  [
947
  "Dr. Emily Watson published a research paper on machine learning algorithms at MIT. She collaborates with her colleague Prof. David Chen on natural language processing projects.",
948
  ["PER", "ORG", "Work of Art"],
 
959
  model_dropdown
960
  ]
961
  )
962
+
963
  # Add model information links
964
  gr.HTML("""
965
  <hr style="margin-top: 40px; margin-bottom: 20px;">