SorrelC commited on
Commit
9709aea
Β·
verified Β·
1 Parent(s): 2640e78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -30
app.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
  import warnings
6
  import random
7
  import re
 
8
  warnings.filterwarnings('ignore')
9
 
10
  # Common NER entity types
@@ -236,7 +237,7 @@ class HybridNERManager:
236
  return []
237
 
238
  def find_overlapping_entities(entities):
239
- """Find and share overlapping entities"""
240
  if not entities:
241
  return []
242
 
@@ -254,7 +255,7 @@ def find_overlapping_entities(entities):
254
  while j < len(sorted_entities):
255
  next_entity = sorted_entities[j]
256
 
257
- # Check if entities overlap
258
  if (current_entity['start'] <= next_entity['start'] < current_entity['end'] or
259
  next_entity['start'] <= current_entity['start'] < current_entity['end'] or
260
  current_entity['text'].lower() == next_entity['text'].lower()):
@@ -263,12 +264,28 @@ def find_overlapping_entities(entities):
263
  else:
264
  j += 1
265
 
266
- # Create shared entity
267
  if len(overlapping_entities) == 1:
268
  shared_entities.append(overlapping_entities[0])
269
  else:
270
- shared_entity = share_entities(overlapping_entities)
271
- shared_entities.append(shared_entity)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
  i += 1
274
 
@@ -386,7 +403,7 @@ def create_shared_entity_html(entity, entity_colors):
386
  f'border-radius: 3px; margin: 0 1px; '
387
  f'border: 2px solid #333; color: white; font-weight: bold;" '
388
  f'title="SHARED: {tooltip}">'
389
- f'{entity["text"]} 🧩🧩</span>')
390
 
391
  def create_entity_table_gradio_tabs(entities, entity_colors):
392
  """Create Gradio tabs for entity results"""
@@ -417,7 +434,7 @@ def create_entity_table_gradio_tabs(entities, entity_colors):
417
  for entity_type, entities_of_type in entity_groups.items():
418
  if entity_type == 'SHARED_ENTITIES':
419
  colour = '#666666'
420
- header = f"🧩🧩 Shared Entities ({len(entities_of_type)} found)"
421
 
422
  # Create table for shared entities
423
  table_html = f"""
@@ -453,7 +470,7 @@ def create_entity_table_gradio_tabs(entities, entity_colors):
453
  """
454
 
455
  table_html += "</tbody></table></div>"
456
- tab_contents[f"🧩🧩 SHARED ({len(entities_of_type)})"] = table_html
457
 
458
  else:
459
  colour = entity_colors.get(entity_type.upper(), '#f0f0f0')
@@ -589,6 +606,7 @@ def process_text(text, standard_entities, custom_entities_str, confidence_thresh
589
  progress(0.9, desc="Creating summary...")
590
 
591
  # Create summary with shared entities terminology
 
592
  total_entities = len(all_entities)
593
  shared_entities = find_overlapping_entities(all_entities)
594
  final_count = len(shared_entities)
@@ -614,7 +632,7 @@ def create_interface():
614
 
615
  Combine common NER categories with your own custom entity types! This tool uses both traditional NER models and GLiNER for comprehensive entity extraction.
616
 
617
- ## 🧩🧩 NEW: Overlapping entities are automatically shared with split-colour highlighting!
618
 
619
  ### How to use:
620
  1. **πŸ“ Enter your text** in the text area below
@@ -737,8 +755,12 @@ def create_interface():
737
 
738
  # Create tabs HTML manually since Gradio dynamic tabs are complex
739
  if isinstance(tab_contents, dict) and tab_contents:
740
- tabs_html = """
741
- <div style="margin: 20px 0;">
 
 
 
 
742
  <div style="border-bottom: 2px solid #ddd; margin-bottom: 20px;">
743
  """
744
 
@@ -746,10 +768,14 @@ def create_interface():
746
  tab_names = list(tab_contents.keys())
747
  for i, tab_name in enumerate(tab_names):
748
  active_style = "background-color: #f8f9fa; border-bottom: 3px solid #4ECDC4;" if i == 0 else "background-color: #fff;"
 
749
  tabs_html += f"""
750
- <button onclick="showResultTab('{i}')" id="result-tab-{i}"
751
  style="padding: 12px 24px; margin-right: 5px; border: 1px solid #ddd;
752
- border-bottom: none; cursor: pointer; font-weight: bold; {active_style}">
 
 
 
753
  {tab_name}
754
  </button>
755
  """
@@ -760,36 +786,66 @@ def create_interface():
760
  for i, (tab_name, content) in enumerate(tab_contents.items()):
761
  display_style = "display: block;" if i == 0 else "display: none;"
762
  tabs_html += f"""
763
- <div id="result-content-{i}" style="{display_style}">
764
  {content}
765
  </div>
766
  """
767
 
768
- # Add JavaScript for tab switching
769
- tabs_html += """
770
  <script>
771
- function showResultTab(tabIndex) {
772
- // Hide all content
773
- var contents = document.querySelectorAll('[id^="result-content-"]');
774
- contents.forEach(function(content) {
 
 
775
  content.style.display = 'none';
776
- });
777
 
778
- // Reset all tab styles
779
- var tabs = document.querySelectorAll('[id^="result-tab-"]');
780
- tabs.forEach(function(tab) {
781
  tab.style.backgroundColor = '#fff';
782
  tab.style.borderBottom = 'none';
783
- });
784
 
785
  // Show selected content
786
- document.getElementById('result-content-' + tabIndex).style.display = 'block';
 
 
 
787
 
788
  // Highlight selected tab
789
- var activeTab = document.getElementById('result-tab-' + tabIndex);
790
- activeTab.style.backgroundColor = '#f8f9fa';
791
- activeTab.style.borderBottom = '3px solid #4ECDC4';
792
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
793
  </script>
794
  </div>
795
  """
 
5
  import warnings
6
  import random
7
  import re
8
+ import time
9
  warnings.filterwarnings('ignore')
10
 
11
  # Common NER entity types
 
237
  return []
238
 
239
  def find_overlapping_entities(entities):
240
+ """Find and share overlapping entities - specifically entities found by BOTH common NER models AND custom entities"""
241
  if not entities:
242
  return []
243
 
 
255
  while j < len(sorted_entities):
256
  next_entity = sorted_entities[j]
257
 
258
+ # Check if entities overlap (same text span or overlapping positions)
259
  if (current_entity['start'] <= next_entity['start'] < current_entity['end'] or
260
  next_entity['start'] <= current_entity['start'] < current_entity['end'] or
261
  current_entity['text'].lower() == next_entity['text'].lower()):
 
264
  else:
265
  j += 1
266
 
267
+ # Create shared entity only if we have BOTH common and custom entities
268
  if len(overlapping_entities) == 1:
269
  shared_entities.append(overlapping_entities[0])
270
  else:
271
+ # Check if this is a true "shared" entity (common + custom)
272
+ has_common = False
273
+ has_custom = False
274
+
275
+ for entity in overlapping_entities:
276
+ source = entity.get('source', '')
277
+ if source in ['spaCy', 'GLiNER-Common'] or source.startswith('Flair-'):
278
+ has_common = True
279
+ elif source == 'GLiNER-Custom':
280
+ has_custom = True
281
+
282
+ if has_common and has_custom:
283
+ # This is a true shared entity (common + custom)
284
+ shared_entity = share_entities(overlapping_entities)
285
+ shared_entities.append(shared_entity)
286
+ else:
287
+ # These are just overlapping entities from the same source type, keep separate
288
+ shared_entities.extend(overlapping_entities)
289
 
290
  i += 1
291
 
 
403
  f'border-radius: 3px; margin: 0 1px; '
404
  f'border: 2px solid #333; color: white; font-weight: bold;" '
405
  f'title="SHARED: {tooltip}">'
406
+ f'{entity["text"]} 🀝</span>')
407
 
408
  def create_entity_table_gradio_tabs(entities, entity_colors):
409
  """Create Gradio tabs for entity results"""
 
434
  for entity_type, entities_of_type in entity_groups.items():
435
  if entity_type == 'SHARED_ENTITIES':
436
  colour = '#666666'
437
+ header = f"🀝 Shared Entities ({len(entities_of_type)} found)"
438
 
439
  # Create table for shared entities
440
  table_html = f"""
 
470
  """
471
 
472
  table_html += "</tbody></table></div>"
473
+ tab_contents[f"🀝 SHARED ({len(entities_of_type)})"] = table_html
474
 
475
  else:
476
  colour = entity_colors.get(entity_type.upper(), '#f0f0f0')
 
606
  progress(0.9, desc="Creating summary...")
607
 
608
  # Create summary with shared entities terminology
609
+ # Note: Shared entities are those found by BOTH common NER models AND custom GLiNER
610
  total_entities = len(all_entities)
611
  shared_entities = find_overlapping_entities(all_entities)
612
  final_count = len(shared_entities)
 
632
 
633
  Combine common NER categories with your own custom entity types! This tool uses both traditional NER models and GLiNER for comprehensive entity extraction.
634
 
635
+ ## 🀝 NEW: Overlapping entities are automatically shared with split-colour highlighting!
636
 
637
  ### How to use:
638
  1. **πŸ“ Enter your text** in the text area below
 
755
 
756
  # Create tabs HTML manually since Gradio dynamic tabs are complex
757
  if isinstance(tab_contents, dict) and tab_contents:
758
+ # Generate unique IDs to avoid conflicts
759
+ import time
760
+ timestamp = str(int(time.time() * 1000))
761
+
762
+ tabs_html = f"""
763
+ <div style="margin: 20px 0;" id="tab-container-{timestamp}">
764
  <div style="border-bottom: 2px solid #ddd; margin-bottom: 20px;">
765
  """
766
 
 
768
  tab_names = list(tab_contents.keys())
769
  for i, tab_name in enumerate(tab_names):
770
  active_style = "background-color: #f8f9fa; border-bottom: 3px solid #4ECDC4;" if i == 0 else "background-color: #fff;"
771
+ default_bg = '#f8f9fa' if i == 0 else '#fff'
772
  tabs_html += f"""
773
+ <button onclick="showResultTab{timestamp}('{i}')" id="result-tab-{timestamp}-{i}"
774
  style="padding: 12px 24px; margin-right: 5px; border: 1px solid #ddd;
775
+ border-bottom: none; cursor: pointer; font-weight: bold; {active_style}
776
+ transition: all 0.3s ease;"
777
+ onmouseover="this.style.backgroundColor='#e9ecef'"
778
+ onmouseout="this.style.backgroundColor='{default_bg}'">
779
  {tab_name}
780
  </button>
781
  """
 
786
  for i, (tab_name, content) in enumerate(tab_contents.items()):
787
  display_style = "display: block;" if i == 0 else "display: none;"
788
  tabs_html += f"""
789
+ <div id="result-content-{timestamp}-{i}" style="{display_style}">
790
  {content}
791
  </div>
792
  """
793
 
794
+ # Add JavaScript for tab switching with unique function name
795
+ tabs_html += f"""
796
  <script>
797
+ function showResultTab{timestamp}(tabIndex) {{
798
+ console.log('Tab clicked:', tabIndex);
799
+
800
+ // Hide all content for this specific tab container
801
+ var contents = document.querySelectorAll('[id^="result-content-{timestamp}-"]');
802
+ contents.forEach(function(content) {{
803
  content.style.display = 'none';
804
+ }});
805
 
806
+ // Reset all tab styles for this specific tab container
807
+ var tabs = document.querySelectorAll('[id^="result-tab-{timestamp}-"]');
808
+ tabs.forEach(function(tab) {{
809
  tab.style.backgroundColor = '#fff';
810
  tab.style.borderBottom = 'none';
811
+ }});
812
 
813
  // Show selected content
814
+ var targetContent = document.getElementById('result-content-{timestamp}-' + tabIndex);
815
+ if (targetContent) {{
816
+ targetContent.style.display = 'block';
817
+ }}
818
 
819
  // Highlight selected tab
820
+ var activeTab = document.getElementById('result-tab-{timestamp}-' + tabIndex);
821
+ if (activeTab) {{
822
+ activeTab.style.backgroundColor = '#f8f9fa';
823
+ activeTab.style.borderBottom = '3px solid #4ECDC4';
824
+ }}
825
+ }}
826
+
827
+ // Ensure tabs are clickable after DOM load
828
+ document.addEventListener('DOMContentLoaded', function() {{
829
+ var tabs = document.querySelectorAll('[id^="result-tab-{timestamp}-"]');
830
+ tabs.forEach(function(tab, index) {{
831
+ tab.addEventListener('click', function(e) {{
832
+ e.preventDefault();
833
+ showResultTab{timestamp}(index.toString());
834
+ }});
835
+ }});
836
+ }});
837
+
838
+ // Also try immediate setup in case DOM is already loaded
839
+ setTimeout(function() {{
840
+ var tabs = document.querySelectorAll('[id^="result-tab-{timestamp}-"]');
841
+ tabs.forEach(function(tab, index) {{
842
+ tab.onclick = function(e) {{
843
+ e.preventDefault();
844
+ showResultTab{timestamp}(index.toString());
845
+ return false;
846
+ }};
847
+ }});
848
+ }}, 100);
849
  </script>
850
  </div>
851
  """