SorrelC commited on
Commit
6830d7b
Β·
verified Β·
1 Parent(s): b2cfaad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +300 -239
app.py CHANGED
@@ -7,13 +7,13 @@ import random
7
  import re
8
  warnings.filterwarnings('ignore')
9
 
10
- # Standard NER entity types
11
  STANDARD_ENTITIES = [
12
  'DATE', 'EVENT', 'FAC', 'GPE', 'LANG', 'LOC',
13
- 'MISC', 'NORP', 'ORG', 'PER', 'PRODUCT', 'WORK_OF_ART'
14
  ]
15
 
16
- # Color schemes
17
  STANDARD_COLORS = {
18
  'DATE': '#FF6B6B', # Red
19
  'EVENT': '#4ECDC4', # Teal
@@ -26,10 +26,10 @@ STANDARD_COLORS = {
26
  'ORG': '#55A3FF', # Light Blue
27
  'PER': '#00B894', # Green
28
  'PRODUCT': '#E17055', # Orange-Red
29
- 'WORK_OF_ART': '#DDA0DD' # Plum
30
  }
31
 
32
- # Additional colors for custom entities
33
  CUSTOM_COLOR_PALETTE = [
34
  '#FF9F43', '#10AC84', '#EE5A24', '#0FBC89', '#5F27CD',
35
  '#FF3838', '#2F3640', '#3742FA', '#2ED573', '#FFA502',
@@ -44,28 +44,16 @@ class HybridNERManager:
44
  self.flair_models = {}
45
  self.all_entity_colors = {}
46
  self.model_names = [
47
- 'spacy_en_core_web_sm',
48
- 'flair_ner-ontonotes-large',
49
- 'flair_ner-large',
50
- 'gliner_medium-v2.1'
51
  ]
52
 
53
- def load_gliner_model(self):
54
- """Load GLiNER model for custom entities"""
55
- if self.gliner_model is None:
56
- try:
57
- # Use a more stable model for HF Spaces
58
- self.gliner_model = GLiNER.from_pretrained("urchade/gliner_medium-v2.1")
59
- print("βœ“ GLiNER model loaded successfully")
60
- except Exception as e:
61
- print(f"Error loading GLiNER model: {str(e)}")
62
- return None
63
- return self.gliner_model
64
-
65
  def load_model(self, model_name):
66
  """Load the specified model"""
67
  try:
68
- if model_name == 'spacy_en_core_web_sm':
69
  return self.load_spacy_model()
70
  elif 'flair' in model_name:
71
  return self.load_flair_model(model_name)
@@ -76,16 +64,21 @@ class HybridNERManager:
76
  return None
77
 
78
  def load_spacy_model(self):
79
- """Load spaCy model for standard NER"""
80
  if self.spacy_model is None:
81
  try:
82
  import spacy
83
  try:
84
- self.spacy_model = spacy.load("en_core_web_sm")
85
- print("βœ“ spaCy model loaded successfully")
 
86
  except OSError:
87
- print("spaCy model not found. Using GLiNER for all entity types.")
88
- return None
 
 
 
 
89
  except Exception as e:
90
  print(f"Error loading spaCy model: {str(e)}")
91
  return None
@@ -98,15 +91,66 @@ class HybridNERManager:
98
  from flair.models import SequenceTagger
99
  if 'ontonotes' in model_name:
100
  model = SequenceTagger.load("flair/ner-english-ontonotes-large")
 
101
  else:
102
- model = SequenceTagger.load("flair/ner-english-large")
 
103
  self.flair_models[model_name] = model
104
- print(f"βœ“ {model_name} loaded successfully")
105
  except Exception as e:
106
  print(f"Error loading {model_name}: {str(e)}")
107
- return None
 
108
  return self.flair_models[model_name]
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def extract_spacy_entities(self, text, entity_types):
111
  """Extract entities using spaCy"""
112
  model = self.load_spacy_model()
@@ -131,35 +175,6 @@ class HybridNERManager:
131
  print(f"Error with spaCy extraction: {str(e)}")
132
  return []
133
 
134
- def assign_colors(self, standard_entities, custom_entities):
135
- """Assign colors to all entity types"""
136
- self.all_entity_colors = {}
137
-
138
- # Assign standard colors
139
- for entity in standard_entities:
140
- self.all_entity_colors[entity.upper()] = STANDARD_COLORS.get(entity, '#CCCCCC')
141
-
142
- # Assign custom colors
143
- for i, entity in enumerate(custom_entities):
144
- if i < len(CUSTOM_COLOR_PALETTE):
145
- self.all_entity_colors[entity.upper()] = CUSTOM_COLOR_PALETTE[i]
146
- else:
147
- # Generate random color if we run out
148
- self.all_entity_colors[entity.upper()] = f"#{random.randint(0, 0xFFFFFF):06x}"
149
-
150
- return self.all_entity_colors
151
-
152
- def extract_entities_by_model(self, text, entity_types, model_name, threshold=0.3):
153
- """Extract entities using the specified model"""
154
- if model_name == 'spacy_en_core_web_sm':
155
- return self.extract_spacy_entities(text, entity_types)
156
- elif 'flair' in model_name:
157
- return self.extract_flair_entities(text, entity_types, model_name)
158
- elif 'gliner' in model_name:
159
- return self.extract_gliner_entities(text, entity_types, threshold, is_custom=False)
160
- else:
161
- return []
162
-
163
  def extract_flair_entities(self, text, entity_types, model_name):
164
  """Extract entities using Flair"""
165
  model = self.load_flair_model(model_name)
@@ -172,7 +187,7 @@ class HybridNERManager:
172
  model.predict(sentence)
173
  entities = []
174
  for entity in sentence.get_spans('ner'):
175
- # Map Flair labels to our standard set
176
  label = entity.tag
177
  if label == 'PERSON':
178
  label = 'PER'
@@ -213,7 +228,7 @@ class HybridNERManager:
213
  'start': entity['start'],
214
  'end': entity['end'],
215
  'confidence': entity.get('score', 0.0),
216
- 'source': 'GLiNER-Custom' if is_custom else 'GLiNER-Standard'
217
  })
218
  return result
219
  except Exception as e:
@@ -241,7 +256,7 @@ def find_overlapping_entities(entities):
241
 
242
  # Check if entities overlap
243
  if (current_entity['start'] <= next_entity['start'] < current_entity['end'] or
244
- next_entity['start'] <= current_entity['start'] < next_entity['end'] or
245
  current_entity['text'].lower() == next_entity['text'].lower()):
246
  overlapping_entities.append(next_entity)
247
  sorted_entities.pop(j)
@@ -303,7 +318,7 @@ def create_highlighted_html(text, entities, entity_colors):
303
  html_parts.append(text[last_end:entity['start']])
304
 
305
  if entity.get('is_shared', False):
306
- # Handle shared entity with multiple colors
307
  html_parts.append(create_shared_entity_html(entity, entity_colors))
308
  else:
309
  # Handle single entity
@@ -326,39 +341,39 @@ def create_highlighted_html(text, entities, entity_colors):
326
  def create_single_entity_html(entity, entity_colors):
327
  """Create HTML for a single entity"""
328
  label = entity['label']
329
- color = entity_colors.get(label.upper(), '#CCCCCC')
330
  confidence = entity.get('confidence', 0.0)
331
  source = entity.get('source', 'Unknown')
332
 
333
- return (f'<span style="background-color: {color}; padding: 2px 4px; '
334
  f'border-radius: 3px; margin: 0 1px; '
335
- f'border: 1px solid {color}; color: white; font-weight: bold;" '
336
  f'title="{label} ({source}) - confidence: {confidence:.2f}">'
337
  f'{entity["text"]}</span>')
338
 
339
  def create_shared_entity_html(entity, entity_colors):
340
- """Create HTML for a shared entity with multiple colors"""
341
  labels = entity['labels']
342
  sources = entity['sources']
343
  confidences = entity['confidences']
344
 
345
- # Get colors for each label
346
- colors = []
347
  for label in labels:
348
- color = entity_colors.get(label.upper(), '#CCCCCC')
349
- colors.append(color)
350
 
351
  # Create gradient background
352
- if len(colors) == 2:
353
- gradient = f"linear-gradient(to right, {colors[0]} 50%, {colors[1]} 50%)"
354
  else:
355
- # For more colors, create equal segments
356
- segment_size = 100 / len(colors)
357
  gradient_parts = []
358
- for i, color in enumerate(colors):
359
  start = i * segment_size
360
  end = (i + 1) * segment_size
361
- gradient_parts.append(f"{color} {start}%, {color} {end}%")
362
  gradient = f"linear-gradient(to right, {', '.join(gradient_parts)})"
363
 
364
  # Create tooltip
@@ -371,12 +386,12 @@ def create_shared_entity_html(entity, entity_colors):
371
  f'border-radius: 3px; margin: 0 1px; '
372
  f'border: 2px solid #333; color: white; font-weight: bold;" '
373
  f'title="SHARED: {tooltip}">'
374
- f'{entity["text"]} πŸ”—</span>')
375
 
376
- def create_entity_table_html(entities, entity_colors):
377
- """Create HTML table with tabbed interface like the original"""
378
  if not entities:
379
- return "<p>No entities found.</p>"
380
 
381
  # Share overlapping entities
382
  shared_entities = find_overlapping_entities(entities)
@@ -394,80 +409,37 @@ def create_entity_table_html(entities, entity_colors):
394
  entity_groups[key].append(entity)
395
 
396
  if not entity_groups:
397
- return "<p>No entities found.</p>"
398
-
399
- # Create tabbed interface
400
- tab_html = "<div style='margin: 20px 0;'>"
401
-
402
- # Tab headers
403
- tab_html += "<div style='border-bottom: 2px solid #ddd; margin-bottom: 20px;'>"
404
- tab_headers = []
405
-
406
- for i, entity_type in enumerate(sorted(entity_groups.keys())):
407
- count = len(entity_groups[entity_type])
408
-
409
- if entity_type == 'SHARED_ENTITIES':
410
- color = '#666666'
411
- icon = "πŸ”—"
412
- display_name = "SHARED"
413
- else:
414
- color = entity_colors.get(entity_type.upper(), '#f0f0f0')
415
- # Determine if it's standard or custom
416
- is_standard = entity_type in STANDARD_ENTITIES
417
- icon = "🎯" if is_standard else "✨"
418
- display_name = entity_type
419
-
420
- active_style = f"background-color: #f8f9fa; border-bottom: 3px solid {color};" if i == 0 else "background-color: #fff;"
421
- tab_headers.append(f"""
422
- <button onclick="showTab('{entity_type}')" id="tab-{entity_type}"
423
- style="padding: 12px 24px; margin-right: 5px; border: 1px solid #ddd;
424
- border-bottom: none; cursor: pointer; font-weight: bold; {active_style}">
425
- {icon} {display_name} ({count})
426
- </button>
427
- """)
428
-
429
- tab_html += ''.join(tab_headers)
430
- tab_html += "</div>"
431
-
432
- # Tab content
433
- for i, entity_type in enumerate(sorted(entity_groups.keys())):
434
- entities_of_type = entity_groups[entity_type]
435
- display_style = "display: block;" if i == 0 else "display: none;"
436
 
 
 
 
 
437
  if entity_type == 'SHARED_ENTITIES':
438
- color = '#666666'
439
- header_text = f"πŸ”— Shared Entities ({len(entities_of_type)} found)"
440
- else:
441
- color = entity_colors.get(entity_type.upper(), '#f0f0f0')
442
- source_type = entities_of_type[0].get('source', 'Unknown')
443
- is_standard = entity_type in STANDARD_ENTITIES
444
- source_icon = "🎯 Standard NER" if is_standard else "✨ Custom GLiNER"
445
- header_text = f"{source_icon} - {entity_type} Entities ({len(entities_of_type)} found)"
446
-
447
- tab_html += f"""
448
- <div id="content-{entity_type}" style="{display_style}">
449
- <h4 style="color: {color}; margin-bottom: 15px;">{header_text}</h4>
450
- <table style="width: 100%; border-collapse: collapse; margin-bottom: 20px;">
451
- <thead>
452
- """
453
-
454
- if entity_type == 'SHARED_ENTITIES':
455
- tab_html += f"""
456
- <tr style="background-color: {color}; color: white;">
457
- <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Entity Text</th>
458
- <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">All Labels</th>
459
- <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Sources</th>
460
- <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Count</th>
461
- </tr>
462
- </thead>
463
- <tbody>
464
  """
465
-
466
  for entity in entities_of_type:
467
  labels_text = " | ".join(entity['labels'])
468
  sources_text = " | ".join(entity['sources'])
469
-
470
- tab_html += f"""
471
  <tr style="background-color: #fff;">
472
  <td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{entity['text']}</td>
473
  <td style="padding: 10px; border: 1px solid #ddd;">{labels_text}</td>
@@ -479,32 +451,48 @@ def create_entity_table_html(entities, entity_colors):
479
  </td>
480
  </tr>
481
  """
 
 
 
 
482
  else:
483
- tab_html += f"""
484
- <tr style="background-color: {color}; color: white;">
485
- <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Entity Text</th>
486
- <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Confidence</th>
487
- <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Type</th>
488
- <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Source</th>
489
- </tr>
490
- </thead>
491
- <tbody>
 
 
 
 
 
 
 
 
 
 
 
 
492
  """
493
-
494
  # Sort by confidence score
495
  entities_of_type.sort(key=lambda x: x.get('confidence', 0), reverse=True)
496
-
497
  for entity in entities_of_type:
498
  confidence = entity.get('confidence', 0.0)
499
- confidence_color = "#28a745" if confidence > 0.7 else "#ffc107" if confidence > 0.4 else "#dc3545"
500
  source = entity.get('source', 'Unknown')
501
  source_badge = f"<span style='background-color: #007bff; color: white; padding: 2px 6px; border-radius: 10px; font-size: 11px;'>{source}</span>"
502
-
503
- tab_html += f"""
504
  <tr style="background-color: #fff;">
505
  <td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{entity['text']}</td>
506
  <td style="padding: 10px; border: 1px solid #ddd;">
507
- <span style="color: {confidence_color}; font-weight: bold;">
508
  {confidence:.3f}
509
  </span>
510
  </td>
@@ -512,46 +500,15 @@ def create_entity_table_html(entities, entity_colors):
512
  <td style="padding: 10px; border: 1px solid #ddd;">{source_badge}</td>
513
  </tr>
514
  """
515
-
516
- tab_html += """
517
- </tbody>
518
- </table>
519
- </div>
520
- """
521
-
522
- # JavaScript for tab switching
523
- tab_html += """
524
- <script>
525
- function showTab(entityType) {
526
- // Hide all content
527
- var contents = document.querySelectorAll('[id^="content-"]');
528
- contents.forEach(function(content) {
529
- content.style.display = 'none';
530
- });
531
-
532
- // Reset all tab styles
533
- var tabs = document.querySelectorAll('[id^="tab-"]');
534
- tabs.forEach(function(tab) {
535
- tab.style.backgroundColor = '#fff';
536
- tab.style.borderBottom = 'none';
537
- });
538
-
539
- // Show selected content
540
- document.getElementById('content-' + entityType).style.display = 'block';
541
-
542
- // Highlight selected tab
543
- var activeTab = document.getElementById('tab-' + entityType);
544
- activeTab.style.backgroundColor = '#f8f9fa';
545
- activeTab.style.borderBottom = '3px solid #4ECDC4';
546
- }
547
- </script>
548
- """
549
-
550
- tab_html += "</div>"
551
- return tab_html
552
 
553
  def create_legend_html(entity_colors, standard_entities, custom_entities):
554
- """Create a legend showing entity colors"""
555
  if not entity_colors:
556
  return ""
557
 
@@ -560,11 +517,11 @@ def create_legend_html(entity_colors, standard_entities, custom_entities):
560
 
561
  if standard_entities:
562
  html += "<div style='margin-bottom: 15px;'>"
563
- html += "<h5 style='margin: 0 0 8px 0;'>🎯 Standard Entities:</h5>"
564
  html += "<div style='display: flex; flex-wrap: wrap; gap: 8px;'>"
565
  for entity_type in standard_entities:
566
- color = entity_colors.get(entity_type.upper(), '#ccc')
567
- html += f"<span style='background-color: {color}; padding: 4px 8px; border-radius: 15px; color: white; font-weight: bold; font-size: 12px;'>{entity_type}</span>"
568
  html += "</div></div>"
569
 
570
  if custom_entities:
@@ -572,8 +529,8 @@ def create_legend_html(entity_colors, standard_entities, custom_entities):
572
  html += "<h5 style='margin: 0 0 8px 0;'>✨ Custom Entities:</h5>"
573
  html += "<div style='display: flex; flex-wrap: wrap; gap: 8px;'>"
574
  for entity_type in custom_entities:
575
- color = entity_colors.get(entity_type.upper(), '#ccc')
576
- html += f"<span style='background-color: {color}; padding: 4px 8px; border-radius: 15px; color: white; font-weight: bold; font-size: 12px;'>{entity_type}</span>"
577
  html += "</div></div>"
578
 
579
  html += "</div>"
@@ -582,45 +539,55 @@ def create_legend_html(entity_colors, standard_entities, custom_entities):
582
  # Initialize the NER manager
583
  ner_manager = HybridNERManager()
584
 
585
- def process_text(text, standard_entities, custom_entities_str, confidence_threshold, selected_model):
586
- """Main processing function for Gradio interface"""
587
  if not text.strip():
588
- return "❌ Please enter some text to analyze", "", ""
589
 
 
 
590
  # Parse custom entities
591
  custom_entities = []
592
  if custom_entities_str.strip():
593
  custom_entities = [entity.strip() for entity in custom_entities_str.split(',') if entity.strip()]
594
 
595
- # Parse standard entities
596
  selected_standard = [entity for entity in standard_entities if entity]
597
 
598
  if not selected_standard and not custom_entities:
599
- return "❌ Please select at least one standard entity type OR enter custom entity types", "", ""
600
 
 
 
601
  all_entities = []
602
 
603
- # Extract standard entities using selected model
604
  if selected_standard and selected_model:
 
605
  standard_entities_results = ner_manager.extract_entities_by_model(text, selected_standard, selected_model, confidence_threshold)
606
  all_entities.extend(standard_entities_results)
607
 
608
  # Extract custom entities using GLiNER
609
  if custom_entities:
 
610
  custom_entity_results = ner_manager.extract_gliner_entities(text, custom_entities, confidence_threshold, is_custom=True)
611
  all_entities.extend(custom_entity_results)
612
 
613
  if not all_entities:
614
- return "❌ No entities found. Try lowering the confidence threshold or using different entity types.", "", ""
615
 
616
- # Assign colors
617
- entity_colors = ner_manager.assign_colors(selected_standard, custom_entities)
 
 
618
 
619
  # Create outputs
620
  legend_html = create_legend_html(entity_colors, selected_standard, custom_entities)
621
  highlighted_html = create_highlighted_html(text, all_entities, entity_colors)
622
- table_html = create_entity_table_html(all_entities, entity_colors)
623
 
 
 
624
  # Create summary with shared entities terminology
625
  total_entities = len(all_entities)
626
  shared_entities = find_overlapping_entities(all_entities)
@@ -635,7 +602,9 @@ def process_text(text, standard_entities, custom_entities_str, confidence_thresh
635
  - **Average confidence:** {sum(e.get('confidence', 0) for e in all_entities) / total_entities:.3f}
636
  """
637
 
638
- return summary, legend_html + highlighted_html, table_html
 
 
639
 
640
  # Create Gradio interface
641
  def create_interface():
@@ -643,23 +612,23 @@ def create_interface():
643
  gr.Markdown("""
644
  # 🎯 Hybrid NER + Custom GLiNER Entity Recognition Tool
645
 
646
- Combine standard NER categories with your own custom entity types! This tool uses both traditional NER models and GLiNER for comprehensive entity extraction.
647
 
648
- ## πŸ”— NEW: Overlapping entities are automatically shared with split-color highlighting!
649
 
650
  ### How to use:
651
  1. **πŸ“ Enter your text** in the text area below
652
- 2. **🎯 Select a model** from the dropdown for standard entities
653
- 3. **β˜‘οΈ Select standard entities** you want to find (PER, ORG, LOC, etc.)
654
  4. **✨ Add custom entities** (comma-separated) like "relationships, occupations, skills"
655
  5. **βš™οΈ Adjust confidence threshold**
656
- 6. **πŸ” Click "Analyze Text"** to see results with tabbed output
657
  """)
658
 
659
  with gr.Row():
660
  with gr.Column(scale=2):
661
  text_input = gr.Textbox(
662
- label="πŸ“ Text to Analyze",
663
  placeholder="Enter your text here...",
664
  lines=6,
665
  max_lines=10
@@ -676,21 +645,21 @@ def create_interface():
676
 
677
  with gr.Row():
678
  with gr.Column():
679
- gr.Markdown("### 🎯 Standard Entity Types")
680
 
681
  # Model selector
682
  model_dropdown = gr.Dropdown(
683
  choices=ner_manager.model_names,
684
  value=ner_manager.model_names[0],
685
- label="Select Model for Standard Entities",
686
- info="Choose which model to use for standard NER"
687
  )
688
 
689
- # Standard entities with select all functionality
690
  standard_entities = gr.CheckboxGroup(
691
  choices=STANDARD_ENTITIES,
692
  value=['PER', 'ORG', 'LOC', 'MISC'], # Default selection
693
- label="Select Standard Entities"
694
  )
695
 
696
  # Select/Deselect All button
@@ -727,7 +696,7 @@ def create_interface():
727
  - financial terms, business roles
728
  """)
729
 
730
- analyze_btn = gr.Button("πŸ” Analyze Text", variant="primary", size="lg")
731
 
732
  # Output sections
733
  with gr.Row():
@@ -736,12 +705,104 @@ def create_interface():
736
  with gr.Row():
737
  highlighted_output = gr.HTML(label="Highlighted Text")
738
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
739
  with gr.Row():
740
- table_output = gr.HTML(label="Detailed Results (Tabbed)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
 
742
  # Connect the button to the processing function
743
- analyze_btn.click(
744
- fn=process_text,
745
  inputs=[
746
  text_input,
747
  standard_entities,
@@ -749,32 +810,32 @@ def create_interface():
749
  confidence_threshold,
750
  model_dropdown
751
  ],
752
- outputs=[summary_output, highlighted_output, table_output]
753
  )
754
 
755
  # Add examples
756
  gr.Examples(
757
  examples=[
758
  [
759
- "John Smith works at Google in New York. He graduated from Stanford University in 2015 and specializes in artificial intelligence research. His wife Sarah is a doctor at Mount Sinai Hospital.",
760
  ["PER", "ORG", "LOC", "DATE"],
761
  "relationships, occupations, educational background",
762
  0.3,
763
- "spacy_en_core_web_sm"
764
  ],
765
  [
766
  "The meeting between CEO Jane Doe and the board of directors at Microsoft headquarters in Seattle discussed the Q4 financial results and the new AI strategy for 2024.",
767
  ["PER", "ORG", "LOC", "DATE"],
768
  "corporate roles, business events, financial terms",
769
  0.4,
770
- "flair_ner-ontonotes-large"
771
  ],
772
  [
773
  "Dr. Emily Watson published a research paper on machine learning algorithms at MIT. She collaborates with her colleague Prof. David Chen on natural language processing projects.",
774
- ["PER", "ORG", "WORK_OF_ART"],
775
  "academic titles, research topics, collaborations",
776
  0.3,
777
- "gliner_medium-v2.1"
778
  ]
779
  ],
780
  inputs=[
 
7
  import re
8
  warnings.filterwarnings('ignore')
9
 
10
+ # Common NER entity types
11
  STANDARD_ENTITIES = [
12
  'DATE', 'EVENT', 'FAC', 'GPE', 'LANG', 'LOC',
13
+ 'MISC', 'NORP', 'ORG', 'PER', 'PRODUCT', 'Work of Art'
14
  ]
15
 
16
+ # Colour schemes
17
  STANDARD_COLORS = {
18
  'DATE': '#FF6B6B', # Red
19
  'EVENT': '#4ECDC4', # Teal
 
26
  'ORG': '#55A3FF', # Light Blue
27
  'PER': '#00B894', # Green
28
  'PRODUCT': '#E17055', # Orange-Red
29
+ 'WORK OF ART': '#DDA0DD' # Plum
30
  }
31
 
32
+ # Additional colours for custom entities
33
  CUSTOM_COLOR_PALETTE = [
34
  '#FF9F43', '#10AC84', '#EE5A24', '#0FBC89', '#5F27CD',
35
  '#FF3838', '#2F3640', '#3742FA', '#2ED573', '#FFA502',
 
44
  self.flair_models = {}
45
  self.all_entity_colors = {}
46
  self.model_names = [
47
+ 'entities_flair_ner-large',
48
+ 'entities_spacy_en_core_web_trf',
49
+ 'entities_flair_ner-ontonotes-large',
50
+ 'entities_gliner_knowledgator/modern-gliner-bi-large-v1.0'
51
  ]
52
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def load_model(self, model_name):
54
  """Load the specified model"""
55
  try:
56
+ if 'spacy' in model_name:
57
  return self.load_spacy_model()
58
  elif 'flair' in model_name:
59
  return self.load_flair_model(model_name)
 
64
  return None
65
 
66
  def load_spacy_model(self):
67
+ """Load spaCy model for common NER"""
68
  if self.spacy_model is None:
69
  try:
70
  import spacy
71
  try:
72
+ # Try transformer model first, fallback to small model
73
+ self.spacy_model = spacy.load("en_core_web_trf")
74
+ print("βœ“ spaCy transformer model loaded successfully")
75
  except OSError:
76
+ try:
77
+ self.spacy_model = spacy.load("en_core_web_sm")
78
+ print("βœ“ spaCy common model loaded successfully")
79
+ except OSError:
80
+ print("spaCy model not found. Using GLiNER for all entity types.")
81
+ return None
82
  except Exception as e:
83
  print(f"Error loading spaCy model: {str(e)}")
84
  return None
 
91
  from flair.models import SequenceTagger
92
  if 'ontonotes' in model_name:
93
  model = SequenceTagger.load("flair/ner-english-ontonotes-large")
94
+ print("βœ“ Flair OntoNotes model loaded successfully")
95
  else:
96
+ model = SequenceTagger.load("flair/ner-english-large")
97
+ print("βœ“ Flair large model loaded successfully")
98
  self.flair_models[model_name] = model
 
99
  except Exception as e:
100
  print(f"Error loading {model_name}: {str(e)}")
101
+ # Fallback to GLiNER
102
+ return self.load_gliner_model()
103
  return self.flair_models[model_name]
104
 
105
+ def load_gliner_model(self):
106
+ """Load GLiNER model for custom entities"""
107
+ if self.gliner_model is None:
108
+ try:
109
+ # Try the modern GLiNER model first, fallback to stable model
110
+ self.gliner_model = GLiNER.from_pretrained("knowledgator/gliner-bi-large-v1.0")
111
+ print("βœ“ GLiNER knowledgator model loaded successfully")
112
+ except Exception as e:
113
+ print(f"Primary GLiNER model failed: {str(e)}")
114
+ try:
115
+ # Fallback to stable model
116
+ self.gliner_model = GLiNER.from_pretrained("urchade/gliner_medium-v2.1")
117
+ print("βœ“ GLiNER fallback model loaded successfully")
118
+ except Exception as e2:
119
+ print(f"Error loading GLiNER model: {str(e2)}")
120
+ return None
121
+ return self.gliner_model
122
+
123
+ def assign_colours(self, standard_entities, custom_entities):
124
+ """Assign colours to all entity types"""
125
+ self.all_entity_colors = {}
126
+
127
+ # Assign common colours
128
+ for entity in standard_entities:
129
+ # Handle the special case of "Work of Art"
130
+ colour_key = "WORK OF ART" if entity == "Work of Art" else entity.upper()
131
+ self.all_entity_colors[entity.upper()] = STANDARD_COLORS.get(colour_key, '#CCCCCC')
132
+
133
+ # Assign custom colours
134
+ for i, entity in enumerate(custom_entities):
135
+ if i < len(CUSTOM_COLOR_PALETTE):
136
+ self.all_entity_colors[entity.upper()] = CUSTOM_COLOR_PALETTE[i]
137
+ else:
138
+ # Generate random colour if we run out
139
+ self.all_entity_colors[entity.upper()] = f"#{random.randint(0, 0xFFFFFF):06x}"
140
+
141
+ return self.all_entity_colors
142
+
143
+ def extract_entities_by_model(self, text, entity_types, model_name, threshold=0.3):
144
+ """Extract entities using the specified model"""
145
+ if 'spacy' in model_name:
146
+ return self.extract_spacy_entities(text, entity_types)
147
+ elif 'flair' in model_name:
148
+ return self.extract_flair_entities(text, entity_types, model_name)
149
+ elif 'gliner' in model_name:
150
+ return self.extract_gliner_entities(text, entity_types, threshold, is_custom=False)
151
+ else:
152
+ return []
153
+
154
  def extract_spacy_entities(self, text, entity_types):
155
  """Extract entities using spaCy"""
156
  model = self.load_spacy_model()
 
175
  print(f"Error with spaCy extraction: {str(e)}")
176
  return []
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  def extract_flair_entities(self, text, entity_types, model_name):
179
  """Extract entities using Flair"""
180
  model = self.load_flair_model(model_name)
 
187
  model.predict(sentence)
188
  entities = []
189
  for entity in sentence.get_spans('ner'):
190
+ # Map Flair labels to our common set
191
  label = entity.tag
192
  if label == 'PERSON':
193
  label = 'PER'
 
228
  'start': entity['start'],
229
  'end': entity['end'],
230
  'confidence': entity.get('score', 0.0),
231
+ 'source': 'GLiNER-Custom' if is_custom else 'GLiNER-Common'
232
  })
233
  return result
234
  except Exception as e:
 
256
 
257
  # Check if entities overlap
258
  if (current_entity['start'] <= next_entity['start'] < current_entity['end'] or
259
+ next_entity['start'] <= current_entity['start'] < current_entity['end'] or
260
  current_entity['text'].lower() == next_entity['text'].lower()):
261
  overlapping_entities.append(next_entity)
262
  sorted_entities.pop(j)
 
318
  html_parts.append(text[last_end:entity['start']])
319
 
320
  if entity.get('is_shared', False):
321
+ # Handle shared entity with multiple colours
322
  html_parts.append(create_shared_entity_html(entity, entity_colors))
323
  else:
324
  # Handle single entity
 
341
  def create_single_entity_html(entity, entity_colors):
342
  """Create HTML for a single entity"""
343
  label = entity['label']
344
+ colour = entity_colors.get(label.upper(), '#CCCCCC')
345
  confidence = entity.get('confidence', 0.0)
346
  source = entity.get('source', 'Unknown')
347
 
348
+ return (f'<span style="background-color: {colour}; padding: 2px 4px; '
349
  f'border-radius: 3px; margin: 0 1px; '
350
+ f'border: 1px solid {colour}; color: white; font-weight: bold;" '
351
  f'title="{label} ({source}) - confidence: {confidence:.2f}">'
352
  f'{entity["text"]}</span>')
353
 
354
  def create_shared_entity_html(entity, entity_colors):
355
+ """Create HTML for a shared entity with multiple colours"""
356
  labels = entity['labels']
357
  sources = entity['sources']
358
  confidences = entity['confidences']
359
 
360
+ # Get colours for each label
361
+ colours = []
362
  for label in labels:
363
+ colour = entity_colors.get(label.upper(), '#CCCCCC')
364
+ colours.append(colour)
365
 
366
  # Create gradient background
367
+ if len(colours) == 2:
368
+ gradient = f"linear-gradient(to right, {colours[0]} 50%, {colours[1]} 50%)"
369
  else:
370
+ # For more colours, create equal segments
371
+ segment_size = 100 / len(colours)
372
  gradient_parts = []
373
+ for i, colour in enumerate(colours):
374
  start = i * segment_size
375
  end = (i + 1) * segment_size
376
+ gradient_parts.append(f"{colour} {start}%, {colour} {end}%")
377
  gradient = f"linear-gradient(to right, {', '.join(gradient_parts)})"
378
 
379
  # Create tooltip
 
386
  f'border-radius: 3px; margin: 0 1px; '
387
  f'border: 2px solid #333; color: white; font-weight: bold;" '
388
  f'title="SHARED: {tooltip}">'
389
+ f'{entity["text"]} 🧩🧩</span>')
390
 
391
+ def create_entity_table_gradio_tabs(entities, entity_colors):
392
+ """Create Gradio tabs for entity results"""
393
  if not entities:
394
+ return "No entities found."
395
 
396
  # Share overlapping entities
397
  shared_entities = find_overlapping_entities(entities)
 
409
  entity_groups[key].append(entity)
410
 
411
  if not entity_groups:
412
+ return "No entities found."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
 
414
+ # Create content for each tab
415
+ tab_contents = {}
416
+
417
+ for entity_type, entities_of_type in entity_groups.items():
418
  if entity_type == 'SHARED_ENTITIES':
419
+ colour = '#666666'
420
+ header = f"🧩🧩 Shared Entities ({len(entities_of_type)} found)"
421
+
422
+ # Create table for shared entities
423
+ table_html = f"""
424
+ <div style="margin: 15px 0;">
425
+ <h4 style="color: {colour}; margin-bottom: 15px;">{header}</h4>
426
+ <table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd;">
427
+ <thead>
428
+ <tr style="background-color: {colour}; color: white;">
429
+ <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Entity Text</th>
430
+ <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">All Labels</th>
431
+ <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Sources</th>
432
+ <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Count</th>
433
+ </tr>
434
+ </thead>
435
+ <tbody>
 
 
 
 
 
 
 
 
 
436
  """
437
+
438
  for entity in entities_of_type:
439
  labels_text = " | ".join(entity['labels'])
440
  sources_text = " | ".join(entity['sources'])
441
+
442
+ table_html += f"""
443
  <tr style="background-color: #fff;">
444
  <td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{entity['text']}</td>
445
  <td style="padding: 10px; border: 1px solid #ddd;">{labels_text}</td>
 
451
  </td>
452
  </tr>
453
  """
454
+
455
+ table_html += "</tbody></table></div>"
456
+ tab_contents[f"🧩🧩 SHARED ({len(entities_of_type)})"] = table_html
457
+
458
  else:
459
+ colour = entity_colors.get(entity_type.upper(), '#f0f0f0')
460
+ # Determine if it's common or custom
461
+ is_standard = entity_type in STANDARD_ENTITIES
462
+ icon = "🎯" if is_standard else "✨"
463
+ source_text = "Common NER" if is_standard else "Custom GLiNER"
464
+ header = f"{icon} {source_text} - {entity_type} ({len(entities_of_type)} found)"
465
+
466
+ # Create table for this entity type
467
+ table_html = f"""
468
+ <div style="margin: 15px 0;">
469
+ <h4 style="color: {colour}; margin-bottom: 15px;">{header}</h4>
470
+ <table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd;">
471
+ <thead>
472
+ <tr style="background-color: {colour}; color: white;">
473
+ <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Entity Text</th>
474
+ <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Confidence</th>
475
+ <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Type</th>
476
+ <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Source</th>
477
+ </tr>
478
+ </thead>
479
+ <tbody>
480
  """
481
+
482
  # Sort by confidence score
483
  entities_of_type.sort(key=lambda x: x.get('confidence', 0), reverse=True)
484
+
485
  for entity in entities_of_type:
486
  confidence = entity.get('confidence', 0.0)
487
+ confidence_colour = "#28a745" if confidence > 0.7 else "#ffc107" if confidence > 0.4 else "#dc3545"
488
  source = entity.get('source', 'Unknown')
489
  source_badge = f"<span style='background-color: #007bff; color: white; padding: 2px 6px; border-radius: 10px; font-size: 11px;'>{source}</span>"
490
+
491
+ table_html += f"""
492
  <tr style="background-color: #fff;">
493
  <td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{entity['text']}</td>
494
  <td style="padding: 10px; border: 1px solid #ddd;">
495
+ <span style="color: {confidence_colour}; font-weight: bold;">
496
  {confidence:.3f}
497
  </span>
498
  </td>
 
500
  <td style="padding: 10px; border: 1px solid #ddd;">{source_badge}</td>
501
  </tr>
502
  """
503
+
504
+ table_html += "</tbody></table></div>"
505
+ tab_label = f"{icon} {entity_type} ({len(entities_of_type)})"
506
+ tab_contents[tab_label] = table_html
507
+
508
+ return tab_contents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
 
510
  def create_legend_html(entity_colors, standard_entities, custom_entities):
511
+ """Create a legend showing entity colours"""
512
  if not entity_colors:
513
  return ""
514
 
 
517
 
518
  if standard_entities:
519
  html += "<div style='margin-bottom: 15px;'>"
520
+ html += "<h5 style='margin: 0 0 8px 0;'>🎯 Common Entities:</h5>"
521
  html += "<div style='display: flex; flex-wrap: wrap; gap: 8px;'>"
522
  for entity_type in standard_entities:
523
+ colour = entity_colors.get(entity_type.upper(), '#ccc')
524
+ html += f"<span style='background-color: {colour}; padding: 4px 8px; border-radius: 15px; color: white; font-weight: bold; font-size: 12px;'>{entity_type}</span>"
525
  html += "</div></div>"
526
 
527
  if custom_entities:
 
529
  html += "<h5 style='margin: 0 0 8px 0;'>✨ Custom Entities:</h5>"
530
  html += "<div style='display: flex; flex-wrap: wrap; gap: 8px;'>"
531
  for entity_type in custom_entities:
532
+ colour = entity_colors.get(entity_type.upper(), '#ccc')
533
+ html += f"<span style='background-color: {colour}; padding: 4px 8px; border-radius: 15px; color: white; font-weight: bold; font-size: 12px;'>{entity_type}</span>"
534
  html += "</div></div>"
535
 
536
  html += "</div>"
 
539
  # Initialize the NER manager
540
  ner_manager = HybridNERManager()
541
 
542
+ def process_text(text, standard_entities, custom_entities_str, confidence_threshold, selected_model, progress=gr.Progress()):
543
+ """Main processing function for Gradio interface with progress tracking"""
544
  if not text.strip():
545
+ return "❌ Please enter some text to analyse", "", {}
546
 
547
+ progress(0.1, desc="Initialising...")
548
+
549
  # Parse custom entities
550
  custom_entities = []
551
  if custom_entities_str.strip():
552
  custom_entities = [entity.strip() for entity in custom_entities_str.split(',') if entity.strip()]
553
 
554
+ # Parse common entities
555
  selected_standard = [entity for entity in standard_entities if entity]
556
 
557
  if not selected_standard and not custom_entities:
558
+ return "❌ Please select at least one common entity type OR enter custom entity types", "", {}
559
 
560
+ progress(0.2, desc="Loading models...")
561
+
562
  all_entities = []
563
 
564
+ # Extract common entities using selected model
565
  if selected_standard and selected_model:
566
+ progress(0.4, desc="Extracting common entities...")
567
  standard_entities_results = ner_manager.extract_entities_by_model(text, selected_standard, selected_model, confidence_threshold)
568
  all_entities.extend(standard_entities_results)
569
 
570
  # Extract custom entities using GLiNER
571
  if custom_entities:
572
+ progress(0.6, desc="Extracting custom entities...")
573
  custom_entity_results = ner_manager.extract_gliner_entities(text, custom_entities, confidence_threshold, is_custom=True)
574
  all_entities.extend(custom_entity_results)
575
 
576
  if not all_entities:
577
+ return "❌ No entities found. Try lowering the confidence threshold or using different entity types.", "", {}
578
 
579
+ progress(0.8, desc="Processing results...")
580
+
581
+ # Assign colours
582
+ entity_colors = ner_manager.assign_colours(selected_standard, custom_entities)
583
 
584
  # Create outputs
585
  legend_html = create_legend_html(entity_colors, selected_standard, custom_entities)
586
  highlighted_html = create_highlighted_html(text, all_entities, entity_colors)
587
+ tab_contents = create_entity_table_gradio_tabs(all_entities, entity_colors)
588
 
589
+ progress(0.9, desc="Creating summary...")
590
+
591
  # Create summary with shared entities terminology
592
  total_entities = len(all_entities)
593
  shared_entities = find_overlapping_entities(all_entities)
 
602
  - **Average confidence:** {sum(e.get('confidence', 0) for e in all_entities) / total_entities:.3f}
603
  """
604
 
605
+ progress(1.0, desc="Complete!")
606
+
607
+ return summary, legend_html + highlighted_html, tab_contents
608
 
609
  # Create Gradio interface
610
  def create_interface():
 
612
  gr.Markdown("""
613
  # 🎯 Hybrid NER + Custom GLiNER Entity Recognition Tool
614
 
615
+ Combine common NER categories with your own custom entity types! This tool uses both traditional NER models and GLiNER for comprehensive entity extraction.
616
 
617
+ ## 🧩🧩 NEW: Overlapping entities are automatically shared with split-colour highlighting!
618
 
619
  ### How to use:
620
  1. **πŸ“ Enter your text** in the text area below
621
+ 2. **🎯 Select a model** from the dropdown for common entities
622
+ 3. **β˜‘οΈ Select common entities** you want to find (PER, ORG, LOC, etc.)
623
  4. **✨ Add custom entities** (comma-separated) like "relationships, occupations, skills"
624
  5. **βš™οΈ Adjust confidence threshold**
625
+ 6. **πŸ” Click "Analyse Text"** to see results with tabbed output
626
  """)
627
 
628
  with gr.Row():
629
  with gr.Column(scale=2):
630
  text_input = gr.Textbox(
631
+ label="πŸ“ Text to Analyse",
632
  placeholder="Enter your text here...",
633
  lines=6,
634
  max_lines=10
 
645
 
646
  with gr.Row():
647
  with gr.Column():
648
+ gr.Markdown("### 🎯 Common Entity Types")
649
 
650
  # Model selector
651
  model_dropdown = gr.Dropdown(
652
  choices=ner_manager.model_names,
653
  value=ner_manager.model_names[0],
654
+ label="Select Model for Common Entities",
655
+ info="Choose which model to use for common NER"
656
  )
657
 
658
+ # Common entities with select all functionality
659
  standard_entities = gr.CheckboxGroup(
660
  choices=STANDARD_ENTITIES,
661
  value=['PER', 'ORG', 'LOC', 'MISC'], # Default selection
662
+ label="Select Common Entities"
663
  )
664
 
665
  # Select/Deselect All button
 
696
  - financial terms, business roles
697
  """)
698
 
699
+ analyse_btn = gr.Button("πŸ” Analyse Text", variant="primary", size="lg")
700
 
701
  # Output sections
702
  with gr.Row():
 
705
  with gr.Row():
706
  highlighted_output = gr.HTML(label="Highlighted Text")
707
 
708
+ # Create dynamic tabs for results
709
+ results_tabs = gr.State({})
710
+
711
+ def update_tabs(tab_contents):
712
+ """Update the results tabs based on the analysis"""
713
+ if not tab_contents or not isinstance(tab_contents, dict):
714
+ return {gr.HTML("No results to display"): gr.update(visible=True)}
715
+
716
+ # Create tabs dynamically
717
+ tab_components = {}
718
+ for tab_name, content in tab_contents.items():
719
+ tab_components[tab_name] = gr.HTML(content)
720
+
721
+ return tab_components
722
+
723
+ # Results section with tabs
724
  with gr.Row():
725
+ with gr.Column():
726
+ gr.Markdown("### πŸ“‹ Detailed Results")
727
+
728
+ # We'll update this section dynamically
729
+ results_container = gr.HTML(label="Results")
730
+
731
+ # Function to process and display results
732
+ def process_and_display(text, standard_entities, custom_entities, confidence_threshold, selected_model):
733
+ # Get results from main processing function
734
+ summary, highlighted, tab_contents = process_text(
735
+ text, standard_entities, custom_entities, confidence_threshold, selected_model
736
+ )
737
+
738
+ # Create tabs HTML manually since Gradio dynamic tabs are complex
739
+ if isinstance(tab_contents, dict) and tab_contents:
740
+ tabs_html = """
741
+ <div style="margin: 20px 0;">
742
+ <div style="border-bottom: 2px solid #ddd; margin-bottom: 20px;">
743
+ """
744
+
745
+ # Create tab buttons
746
+ tab_names = list(tab_contents.keys())
747
+ for i, tab_name in enumerate(tab_names):
748
+ active_style = "background-color: #f8f9fa; border-bottom: 3px solid #4ECDC4;" if i == 0 else "background-color: #fff;"
749
+ tabs_html += f"""
750
+ <button onclick="showResultTab('{i}')" id="result-tab-{i}"
751
+ style="padding: 12px 24px; margin-right: 5px; border: 1px solid #ddd;
752
+ border-bottom: none; cursor: pointer; font-weight: bold; {active_style}">
753
+ {tab_name}
754
+ </button>
755
+ """
756
+
757
+ tabs_html += "</div>"
758
+
759
+ # Create tab content
760
+ for i, (tab_name, content) in enumerate(tab_contents.items()):
761
+ display_style = "display: block;" if i == 0 else "display: none;"
762
+ tabs_html += f"""
763
+ <div id="result-content-{i}" style="{display_style}">
764
+ {content}
765
+ </div>
766
+ """
767
+
768
+ # Add JavaScript for tab switching
769
+ tabs_html += """
770
+ <script>
771
+ function showResultTab(tabIndex) {
772
+ // Hide all content
773
+ var contents = document.querySelectorAll('[id^="result-content-"]');
774
+ contents.forEach(function(content) {
775
+ content.style.display = 'none';
776
+ });
777
+
778
+ // Reset all tab styles
779
+ var tabs = document.querySelectorAll('[id^="result-tab-"]');
780
+ tabs.forEach(function(tab) {
781
+ tab.style.backgroundColor = '#fff';
782
+ tab.style.borderBottom = 'none';
783
+ });
784
+
785
+ // Show selected content
786
+ document.getElementById('result-content-' + tabIndex).style.display = 'block';
787
+
788
+ // Highlight selected tab
789
+ var activeTab = document.getElementById('result-tab-' + tabIndex);
790
+ activeTab.style.backgroundColor = '#f8f9fa';
791
+ activeTab.style.borderBottom = '3px solid #4ECDC4';
792
+ }
793
+ </script>
794
+ </div>
795
+ """
796
+
797
+ results_display = tabs_html
798
+ else:
799
+ results_display = str(tab_contents) if tab_contents else "No results to display"
800
+
801
+ return summary, highlighted, results_display
802
 
803
  # Connect the button to the processing function
804
+ analyse_btn.click(
805
+ fn=process_and_display,
806
  inputs=[
807
  text_input,
808
  standard_entities,
 
810
  confidence_threshold,
811
  model_dropdown
812
  ],
813
+ outputs=[summary_output, highlighted_output, results_container]
814
  )
815
 
816
  # Add examples
817
  gr.Examples(
818
  examples=[
819
  [
820
+ "John Smith works at Google in New York. He graduated from Stanford University in 2015 and specialises in artificial intelligence research. His wife Sarah is a doctor at Mount Sinai Hospital.",
821
  ["PER", "ORG", "LOC", "DATE"],
822
  "relationships, occupations, educational background",
823
  0.3,
824
+ "entities_spacy_en_core_web_trf"
825
  ],
826
  [
827
  "The meeting between CEO Jane Doe and the board of directors at Microsoft headquarters in Seattle discussed the Q4 financial results and the new AI strategy for 2024.",
828
  ["PER", "ORG", "LOC", "DATE"],
829
  "corporate roles, business events, financial terms",
830
  0.4,
831
+ "entities_flair_ner-ontonotes-large"
832
  ],
833
  [
834
  "Dr. Emily Watson published a research paper on machine learning algorithms at MIT. She collaborates with her colleague Prof. David Chen on natural language processing projects.",
835
+ ["PER", "ORG", "Work of Art"],
836
  "academic titles, research topics, collaborations",
837
  0.3,
838
+ "entities_gliner_knowledgator/modern-gliner-bi-large-v1.0"
839
  ]
840
  ],
841
  inputs=[