Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import pandas as pd
|
|
| 5 |
import warnings
|
| 6 |
import random
|
| 7 |
import re
|
|
|
|
| 8 |
warnings.filterwarnings('ignore')
|
| 9 |
|
| 10 |
# Common NER entity types
|
|
@@ -236,7 +237,7 @@ class HybridNERManager:
|
|
| 236 |
return []
|
| 237 |
|
| 238 |
def find_overlapping_entities(entities):
|
| 239 |
-
"""Find and share overlapping entities"""
|
| 240 |
if not entities:
|
| 241 |
return []
|
| 242 |
|
|
@@ -254,7 +255,7 @@ def find_overlapping_entities(entities):
|
|
| 254 |
while j < len(sorted_entities):
|
| 255 |
next_entity = sorted_entities[j]
|
| 256 |
|
| 257 |
-
# Check if entities overlap
|
| 258 |
if (current_entity['start'] <= next_entity['start'] < current_entity['end'] or
|
| 259 |
next_entity['start'] <= current_entity['start'] < current_entity['end'] or
|
| 260 |
current_entity['text'].lower() == next_entity['text'].lower()):
|
|
@@ -263,12 +264,28 @@ def find_overlapping_entities(entities):
|
|
| 263 |
else:
|
| 264 |
j += 1
|
| 265 |
|
| 266 |
-
# Create shared entity
|
| 267 |
if len(overlapping_entities) == 1:
|
| 268 |
shared_entities.append(overlapping_entities[0])
|
| 269 |
else:
|
| 270 |
-
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
i += 1
|
| 274 |
|
|
@@ -386,7 +403,7 @@ def create_shared_entity_html(entity, entity_colors):
|
|
| 386 |
f'border-radius: 3px; margin: 0 1px; '
|
| 387 |
f'border: 2px solid #333; color: white; font-weight: bold;" '
|
| 388 |
f'title="SHARED: {tooltip}">'
|
| 389 |
-
f'{entity["text"]}
|
| 390 |
|
| 391 |
def create_entity_table_gradio_tabs(entities, entity_colors):
|
| 392 |
"""Create Gradio tabs for entity results"""
|
|
@@ -417,7 +434,7 @@ def create_entity_table_gradio_tabs(entities, entity_colors):
|
|
| 417 |
for entity_type, entities_of_type in entity_groups.items():
|
| 418 |
if entity_type == 'SHARED_ENTITIES':
|
| 419 |
colour = '#666666'
|
| 420 |
-
header = f"
|
| 421 |
|
| 422 |
# Create table for shared entities
|
| 423 |
table_html = f"""
|
|
@@ -453,7 +470,7 @@ def create_entity_table_gradio_tabs(entities, entity_colors):
|
|
| 453 |
"""
|
| 454 |
|
| 455 |
table_html += "</tbody></table></div>"
|
| 456 |
-
tab_contents[f"
|
| 457 |
|
| 458 |
else:
|
| 459 |
colour = entity_colors.get(entity_type.upper(), '#f0f0f0')
|
|
@@ -589,6 +606,7 @@ def process_text(text, standard_entities, custom_entities_str, confidence_thresh
|
|
| 589 |
progress(0.9, desc="Creating summary...")
|
| 590 |
|
| 591 |
# Create summary with shared entities terminology
|
|
|
|
| 592 |
total_entities = len(all_entities)
|
| 593 |
shared_entities = find_overlapping_entities(all_entities)
|
| 594 |
final_count = len(shared_entities)
|
|
@@ -614,7 +632,7 @@ def create_interface():
|
|
| 614 |
|
| 615 |
Combine common NER categories with your own custom entity types! This tool uses both traditional NER models and GLiNER for comprehensive entity extraction.
|
| 616 |
|
| 617 |
-
##
|
| 618 |
|
| 619 |
### How to use:
|
| 620 |
1. **π Enter your text** in the text area below
|
|
@@ -737,8 +755,12 @@ def create_interface():
|
|
| 737 |
|
| 738 |
# Create tabs HTML manually since Gradio dynamic tabs are complex
|
| 739 |
if isinstance(tab_contents, dict) and tab_contents:
|
| 740 |
-
|
| 741 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 742 |
<div style="border-bottom: 2px solid #ddd; margin-bottom: 20px;">
|
| 743 |
"""
|
| 744 |
|
|
@@ -746,10 +768,14 @@ def create_interface():
|
|
| 746 |
tab_names = list(tab_contents.keys())
|
| 747 |
for i, tab_name in enumerate(tab_names):
|
| 748 |
active_style = "background-color: #f8f9fa; border-bottom: 3px solid #4ECDC4;" if i == 0 else "background-color: #fff;"
|
|
|
|
| 749 |
tabs_html += f"""
|
| 750 |
-
<button onclick="showResultTab('{i}')" id="result-tab-{i}"
|
| 751 |
style="padding: 12px 24px; margin-right: 5px; border: 1px solid #ddd;
|
| 752 |
-
border-bottom: none; cursor: pointer; font-weight: bold; {active_style}
|
|
|
|
|
|
|
|
|
|
| 753 |
{tab_name}
|
| 754 |
</button>
|
| 755 |
"""
|
|
@@ -760,36 +786,66 @@ def create_interface():
|
|
| 760 |
for i, (tab_name, content) in enumerate(tab_contents.items()):
|
| 761 |
display_style = "display: block;" if i == 0 else "display: none;"
|
| 762 |
tabs_html += f"""
|
| 763 |
-
<div id="result-content-{i}" style="{display_style}">
|
| 764 |
{content}
|
| 765 |
</div>
|
| 766 |
"""
|
| 767 |
|
| 768 |
-
# Add JavaScript for tab switching
|
| 769 |
-
tabs_html += """
|
| 770 |
<script>
|
| 771 |
-
function showResultTab(tabIndex) {
|
| 772 |
-
|
| 773 |
-
|
| 774 |
-
|
|
|
|
|
|
|
| 775 |
content.style.display = 'none';
|
| 776 |
-
});
|
| 777 |
|
| 778 |
-
// Reset all tab styles
|
| 779 |
-
var tabs = document.querySelectorAll('[id^="result-tab-"]');
|
| 780 |
-
tabs.forEach(function(tab) {
|
| 781 |
tab.style.backgroundColor = '#fff';
|
| 782 |
tab.style.borderBottom = 'none';
|
| 783 |
-
});
|
| 784 |
|
| 785 |
// Show selected content
|
| 786 |
-
document.getElementById('result-content-' + tabIndex)
|
|
|
|
|
|
|
|
|
|
| 787 |
|
| 788 |
// Highlight selected tab
|
| 789 |
-
var activeTab = document.getElementById('result-tab-' + tabIndex);
|
| 790 |
-
activeTab
|
| 791 |
-
|
| 792 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
</script>
|
| 794 |
</div>
|
| 795 |
"""
|
|
|
|
| 5 |
import warnings
|
| 6 |
import random
|
| 7 |
import re
|
| 8 |
+
import time
|
| 9 |
warnings.filterwarnings('ignore')
|
| 10 |
|
| 11 |
# Common NER entity types
|
|
|
|
| 237 |
return []
|
| 238 |
|
| 239 |
def find_overlapping_entities(entities):
|
| 240 |
+
"""Find and share overlapping entities - specifically entities found by BOTH common NER models AND custom entities"""
|
| 241 |
if not entities:
|
| 242 |
return []
|
| 243 |
|
|
|
|
| 255 |
while j < len(sorted_entities):
|
| 256 |
next_entity = sorted_entities[j]
|
| 257 |
|
| 258 |
+
# Check if entities overlap (same text span or overlapping positions)
|
| 259 |
if (current_entity['start'] <= next_entity['start'] < current_entity['end'] or
|
| 260 |
next_entity['start'] <= current_entity['start'] < current_entity['end'] or
|
| 261 |
current_entity['text'].lower() == next_entity['text'].lower()):
|
|
|
|
| 264 |
else:
|
| 265 |
j += 1
|
| 266 |
|
| 267 |
+
# Create shared entity only if we have BOTH common and custom entities
|
| 268 |
if len(overlapping_entities) == 1:
|
| 269 |
shared_entities.append(overlapping_entities[0])
|
| 270 |
else:
|
| 271 |
+
# Check if this is a true "shared" entity (common + custom)
|
| 272 |
+
has_common = False
|
| 273 |
+
has_custom = False
|
| 274 |
+
|
| 275 |
+
for entity in overlapping_entities:
|
| 276 |
+
source = entity.get('source', '')
|
| 277 |
+
if source in ['spaCy', 'GLiNER-Common'] or source.startswith('Flair-'):
|
| 278 |
+
has_common = True
|
| 279 |
+
elif source == 'GLiNER-Custom':
|
| 280 |
+
has_custom = True
|
| 281 |
+
|
| 282 |
+
if has_common and has_custom:
|
| 283 |
+
# This is a true shared entity (common + custom)
|
| 284 |
+
shared_entity = share_entities(overlapping_entities)
|
| 285 |
+
shared_entities.append(shared_entity)
|
| 286 |
+
else:
|
| 287 |
+
# These are just overlapping entities from the same source type, keep separate
|
| 288 |
+
shared_entities.extend(overlapping_entities)
|
| 289 |
|
| 290 |
i += 1
|
| 291 |
|
|
|
|
| 403 |
f'border-radius: 3px; margin: 0 1px; '
|
| 404 |
f'border: 2px solid #333; color: white; font-weight: bold;" '
|
| 405 |
f'title="SHARED: {tooltip}">'
|
| 406 |
+
f'{entity["text"]} π€</span>')
|
| 407 |
|
| 408 |
def create_entity_table_gradio_tabs(entities, entity_colors):
|
| 409 |
"""Create Gradio tabs for entity results"""
|
|
|
|
| 434 |
for entity_type, entities_of_type in entity_groups.items():
|
| 435 |
if entity_type == 'SHARED_ENTITIES':
|
| 436 |
colour = '#666666'
|
| 437 |
+
header = f"π€ Shared Entities ({len(entities_of_type)} found)"
|
| 438 |
|
| 439 |
# Create table for shared entities
|
| 440 |
table_html = f"""
|
|
|
|
| 470 |
"""
|
| 471 |
|
| 472 |
table_html += "</tbody></table></div>"
|
| 473 |
+
tab_contents[f"π€ SHARED ({len(entities_of_type)})"] = table_html
|
| 474 |
|
| 475 |
else:
|
| 476 |
colour = entity_colors.get(entity_type.upper(), '#f0f0f0')
|
|
|
|
| 606 |
progress(0.9, desc="Creating summary...")
|
| 607 |
|
| 608 |
# Create summary with shared entities terminology
|
| 609 |
+
# Note: Shared entities are those found by BOTH common NER models AND custom GLiNER
|
| 610 |
total_entities = len(all_entities)
|
| 611 |
shared_entities = find_overlapping_entities(all_entities)
|
| 612 |
final_count = len(shared_entities)
|
|
|
|
| 632 |
|
| 633 |
Combine common NER categories with your own custom entity types! This tool uses both traditional NER models and GLiNER for comprehensive entity extraction.
|
| 634 |
|
| 635 |
+
## π€ NEW: Overlapping entities are automatically shared with split-colour highlighting!
|
| 636 |
|
| 637 |
### How to use:
|
| 638 |
1. **π Enter your text** in the text area below
|
|
|
|
| 755 |
|
| 756 |
# Create tabs HTML manually since Gradio dynamic tabs are complex
|
| 757 |
if isinstance(tab_contents, dict) and tab_contents:
|
| 758 |
+
# Generate unique IDs to avoid conflicts
|
| 759 |
+
import time
|
| 760 |
+
timestamp = str(int(time.time() * 1000))
|
| 761 |
+
|
| 762 |
+
tabs_html = f"""
|
| 763 |
+
<div style="margin: 20px 0;" id="tab-container-{timestamp}">
|
| 764 |
<div style="border-bottom: 2px solid #ddd; margin-bottom: 20px;">
|
| 765 |
"""
|
| 766 |
|
|
|
|
| 768 |
tab_names = list(tab_contents.keys())
|
| 769 |
for i, tab_name in enumerate(tab_names):
|
| 770 |
active_style = "background-color: #f8f9fa; border-bottom: 3px solid #4ECDC4;" if i == 0 else "background-color: #fff;"
|
| 771 |
+
default_bg = '#f8f9fa' if i == 0 else '#fff'
|
| 772 |
tabs_html += f"""
|
| 773 |
+
<button onclick="showResultTab{timestamp}('{i}')" id="result-tab-{timestamp}-{i}"
|
| 774 |
style="padding: 12px 24px; margin-right: 5px; border: 1px solid #ddd;
|
| 775 |
+
border-bottom: none; cursor: pointer; font-weight: bold; {active_style}
|
| 776 |
+
transition: all 0.3s ease;"
|
| 777 |
+
onmouseover="this.style.backgroundColor='#e9ecef'"
|
| 778 |
+
onmouseout="this.style.backgroundColor='{default_bg}'">
|
| 779 |
{tab_name}
|
| 780 |
</button>
|
| 781 |
"""
|
|
|
|
| 786 |
for i, (tab_name, content) in enumerate(tab_contents.items()):
|
| 787 |
display_style = "display: block;" if i == 0 else "display: none;"
|
| 788 |
tabs_html += f"""
|
| 789 |
+
<div id="result-content-{timestamp}-{i}" style="{display_style}">
|
| 790 |
{content}
|
| 791 |
</div>
|
| 792 |
"""
|
| 793 |
|
| 794 |
+
# Add JavaScript for tab switching with unique function name
|
| 795 |
+
tabs_html += f"""
|
| 796 |
<script>
|
| 797 |
+
function showResultTab{timestamp}(tabIndex) {{
|
| 798 |
+
console.log('Tab clicked:', tabIndex);
|
| 799 |
+
|
| 800 |
+
// Hide all content for this specific tab container
|
| 801 |
+
var contents = document.querySelectorAll('[id^="result-content-{timestamp}-"]');
|
| 802 |
+
contents.forEach(function(content) {{
|
| 803 |
content.style.display = 'none';
|
| 804 |
+
}});
|
| 805 |
|
| 806 |
+
// Reset all tab styles for this specific tab container
|
| 807 |
+
var tabs = document.querySelectorAll('[id^="result-tab-{timestamp}-"]');
|
| 808 |
+
tabs.forEach(function(tab) {{
|
| 809 |
tab.style.backgroundColor = '#fff';
|
| 810 |
tab.style.borderBottom = 'none';
|
| 811 |
+
}});
|
| 812 |
|
| 813 |
// Show selected content
|
| 814 |
+
var targetContent = document.getElementById('result-content-{timestamp}-' + tabIndex);
|
| 815 |
+
if (targetContent) {{
|
| 816 |
+
targetContent.style.display = 'block';
|
| 817 |
+
}}
|
| 818 |
|
| 819 |
// Highlight selected tab
|
| 820 |
+
var activeTab = document.getElementById('result-tab-{timestamp}-' + tabIndex);
|
| 821 |
+
if (activeTab) {{
|
| 822 |
+
activeTab.style.backgroundColor = '#f8f9fa';
|
| 823 |
+
activeTab.style.borderBottom = '3px solid #4ECDC4';
|
| 824 |
+
}}
|
| 825 |
+
}}
|
| 826 |
+
|
| 827 |
+
// Ensure tabs are clickable after DOM load
|
| 828 |
+
document.addEventListener('DOMContentLoaded', function() {{
|
| 829 |
+
var tabs = document.querySelectorAll('[id^="result-tab-{timestamp}-"]');
|
| 830 |
+
tabs.forEach(function(tab, index) {{
|
| 831 |
+
tab.addEventListener('click', function(e) {{
|
| 832 |
+
e.preventDefault();
|
| 833 |
+
showResultTab{timestamp}(index.toString());
|
| 834 |
+
}});
|
| 835 |
+
}});
|
| 836 |
+
}});
|
| 837 |
+
|
| 838 |
+
// Also try immediate setup in case DOM is already loaded
|
| 839 |
+
setTimeout(function() {{
|
| 840 |
+
var tabs = document.querySelectorAll('[id^="result-tab-{timestamp}-"]');
|
| 841 |
+
tabs.forEach(function(tab, index) {{
|
| 842 |
+
tab.onclick = function(e) {{
|
| 843 |
+
e.preventDefault();
|
| 844 |
+
showResultTab{timestamp}(index.toString());
|
| 845 |
+
return false;
|
| 846 |
+
}};
|
| 847 |
+
}});
|
| 848 |
+
}}, 100);
|
| 849 |
</script>
|
| 850 |
</div>
|
| 851 |
"""
|