Spaces:
Sleeping
Sleeping
""" | |
Differential visualization enhancements for Tibetan legal manuscript analysis. | |
Provides enhanced heatmaps with structural change highlighting. | |
""" | |
import plotly.graph_objects as go | |
from typing import Dict, List | |
import pandas as pd | |
from .structural_analysis import detect_structural_changes, generate_structural_alignment | |
def create_differential_heatmap(texts_dict: Dict[str, str], | |
chapter_key: str, | |
metric_results: pd.DataFrame, | |
highlight_threshold: float = 0.7) -> go.Figure: | |
""" | |
Create enhanced heatmap with structural change highlighting. | |
Args: | |
texts_dict: Dictionary mapping text names to their content | |
chapter_key: Chapter identifier being analyzed | |
metric_results: DataFrame with similarity metrics | |
highlight_threshold: Threshold for highlighting significant changes | |
""" | |
# Get unique text pairs | |
text_pairs = metric_results['Text Pair'].unique() | |
# Create enhanced heatmap data | |
enhanced_data = [] | |
for pair in text_pairs: | |
texts = pair.split(' vs ') | |
if len(texts) == 2: | |
text1_name, text2_name = texts | |
# Get actual text content | |
text1_content = texts_dict.get(text1_name, '') | |
text2_content = texts_dict.get(text2_name, '') | |
# Perform structural analysis | |
changes = detect_structural_changes(text1_content, text2_content) | |
alignment = generate_structural_alignment(text1_content, text2_content) | |
# Create enhanced metrics | |
enhanced_row = { | |
'Text Pair': pair, | |
'Chapter': chapter_key, | |
'structural_changes': len(changes['insertions']) + len(changes['deletions']) + len(changes['modifications']), | |
'modification_score': len(changes['modifications']), | |
'insertion_score': len(changes['insertions']), | |
'deletion_score': len(changes['deletions']), | |
'alignment_quality': len(alignment['matches']) / max(len(alignment['segments1']) + len(alignment['segments2']), 1), | |
'significant_differences': len([c for c in changes['modifications'] if len(c['original']) > 10]) | |
} | |
enhanced_data.append(enhanced_row) | |
# Create a clean table with numbers and percentages | |
summary_table = [] | |
for row in enhanced_data: | |
text_pair = row['Text Pair'] | |
chapter = row['Chapter'] | |
# Calculate percentages | |
total_changes = row['structural_changes'] | |
modifications = row['modification_score'] | |
insertions_deletions = row['insertion_score'] + row['deletion_score'] | |
alignment_quality = row['alignment_quality'] | |
# Create summary row | |
summary_row = { | |
'Text Pair': text_pair, | |
'Chapter': chapter, | |
'Total Changes': total_changes, | |
'Modifications': modifications, | |
'Insertions/Deletions': insertions_deletions, | |
'Alignment Quality': f"{alignment_quality:.1f}%", | |
'Significant Differences': row['significant_differences'] | |
} | |
summary_table.append(summary_row) | |
# Create DataFrame for table display | |
summary_df = pd.DataFrame(summary_table) | |
# Create a simple table with styling | |
fig = go.Figure(data=[go.Table( | |
header=dict( | |
values=['Text Pair', 'Chapter', 'Total Changes', 'Modifications', | |
'Insertions/Deletions', 'Alignment Quality', 'Significant Differences'], | |
font=dict(size=12, color='white'), | |
fill_color='darkblue', | |
align='left' | |
), | |
cells=dict( | |
values=[ | |
summary_df['Text Pair'], | |
summary_df['Chapter'], | |
summary_df['Total Changes'], | |
summary_df['Modifications'], | |
summary_df['Insertions/Deletions'], | |
summary_df['Alignment Quality'], | |
summary_df['Significant Differences'] | |
], | |
font=dict(size=11), | |
align='left', | |
fill_color=['lightgrey' if i % 2 == 0 else 'white' | |
for i in range(len(summary_df))] | |
) | |
)]) | |
fig.update_layout( | |
title="Structural Analysis Summary", | |
height=400, | |
margin=dict(l=10, r=10, t=40, b=10) | |
) | |
return fig | |
def create_change_detection_report(texts_dict: Dict[str, str], | |
chapter_key: str, | |
output_format: str = 'html') -> str: | |
""" | |
Create detailed change detection report for a chapter. | |
Args: | |
texts_dict: Dictionary mapping text names to content | |
chapter_key: Chapter identifier | |
output_format: Format for output ('html', 'json', 'markdown') | |
""" | |
from .structural_analysis import generate_differential_report | |
text_names = list(texts_dict.keys()) | |
reports = [] | |
for i, text1_name in enumerate(text_names): | |
for text2_name in text_names[i+1:]: | |
text1_content = texts_dict[text1_name] | |
text2_content = texts_dict[text2_name] | |
report = generate_differential_report( | |
text1_content, text2_content, text1_name, text2_name | |
) | |
reports.append(report) | |
if output_format == 'html': | |
return create_html_report(reports, chapter_key) | |
elif output_format == 'json': | |
import json | |
return json.dumps(reports, indent=2, ensure_ascii=False) | |
else: | |
return create_markdown_report(reports, chapter_key) | |
def create_html_report(reports: List[Dict], chapter_key: str) -> str: | |
"""Create HTML report for structural analysis.""" | |
html = f""" | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Structural Analysis Report - Chapter {chapter_key}</title> | |
<style> | |
body {{ font-family: Arial, sans-serif; margin: 20px; }} | |
.report {{ max-width: 1200px; margin: 0 auto; }} | |
.comparison {{ border: 1px solid #ddd; margin: 20px 0; padding: 15px; }} | |
.changes {{ display: flex; gap: 20px; }} | |
.change-type {{ flex: 1; padding: 10px; border: 1px solid #eee; }} | |
.insertion {{ background-color: #e8f5e8; }} | |
.deletion {{ background-color: #ffe8e8; }} | |
.modification {{ background-color: #fff3e0; }} | |
.highlight {{ background-color: yellow; padding: 2px 4px; }} | |
</style> | |
</head> | |
<body> | |
<div class="report"> | |
<h1>Structural Analysis Report - Chapter {chapter_key}</h1> | |
""" | |
for report in reports: | |
html += f""" | |
<div class="comparison"> | |
<h2>{report['file1']} vs {report['file2']}</h2> | |
<div class="scores"> | |
<p><strong>Structural Similarity:</strong> {report['scores']['structural_similarity']:.2f}</p> | |
<p><strong>Alignment Score:</strong> {report['scores']['alignment_score']:.2f}</p> | |
</div> | |
<div class="changes"> | |
<div class="change-type insertion"> | |
<h3>Insertions ({len(report['changes']['insertions'])})</h3> | |
{format_changes_html(report['changes']['insertions'])} | |
</div> | |
<div class="change-type deletion"> | |
<h3>Deletions ({len(report['changes']['deletions'])})</h3> | |
{format_changes_html(report['changes']['deletions'])} | |
</div> | |
<div class="change-type modification"> | |
<h3>Modifications ({len(report['changes']['modifications'])})</h3> | |
{format_changes_html(report['changes']['modifications'], is_modification=True)} | |
</div> | |
</div> | |
</div> | |
""" | |
html += """ | |
</div> | |
</body> | |
</html> | |
""" | |
return html | |
def format_changes_html(changes: List[Dict], is_modification: bool = False) -> str: | |
"""Format changes for HTML display.""" | |
if not changes: | |
return "<p>No changes detected.</p>" | |
html = "" | |
for change in changes[:5]: # Limit to first 5 for brevity | |
if is_modification: | |
html += f""" | |
<div class="change"> | |
<span class="highlight">{change.get('original', '')}</span> β | |
<span class="highlight">{change.get('replacement', '')}</span> | |
</div> | |
""" | |
else: | |
html += f""" | |
<div class="change"> | |
<span class="highlight">{change.get('word', '')}</span> | |
</div> | |
""" | |
if len(changes) > 5: | |
html += f"<p>... and {len(changes) - 5} more</p>" | |
return html | |
def create_markdown_report(reports: List[Dict], chapter_key: str) -> str: | |
"""Create markdown report for structural analysis.""" | |
md = f"# Structural Analysis Report - Chapter {chapter_key}\n\n" | |
for report in reports: | |
md += f"## {report['file1']} vs {report['file2']}\n\n" | |
md += f"- **Structural Similarity**: {report['scores']['structural_similarity']:.2f}\n" | |
md += f"- **Alignment Score**: {report['scores']['alignment_score']:.2f}\n" | |
md += f"- **Insertions**: {len(report['changes']['insertions'])}\n" | |
md += f"- **Deletions**: {len(report['changes']['deletions'])}\n" | |
md += f"- **Modifications**: {len(report['changes']['modifications'])}\n\n" | |
if report['changes']['modifications']: | |
md += "### Significant Modifications:\n" | |
for mod in report['changes']['modifications'][:3]: | |
md += f"- **{mod.get('original', '')}** β **{mod.get('replacement', '')}**\n" | |
return md | |