""" Differential visualization enhancements for Tibetan legal manuscript analysis. Provides enhanced heatmaps with structural change highlighting. """ import plotly.graph_objects as go from typing import Dict, List import pandas as pd from .structural_analysis import detect_structural_changes, generate_structural_alignment def create_differential_heatmap(texts_dict: Dict[str, str], chapter_key: str, metric_results: pd.DataFrame, highlight_threshold: float = 0.7) -> go.Figure: """ Create enhanced heatmap with structural change highlighting. Args: texts_dict: Dictionary mapping text names to their content chapter_key: Chapter identifier being analyzed metric_results: DataFrame with similarity metrics highlight_threshold: Threshold for highlighting significant changes """ # Get unique text pairs text_pairs = metric_results['Text Pair'].unique() # Create enhanced heatmap data enhanced_data = [] for pair in text_pairs: texts = pair.split(' vs ') if len(texts) == 2: text1_name, text2_name = texts # Get actual text content text1_content = texts_dict.get(text1_name, '') text2_content = texts_dict.get(text2_name, '') # Perform structural analysis changes = detect_structural_changes(text1_content, text2_content) alignment = generate_structural_alignment(text1_content, text2_content) # Create enhanced metrics enhanced_row = { 'Text Pair': pair, 'Chapter': chapter_key, 'structural_changes': len(changes['insertions']) + len(changes['deletions']) + len(changes['modifications']), 'modification_score': len(changes['modifications']), 'insertion_score': len(changes['insertions']), 'deletion_score': len(changes['deletions']), 'alignment_quality': len(alignment['matches']) / max(len(alignment['segments1']) + len(alignment['segments2']), 1), 'significant_differences': len([c for c in changes['modifications'] if len(c['original']) > 10]) } enhanced_data.append(enhanced_row) # Create a clean table with numbers and percentages summary_table = [] for row in enhanced_data: text_pair = row['Text Pair'] chapter = row['Chapter'] # Calculate percentages total_changes = row['structural_changes'] modifications = row['modification_score'] insertions_deletions = row['insertion_score'] + row['deletion_score'] alignment_quality = row['alignment_quality'] # Create summary row summary_row = { 'Text Pair': text_pair, 'Chapter': chapter, 'Total Changes': total_changes, 'Modifications': modifications, 'Insertions/Deletions': insertions_deletions, 'Alignment Quality': f"{alignment_quality:.1f}%", 'Significant Differences': row['significant_differences'] } summary_table.append(summary_row) # Create DataFrame for table display summary_df = pd.DataFrame(summary_table) # Create a simple table with styling fig = go.Figure(data=[go.Table( header=dict( values=['Text Pair', 'Chapter', 'Total Changes', 'Modifications', 'Insertions/Deletions', 'Alignment Quality', 'Significant Differences'], font=dict(size=12, color='white'), fill_color='darkblue', align='left' ), cells=dict( values=[ summary_df['Text Pair'], summary_df['Chapter'], summary_df['Total Changes'], summary_df['Modifications'], summary_df['Insertions/Deletions'], summary_df['Alignment Quality'], summary_df['Significant Differences'] ], font=dict(size=11), align='left', fill_color=['lightgrey' if i % 2 == 0 else 'white' for i in range(len(summary_df))] ) )]) fig.update_layout( title="Structural Analysis Summary", height=400, margin=dict(l=10, r=10, t=40, b=10) ) return fig def create_change_detection_report(texts_dict: Dict[str, str], chapter_key: str, output_format: str = 'html') -> str: """ Create detailed change detection report for a chapter. Args: texts_dict: Dictionary mapping text names to content chapter_key: Chapter identifier output_format: Format for output ('html', 'json', 'markdown') """ from .structural_analysis import generate_differential_report text_names = list(texts_dict.keys()) reports = [] for i, text1_name in enumerate(text_names): for text2_name in text_names[i+1:]: text1_content = texts_dict[text1_name] text2_content = texts_dict[text2_name] report = generate_differential_report( text1_content, text2_content, text1_name, text2_name ) reports.append(report) if output_format == 'html': return create_html_report(reports, chapter_key) elif output_format == 'json': import json return json.dumps(reports, indent=2, ensure_ascii=False) else: return create_markdown_report(reports, chapter_key) def create_html_report(reports: List[Dict], chapter_key: str) -> str: """Create HTML report for structural analysis.""" html = f""" Structural Analysis Report - Chapter {chapter_key}

Structural Analysis Report - Chapter {chapter_key}

""" for report in reports: html += f"""

{report['file1']} vs {report['file2']}

Structural Similarity: {report['scores']['structural_similarity']:.2f}

Alignment Score: {report['scores']['alignment_score']:.2f}

Insertions ({len(report['changes']['insertions'])})

{format_changes_html(report['changes']['insertions'])}

Deletions ({len(report['changes']['deletions'])})

{format_changes_html(report['changes']['deletions'])}

Modifications ({len(report['changes']['modifications'])})

{format_changes_html(report['changes']['modifications'], is_modification=True)}
""" html += """
""" return html def format_changes_html(changes: List[Dict], is_modification: bool = False) -> str: """Format changes for HTML display.""" if not changes: return "

No changes detected.

" html = "" for change in changes[:5]: # Limit to first 5 for brevity if is_modification: html += f"""
{change.get('original', '')}{change.get('replacement', '')}
""" else: html += f"""
{change.get('word', '')}
""" if len(changes) > 5: html += f"

... and {len(changes) - 5} more

" return html def create_markdown_report(reports: List[Dict], chapter_key: str) -> str: """Create markdown report for structural analysis.""" md = f"# Structural Analysis Report - Chapter {chapter_key}\n\n" for report in reports: md += f"## {report['file1']} vs {report['file2']}\n\n" md += f"- **Structural Similarity**: {report['scores']['structural_similarity']:.2f}\n" md += f"- **Alignment Score**: {report['scores']['alignment_score']:.2f}\n" md += f"- **Insertions**: {len(report['changes']['insertions'])}\n" md += f"- **Deletions**: {len(report['changes']['deletions'])}\n" md += f"- **Modifications**: {len(report['changes']['modifications'])}\n\n" if report['changes']['modifications']: md += "### Significant Modifications:\n" for mod in report['changes']['modifications'][:3]: md += f"- **{mod.get('original', '')}** → **{mod.get('replacement', '')}**\n" return md