Spaces:

daniel-wojahn
/

ttm-webapp-hf

Sleeping

File size: 10,064 Bytes

"""
Differential visualization enhancements for Tibetan legal manuscript analysis.
Provides enhanced heatmaps with structural change highlighting.
"""

import plotly.graph_objects as go
from typing import Dict, List
import pandas as pd
from .structural_analysis import detect_structural_changes, generate_structural_alignment


def create_differential_heatmap(texts_dict: Dict[str, str], 
                               chapter_key: str,
                               metric_results: pd.DataFrame,
                               highlight_threshold: float = 0.7) -> go.Figure:
    """
    Create enhanced heatmap with structural change highlighting.
    
    Args:
        texts_dict: Dictionary mapping text names to their content
        chapter_key: Chapter identifier being analyzed
        metric_results: DataFrame with similarity metrics
        highlight_threshold: Threshold for highlighting significant changes
    """
    
    # Get unique text pairs
    text_pairs = metric_results['Text Pair'].unique()
    
    # Create enhanced heatmap data
    enhanced_data = []
    
    for pair in text_pairs:
        texts = pair.split(' vs ')
        if len(texts) == 2:
            text1_name, text2_name = texts
            
            # Get actual text content
            text1_content = texts_dict.get(text1_name, '')
            text2_content = texts_dict.get(text2_name, '')
            
            # Perform structural analysis
            changes = detect_structural_changes(text1_content, text2_content)
            alignment = generate_structural_alignment(text1_content, text2_content)
            
            # Create enhanced metrics
            enhanced_row = {
                'Text Pair': pair,
                'Chapter': chapter_key,
                'structural_changes': len(changes['insertions']) + len(changes['deletions']) + len(changes['modifications']),
                'modification_score': len(changes['modifications']),
                'insertion_score': len(changes['insertions']),
                'deletion_score': len(changes['deletions']),
                'alignment_quality': len(alignment['matches']) / max(len(alignment['segments1']) + len(alignment['segments2']), 1),
                'significant_differences': len([c for c in changes['modifications'] if len(c['original']) > 10])
            }
            
            enhanced_data.append(enhanced_row)
    
    # Create a clean table with numbers and percentages
    summary_table = []
    
    for row in enhanced_data:
        text_pair = row['Text Pair']
        chapter = row['Chapter']
        
        # Calculate percentages
        total_changes = row['structural_changes']
        modifications = row['modification_score']
        insertions_deletions = row['insertion_score'] + row['deletion_score']
        alignment_quality = row['alignment_quality']
        
        # Create summary row
        summary_row = {
            'Text Pair': text_pair,
            'Chapter': chapter,
            'Total Changes': total_changes,
            'Modifications': modifications,
            'Insertions/Deletions': insertions_deletions,
            'Alignment Quality': f"{alignment_quality:.1f}%",
            'Significant Differences': row['significant_differences']
        }
        
        summary_table.append(summary_row)
    
    # Create DataFrame for table display
    summary_df = pd.DataFrame(summary_table)
    
    # Create a simple table with styling
    fig = go.Figure(data=[go.Table(
        header=dict(
            values=['Text Pair', 'Chapter', 'Total Changes', 'Modifications', 
                   'Insertions/Deletions', 'Alignment Quality', 'Significant Differences'],
            font=dict(size=12, color='white'),
            fill_color='darkblue',
            align='left'
        ),
        cells=dict(
            values=[
                summary_df['Text Pair'],
                summary_df['Chapter'], 
                summary_df['Total Changes'],
                summary_df['Modifications'],
                summary_df['Insertions/Deletions'],
                summary_df['Alignment Quality'],
                summary_df['Significant Differences']
            ],
            font=dict(size=11),
            align='left',
            fill_color=['lightgrey' if i % 2 == 0 else 'white' 
                       for i in range(len(summary_df))]
        )
    )])
    
    fig.update_layout(
        title="Structural Analysis Summary",
        height=400,
        margin=dict(l=10, r=10, t=40, b=10)
    )
    
    return fig


def create_change_detection_report(texts_dict: Dict[str, str],
                                 chapter_key: str,
                                 output_format: str = 'html') -> str:
    """
    Create detailed change detection report for a chapter.
    
    Args:
        texts_dict: Dictionary mapping text names to content
        chapter_key: Chapter identifier
        output_format: Format for output ('html', 'json', 'markdown')
    """
    
    from .structural_analysis import generate_differential_report
    
    text_names = list(texts_dict.keys())
    reports = []
    
    for i, text1_name in enumerate(text_names):
        for text2_name in text_names[i+1:]:
            text1_content = texts_dict[text1_name]
            text2_content = texts_dict[text2_name]
            
            report = generate_differential_report(
                text1_content, text2_content, text1_name, text2_name
            )
            reports.append(report)
    
    if output_format == 'html':
        return create_html_report(reports, chapter_key)
    elif output_format == 'json':
        import json
        return json.dumps(reports, indent=2, ensure_ascii=False)
    else:
        return create_markdown_report(reports, chapter_key)


def create_html_report(reports: List[Dict], chapter_key: str) -> str:
    """Create HTML report for structural analysis."""
    
    html = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Structural Analysis Report - Chapter {chapter_key}</title>
        <style>
            body {{ font-family: Arial, sans-serif; margin: 20px; }}
            .report {{ max-width: 1200px; margin: 0 auto; }}
            .comparison {{ border: 1px solid #ddd; margin: 20px 0; padding: 15px; }}
            .changes {{ display: flex; gap: 20px; }}
            .change-type {{ flex: 1; padding: 10px; border: 1px solid #eee; }}
            .insertion {{ background-color: #e8f5e8; }}
            .deletion {{ background-color: #ffe8e8; }}
            .modification {{ background-color: #fff3e0; }}
            .highlight {{ background-color: yellow; padding: 2px 4px; }}
        </style>
    </head>
    <body>
        <div class="report">
            <h1>Structural Analysis Report - Chapter {chapter_key}</h1>
    """
    
    for report in reports:
        html += f"""
            <div class="comparison">
                <h2>{report['file1']} vs {report['file2']}</h2>
                <div class="scores">
                    <p><strong>Structural Similarity:</strong> {report['scores']['structural_similarity']:.2f}</p>
                    <p><strong>Alignment Score:</strong> {report['scores']['alignment_score']:.2f}</p>
                </div>
                
                <div class="changes">
                    <div class="change-type insertion">
                        <h3>Insertions ({len(report['changes']['insertions'])})</h3>
                        {format_changes_html(report['changes']['insertions'])}
                    </div>
                    <div class="change-type deletion">
                        <h3>Deletions ({len(report['changes']['deletions'])})</h3>
                        {format_changes_html(report['changes']['deletions'])}
                    </div>
                    <div class="change-type modification">
                        <h3>Modifications ({len(report['changes']['modifications'])})</h3>
                        {format_changes_html(report['changes']['modifications'], is_modification=True)}
                    </div>
                </div>
            </div>
        """
    
    html += """
        </div>
    </body>
    </html>
    """
    
    return html


def format_changes_html(changes: List[Dict], is_modification: bool = False) -> str:
    """Format changes for HTML display."""
    if not changes:
        return "<p>No changes detected.</p>"
    
    html = ""
    for change in changes[:5]:  # Limit to first 5 for brevity
        if is_modification:
            html += f"""
            <div class="change">
                <span class="highlight">{change.get('original', '')}</span> → 
                <span class="highlight">{change.get('replacement', '')}</span>
            </div>
            """
        else:
            html += f"""
            <div class="change">
                <span class="highlight">{change.get('word', '')}</span>
            </div>
            """
    
    if len(changes) > 5:
        html += f"<p>... and {len(changes) - 5} more</p>"
    
    return html


def create_markdown_report(reports: List[Dict], chapter_key: str) -> str:
    """Create markdown report for structural analysis."""
    
    md = f"# Structural Analysis Report - Chapter {chapter_key}\n\n"
    
    for report in reports:
        md += f"## {report['file1']} vs {report['file2']}\n\n"
        md += f"- **Structural Similarity**: {report['scores']['structural_similarity']:.2f}\n"
        md += f"- **Alignment Score**: {report['scores']['alignment_score']:.2f}\n"
        md += f"- **Insertions**: {len(report['changes']['insertions'])}\n"
        md += f"- **Deletions**: {len(report['changes']['deletions'])}\n"
        md += f"- **Modifications**: {len(report['changes']['modifications'])}\n\n"
        
        if report['changes']['modifications']:
            md += "### Significant Modifications:\n"
            for mod in report['changes']['modifications'][:3]:
                md += f"- **{mod.get('original', '')}** → **{mod.get('replacement', '')}**\n"
    
    return md