Spaces:

daniel-wojahn
/

ttm-webapp-hf

Sleeping

App Files Files Community

ttm-webapp-hf / pipeline /differential_viz.py

daniel-wojahn

cleanup and expansion of structural analysis

edd4b9d 11 days ago

raw

history blame contribute delete

10.1 kB

	"""
	Differential visualization enhancements for Tibetan legal manuscript analysis.
	Provides enhanced heatmaps with structural change highlighting.
	"""

	import plotly.graph_objects as go
	from typing import Dict, List
	import pandas as pd
	from .structural_analysis import detect_structural_changes, generate_structural_alignment


	def create_differential_heatmap(texts_dict: Dict[str, str],
	chapter_key: str,
	metric_results: pd.DataFrame,
	highlight_threshold: float = 0.7) -> go.Figure:
	"""
	Create enhanced heatmap with structural change highlighting.

	Args:
	texts_dict: Dictionary mapping text names to their content
	chapter_key: Chapter identifier being analyzed
	metric_results: DataFrame with similarity metrics
	highlight_threshold: Threshold for highlighting significant changes
	"""

	# Get unique text pairs
	text_pairs = metric_results['Text Pair'].unique()

	# Create enhanced heatmap data
	enhanced_data = []

	for pair in text_pairs:
	texts = pair.split(' vs ')
	if len(texts) == 2:
	text1_name, text2_name = texts

	# Get actual text content
	text1_content = texts_dict.get(text1_name, '')
	text2_content = texts_dict.get(text2_name, '')

	# Perform structural analysis
	changes = detect_structural_changes(text1_content, text2_content)
	alignment = generate_structural_alignment(text1_content, text2_content)

	# Create enhanced metrics
	enhanced_row = {
	'Text Pair': pair,
	'Chapter': chapter_key,
	'structural_changes': len(changes['insertions']) + len(changes['deletions']) + len(changes['modifications']),
	'modification_score': len(changes['modifications']),
	'insertion_score': len(changes['insertions']),
	'deletion_score': len(changes['deletions']),
	'alignment_quality': len(alignment['matches']) / max(len(alignment['segments1']) + len(alignment['segments2']), 1),
	'significant_differences': len([c for c in changes['modifications'] if len(c['original']) > 10])
	}

	enhanced_data.append(enhanced_row)

	# Create a clean table with numbers and percentages
	summary_table = []

	for row in enhanced_data:
	text_pair = row['Text Pair']
	chapter = row['Chapter']

	# Calculate percentages
	total_changes = row['structural_changes']
	modifications = row['modification_score']
	insertions_deletions = row['insertion_score'] + row['deletion_score']
	alignment_quality = row['alignment_quality']

	# Create summary row
	summary_row = {
	'Text Pair': text_pair,
	'Chapter': chapter,
	'Total Changes': total_changes,
	'Modifications': modifications,
	'Insertions/Deletions': insertions_deletions,
	'Alignment Quality': f"{alignment_quality:.1f}%",
	'Significant Differences': row['significant_differences']
	}

	summary_table.append(summary_row)

	# Create DataFrame for table display
	summary_df = pd.DataFrame(summary_table)

	# Create a simple table with styling
	fig = go.Figure(data=[go.Table(
	header=dict(
	values=['Text Pair', 'Chapter', 'Total Changes', 'Modifications',
	'Insertions/Deletions', 'Alignment Quality', 'Significant Differences'],
	font=dict(size=12, color='white'),
	fill_color='darkblue',
	align='left'
	),
	cells=dict(
	values=[
	summary_df['Text Pair'],
	summary_df['Chapter'],
	summary_df['Total Changes'],
	summary_df['Modifications'],
	summary_df['Insertions/Deletions'],
	summary_df['Alignment Quality'],
	summary_df['Significant Differences']
	],
	font=dict(size=11),
	align='left',
	fill_color=['lightgrey' if i % 2 == 0 else 'white'
	for i in range(len(summary_df))]
	)
	)])

	fig.update_layout(
	title="Structural Analysis Summary",
	height=400,
	margin=dict(l=10, r=10, t=40, b=10)
	)

	return fig


	def create_change_detection_report(texts_dict: Dict[str, str],
	chapter_key: str,
	output_format: str = 'html') -> str:
	"""
	Create detailed change detection report for a chapter.

	Args:
	texts_dict: Dictionary mapping text names to content
	chapter_key: Chapter identifier
	output_format: Format for output ('html', 'json', 'markdown')
	"""

	from .structural_analysis import generate_differential_report

	text_names = list(texts_dict.keys())
	reports = []

	for i, text1_name in enumerate(text_names):
	for text2_name in text_names[i+1:]:
	text1_content = texts_dict[text1_name]
	text2_content = texts_dict[text2_name]

	report = generate_differential_report(
	text1_content, text2_content, text1_name, text2_name
	)
	reports.append(report)

	if output_format == 'html':
	return create_html_report(reports, chapter_key)
	elif output_format == 'json':
	import json
	return json.dumps(reports, indent=2, ensure_ascii=False)
	else:
	return create_markdown_report(reports, chapter_key)


	def create_html_report(reports: List[Dict], chapter_key: str) -> str:
	"""Create HTML report for structural analysis."""

	html = f"""
	<!DOCTYPE html>
	<html>
	<head>
	<title>Structural Analysis Report - Chapter {chapter_key}</title>
	<style>
	body {{ font-family: Arial, sans-serif; margin: 20px; }}
	.report {{ max-width: 1200px; margin: 0 auto; }}
	.comparison {{ border: 1px solid #ddd; margin: 20px 0; padding: 15px; }}
	.changes {{ display: flex; gap: 20px; }}
	.change-type {{ flex: 1; padding: 10px; border: 1px solid #eee; }}
	.insertion {{ background-color: #e8f5e8; }}
	.deletion {{ background-color: #ffe8e8; }}
	.modification {{ background-color: #fff3e0; }}
	.highlight {{ background-color: yellow; padding: 2px 4px; }}
	</style>
	</head>
	<body>
	<div class="report">
	<h1>Structural Analysis Report - Chapter {chapter_key}</h1>
	"""

	for report in reports:
	html += f"""
	<div class="comparison">
	<h2>{report['file1']} vs {report['file2']}</h2>
	<div class="scores">
	<p><strong>Structural Similarity:</strong> {report['scores']['structural_similarity']:.2f}</p>
	<p><strong>Alignment Score:</strong> {report['scores']['alignment_score']:.2f}</p>
	</div>

	<div class="changes">
	<div class="change-type insertion">
	<h3>Insertions ({len(report['changes']['insertions'])})</h3>
	{format_changes_html(report['changes']['insertions'])}
	</div>
	<div class="change-type deletion">
	<h3>Deletions ({len(report['changes']['deletions'])})</h3>
	{format_changes_html(report['changes']['deletions'])}
	</div>
	<div class="change-type modification">
	<h3>Modifications ({len(report['changes']['modifications'])})</h3>
	{format_changes_html(report['changes']['modifications'], is_modification=True)}
	</div>
	</div>
	</div>
	"""

	html += """
	</div>
	</body>
	</html>
	"""

	return html


	def format_changes_html(changes: List[Dict], is_modification: bool = False) -> str:
	"""Format changes for HTML display."""
	if not changes:
	return "<p>No changes detected.</p>"

	html = ""
	for change in changes[:5]: # Limit to first 5 for brevity
	if is_modification:
	html += f"""
	<div class="change">
	<span class="highlight">{change.get('original', '')}</span> →
	<span class="highlight">{change.get('replacement', '')}</span>
	</div>
	"""
	else:
	html += f"""
	<div class="change">
	<span class="highlight">{change.get('word', '')}</span>
	</div>
	"""

	if len(changes) > 5:
	html += f"<p>... and {len(changes) - 5} more</p>"

	return html


	def create_markdown_report(reports: List[Dict], chapter_key: str) -> str:
	"""Create markdown report for structural analysis."""

	md = f"# Structural Analysis Report - Chapter {chapter_key}\n\n"

	for report in reports:
	md += f"## {report['file1']} vs {report['file2']}\n\n"
	md += f"- Structural Similarity: {report['scores']['structural_similarity']:.2f}\n"
	md += f"- Alignment Score: {report['scores']['alignment_score']:.2f}\n"
	md += f"- Insertions: {len(report['changes']['insertions'])}\n"
	md += f"- Deletions: {len(report['changes']['deletions'])}\n"
	md += f"- Modifications: {len(report['changes']['modifications'])}\n\n"

	if report['changes']['modifications']:
	md += "### Significant Modifications:\n"
	for mod in report['changes']['modifications'][:3]:
	md += f"- {mod.get('original', '')} → {mod.get('replacement', '')}\n"

	return md