akera commited on
Commit
23201ae
·
verified ·
1 Parent(s): ad7599c

Update src/plotting.py

Browse files
Files changed (1) hide show
  1. src/plotting.py +632 -489
src/plotting.py CHANGED
@@ -1,8 +1,6 @@
1
  # src/plotting.py
2
  import matplotlib.pyplot as plt
3
  import matplotlib.gridspec as gridspec
4
- import matplotlib.colors as mcolors
5
- from colorsys import rgb_to_hls, hls_to_rgb
6
  import plotly.graph_objects as go
7
  import plotly.express as px
8
  from plotly.subplots import make_subplots
@@ -10,587 +8,732 @@ import pandas as pd
10
  import numpy as np
11
  from collections import defaultdict
12
  from typing import Dict, List, Optional, Union
13
- from config import LANGUAGE_NAMES, ALL_UG40_LANGUAGES, GOOGLE_SUPPORTED_LANGUAGES, METRICS_CONFIG
 
 
 
 
 
 
 
 
 
14
 
15
- plt.style.use('default')
16
- plt.rcParams['figure.facecolor'] = 'white'
17
- plt.rcParams['axes.facecolor'] = 'white'
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- def create_leaderboard_ranking_plot(df: pd.DataFrame, metric: str = 'quality_score', top_n: int = 15) -> go.Figure:
20
- """Create interactive leaderboard ranking plot using Plotly."""
21
-
22
  if df.empty:
23
  fig = go.Figure()
24
  fig.add_annotation(
25
- text="No data available",
26
- xref="paper", yref="paper",
27
- x=0.5, y=0.5, showarrow=False,
28
- font=dict(size=16)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  )
30
- fig.update_layout(title="No Data Available")
31
  return fig
32
-
33
- # Get top N models
34
- top_models = df.head(top_n)
35
-
36
- # Create horizontal bar chart
37
- fig = go.Figure(data=[
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  go.Bar(
39
- y=top_models['model_name'],
40
- x=top_models[metric],
41
- orientation='h',
42
- marker=dict(
43
- color=top_models[metric],
44
- colorscale='Viridis',
45
- showscale=True,
46
- colorbar=dict(title=metric.replace('_', ' ').title())
47
- ),
48
- text=[f"{score:.3f}" for score in top_models[metric]],
49
- textposition='auto',
50
  hovertemplate=(
51
- "<b>%{y}</b><br>" +
52
- f"{metric.replace('_', ' ').title()}: %{{x:.4f}}<br>" +
53
- "Author: %{customdata[0]}<br>" +
54
- "Coverage: %{customdata[1]:.1%}<br>" +
55
- "<extra></extra>"
 
 
 
 
 
 
 
 
56
  ),
57
- customdata=list(zip(top_models['author'], top_models['coverage_rate']))
58
  )
59
- ])
60
-
 
 
61
  fig.update_layout(
62
- title=f"🏆 SALT Translation Leaderboard - {metric.replace('_', ' ').title()}",
63
- xaxis_title=f"{metric.replace('_', ' ').title()} Score",
64
  yaxis_title="Models",
65
- height=max(400, len(top_models) * 30 + 100),
66
  margin=dict(l=20, r=20, t=60, b=20),
67
- plot_bgcolor='white',
68
- paper_bgcolor='white'
 
69
  )
70
-
71
  # Reverse y-axis to show best model at top
72
  fig.update_yaxes(autorange="reversed")
73
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  return fig
75
 
76
- def create_metrics_comparison_plot(df: pd.DataFrame, models: List[str] = None, max_models: int = 8) -> go.Figure:
77
- """Create radar chart comparing multiple metrics across models."""
78
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  if df.empty:
80
  fig = go.Figure()
81
  fig.add_annotation(text="No data available", x=0.5, y=0.5, showarrow=False)
82
- fig.update_layout(title="No Data Available")
83
  return fig
84
-
85
- # Select models to compare
86
- if models is None:
87
- selected_models = df.head(max_models)
88
- else:
89
- selected_models = df[df['model_name'].isin(models)].head(max_models)
90
-
91
- if len(selected_models) == 0:
 
 
 
92
  fig = go.Figure()
93
- fig.add_annotation(text="No models found", x=0.5, y=0.5, showarrow=False)
94
- fig.update_layout(title="No Models Found")
 
95
  return fig
96
-
97
- # Metrics to include in radar chart
98
- metrics = ['quality_score', 'bleu', 'chrf', 'rouge1', 'rougeL']
99
- metric_labels = ['Quality Score', 'BLEU (/100)', 'ChrF', 'ROUGE-1', 'ROUGE-L']
100
-
101
  fig = go.Figure()
102
-
103
- colors = px.colors.qualitative.Set1[:len(selected_models)]
104
-
105
- for i, (_, model) in enumerate(selected_models.iterrows()):
106
- # Normalize BLEU to 0-1 scale for radar chart
107
- values = []
108
- for metric in metrics:
109
- value = model[metric]
110
- if metric == 'bleu':
111
- value = value / 100.0 # Normalize BLEU
112
- values.append(value)
113
-
114
- # Close the radar chart
115
- values += values[:1]
116
- metric_labels_closed = metric_labels + [metric_labels[0]]
117
-
118
- fig.add_trace(go.Scatterpolar(
119
- r=values,
120
- theta=metric_labels_closed,
121
- fill='toself',
122
- name=model['model_name'],
123
- line_color=colors[i % len(colors)],
124
- fillcolor=colors[i % len(colors)],
125
- opacity=0.6
126
- ))
127
-
128
- fig.update_layout(
129
- polar=dict(
130
- radialaxis=dict(
131
- visible=True,
132
- range=[0, 1]
 
 
 
 
 
 
 
133
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  ),
135
- showlegend=True,
136
- title="📊 Multi-Metric Model Comparison",
137
- height=600
138
  )
139
-
140
  return fig
141
 
142
- def create_language_pair_heatmap(results_dict: Dict, metric: str = 'quality_score') -> go.Figure:
143
- """Create heatmap showing performance across language pairs."""
144
-
145
- if not results_dict or 'pair_metrics' not in results_dict:
 
146
  fig = go.Figure()
147
- fig.add_annotation(text="No language pair data available", x=0.5, y=0.5, showarrow=False)
148
- fig.update_layout(title="No Language Pair Data Available")
149
  return fig
150
-
151
- pair_metrics = results_dict['pair_metrics']
152
-
153
- # Create matrix for heatmap
154
- languages = ALL_UG40_LANGUAGES
155
- matrix = np.zeros((len(languages), len(languages)))
156
-
157
- for i, src_lang in enumerate(languages):
158
- for j, tgt_lang in enumerate(languages):
159
- if src_lang != tgt_lang:
160
- pair_key = f"{src_lang}_to_{tgt_lang}"
161
- if pair_key in pair_metrics and metric in pair_metrics[pair_key]:
162
- matrix[i, j] = pair_metrics[pair_key][metric]
163
- else:
164
- matrix[i, j] = np.nan
165
- else:
166
- matrix[i, j] = np.nan
167
-
168
- # Create language labels
169
- lang_labels = [LANGUAGE_NAMES.get(lang, lang) for lang in languages]
170
-
171
- fig = go.Figure(data=go.Heatmap(
172
- z=matrix,
173
- x=lang_labels,
174
- y=lang_labels,
175
- colorscale='Viridis',
176
- showscale=True,
177
- colorbar=dict(title=metric.replace('_', ' ').title()),
178
- hovertemplate=(
179
- "Source: %{y}<br>" +
180
- "Target: %{x}<br>" +
181
- f"{metric.replace('_', ' ').title()}: %{{z:.3f}}<br>" +
182
- "<extra></extra>"
183
  )
184
- ))
185
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  fig.update_layout(
187
- title=f"🗺️ Language Pair Performance - {metric.replace('_', ' ').title()}",
188
- xaxis_title="Target Language",
189
- yaxis_title="Source Language",
190
- height=600,
191
- width=700
 
 
192
  )
193
-
194
  return fig
195
 
196
- def create_coverage_analysis_plot(df: pd.DataFrame) -> go.Figure:
197
- """Create plot analyzing test set coverage across submissions."""
198
-
 
199
  if df.empty:
200
  fig = go.Figure()
201
  fig.add_annotation(text="No data available", x=0.5, y=0.5, showarrow=False)
202
- fig.update_layout(title="No Data Available")
203
  return fig
204
-
205
  fig = make_subplots(
206
- rows=2, cols=2,
 
207
  subplot_titles=(
208
- "Coverage Distribution",
209
- "Language Pairs Covered",
210
- "Sample Count vs Quality",
211
- "Google Comparable Coverage"
212
  ),
213
- specs=[[{"type": "bar"}, {"type": "scatter"}],
214
- [{"type": "scatter"}, {"type": "bar"}]]
 
 
215
  )
216
-
217
- # Coverage distribution
218
- coverage_bins = pd.cut(df['coverage_rate'],
219
- bins=[0, 0.5, 0.8, 0.9, 0.95, 1.0],
220
- labels=['<50%', '50-80%', '80-90%', '90-95%', '95-100%'])
221
- coverage_counts = coverage_bins.value_counts()
222
-
223
- fig.add_trace(
224
- go.Bar(x=coverage_counts.index, y=coverage_counts.values, name="Coverage"),
225
- row=1, col=1
 
 
 
 
 
 
 
 
 
 
 
 
226
  )
227
-
228
- # Language pairs covered vs quality
 
 
 
 
 
 
 
 
 
 
 
 
229
  fig.add_trace(
230
- go.Scatter(
231
- x=df['language_pairs_covered'],
232
- y=df['quality_score'],
233
- mode='markers',
234
- text=df['model_name'],
235
- name="Quality vs Coverage"
236
  ),
237
- row=1, col=2
 
238
  )
239
-
240
- # Sample count vs quality
 
 
 
 
 
 
241
  fig.add_trace(
242
- go.Scatter(
243
- x=df['total_samples'],
244
- y=df['quality_score'],
245
- mode='markers',
246
- text=df['model_name'],
247
- name="Quality vs Samples"
248
  ),
249
- row=2, col=1
 
250
  )
251
-
252
- # Google comparable coverage
253
- google_coverage = df['google_pairs_covered'].value_counts().sort_index()
254
- fig.add_trace(
255
- go.Bar(x=google_coverage.index, y=google_coverage.values, name="Google Coverage"),
256
- row=2, col=2
257
- )
258
-
259
  fig.update_layout(
260
- title="📈 Test Set Coverage Analysis",
261
- height=800,
262
- showlegend=False
263
  )
264
-
265
  return fig
266
 
267
- def create_model_performance_timeline(df: pd.DataFrame) -> go.Figure:
268
- """Create timeline showing model performance over time."""
269
-
 
270
  if df.empty:
271
  fig = go.Figure()
272
  fig.add_annotation(text="No data available", x=0.5, y=0.5, showarrow=False)
273
- fig.update_layout(title="No Data Available")
274
  return fig
275
-
276
- # Convert submission_date to datetime
277
- df_copy = df.copy()
278
- df_copy['submission_date'] = pd.to_datetime(df_copy['submission_date'])
279
- df_copy = df_copy.sort_values('submission_date')
280
-
281
- fig = go.Figure()
282
-
283
- # Add scatter plot for each submission
284
- fig.add_trace(go.Scatter(
285
- x=df_copy['submission_date'],
286
- y=df_copy['quality_score'],
287
- mode='markers+lines',
288
- marker=dict(
289
- size=10,
290
- color=df_copy['quality_score'],
291
- colorscale='Viridis',
292
- showscale=True,
293
- colorbar=dict(title="Quality Score")
294
- ),
295
- text=df_copy['model_name'],
296
- hovertemplate=(
297
- "<b>%{text}</b><br>" +
298
- "Date: %{x}<br>" +
299
- "Quality Score: %{y:.4f}<br>" +
300
- "<extra></extra>"
301
- ),
302
- name="Models"
303
- ))
304
-
305
- # Add trend line
306
- if len(df_copy) > 1:
307
- z = np.polyfit(range(len(df_copy)), df_copy['quality_score'], 1)
308
- trend_line = np.poly1d(z)(range(len(df_copy)))
309
-
310
- fig.add_trace(go.Scatter(
311
- x=df_copy['submission_date'],
312
- y=trend_line,
313
- mode='lines',
314
- line=dict(dash='dash', color='red'),
315
- name="Trend",
316
- hoverinfo='skip'
317
- ))
318
-
319
- fig.update_layout(
320
- title="📅 Model Performance Timeline",
321
- xaxis_title="Submission Date",
322
- yaxis_title="Quality Score",
323
- height=500
324
- )
325
-
326
- return fig
327
 
328
- def create_google_comparison_plot(df: pd.DataFrame) -> go.Figure:
329
- """Create plot comparing models on Google Translate-comparable language pairs."""
330
-
331
- # Filter models that have Google comparable results
332
- google_models = df[df['google_pairs_covered'] > 0].copy()
333
-
334
- if google_models.empty:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  fig = go.Figure()
336
  fig.add_annotation(
337
- text="No models with Google Translate comparable results",
338
- x=0.5, y=0.5, showarrow=False
339
  )
340
- fig.update_layout(title="No Google Comparable Models")
341
  return fig
342
-
 
 
 
 
 
343
  fig = go.Figure()
344
-
345
- # Create scatter plot
346
- fig.add_trace(go.Scatter(
347
- x=google_models['google_bleu'],
348
- y=google_models['google_quality_score'],
349
- mode='markers+text',
350
- marker=dict(
351
- size=12,
352
- color=google_models['google_chrf'],
353
- colorscale='Plasma',
354
- showscale=True,
355
- colorbar=dict(title="ChrF Score")
356
- ),
357
- text=google_models['model_name'],
358
- textposition="top center",
359
- hovertemplate=(
360
- "<b>%{text}</b><br>" +
361
- "BLEU: %{x:.2f}<br>" +
362
- "Quality: %{y:.4f}<br>" +
363
- "ChrF: %{marker.color:.4f}<br>" +
364
- "<extra></extra>"
365
- ),
366
- name="Models"
367
- ))
368
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  fig.update_layout(
370
- title="🤖 Google Translate Comparable Performance",
371
- xaxis_title="BLEU Score",
372
- yaxis_title="Quality Score",
373
- height=500
 
 
 
374
  )
375
-
376
  return fig
377
 
378
- def create_detailed_model_analysis(model_results: Dict, model_name: str) -> go.Figure:
379
- """Create detailed analysis plot for a specific model - FIXED version."""
380
-
381
- if not model_results or 'pair_metrics' not in model_results:
 
 
 
 
 
 
 
 
 
 
 
382
  fig = go.Figure()
383
- fig.add_annotation(text="No detailed results available", x=0.5, y=0.5, showarrow=False)
384
- fig.update_layout(title=f"No Data for {model_name}")
 
385
  return fig
386
-
387
- pair_metrics = model_results['pair_metrics']
388
-
389
- # Extract language pair data
 
390
  pairs = []
391
- bleu_scores = []
392
- quality_scores = []
 
393
  sample_counts = []
394
- google_comparable = []
395
-
396
- for pair_key, metrics in pair_metrics.items():
397
- if 'sample_count' in metrics and metrics['sample_count'] > 0:
398
- src, tgt = pair_key.split('_to_')
399
- pair_label = f"{LANGUAGE_NAMES.get(src, src)} → {LANGUAGE_NAMES.get(tgt, tgt)}"
400
-
401
- pairs.append(pair_label)
402
- bleu_scores.append(metrics.get('bleu', 0))
403
- quality_scores.append(metrics.get('quality_score', 0))
404
- sample_counts.append(metrics.get('sample_count', 0))
405
-
406
- is_google = (src in GOOGLE_SUPPORTED_LANGUAGES and tgt in GOOGLE_SUPPORTED_LANGUAGES)
407
- google_comparable.append(is_google)
408
-
 
 
 
 
 
 
 
 
 
 
409
  if not pairs:
410
  fig = go.Figure()
411
- fig.add_annotation(text="No language pair data found", x=0.5, y=0.5, showarrow=False)
412
- fig.update_layout(title=f"No Language Pair Data for {model_name}")
 
413
  return fig
414
-
415
- # Create subplot with proper spacing and titles
416
  fig = make_subplots(
417
- rows=2, cols=1,
 
418
  subplot_titles=(
419
- f"BLEU Scores by Language Pair",
420
- f"Quality Scores by Language Pair"
421
  ),
422
  vertical_spacing=0.15,
423
- row_heights=[0.45, 0.45]
424
  )
425
-
426
- # Color code by Google comparable
427
- colors = ['#1f77b4' if gc else '#ff7f0e' for gc in google_comparable]
428
-
429
- # BLEU scores (top subplot)
 
 
 
 
 
 
430
  fig.add_trace(
431
  go.Bar(
432
  x=pairs,
433
- y=bleu_scores,
434
- marker_color=colors,
435
- name="BLEU",
436
- text=[f"{score:.1f}" for score in bleu_scores],
437
- textposition='outside',
438
- textfont=dict(size=10),
439
- showlegend=True
 
 
 
 
 
 
440
  ),
441
- row=1, col=1
 
442
  )
443
-
444
- # Quality scores (bottom subplot)
445
  fig.add_trace(
446
  go.Bar(
447
  x=pairs,
448
- y=quality_scores,
449
- marker_color=colors,
450
- name="Quality",
451
- text=[f"{score:.3f}" for score in quality_scores],
452
- textposition='outside',
453
- textfont=dict(size=10),
454
- showlegend=False
455
  ),
456
- row=2, col=1
 
457
  )
458
-
459
- # Update layout
 
460
  fig.update_layout(
 
461
  height=900,
462
- title=dict(
463
- text=f"📊 Detailed Analysis: {model_name}",
464
- x=0.5,
465
- xanchor='center'
466
- ),
467
- showlegend=True,
468
- margin=dict(l=50, r=50, t=100, b=150)
469
- )
470
-
471
- # Update x-axes to rotate labels properly
472
- fig.update_xaxes(
473
- tickangle=45,
474
- tickfont=dict(size=10),
475
- row=1, col=1
476
  )
477
- fig.update_xaxes(
478
- tickangle=45,
479
- tickfont=dict(size=10),
480
- row=2, col=1
481
- )
482
-
483
- # Update y-axes
484
- fig.update_yaxes(title_text="BLEU Score", row=1, col=1)
485
- fig.update_yaxes(title_text="Quality Score", row=2, col=1)
486
-
487
- # Add legend manually for Google vs UG40 only
488
- fig.add_trace(
489
- go.Scatter(
490
- x=[None], y=[None],
491
- mode='markers',
492
- marker=dict(size=15, color='#1f77b4', symbol='square'),
493
- name="Google Comparable",
494
- showlegend=True
495
- )
496
- )
497
-
498
- fig.add_trace(
499
- go.Scatter(
500
- x=[None], y=[None],
501
- mode='markers',
502
- marker=dict(size=15, color='#ff7f0e', symbol='square'),
503
- name="UG40 Only",
504
- showlegend=True
505
- )
506
- )
507
-
508
- return fig
509
 
510
- def create_submission_summary_plot(validation_info: Dict, evaluation_results: Dict) -> go.Figure:
511
- """Create summary plot for a new submission."""
512
-
513
- fig = make_subplots(
514
- rows=2, cols=2,
515
- subplot_titles=(
516
- "Sample Distribution",
517
- "Primary Metrics",
518
- "Error Analysis",
519
- "Coverage Summary"
520
- ),
521
- specs=[[{"type": "pie"}, {"type": "bar"}],
522
- [{"type": "bar"}, {"type": "bar"}]]
523
- )
524
-
525
- # Sample distribution (pie chart)
526
- coverage = validation_info.get('coverage', 0.8)
527
- fig.add_trace(
528
- go.Pie(
529
- labels=["Evaluated", "Missing"],
530
- values=[coverage * 100, (1 - coverage) * 100],
531
- name="Samples"
532
- ),
533
- row=1, col=1
534
- )
535
-
536
- # Primary metrics
537
- if 'summary' in evaluation_results:
538
- metrics_data = evaluation_results['summary']['primary_metrics']
539
- metric_names = list(metrics_data.keys())
540
- metric_values = list(metrics_data.values())
541
-
542
- fig.add_trace(
543
- go.Bar(
544
- x=metric_names,
545
- y=metric_values,
546
- name="Metrics",
547
- text=[f"{val:.3f}" for val in metric_values],
548
- textposition='auto'
549
- ),
550
- row=1, col=2
551
- )
552
-
553
- # Error analysis (CER, WER)
554
- if 'averages' in evaluation_results:
555
- error_metrics = ['cer', 'wer']
556
- error_values = [evaluation_results['averages'].get(m, 0) for m in error_metrics]
557
-
558
- fig.add_trace(
559
- go.Bar(
560
- x=error_metrics,
561
- y=error_values,
562
- name="Errors",
563
- text=[f"{val:.3f}" for val in error_values],
564
- textposition='auto'
565
- ),
566
- row=2, col=1
567
- )
568
-
569
- # Coverage summary
570
- if 'summary' in evaluation_results:
571
- summary = evaluation_results['summary']
572
- coverage_labels = ["Total Samples", "Lang Pairs", "Google Pairs"]
573
- coverage_values = [
574
- summary.get('total_samples', 0),
575
- summary.get('language_pairs_covered', 0),
576
- summary.get('google_comparable_pairs', 0)
577
- ]
578
-
579
- fig.add_trace(
580
- go.Bar(
581
- x=coverage_labels,
582
- y=coverage_values,
583
- name="Coverage",
584
- text=[f"{val}" for val in coverage_values],
585
- textposition='auto'
586
- ),
587
- row=2, col=2
588
- )
589
-
590
- fig.update_layout(
591
- title="📋 Submission Summary",
592
- height=700,
593
- showlegend=False
594
- )
595
-
596
- return fig
 
1
  # src/plotting.py
2
  import matplotlib.pyplot as plt
3
  import matplotlib.gridspec as gridspec
 
 
4
  import plotly.graph_objects as go
5
  import plotly.express as px
6
  from plotly.subplots import make_subplots
 
8
  import numpy as np
9
  from collections import defaultdict
10
  from typing import Dict, List, Optional, Union
11
+ from config import (
12
+ LANGUAGE_NAMES,
13
+ ALL_UG40_LANGUAGES,
14
+ GOOGLE_SUPPORTED_LANGUAGES,
15
+ METRICS_CONFIG,
16
+ EVALUATION_TRACKS,
17
+ MODEL_CATEGORIES,
18
+ CHART_CONFIG,
19
+ STATISTICAL_CONFIG,
20
+ )
21
 
22
+ # Scientific plotting style
23
+ plt.style.use("default")
24
+ plt.rcParams["figure.facecolor"] = "white"
25
+ plt.rcParams["axes.facecolor"] = "white"
26
+ plt.rcParams["font.size"] = 10
27
+ plt.rcParams["axes.labelsize"] = 12
28
+ plt.rcParams["axes.titlesize"] = 14
29
+ plt.rcParams["xtick.labelsize"] = 10
30
+ plt.rcParams["ytick.labelsize"] = 10
31
+
32
+
33
+ def create_scientific_leaderboard_plot(
34
+ df: pd.DataFrame, track: str, metric: str = "quality", top_n: int = 15
35
+ ) -> go.Figure:
36
+ """Create scientific leaderboard plot with confidence intervals."""
37
 
 
 
 
38
  if df.empty:
39
  fig = go.Figure()
40
  fig.add_annotation(
41
+ text="No models available for this track",
42
+ xref="paper",
43
+ yref="paper",
44
+ x=0.5,
45
+ y=0.5,
46
+ showarrow=False,
47
+ font=dict(size=16),
48
+ )
49
+ fig.update_layout(title=f"No Data Available - {track.title()} Track")
50
+ return fig
51
+
52
+ # Get top N models for this track
53
+ metric_col = f"{track}_{metric}"
54
+ ci_lower_col = f"{track}_ci_lower"
55
+ ci_upper_col = f"{track}_ci_upper"
56
+
57
+ if metric_col not in df.columns:
58
+ fig = go.Figure()
59
+ fig.add_annotation(
60
+ text=f"Metric {metric} not available for {track} track",
61
+ xref="paper",
62
+ yref="paper",
63
+ x=0.5,
64
+ y=0.5,
65
+ showarrow=False,
66
  )
 
67
  return fig
68
+
69
+ # Filter and sort
70
+ valid_models = df[(df[metric_col] > 0)].head(top_n)
71
+
72
+ if valid_models.empty:
73
+ fig = go.Figure()
74
+ fig.add_annotation(text="No valid models found", x=0.5, y=0.5, showarrow=False)
75
+ return fig
76
+
77
+ # Create color mapping by category
78
+ category_colors = {}
79
+ for i, category in enumerate(MODEL_CATEGORIES.keys()):
80
+ category_colors[category] = MODEL_CATEGORIES[category]["color"]
81
+
82
+ colors = [
83
+ category_colors.get(cat, "#808080") for cat in valid_models["model_category"]
84
+ ]
85
+
86
+ # Main bar plot
87
+ fig = go.Figure()
88
+
89
+ # Add bars with error bars if confidence intervals available
90
+ if ci_lower_col in valid_models.columns and ci_upper_col in valid_models.columns:
91
+ error_y = dict(
92
+ type="data",
93
+ array=valid_models[ci_upper_col] - valid_models[metric_col],
94
+ arrayminus=valid_models[metric_col] - valid_models[ci_lower_col],
95
+ visible=True,
96
+ thickness=2,
97
+ width=4,
98
+ )
99
+ else:
100
+ error_y = None
101
+
102
+ fig.add_trace(
103
  go.Bar(
104
+ y=valid_models["model_name"],
105
+ x=valid_models[metric_col],
106
+ orientation="h",
107
+ marker=dict(color=colors, line=dict(color="black", width=0.5)),
108
+ error_x=error_y,
109
+ text=[f"{score:.3f}" for score in valid_models[metric_col]],
110
+ textposition="auto",
 
 
 
 
111
  hovertemplate=(
112
+ "<b>%{y}</b><br>"
113
+ + f"{metric.title()}: %{{x:.4f}}<br>"
114
+ + "Category: %{customdata[0]}<br>"
115
+ + "Author: %{customdata[1]}<br>"
116
+ + "Samples: %{customdata[2]}<br>"
117
+ + "<extra></extra>"
118
+ ),
119
+ customdata=list(
120
+ zip(
121
+ valid_models["model_category"],
122
+ valid_models["author"],
123
+ valid_models.get(f"{track}_samples", [0] * len(valid_models)),
124
+ )
125
  ),
 
126
  )
127
+ )
128
+
129
+ # Customize layout
130
+ track_info = EVALUATION_TRACKS[track]
131
  fig.update_layout(
132
+ title=f"🏆 {track_info['name']} - {metric.title()} Score",
133
+ xaxis_title=f"{metric.title()} Score (with 95% CI)",
134
  yaxis_title="Models",
135
+ height=max(400, len(valid_models) * 35 + 100),
136
  margin=dict(l=20, r=20, t=60, b=20),
137
+ plot_bgcolor="white",
138
+ paper_bgcolor="white",
139
+ font=dict(size=12),
140
  )
141
+
142
  # Reverse y-axis to show best model at top
143
  fig.update_yaxes(autorange="reversed")
144
+
145
+ # Add category legend
146
+ for category, info in MODEL_CATEGORIES.items():
147
+ if category in valid_models["model_category"].values:
148
+ fig.add_trace(
149
+ go.Scatter(
150
+ x=[None],
151
+ y=[None],
152
+ mode="markers",
153
+ marker=dict(size=10, color=info["color"]),
154
+ name=info["name"],
155
+ showlegend=True,
156
+ )
157
+ )
158
+
159
  return fig
160
 
161
+
162
+ def create_language_pair_heatmap_scientific(
163
+ model_results: Dict, track: str, metric: str = "quality_score"
164
+ ) -> go.Figure:
165
+ """Create research-grade language pair heatmap with proper axes."""
166
+
167
+ if not model_results or "tracks" not in model_results:
168
+ fig = go.Figure()
169
+ fig.add_annotation(
170
+ text="No model results available", x=0.5, y=0.5, showarrow=False
171
+ )
172
+ return fig
173
+
174
+ track_data = model_results["tracks"].get(track, {})
175
+ if track_data.get("error") or "pair_metrics" not in track_data:
176
+ fig = go.Figure()
177
+ fig.add_annotation(
178
+ text=f"No data available for {track} track", x=0.5, y=0.5, showarrow=False
179
+ )
180
+ return fig
181
+
182
+ pair_metrics = track_data["pair_metrics"]
183
+ track_languages = EVALUATION_TRACKS[track]["languages"]
184
+
185
+ # Create matrix for heatmap
186
+ n_langs = len(track_languages)
187
+ matrix = np.full((n_langs, n_langs), np.nan)
188
+
189
+ for i, src_lang in enumerate(track_languages):
190
+ for j, tgt_lang in enumerate(track_languages):
191
+ if src_lang != tgt_lang:
192
+ pair_key = f"{src_lang}_to_{tgt_lang}"
193
+ if pair_key in pair_metrics and metric in pair_metrics[pair_key]:
194
+ matrix[i, j] = pair_metrics[pair_key][metric]["mean"]
195
+
196
+ # Create language labels
197
+ lang_labels = [LANGUAGE_NAMES.get(lang, lang.upper()) for lang in track_languages]
198
+
199
+ # Create heatmap
200
+ fig = go.Figure(
201
+ data=go.Heatmap(
202
+ z=matrix,
203
+ x=lang_labels,
204
+ y=lang_labels,
205
+ colorscale="Viridis",
206
+ showscale=True,
207
+ colorbar=dict(
208
+ title=f"{metric.replace('_', ' ').title()}",
209
+ titleside="right",
210
+ len=0.8,
211
+ ),
212
+ hovertemplate=(
213
+ "Source: %{y}<br>"
214
+ + "Target: %{x}<br>"
215
+ + f"{metric.replace('_', ' ').title()}: %{{z:.3f}}<br>"
216
+ + "<extra></extra>"
217
+ ),
218
+ zmin=0,
219
+ zmax=1 if metric == "quality_score" else None,
220
+ )
221
+ )
222
+
223
+ # Customize layout
224
+ track_info = EVALUATION_TRACKS[track]
225
+ fig.update_layout(
226
+ title=f"🗺️ {track_info['name']} - {metric.replace('_', ' ').title()} by Language Pair",
227
+ xaxis_title="Target Language",
228
+ yaxis_title="Source Language",
229
+ height=600,
230
+ width=700,
231
+ font=dict(size=12),
232
+ xaxis=dict(side="bottom"),
233
+ yaxis=dict(autorange="reversed"), # Source languages from top to bottom
234
+ )
235
+
236
+ return fig
237
+
238
+
239
+ def create_statistical_comparison_plot(df: pd.DataFrame, track: str) -> go.Figure:
240
+ """Create statistical comparison plot showing confidence intervals."""
241
+
242
  if df.empty:
243
  fig = go.Figure()
244
  fig.add_annotation(text="No data available", x=0.5, y=0.5, showarrow=False)
 
245
  return fig
246
+
247
+ metric_col = f"{track}_quality"
248
+ ci_lower_col = f"{track}_ci_lower"
249
+ ci_upper_col = f"{track}_ci_upper"
250
+
251
+ # Filter to models with data for this track
252
+ valid_models = df[
253
+ (df[metric_col] > 0) & (df[ci_lower_col].notna()) & (df[ci_upper_col].notna())
254
+ ].head(10)
255
+
256
+ if valid_models.empty:
257
  fig = go.Figure()
258
+ fig.add_annotation(
259
+ text="No models with confidence intervals", x=0.5, y=0.5, showarrow=False
260
+ )
261
  return fig
262
+
 
 
 
 
263
  fig = go.Figure()
264
+
265
+ # Add confidence intervals as error bars
266
+ for i, (_, model) in enumerate(valid_models.iterrows()):
267
+ category = model["model_category"]
268
+ color = MODEL_CATEGORIES.get(category, {}).get("color", "#808080")
269
+
270
+ # Main point
271
+ fig.add_trace(
272
+ go.Scatter(
273
+ x=[model[metric_col]],
274
+ y=[i],
275
+ mode="markers",
276
+ marker=dict(
277
+ size=12,
278
+ color=color,
279
+ line=dict(color="black", width=1),
280
+ ),
281
+ name=model["model_name"],
282
+ showlegend=False,
283
+ hovertemplate=(
284
+ f"<b>{model['model_name']}</b><br>"
285
+ + f"Quality: {model[metric_col]:.4f}<br>"
286
+ + f"95% CI: [{model[ci_lower_col]:.4f}, {model[ci_upper_col]:.4f}]<br>"
287
+ + f"Category: {category}<br>"
288
+ + "<extra></extra>"
289
+ ),
290
+ )
291
+ )
292
+
293
+ # Confidence interval line
294
+ fig.add_trace(
295
+ go.Scatter(
296
+ x=[model[ci_lower_col], model[ci_upper_col]],
297
+ y=[i, i],
298
+ mode="lines",
299
+ line=dict(color=color, width=3),
300
+ showlegend=False,
301
+ hoverinfo="skip",
302
  )
303
+ )
304
+
305
+ # CI endpoints
306
+ fig.add_trace(
307
+ go.Scatter(
308
+ x=[model[ci_lower_col], model[ci_upper_col]],
309
+ y=[i, i],
310
+ mode="markers",
311
+ marker=dict(
312
+ symbol="line-ns",
313
+ size=10,
314
+ color=color,
315
+ line=dict(width=2),
316
+ ),
317
+ showlegend=False,
318
+ hoverinfo="skip",
319
+ )
320
+ )
321
+
322
+ # Customize layout
323
+ track_info = EVALUATION_TRACKS[track]
324
+ fig.update_layout(
325
+ title=f"📊 {track_info['name']} - Statistical Comparison",
326
+ xaxis_title="Quality Score",
327
+ yaxis_title="Models",
328
+ height=max(400, len(valid_models) * 40 + 100),
329
+ yaxis=dict(
330
+ tickmode="array",
331
+ tickvals=list(range(len(valid_models))),
332
+ ticktext=valid_models["model_name"].tolist(),
333
+ autorange="reversed",
334
  ),
335
+ showlegend=False,
336
+ plot_bgcolor="white",
337
+ paper_bgcolor="white",
338
  )
339
+
340
  return fig
341
 
342
+
343
+ def create_category_comparison_plot(df: pd.DataFrame, track: str) -> go.Figure:
344
+ """Create category-wise comparison plot."""
345
+
346
+ if df.empty:
347
  fig = go.Figure()
348
+ fig.add_annotation(text="No data available", x=0.5, y=0.5, showarrow=False)
 
349
  return fig
350
+
351
+ metric_col = f"{track}_quality"
352
+ adequate_col = f"{track}_adequate"
353
+
354
+ # Filter to adequate models
355
+ valid_models = df[df[adequate_col] & (df[metric_col] > 0)]
356
+
357
+ if valid_models.empty:
358
+ fig = go.Figure()
359
+ fig.add_annotation(
360
+ text="No adequate models found", x=0.5, y=0.5, showarrow=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  )
362
+ return fig
363
+
364
+ fig = go.Figure()
365
+
366
+ # Create box plot for each category
367
+ for category, info in MODEL_CATEGORIES.items():
368
+ category_models = valid_models[valid_models["model_category"] == category]
369
+
370
+ if len(category_models) > 0:
371
+ fig.add_trace(
372
+ go.Box(
373
+ y=category_models[metric_col],
374
+ name=info["name"],
375
+ marker_color=info["color"],
376
+ boxpoints="all", # Show all points
377
+ jitter=0.3,
378
+ pointpos=-1.8,
379
+ hovertemplate=(
380
+ f"<b>{info['name']}</b><br>"
381
+ + "Quality: %{y:.4f}<br>"
382
+ + "Model: %{customdata}<br>"
383
+ + "<extra></extra>"
384
+ ),
385
+ customdata=category_models["model_name"],
386
+ )
387
+ )
388
+
389
+ # Customize layout
390
+ track_info = EVALUATION_TRACKS[track]
391
  fig.update_layout(
392
+ title=f"📈 {track_info['name']} - Performance by Category",
393
+ xaxis_title="Model Category",
394
+ yaxis_title="Quality Score",
395
+ height=500,
396
+ showlegend=False,
397
+ plot_bgcolor="white",
398
+ paper_bgcolor="white",
399
  )
400
+
401
  return fig
402
 
403
+
404
+ def create_adequacy_analysis_plot(df: pd.DataFrame) -> go.Figure:
405
+ """Create analysis plot for statistical adequacy across tracks."""
406
+
407
  if df.empty:
408
  fig = go.Figure()
409
  fig.add_annotation(text="No data available", x=0.5, y=0.5, showarrow=False)
 
410
  return fig
411
+
412
  fig = make_subplots(
413
+ rows=2,
414
+ cols=2,
415
  subplot_titles=(
416
+ "Sample Sizes by Track",
417
+ "Statistical Adequacy Distribution",
418
+ "Scientific Adequacy Scores",
419
+ "Model Categories Distribution",
420
  ),
421
+ specs=[
422
+ [{"type": "bar"}, {"type": "pie"}],
423
+ [{"type": "histogram"}, {"type": "bar"}],
424
+ ],
425
  )
426
+
427
+ # Sample sizes by track
428
+ track_names = []
429
+ sample_counts = []
430
+
431
+ for track in EVALUATION_TRACKS.keys():
432
+ samples_col = f"{track}_samples"
433
+ if samples_col in df.columns:
434
+ total_samples = df[df[samples_col] > 0][samples_col].sum()
435
+ track_names.append(track.replace("_", " ").title())
436
+ sample_counts.append(total_samples)
437
+
438
+ if track_names:
439
+ fig.add_trace(
440
+ go.Bar(x=track_names, y=sample_counts, name="Samples"), row=1, col=1
441
+ )
442
+
443
+ # Statistical adequacy distribution
444
+ adequacy_bins = pd.cut(
445
+ df["scientific_adequacy_score"],
446
+ bins=[0, 0.3, 0.6, 0.8, 1.0],
447
+ labels=["Poor", "Fair", "Good", "Excellent"],
448
  )
449
+ adequacy_counts = adequacy_bins.value_counts()
450
+
451
+ if not adequacy_counts.empty:
452
+ fig.add_trace(
453
+ go.Pie(
454
+ labels=adequacy_counts.index,
455
+ values=adequacy_counts.values,
456
+ name="Adequacy",
457
+ ),
458
+ row=1,
459
+ col=2,
460
+ )
461
+
462
+ # Scientific adequacy scores histogram
463
  fig.add_trace(
464
+ go.Histogram(
465
+ x=df["scientific_adequacy_score"], nbinsx=20, name="Adequacy Scores"
 
 
 
 
466
  ),
467
+ row=2,
468
+ col=1,
469
  )
470
+
471
+ # Model categories distribution
472
+ category_counts = df["model_category"].value_counts()
473
+ category_colors = [
474
+ MODEL_CATEGORIES.get(cat, {}).get("color", "#808080")
475
+ for cat in category_counts.index
476
+ ]
477
+
478
  fig.add_trace(
479
+ go.Bar(
480
+ x=category_counts.index,
481
+ y=category_counts.values,
482
+ marker_color=category_colors,
483
+ name="Categories",
 
484
  ),
485
+ row=2,
486
+ col=2,
487
  )
488
+
 
 
 
 
 
 
 
489
  fig.update_layout(
490
+ title="📊 Scientific Evaluation Analysis", height=800, showlegend=False
 
 
491
  )
492
+
493
  return fig
494
 
495
+
496
+ def create_cross_track_analysis_plot(df: pd.DataFrame) -> go.Figure:
497
+ """Create cross-track performance correlation analysis."""
498
+
499
  if df.empty:
500
  fig = go.Figure()
501
  fig.add_annotation(text="No data available", x=0.5, y=0.5, showarrow=False)
 
502
  return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
 
504
+ # Get models with data in multiple tracks
505
+ quality_cols = [f"{track}_quality" for track in EVALUATION_TRACKS.keys()]
506
+ available_cols = [col for col in quality_cols if col in df.columns]
507
+
508
+ if len(available_cols) < 2:
509
+ fig = go.Figure()
510
+ fig.add_annotation(
511
+ text="Need at least 2 tracks for comparison", x=0.5, y=0.5, showarrow=False
512
+ )
513
+ return fig
514
+
515
+ # Filter to models with data in multiple tracks
516
+ multi_track_models = df.copy()
517
+ for col in available_cols:
518
+ multi_track_models = multi_track_models[multi_track_models[col] > 0]
519
+
520
+ if len(multi_track_models) < 3:
521
+ fig = go.Figure()
522
+ fig.add_annotation(
523
+ text="Insufficient models for cross-track analysis",
524
+ x=0.5,
525
+ y=0.5,
526
+ showarrow=False,
527
+ )
528
+ return fig
529
+
530
+ # Create scatter plot matrix
531
+ track_pairs = [
532
+ (available_cols[i], available_cols[j])
533
+ for i in range(len(available_cols))
534
+ for j in range(i + 1, len(available_cols))
535
+ ]
536
+
537
+ if not track_pairs:
538
  fig = go.Figure()
539
  fig.add_annotation(
540
+ text="No track pairs available", x=0.5, y=0.5, showarrow=False
 
541
  )
 
542
  return fig
543
+
544
+ # Use first pair for demonstration
545
+ x_col, y_col = track_pairs[0]
546
+ x_track = x_col.replace("_quality", "").replace("_", " ").title()
547
+ y_track = y_col.replace("_quality", "").replace("_", " ").title()
548
+
549
  fig = go.Figure()
550
+
551
+ # Color by category
552
+ for category, info in MODEL_CATEGORIES.items():
553
+ category_models = multi_track_models[
554
+ multi_track_models["model_category"] == category
555
+ ]
556
+
557
+ if len(category_models) > 0:
558
+ fig.add_trace(
559
+ go.Scatter(
560
+ x=category_models[x_col],
561
+ y=category_models[y_col],
562
+ mode="markers",
563
+ marker=dict(
564
+ size=10,
565
+ color=info["color"],
566
+ line=dict(color="black", width=1),
567
+ ),
568
+ name=info["name"],
569
+ text=category_models["model_name"],
570
+ hovertemplate=(
571
+ "<b>%{text}</b><br>"
572
+ + f"{x_track}: %{{x:.4f}}<br>"
573
+ + f"{y_track}: %{{y:.4f}}<br>"
574
+ + f"Category: {info['name']}<br>"
575
+ + "<extra></extra>"
576
+ ),
577
+ )
578
+ )
579
+
580
+ # Add diagonal line for reference
581
+ min_val = min(multi_track_models[x_col].min(), multi_track_models[y_col].min())
582
+ max_val = max(multi_track_models[x_col].max(), multi_track_models[y_col].max())
583
+
584
+ fig.add_trace(
585
+ go.Scatter(
586
+ x=[min_val, max_val],
587
+ y=[min_val, max_val],
588
+ mode="lines",
589
+ line=dict(dash="dash", color="gray", width=2),
590
+ name="Perfect Correlation",
591
+ showlegend=False,
592
+ hoverinfo="skip",
593
+ )
594
+ )
595
+
596
  fig.update_layout(
597
+ title=f"🔄 Cross-Track Performance: {x_track} vs {y_track}",
598
+ xaxis_title=f"{x_track} Quality Score",
599
+ yaxis_title=f"{y_track} Quality Score",
600
+ height=600,
601
+ width=600,
602
+ plot_bgcolor="white",
603
+ paper_bgcolor="white",
604
  )
605
+
606
  return fig
607
 
608
+
609
+ def create_scientific_model_detail_plot(
610
+ model_results: Dict, model_name: str, track: str
611
+ ) -> go.Figure:
612
+ """Create detailed scientific analysis for a specific model."""
613
+
614
+ if not model_results or "tracks" not in model_results:
615
+ fig = go.Figure()
616
+ fig.add_annotation(
617
+ text="No model results available", x=0.5, y=0.5, showarrow=False
618
+ )
619
+ return fig
620
+
621
+ track_data = model_results["tracks"].get(track, {})
622
+ if track_data.get("error") or "pair_metrics" not in track_data:
623
  fig = go.Figure()
624
+ fig.add_annotation(
625
+ text=f"No data for {track} track", x=0.5, y=0.5, showarrow=False
626
+ )
627
  return fig
628
+
629
+ pair_metrics = track_data["pair_metrics"]
630
+ track_languages = EVALUATION_TRACKS[track]["languages"]
631
+
632
+ # Extract data for plotting
633
  pairs = []
634
+ quality_means = []
635
+ quality_cis = []
636
+ bleu_means = []
637
  sample_counts = []
638
+
639
+ for src in track_languages:
640
+ for tgt in track_languages:
641
+ if src == tgt:
642
+ continue
643
+
644
+ pair_key = f"{src}_to_{tgt}"
645
+ if pair_key in pair_metrics:
646
+ metrics = pair_metrics[pair_key]
647
+
648
+ if "quality_score" in metrics and "sample_count" in metrics:
649
+ pair_label = f"{LANGUAGE_NAMES.get(src, src)} → {LANGUAGE_NAMES.get(tgt, tgt)}"
650
+ pairs.append(pair_label)
651
+
652
+ quality_stats = metrics["quality_score"]
653
+ quality_means.append(quality_stats["mean"])
654
+ quality_cis.append(
655
+ [quality_stats["ci_lower"], quality_stats["ci_upper"]]
656
+ )
657
+
658
+ bleu_stats = metrics.get("bleu", {"mean": 0})
659
+ bleu_means.append(bleu_stats["mean"])
660
+
661
+ sample_counts.append(metrics["sample_count"])
662
+
663
  if not pairs:
664
  fig = go.Figure()
665
+ fig.add_annotation(
666
+ text="No language pair data available", x=0.5, y=0.5, showarrow=False
667
+ )
668
  return fig
669
+
670
+ # Create subplots
671
  fig = make_subplots(
672
+ rows=2,
673
+ cols=1,
674
  subplot_titles=(
675
+ "Quality Scores by Language Pair (with 95% CI)",
676
+ "BLEU Scores by Language Pair",
677
  ),
678
  vertical_spacing=0.15,
 
679
  )
680
+
681
+ # Quality scores with confidence intervals
682
+ error_y = dict(
683
+ type="data",
684
+ array=[ci[1] - mean for ci, mean in zip(quality_cis, quality_means)],
685
+ arrayminus=[mean - ci[0] for ci, mean in zip(quality_cis, quality_means)],
686
+ visible=True,
687
+ thickness=2,
688
+ width=4,
689
+ )
690
+
691
  fig.add_trace(
692
  go.Bar(
693
  x=pairs,
694
+ y=quality_means,
695
+ error_y=error_y,
696
+ name="Quality Score",
697
+ marker_color="steelblue",
698
+ text=[f"{score:.3f}" for score in quality_means],
699
+ textposition="outside",
700
+ hovertemplate=(
701
+ "<b>%{x}</b><br>"
702
+ + "Quality: %{y:.4f}<br>"
703
+ + "Samples: %{customdata}<br>"
704
+ + "<extra></extra>"
705
+ ),
706
+ customdata=sample_counts,
707
  ),
708
+ row=1,
709
+ col=1,
710
  )
711
+
712
+ # BLEU scores
713
  fig.add_trace(
714
  go.Bar(
715
  x=pairs,
716
+ y=bleu_means,
717
+ name="BLEU Score",
718
+ marker_color="coral",
719
+ text=[f"{score:.1f}" for score in bleu_means],
720
+ textposition="outside",
 
 
721
  ),
722
+ row=2,
723
+ col=1,
724
  )
725
+
726
+ # Customize layout
727
+ track_info = EVALUATION_TRACKS[track]
728
  fig.update_layout(
729
+ title=f"🔬 Detailed Analysis: {model_name} - {track_info['name']}",
730
  height=900,
731
+ showlegend=False,
732
+ margin=dict(l=50, r=50, t=100, b=150),
 
 
 
 
 
 
 
 
 
 
 
 
733
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
 
735
+ # Rotate x-axis labels
736
+ fig.update_xaxes(tickangle=45, row=1, col=1)
737
+ fig.update_xaxes(tickangle=45, row=2, col=1)
738
+
739
+ return fig