Spaces:

mlfoundations
/

OpenThoughts_data_explorer

Running

App Files Files Community

jmercat commited on Jun 2

Commit

f7fb142

1 Parent(s): 7362f99

Uncertainty aware correlation heatmap

Browse files

Files changed (1) hide show

app.py +545 -19

app.py CHANGED Viewed

@@ -216,7 +216,7 @@ def create_interactive_heatmap(corr_matrix, title="Correlation Heatmap"):
         hover_row = []
         for j, bench2 in enumerate(corr_matrix.columns):
             if i == j:
-                hover_row.append(f"{clean_names[i]}<br>Reliability: 100%")
             else:
                 corr_val = corr_matrix_pct.iloc[i, j]
                 if pd.isna(corr_val):
@@ -461,6 +461,361 @@ def create_consensus_ranking(df, method='spearman', use_rank_imputation=True):
     return ranking_df, df_ranks, metadata
 def main():
     """Main application."""
     st.markdown('<h1 class="main-header">OpenThoughts Evalchemy Benchmark Explorer</h1>',
@@ -681,32 +1036,181 @@ def show_overview_dashboard(df, stderr_df):
         st.write(f"Total pairs analyzed: {len(pairs)}")
 def show_interactive_heatmap(df):
-    """Show the interactive heatmap."""
     st.header("🔥 Interactive Correlation Heatmap")
-    # Correlation method selection
-    col1, col2 = st.columns([3, 1])
     with col2:
-        corr_method = st.selectbox(
-            "Correlation Method",
-            ["pearson", "spearman", "kendall"],
-            help="**Pearson's r** is a parametric measure of linear correlation that is sensitive to outliers and can be less appropriate for ordinal data.\n" +
-                 "**Spearman's rho** is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data.\n" +
-                 "**Kendall's tau** is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data."
-        )
     # Compute correlation matrix
-    corr_matrix = compute_correlations(df, corr_method)
-    # Create and display heatmap
-    fig = create_interactive_heatmap(corr_matrix, f"{corr_method.capitalize()} Correlation Matrix")
     st.plotly_chart(fig, use_container_width=True)
-    # Correlation statistics
     st.subheader("Correlation Statistics")
-    # Get all off-diagonal correlations
     mask = np.triu(np.ones_like(corr_matrix, dtype=bool), k=1)
     corr_values = corr_matrix.where(mask).stack().dropna()
@@ -724,14 +1228,36 @@ def show_interactive_heatmap(df):
     with col4:
         st.metric("Min Correlation", f"{corr_values.min():.3f}")
-    # Distribution of correlations
     st.subheader("Correlation Distribution")
-    fig = px.histogram(corr_values,
                        nbins=20,
                        title="Distribution of Pairwise Correlations",
                        labels={'value': 'Correlation Coefficient', 'count': 'Frequency'})
-    st.plotly_chart(fig, use_container_width=True)
 def show_scatter_explorer(df, stderr_df):
     """Show the scatter plot explorer."""

         hover_row = []
         for j, bench2 in enumerate(corr_matrix.columns):
             if i == j:
+                hover_row.append(f"{clean_names[i]}<br>Correlation: 100%")
             else:
                 corr_val = corr_matrix_pct.iloc[i, j]
                 if pd.isna(corr_val):
     return ranking_df, df_ranks, metadata
+def weighted_correlation(x, y, weights):
+    """Compute weighted Pearson correlation coefficient."""
+    # Remove NaN values
+    valid_mask = ~(np.isnan(x) | np.isnan(y) | np.isnan(weights))
+    if valid_mask.sum() < 3:
+        return np.nan, np.nan
+    x_clean = x[valid_mask]
+    y_clean = y[valid_mask]
+    w_clean = weights[valid_mask]
+    # Weighted means
+    x_mean = np.average(x_clean, weights=w_clean)
+    y_mean = np.average(y_clean, weights=w_clean)
+    # Weighted covariance and variances
+    cov = np.average((x_clean - x_mean) * (y_clean - y_mean), weights=w_clean)
+    var_x = np.average((x_clean - x_mean)**2, weights=w_clean)
+    var_y = np.average((y_clean - y_mean)**2, weights=w_clean)
+    # Weighted correlation
+    if var_x == 0 or var_y == 0:
+        return np.nan, np.nan
+    corr = cov / np.sqrt(var_x * var_y)
+    # Approximate degrees of freedom for weighted data
+    # Using effective sample size approximation
+    sum_w = np.sum(w_clean)
+    sum_w2 = np.sum(w_clean**2)
+    eff_n = sum_w**2 / sum_w2
+    # Standard error of correlation (approximate)
+    if eff_n > 3:
+        from scipy.stats import t
+        se_corr = np.sqrt((1 - corr**2) / (eff_n - 2))
+        t_stat = corr / se_corr
+        p_value = 2 * (1 - t.cdf(abs(t_stat), eff_n - 2))
+    else:
+        p_value = np.nan
+    return corr, p_value
+def match_scores_with_stderr(scores_df, stderr_df, target_benchmarks):
+    """Match score columns with their corresponding stderr columns."""
+    target_benchmarks_dict, benchmark_categories, colors, col_to_category = get_focused_benchmark_mapping()
+    score_to_stderr_mapping = {}
+    # Look for stderr matches with various naming patterns
+    for col in target_benchmarks:
+        stderr_col = None
+        # Try different naming patterns in order of preference
+        potential_stderr_names = [
+            f"{col}_std_err",  # Direct match
+            f"{col.replace('_accuracy', '_accuracy_std_err')}",  # Handle _accuracy vs _accuracy_avg
+            f"{col.replace('_accuracy_avg', '_accuracy_std_err')}",  # Handle _accuracy_avg
+        ]
+        # Special handling for MATH500 and other variations
+        if col == 'MATH500_accuracy':
+            potential_stderr_names.extend([
+                'MATH500x2_accuracy_std_err',
+                'MATH500_accuracy_std_err'
+            ])
+        # Add 'x2' variants for all benchmarks (in case there are other x2 versions)
+        base_name = col.replace('_accuracy_avg', '').replace('_accuracy', '')
+        potential_stderr_names.extend([
+            f"{base_name}x2_accuracy_std_err",
+            f"{base_name}_accuracy_std_err"
+        ])
+        # Find the first matching column with sufficient data
+        for stderr_name in potential_stderr_names:
+            if stderr_name in stderr_df.columns:
+                # Check if there's sufficient data (at least 10 models)
+                non_null_count = stderr_df[stderr_name].notna().sum()
+                if non_null_count >= 10:
+                    stderr_col = stderr_name
+                    break
+        if stderr_col:
+            score_to_stderr_mapping[col] = stderr_col
+    return score_to_stderr_mapping
+def create_uncertainty_aware_correlation_matrix(scores_df, stderr_df, score_to_stderr_mapping):
+    """Create correlation matrix accounting for measurement uncertainties."""
+    target_benchmarks, benchmark_categories, colors, col_to_category = get_focused_benchmark_mapping()
+    benchmarks = list(score_to_stderr_mapping.keys())
+    n_benchmarks = len(benchmarks)
+    # Initialize matrices
+    corr_matrix = np.full((n_benchmarks, n_benchmarks), np.nan)
+    pvalue_matrix = np.full((n_benchmarks, n_benchmarks), np.nan)
+    weighted_corr_matrix = np.full((n_benchmarks, n_benchmarks), np.nan)
+    weighted_pvalue_matrix = np.full((n_benchmarks, n_benchmarks), np.nan)
+    for i, bench1 in enumerate(benchmarks):
+        for j, bench2 in enumerate(benchmarks):
+            if i == j:
+                # Diagonal: compute reliability coefficient
+                stderr_col = score_to_stderr_mapping[bench1]
+                # Has actual stderr data
+                # reliability = 1 - (measurement_error_variance / total_variance)
+                scores = scores_df[bench1].dropna()
+                stderrs = stderr_df[stderr_col].dropna()
+                # Align data
+                common_idx = scores.index.intersection(stderrs.index)
+                if len(common_idx) >= 3:
+                    aligned_scores = scores.loc[common_idx]
+                    aligned_stderrs = stderrs.loc[common_idx]
+                    # Total variance in observed scores
+                    total_variance = aligned_scores.var()
+                    # Mean measurement error variance
+                    mean_error_variance = (aligned_stderrs**2).mean()
+                    # Reliability = proportion of total variance that is "true" variance
+                    if total_variance > 0:
+                        reliability = max(0, 1 - (mean_error_variance / total_variance))
+                        # For regular correlation, we still use 1.0 (mathematical definition)
+                        corr_matrix[i, j] = 1.0
+                        pvalue_matrix[i, j] = 0.0
+                        # For weighted correlation, use reliability coefficient
+                        weighted_corr_matrix[i, j] = reliability
+                        weighted_pvalue_matrix[i, j] = 0.0
+                    else:
+                        corr_matrix[i, j] = 1.0
+                        weighted_corr_matrix[i, j] = 0.0
+                        pvalue_matrix[i, j] = 0.0
+                        weighted_pvalue_matrix[i, j] = 0.0
+                else:
+                    # Insufficient data
+                    corr_matrix[i, j] = 1.0
+                    weighted_corr_matrix[i, j] = np.nan
+                    pvalue_matrix[i, j] = 0.0
+                    weighted_pvalue_matrix[i, j] = np.nan
+                continue
+            # Get common valid data
+            x = scores_df[bench1].values
+            y = scores_df[bench2].values
+            # Get standard errors
+            stderr1_col = score_to_stderr_mapping[bench1]
+            stderr2_col = score_to_stderr_mapping[bench2]
+            # Standard (unweighted) correlation
+            valid_mask = ~(np.isnan(x) | np.isnan(y))
+            if valid_mask.sum() >= 3:
+                corr, p_val = pearsonr(x[valid_mask], y[valid_mask])
+                corr_matrix[i, j] = corr
+                pvalue_matrix[i, j] = p_val
+            # Weighted correlation
+            stderr1 = stderr_df[stderr1_col].values
+            stderr2 = stderr_df[stderr2_col].values
+            # Weighted correlation using inverse variance weighting
+            # Weight = 1 / (stderr1^2 + stderr2^2) - accounting for error in both variables
+            valid_stderr_mask = ~(np.isnan(stderr1) | np.isnan(stderr2)) & valid_mask
+            if valid_stderr_mask.sum() >= 3:
+                combined_variance = stderr1[valid_stderr_mask]**2 + stderr2[valid_stderr_mask]**2
+                # Avoid division by zero
+                weights = np.where(combined_variance > 0, 1.0 / combined_variance, 0)
+                if weights.sum() > 0:
+                    w_corr, w_p_val = weighted_correlation(
+                        x[valid_stderr_mask],
+                        y[valid_stderr_mask],
+                        weights
+                    )
+                    weighted_corr_matrix[i, j] = w_corr
+                    weighted_pvalue_matrix[i, j] = w_p_val
+            else:
+                # Use regular correlation for weighted matrix too
+                if valid_mask.sum() >= 3:
+                    weighted_corr_matrix[i, j] = corr_matrix[i, j]
+                    weighted_pvalue_matrix[i, j] = pvalue_matrix[i, j]
+    # Convert to DataFrames
+    corr_df = pd.DataFrame(corr_matrix, index=benchmarks, columns=benchmarks)
+    pvalue_df = pd.DataFrame(pvalue_matrix, index=benchmarks, columns=benchmarks)
+    weighted_corr_df = pd.DataFrame(weighted_corr_matrix, index=benchmarks, columns=benchmarks)
+    weighted_pvalue_df = pd.DataFrame(weighted_pvalue_matrix, index=benchmarks, columns=benchmarks)
+    return corr_df, pvalue_df, weighted_corr_df, weighted_pvalue_df
+def create_uncertainty_weighted_heatmap_plotly(weighted_corr_df, title_prefix="Uncertainty-Weighted Correlation Analysis"):
+    """Create a single uncertainty-weighted heatmap using Plotly."""
+    target_benchmarks, benchmark_categories, colors, col_to_category = get_focused_benchmark_mapping()
+    # Get clean names for display
+    clean_names = [clean_benchmark_name(name) for name in weighted_corr_df.columns]
+    # Weighted correlation heatmap
+    weighted_corr_pct = (weighted_corr_df * 100).round(1)
+    # Create hover text for weighted correlations
+    hover_text_weighted = []
+    for i, bench1 in enumerate(weighted_corr_df.columns):
+        hover_row = []
+        for j, bench2 in enumerate(weighted_corr_df.columns):
+            if i == j:
+                reliability = weighted_corr_df.iloc[i, j]
+                if pd.isna(reliability):
+                    hover_row.append(f"{clean_names[i]}<br>Reliability: Unknown")
+                else:
+                    hover_row.append(f"{clean_names[i]}<br>Reliability: {reliability*100:.1f}%")
+            else:
+                corr_val = weighted_corr_pct.iloc[i, j]
+                if pd.isna(corr_val):
+                    hover_row.append(f"{clean_names[i]} vs {clean_names[j]}<br>No weighted data")
+                else:
+                    hover_row.append(f"{clean_names[i]} vs {clean_names[j]}<br>Weighted correlation: {corr_val:.1f}%")
+        hover_text_weighted.append(hover_row)
+    # Create the heatmap
+    fig = go.Figure(data=go.Heatmap(
+        z=weighted_corr_df.values,
+        x=clean_names,
+        y=clean_names,
+        colorscale='RdBu_r',
+        zmid=0,
+        text=weighted_corr_pct.values,
+        texttemplate="%{text}",
+        textfont={"size": 10},
+        hoverinfo='text',
+        hovertext=hover_text_weighted,
+        colorbar=dict(title="Correlation")
+    ))
+    # Update layout
+    fig.update_layout(
+        title=f"{title_prefix}<br><sub>Diagonal shows reliability coefficients (signal-to-noise ratios)</sub>",
+        width=800,
+        height=700,
+        font=dict(size=12),
+        xaxis=dict(tickangle=45),
+        yaxis=dict(tickangle=0)
+    )
+    return fig
+def create_uncertainty_aware_heatmap_plotly(corr_df, weighted_corr_df, title_prefix="Correlation Analysis"):
+    """Create side-by-side interactive heatmaps comparing regular vs weighted correlations using Plotly."""
+    target_benchmarks, benchmark_categories, colors, col_to_category = get_focused_benchmark_mapping()
+    # Get clean names for display
+    clean_names = [clean_benchmark_name(name) for name in corr_df.columns]
+    # Create subplots
+    fig = make_subplots(
+        rows=1, cols=2,
+        subplot_titles=('Regular Correlation Matrix<br>(Equal weighting)',
+                       'Uncertainty-Weighted Correlation Matrix<br>(Inverse variance weighting)'),
+        horizontal_spacing=0.15
+    )
+    # Regular correlation heatmap
+    corr_matrix_pct = (corr_df * 100).round(1)
+    # Create hover text for regular correlations
+    hover_text_regular = []
+    for i, bench1 in enumerate(corr_df.columns):
+        hover_row = []
+        for j, bench2 in enumerate(corr_df.columns):
+            if i == j:
+                hover_row.append(f"{clean_names[i]}<br>Self-correlation: 100%")
+            else:
+                corr_val = corr_matrix_pct.iloc[i, j]
+                if pd.isna(corr_val):
+                    hover_row.append(f"{clean_names[i]} vs {clean_names[j]}<br>No data")
+                else:
+                    hover_row.append(f"{clean_names[i]} vs {clean_names[j]}<br>Correlation: {corr_val:.1f}%")
+        hover_text_regular.append(hover_row)
+    fig.add_trace(go.Heatmap(
+        z=corr_df.values,
+        x=clean_names,
+        y=clean_names,
+        colorscale='RdBu_r',
+        zmid=0,
+        text=corr_matrix_pct.values,
+        texttemplate="%{text}",
+        textfont={"size": 8},
+        hoverinfo='text',
+        hovertext=hover_text_regular,
+        showscale=False,
+        name="Regular"
+    ), row=1, col=1)
+    # Weighted correlation heatmap
+    weighted_corr_pct = (weighted_corr_df * 100).round(1)
+    # Create hover text for weighted correlations
+    hover_text_weighted = []
+    for i, bench1 in enumerate(weighted_corr_df.columns):
+        hover_row = []
+        for j, bench2 in enumerate(weighted_corr_df.columns):
+            if i == j:
+                reliability = weighted_corr_df.iloc[i, j]
+                if pd.isna(reliability):
+                    hover_row.append(f"{clean_names[i]}<br>Reliability: Unknown")
+                else:
+                    hover_row.append(f"{clean_names[i]}<br>Reliability: {reliability*100:.1f}%")
+            else:
+                corr_val = weighted_corr_pct.iloc[i, j]
+                if pd.isna(corr_val):
+                    hover_row.append(f"{clean_names[i]} vs {clean_names[j]}<br>No weighted data")
+                else:
+                    hover_row.append(f"{clean_names[i]} vs {clean_names[j]}<br>Weighted correlation: {corr_val:.1f}%")
+        hover_text_weighted.append(hover_row)
+    fig.add_trace(go.Heatmap(
+        z=weighted_corr_df.values,
+        x=clean_names,
+        y=clean_names,
+        colorscale='RdBu_r',
+        zmid=0,
+        text=weighted_corr_pct.values,
+        texttemplate="%{text}",
+        textfont={"size": 8},
+        hoverinfo='text',
+        hovertext=hover_text_weighted,
+        showscale=True,
+        colorbar=dict(title="Correlation", x=1.02),
+        name="Weighted"
+    ), row=1, col=2)
+    # Update layout
+    fig.update_layout(
+        title=f"{title_prefix}<br><sub>Diagonal shows reliability coefficients for weighted matrix</sub>",
+        width=1400,
+        height=700,
+        font=dict(size=12)
+    )
+    # Update axes
+    fig.update_xaxes(tickangle=45, row=1, col=1)
+    fig.update_xaxes(tickangle=45, row=1, col=2)
+    fig.update_yaxes(tickangle=0, row=1, col=1)
+    fig.update_yaxes(tickangle=0, row=1, col=2)
+    return fig
 def main():
     """Main application."""
     st.markdown('<h1 class="main-header">OpenThoughts Evalchemy Benchmark Explorer</h1>',
         st.write(f"Total pairs analyzed: {len(pairs)}")
 def show_interactive_heatmap(df):
+    """Display interactive correlation heatmap with various options."""
     st.header("🔥 Interactive Correlation Heatmap")
+    # Check if stderr data is available
+    stderr_df = load_stderr_data()
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        # Check if stderr data is available for the uncertainty-aware checkbox
+        stderr_available = stderr_df is not None
+        uncertainty_aware = False
+        if stderr_available:
+            uncertainty_aware = st.checkbox(
+                "🔬 Uncertainty-Aware Analysis",
+                value=False,
+                help="Use measurement uncertainties to weight correlations (requires standard error data)"
+            )
+        # Adjust method selector based on uncertainty-aware mode
+        if uncertainty_aware:
+            st.selectbox(
+                "Correlation Method",
+                ["pearson"],
+                index=0,
+                disabled=True,
+                help="**Uncertainty-aware analysis uses Pearson correlations only**\n\nWeighted correlations require parametric methods to properly account for measurement uncertainties."
+            )
+            method = "pearson"  # Force Pearson for uncertainty-aware analysis
+        else:
+            method = st.selectbox(
+                "Correlation Method",
+                ["kendall", "spearman", "pearson"],
+                index=0,
+                help="**Pearson's r** is a parametric measure of linear correlation that is sensitive to outliers and can be less appropriate for ordinal data.\n" +
+                     "**Spearman's rho** is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data.\n" +
+                     "**Kendall's tau** is a non-parametric measure of rank correlation that is less sensitive to outliers and can be more appropriate for ordinal data."
+            )
     with col2:
+        show_values = st.checkbox("Show correlation values", value=True)
+    # Additional options
+    if uncertainty_aware and stderr_df is not None:
+        st.info("🔬 **Uncertainty-Aware Mode**: Correlations are weighted by inverse measurement variance. "
+                "Diagonal shows reliability coefficients (proportion of variance that is 'true signal' vs measurement error).")
+        # Match scores with stderr data
+        available_benchmarks = list(df.columns)
+        score_to_stderr_mapping = match_scores_with_stderr(df, stderr_df, available_benchmarks)
+        if len(score_to_stderr_mapping) == 0:
+            st.warning("No matching standard error data found for the selected benchmarks. "
+                      "Falling back to regular correlation analysis.")
+            uncertainty_aware = False
+        else:
+            # Filter to benchmarks with stderr data
+            benchmarks_with_stderr = list(score_to_stderr_mapping.keys())
+            df_stderr = df[benchmarks_with_stderr].copy()
+            st.success(f"Found standard error data for {len(score_to_stderr_mapping)} benchmarks: "
+                      f"{', '.join([clean_benchmark_name(b) for b in benchmarks_with_stderr])}")
+            # Align dataframes
+            common_models = df_stderr.index.intersection(stderr_df.index)
+            df_aligned = df_stderr.loc[common_models]
+            stderr_aligned = stderr_df.loc[common_models]
+            st.write(f"**Analysis scope**: {len(common_models)} models with both scores and standard errors")
+            # Compute uncertainty-aware correlations
+            with st.spinner("Computing uncertainty-weighted correlations..."):
+                corr_df, pvalue_df, weighted_corr_df, weighted_pvalue_df = create_uncertainty_aware_correlation_matrix(
+                    df_aligned, stderr_aligned, score_to_stderr_mapping
+                )
+            # Create and display uncertainty-aware heatmap
+            fig = create_uncertainty_weighted_heatmap_plotly(
+                weighted_corr_df,
+                title_prefix=f"Uncertainty-Weighted {method.capitalize()} Correlations"
+            )
+            st.plotly_chart(fig, use_container_width=True)
+            # Show reliability statistics
+            with st.expander("📊 Reliability Statistics", expanded=False):
+                st.write("**Benchmark Reliability Coefficients** (proportion of variance that is true signal):")
+                reliability_data = []
+                for bench in weighted_corr_df.columns:
+                    diag_val = weighted_corr_df.loc[bench, bench]
+                    if not pd.isna(diag_val):
+                        reliability_data.append({
+                            'Benchmark': clean_benchmark_name(bench),
+                            'Reliability': f"{diag_val*100:.1f}%",
+                            'Category': next((cat for cat, benchs in get_focused_benchmark_mapping()[1].items()
+                                            for b in benchs if get_focused_benchmark_mapping()[0].get(b) == bench), 'Unknown')
+                        })
+                if reliability_data:
+                    reliability_df = pd.DataFrame(reliability_data)
+                    st.dataframe(reliability_df, use_container_width=True)
+                    avg_reliability = pd.to_numeric([d['Reliability'].rstrip('%') for d in reliability_data]).mean() / 100
+                    st.metric("Average Reliability", f"{avg_reliability:.3f} ({avg_reliability*100:.1f}%)")
+            # Show correlation differences
+            with st.expander("📈 Impact of Uncertainty Weighting", expanded=False):
+                st.write("**Correlation Changes** (Weighted - Regular):")
+                diff_data = []
+                for i, bench1 in enumerate(corr_df.columns):
+                    for j, bench2 in enumerate(corr_df.columns):
+                        if i < j:  # Only upper triangle
+                            regular_corr = corr_df.iloc[i, j]
+                            weighted_corr = weighted_corr_df.iloc[i, j]
+                            if not (pd.isna(regular_corr) or pd.isna(weighted_corr)):
+                                diff = weighted_corr - regular_corr
+                                diff_data.append({
+                                    'Benchmark Pair': f"{clean_benchmark_name(bench1)} vs {clean_benchmark_name(bench2)}",
+                                    'Regular': f"{regular_corr:.3f}",
+                                    'Weighted': f"{weighted_corr:.3f}",
+                                    'Difference': f"{diff:+.3f}",
+                                    'Abs Difference': abs(diff)
+                                })
+                if diff_data:
+                    diff_df = pd.DataFrame(diff_data)
+                    # Sort by absolute difference
+                    diff_df_sorted = diff_df.sort_values('Abs Difference', ascending=False)
+                    st.dataframe(diff_df_sorted.drop('Abs Difference', axis=1), use_container_width=True)
+                    # Summary stats
+                    diffs = [float(d['Difference']) for d in diff_data]
+                    col1, col2, col3 = st.columns(3)
+                    with col1:
+                        st.metric("Mean Change", f"{np.mean(diffs):+.4f}")
+                    with col2:
+                        st.metric("Max |Change|", f"{max(abs(d) for d in diffs):.4f}")
+                    with col3:
+                        st.metric("Large Changes (|Δ| > 0.1)", f"{sum(1 for d in diffs if abs(d) > 0.1)}")
+            return  # Exit early for uncertainty-aware analysis
+    # Regular correlation analysis (original functionality restored)
+    if df.empty:
+        st.error("No data available.")
+        return
     # Compute correlation matrix
+    corr_matrix = compute_correlations(df, method)
+    if corr_matrix.empty:
+        st.error("Unable to compute correlations.")
+        return
+    # Create and display regular heatmap (original way)
+    fig = create_interactive_heatmap(corr_matrix, f"{method.capitalize()} Correlation Matrix")
+    # Add correlation values as text annotations if requested
+    if show_values:
+        # Convert correlations to percentages for display
+        corr_text = (corr_matrix * 100).round().astype(str)
+        fig.update_traces(
+            text=corr_text.values,
+            texttemplate="%{text}",
+            textfont={"size": 8}
+        )
     st.plotly_chart(fig, use_container_width=True)
+    # Correlation statistics (original)
     st.subheader("Correlation Statistics")
+    # Get all off-diagonal correlations (original method)
     mask = np.triu(np.ones_like(corr_matrix, dtype=bool), k=1)
     corr_values = corr_matrix.where(mask).stack().dropna()
     with col4:
         st.metric("Min Correlation", f"{corr_values.min():.3f}")
+    # Distribution of correlations (original)
     st.subheader("Correlation Distribution")
+    fig_hist = px.histogram(corr_values,
                        nbins=20,
                        title="Distribution of Pairwise Correlations",
                        labels={'value': 'Correlation Coefficient', 'count': 'Frequency'})
+    st.plotly_chart(fig_hist, use_container_width=True)
+    # Methodology note
+    with st.expander("ℹ️ About Correlation Methods", expanded=False):
+        st.markdown("""
+        **Pearson**: Measures linear relationships. Values range from -1 to +1.
+        - +1: Perfect positive linear relationship
+        - 0: No linear relationship
+        - -1: Perfect negative linear relationship
+        **Spearman**: Measures monotonic relationships using ranks. More robust to outliers.
+        - Good for non-linear but monotonic relationships
+        - Less sensitive to extreme values
+        **Kendall**: Measures ordinal association using concordant/discordant pairs.
+        - More robust than Spearman for small samples
+        - Better for data with many tied values
+        **Uncertainty-Aware Analysis**: When available, uses measurement standard errors to:
+        - Weight correlations by inverse measurement variance
+        - Show reliability coefficients (signal-to-noise ratios) on diagonal
+        - Provide more accurate correlation estimates for noisy data
+        """)
 def show_scatter_explorer(df, stderr_df):
     """Show the scatter plot explorer."""