de-Rodrigo commited on
Commit
0598719
1 Parent(s): 8386048

Explained Variace Section for PCA

Browse files
Files changed (1) hide show
  1. app.py +19 -1
app.py CHANGED
@@ -312,6 +312,12 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
312
  learning_rate=tsne_params["learning_rate"])
313
 
314
  reduced = reducer.fit_transform(df_combined[embedding_cols].values)
 
 
 
 
 
 
315
  dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
316
 
317
  df_distances = compute_wasserstein_distances_synthetic_individual(
@@ -380,9 +386,11 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
380
  "scatter_fig": scatter_fig,
381
  "dfs_reduced": dfs_reduced,
382
  "unique_subsets": unique_subsets,
383
- "df_distances": df_distances
 
384
  }
385
 
 
386
  # =============================================================================
387
  # Funci贸n de optimizaci贸n (grid search) para TSNE, usando la misma pipeline
388
  # =============================================================================
@@ -476,6 +484,15 @@ def run_model(model_name):
476
  })
477
  st.table(reg_metrics)
478
 
 
 
 
 
 
 
 
 
 
479
  data_table, df_table, source_table = create_table(result["df_distances"])
480
  real_subset_names = list(df_table.columns[1:])
481
  real_select = Select(title="", value=real_subset_names[0], options=real_subset_names)
@@ -537,6 +554,7 @@ def run_model(model_name):
537
  key=f"download_button_excel_{model_name}"
538
  )
539
 
 
540
  def main():
541
  config_style()
542
  tabs = st.tabs(["Donut", "Idefics2"])
 
312
  learning_rate=tsne_params["learning_rate"])
313
 
314
  reduced = reducer.fit_transform(df_combined[embedding_cols].values)
315
+
316
+ # Si se usa PCA, capturamos la varianza explicada
317
+ explained_variance = None
318
+ if reduction_method == "PCA":
319
+ explained_variance = reducer.explained_variance_ratio_
320
+
321
  dfs_reduced, unique_subsets = split_versions(df_combined, reduced)
322
 
323
  df_distances = compute_wasserstein_distances_synthetic_individual(
 
386
  "scatter_fig": scatter_fig,
387
  "dfs_reduced": dfs_reduced,
388
  "unique_subsets": unique_subsets,
389
+ "df_distances": df_distances,
390
+ "explained_variance": explained_variance # Se incluye la varianza explicada (solo para PCA)
391
  }
392
 
393
+
394
  # =============================================================================
395
  # Funci贸n de optimizaci贸n (grid search) para TSNE, usando la misma pipeline
396
  # =============================================================================
 
484
  })
485
  st.table(reg_metrics)
486
 
487
+ # Si se ha utilizado PCA, mostramos la varianza explicada
488
+ if reduction_method == "PCA" and result["explained_variance"] is not None:
489
+ st.subheader("Explained Variance Ratio")
490
+ variance_df = pd.DataFrame({
491
+ "Component": ["PC1", "PC2"],
492
+ "Explained Variance": result["explained_variance"]
493
+ })
494
+ st.table(variance_df)
495
+
496
  data_table, df_table, source_table = create_table(result["df_distances"])
497
  real_subset_names = list(df_table.columns[1:])
498
  real_select = Select(title="", value=real_subset_names[0], options=real_subset_names)
 
554
  key=f"download_button_excel_{model_name}"
555
  )
556
 
557
+
558
  def main():
559
  config_style()
560
  tabs = st.tabs(["Donut", "Idefics2"])