de-Rodrigo commited on
Commit
3139646
1 Parent(s): 566ef77

Include PCA Components Weights

Browse files
Files changed (1) hide show
  1. app.py +38 -3
app.py CHANGED
@@ -409,9 +409,9 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
409
  learning_rate=tsne_params["learning_rate"])
410
 
411
  reduced = reducer.fit_transform(df_combined[embedding_cols].values)
412
- # Guardamos el embedding completo (4 dimensiones para PCA)
413
  df_combined['embedding'] = list(reduced)
414
- # Si el embedding es 2D (por t-SNE o PCA con 2 componentes) asignamos x e y para visualizaci贸n
415
  if reduced.shape[1] == 2:
416
  df_combined['x'] = reduced[:, 0]
417
  df_combined['y'] = reduced[:, 1]
@@ -489,7 +489,7 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
489
  y_line = model_global.predict(x_line.reshape(-1, 1))
490
  scatter_fig.line(x_line, y_line, line_width=2, line_color="black", legend_label="Global Regression")
491
 
492
- return {
493
  "R2": r2,
494
  "slope": slope,
495
  "intercept": intercept,
@@ -501,6 +501,11 @@ def compute_global_regression(df_combined, embedding_cols, tsne_params, df_f1, r
501
  "trustworthiness": trust,
502
  "continuity": cont
503
  }
 
 
 
 
 
504
 
505
  def optimize_tsne_params(df_combined, embedding_cols, df_f1, distance_metric):
506
  perplexity_range = np.linspace(30, 50, 10)
@@ -606,6 +611,36 @@ def run_model(model_name):
606
  st.write(f"Trustworthiness: {result['trustworthiness']:.4f}")
607
  st.write(f"Continuity: {result['continuity']:.4f}")
608
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
 
610
  data_table, df_table, source_table = create_table(result["df_distances"])
611
  real_subset_names = list(df_table.columns[1:])
 
409
  learning_rate=tsne_params["learning_rate"])
410
 
411
  reduced = reducer.fit_transform(df_combined[embedding_cols].values)
412
+ # Guardamos el embedding completo (por ejemplo, 4 dimensiones en PCA)
413
  df_combined['embedding'] = list(reduced)
414
+ # Si el embedding es 2D, asignamos x e y para visualizaci贸n
415
  if reduced.shape[1] == 2:
416
  df_combined['x'] = reduced[:, 0]
417
  df_combined['y'] = reduced[:, 1]
 
489
  y_line = model_global.predict(x_line.reshape(-1, 1))
490
  scatter_fig.line(x_line, y_line, line_width=2, line_color="black", legend_label="Global Regression")
491
 
492
+ results = {
493
  "R2": r2,
494
  "slope": slope,
495
  "intercept": intercept,
 
501
  "trustworthiness": trust,
502
  "continuity": cont
503
  }
504
+
505
+ if reduction_method == "PCA":
506
+ results["pca_model"] = reducer # Agregamos el objeto PCA para usarlo luego en los plots
507
+
508
+ return results
509
 
510
  def optimize_tsne_params(df_combined, embedding_cols, df_f1, distance_metric):
511
  perplexity_range = np.linspace(30, 50, 10)
 
611
  st.write(f"Trustworthiness: {result['trustworthiness']:.4f}")
612
  st.write(f"Continuity: {result['continuity']:.4f}")
613
 
614
+ if reduction_method == "PCA" and result.get("pca_model") is not None:
615
+ pca_model = result["pca_model"]
616
+ components = pca_model.components_ # Shape: (n_components, n_features)
617
+
618
+ st.subheader("Pesos de las Componentes Principales (Loadings)")
619
+ # Para cada componente principal, se crea un plot de barras
620
+ for i, comp in enumerate(components):
621
+ # Fuente de datos con nombres de dimensiones y pesos
622
+ source = ColumnDataSource(data=dict(
623
+ dimensions=embedding_cols, # Ej: ["dim_0", "dim_1", "dim_2", ...]
624
+ weight=comp
625
+ ))
626
+ # Definir la figura usando el rango en x, pero ocultamos las etiquetas del eje
627
+ p = figure(x_range=embedding_cols, title=f"Componente Principal {i+1}",
628
+ plot_height=400, plot_width=600,
629
+ toolbar_location=None, tools="")
630
+ p.vbar(x='dimensions', top='weight', width=0.8, source=source)
631
+ # Ocultar las etiquetas del eje x para que el plot quede m谩s limpio
632
+ p.xaxis.major_label_text_font_size = '0pt'
633
+
634
+ # Agregar HoverTool para que al pasar el mouse se muestren los datos
635
+ hover = HoverTool(tooltips=[("Dimensi贸n", "@dimensions"), ("Peso", "@weight")])
636
+ p.add_tools(hover)
637
+
638
+ # Opcionalmente, puedes seguir definiendo las etiquetas de los ejes (aunque en x no se mostrar谩n)
639
+ p.xaxis.axis_label = "Dimensiones originales"
640
+ p.yaxis.axis_label = "Peso"
641
+
642
+ st.bokeh_chart(p)
643
+
644
 
645
  data_table, df_table, source_table = create_table(result["df_distances"])
646
  real_subset_names = list(df_table.columns[1:])