Spaces:
Running
Running
Commit
路
a685ec6
1
Parent(s):
76f3be3
Selector to Include Pretrained Datasets
Browse files
app.py
CHANGED
@@ -537,7 +537,7 @@ def optimize_tsne_params(df_combined, embedding_cols, df_f1, distance_metric):
|
|
537 |
|
538 |
def run_model(model_name):
|
539 |
version = st.selectbox("Select Model Version:", options=["vanilla", "finetuned_real"], key=f"version_{model_name}")
|
540 |
-
#
|
541 |
embedding_computation = st.selectbox("驴C贸mo se computa el embedding?", options=["weighted", "averaged"], key=f"embedding_method_{model_name}")
|
542 |
# Se asigna el prefijo correspondiente
|
543 |
prefijo_embedding = "weighted_" if embedding_computation == "weighted" else "averaged_"
|
@@ -545,7 +545,16 @@ def run_model(model_name):
|
|
545 |
embeddings = load_embeddings(model_name, version, prefijo_embedding)
|
546 |
if embeddings is None:
|
547 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
548 |
embedding_cols = [col for col in embeddings["real"].columns if col.startswith("dim_")]
|
|
|
549 |
df_combined = pd.concat(list(embeddings.values()), ignore_index=True)
|
550 |
|
551 |
try:
|
@@ -611,36 +620,30 @@ def run_model(model_name):
|
|
611 |
st.write(f"Trustworthiness: {result['trustworthiness']:.4f}")
|
612 |
st.write(f"Continuity: {result['continuity']:.4f}")
|
613 |
|
|
|
614 |
if reduction_method == "PCA" and result.get("pca_model") is not None:
|
615 |
pca_model = result["pca_model"]
|
616 |
components = pca_model.components_ # Shape: (n_components, n_features)
|
617 |
|
618 |
st.subheader("Pesos de las Componentes Principales (Loadings)")
|
619 |
-
#
|
620 |
for i, comp in enumerate(components):
|
621 |
-
# Fuente de datos con nombres de dimensiones y pesos
|
622 |
source = ColumnDataSource(data=dict(
|
623 |
-
dimensions=embedding_cols,
|
624 |
weight=comp
|
625 |
))
|
626 |
-
# Definir la figura usando el rango en x, pero ocultamos las etiquetas del eje
|
627 |
p = figure(x_range=embedding_cols, title=f"Componente Principal {i+1}",
|
628 |
-
|
629 |
-
|
630 |
p.vbar(x='dimensions', top='weight', width=0.8, source=source)
|
631 |
-
# Ocultar
|
632 |
p.xaxis.major_label_text_font_size = '0pt'
|
633 |
-
|
634 |
-
# Agregar HoverTool para que al pasar el mouse se muestren los datos
|
635 |
hover = HoverTool(tooltips=[("Dimensi贸n", "@dimensions"), ("Peso", "@weight")])
|
636 |
p.add_tools(hover)
|
637 |
-
|
638 |
-
# Opcionalmente, puedes seguir definiendo las etiquetas de los ejes (aunque en x no se mostrar谩n)
|
639 |
p.xaxis.axis_label = "Dimensiones originales"
|
640 |
p.yaxis.axis_label = "Peso"
|
641 |
-
|
642 |
st.bokeh_chart(p)
|
643 |
-
|
644 |
|
645 |
data_table, df_table, source_table = create_table(result["df_distances"])
|
646 |
real_subset_names = list(df_table.columns[1:])
|
|
|
537 |
|
538 |
def run_model(model_name):
|
539 |
version = st.selectbox("Select Model Version:", options=["vanilla", "finetuned_real"], key=f"version_{model_name}")
|
540 |
+
# Selector para el m茅todo de c贸mputo del embedding
|
541 |
embedding_computation = st.selectbox("驴C贸mo se computa el embedding?", options=["weighted", "averaged"], key=f"embedding_method_{model_name}")
|
542 |
# Se asigna el prefijo correspondiente
|
543 |
prefijo_embedding = "weighted_" if embedding_computation == "weighted" else "averaged_"
|
|
|
545 |
embeddings = load_embeddings(model_name, version, prefijo_embedding)
|
546 |
if embeddings is None:
|
547 |
return
|
548 |
+
|
549 |
+
# Nuevo selector para incluir o excluir el dataset pretrained
|
550 |
+
include_pretrained = st.checkbox("Incluir dataset pretrained", value=True)
|
551 |
+
if not include_pretrained:
|
552 |
+
# Removemos la entrada pretrained del diccionario, si existe.
|
553 |
+
embeddings.pop("pretrained", None)
|
554 |
+
|
555 |
+
# Extraer columnas de embedding de los datos "real"
|
556 |
embedding_cols = [col for col in embeddings["real"].columns if col.startswith("dim_")]
|
557 |
+
# Concatenamos los datasets disponibles (ahora, sin pretrained si se deseleccion贸)
|
558 |
df_combined = pd.concat(list(embeddings.values()), ignore_index=True)
|
559 |
|
560 |
try:
|
|
|
620 |
st.write(f"Trustworthiness: {result['trustworthiness']:.4f}")
|
621 |
st.write(f"Continuity: {result['continuity']:.4f}")
|
622 |
|
623 |
+
# Si se us贸 PCA, se muestran los plots de loadings con Bokeh (con hover para ver la etiqueta)
|
624 |
if reduction_method == "PCA" and result.get("pca_model") is not None:
|
625 |
pca_model = result["pca_model"]
|
626 |
components = pca_model.components_ # Shape: (n_components, n_features)
|
627 |
|
628 |
st.subheader("Pesos de las Componentes Principales (Loadings)")
|
629 |
+
# Se crea un plot de barras por cada componente
|
630 |
for i, comp in enumerate(components):
|
|
|
631 |
source = ColumnDataSource(data=dict(
|
632 |
+
dimensions=embedding_cols,
|
633 |
weight=comp
|
634 |
))
|
|
|
635 |
p = figure(x_range=embedding_cols, title=f"Componente Principal {i+1}",
|
636 |
+
plot_height=400, plot_width=600,
|
637 |
+
toolbar_location=None, tools="")
|
638 |
p.vbar(x='dimensions', top='weight', width=0.8, source=source)
|
639 |
+
# Ocultar etiquetas del eje x para un aspecto m谩s limpio
|
640 |
p.xaxis.major_label_text_font_size = '0pt'
|
641 |
+
# Agregar HoverTool para mostrar la dimensi贸n y su peso
|
|
|
642 |
hover = HoverTool(tooltips=[("Dimensi贸n", "@dimensions"), ("Peso", "@weight")])
|
643 |
p.add_tools(hover)
|
|
|
|
|
644 |
p.xaxis.axis_label = "Dimensiones originales"
|
645 |
p.yaxis.axis_label = "Peso"
|
|
|
646 |
st.bokeh_chart(p)
|
|
|
647 |
|
648 |
data_table, df_table, source_table = create_table(result["df_distances"])
|
649 |
real_subset_names = list(df_table.columns[1:])
|