Embeddings / app.py
de-Rodrigo's picture
Revert "Revert "Choose t-sne or PCA""
c6959dd
raw
history blame
4.47 kB
import streamlit as st
import pandas as pd
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10
TOOLTIPS = """
<div>
<div>
<img src="@img{safe}" style="width:128px; height:auto; float: left; margin: 0px 15px 15px 0px;" alt="@img" border="2"></img>
</div>
<div>
<span style="font-size: 17px; font-weight: bold;">@label</span>
</div>
</div>
"""
def render_plot(selected_labels, df, plot_placeholder):
if not selected_labels:
st.write("No data to display. Please select at least one subset.")
return
filtered_data = df[df['label'].isin(selected_labels)]
p = figure(width=400, height=400, tooltips=TOOLTIPS)
num_labels = len(selected_labels)
# Ajuste de la paleta
if num_labels < 3:
palette = Category10[3][:num_labels]
elif num_labels in [3, 4, 5, 6, 7, 8, 9, 10]:
palette = Category10[num_labels]
else:
palette = Category10[10][:num_labels]
# Graficar cada label por separado
for label, color in zip(selected_labels, palette):
subset = filtered_data[filtered_data['label'] == label]
source = ColumnDataSource(data=dict(
x=subset['x'],
y=subset['y'],
label=subset['label'],
img=subset['img']
))
p.scatter('x', 'y', size=12, source=source, color=color, legend_label=label)
p.legend.title = "Subsets"
p.legend.location = "top_right"
p.legend.click_policy = "hide"
plot_placeholder.bokeh_chart(p)
def config_style():
st.markdown(
"""
<style>
.main-title {
font-size: 50px;
color: #4CAF50;
text-align: center;
}
.sub-title {
font-size: 30px;
color: #555;
}
.custom-text {
font-size: 18px;
line-height: 1.5;
}
</style>
""",
unsafe_allow_html=True
)
st.markdown('<h1 class="main-title">Merit Secret Embeddings 馃帓馃搩馃弳</h1>', unsafe_allow_html=True)
st.markdown('<h2 class="sub-title">Donut</h2>', unsafe_allow_html=True)
st.markdown(
"""
<p class="custom-text">
Explore how Donut perceives real data.
</p>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
config_style()
# --- Primer gr谩fico: datos de Donut ---
df_donut_pca = pd.read_csv("data/data_donut_pca.csv")
df_donut_tsne = pd.read_csv("data/data_donut_tsne.csv")
# Desplegable para seleccionar visualizaci贸n
donut_mode = st.selectbox(
"Seleccione visualizaci贸n para Donut:",
options=["PCA", "t-SNE"]
)
# Escoger el DataFrame seg煤n la selecci贸n
if donut_mode == "PCA":
current_df_donut = df_donut_pca
else:
current_df_donut = df_donut_tsne
unique_labels = current_df_donut['label'].unique().tolist()
plot_placeholder = st.empty()
# Mostrar gr谩fico inicial con todas las etiquetas
render_plot(unique_labels, current_df_donut, plot_placeholder)
# Desplegable para filtrar etiquetas
selected_labels = st.multiselect(
"Seleccione subsets para visualizar (Donut):",
options=unique_labels,
default=unique_labels
)
render_plot(selected_labels, current_df_donut, plot_placeholder)
# --- Segundo gr谩fico: datos de Idefics2 ---
st.markdown('<h2 class="sub-title">Idefics2</h2>', unsafe_allow_html=True)
df_idefics2_pca = pd.read_csv("data/data_idefics2_pca.csv")
df_idefics2_tsne = pd.read_csv("data/data_idefics2_tsne.csv")
# Desplegable para seleccionar visualizaci贸n para Idefics2
idefics2_mode = st.selectbox(
"Seleccione visualizaci贸n para Idefics2:",
options=["PCA", "t-SNE"],
key="idefics2_mode"
)
if idefics2_mode == "PCA":
current_df_idefics2 = df_idefics2_pca
else:
current_df_idefics2 = df_idefics2_tsne
unique_labels2 = current_df_idefics2['label'].unique().tolist()
plot_placeholder2 = st.empty()
render_plot(unique_labels2, current_df_idefics2, plot_placeholder2)
selected_labels2 = st.multiselect(
"Seleccione subsets para visualizar (Idefics2):",
options=unique_labels2,
default=unique_labels2,
key="idefics2"
)
render_plot(selected_labels2, current_df_idefics2, plot_placeholder2)