Embeddings / app.py
de-Rodrigo's picture
Update Requirements
5498932
raw
history blame
5.35 kB
import streamlit as st
import pandas as pd
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
TOOLTIPS = """
<div>
<div>
<img src="@img{safe}" style="width:128px; height:auto; float: left; margin: 0px 15px 15px 0px;" alt="@img" border="2"></img>
</div>
<div>
<span style="font-size: 17px; font-weight: bold;">@label</span>
</div>
</div>
"""
def render_plot(selected_labels, df, plot_placeholder):
if not selected_labels:
st.write("No data to display. Please select at least one subset.")
return
filtered_data = df[df['label'].isin(selected_labels)]
p = figure(width=400, height=400, tooltips=TOOLTIPS)
num_labels = len(selected_labels)
# Ajuste de la paleta
if num_labels < 3:
palette = Category10[3][:num_labels]
elif num_labels in [3, 4, 5, 6, 7, 8, 9, 10]:
palette = Category10[num_labels]
else:
palette = Category10[10][:num_labels]
# Graficar cada label por separado
for label, color in zip(selected_labels, palette):
subset = filtered_data[filtered_data['label'] == label]
source = ColumnDataSource(data=dict(
x=subset['x'],
y=subset['y'],
label=subset['label'],
img=subset['img']
))
p.scatter('x', 'y', size=12, source=source, color=color, legend_label=label)
p.legend.title = "Subsets"
p.legend.location = "top_right"
p.legend.click_policy = "hide"
plot_placeholder.bokeh_chart(p)
def config_style():
st.markdown(
"""
<style>
.main-title {
font-size: 50px;
color: #4CAF50;
text-align: center;
}
.sub-title {
font-size: 30px;
color: #555;
}
.custom-text {
font-size: 18px;
line-height: 1.5;
}
</style>
""",
unsafe_allow_html=True
)
st.markdown('<h1 class="main-title">Merit Secret Embeddings 馃帓馃搩馃弳</h1>', unsafe_allow_html=True)
st.markdown('<h2 class="sub-title">Donut</h2>', unsafe_allow_html=True)
st.markdown(
"""
<p class="custom-text">
Explore how Donut perceives real data.
</p>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
config_style()
# --- Primer gr谩fico: datos de Donut ---
# Se asume que "embeddings_donut.csv" contiene las columnas "dim_0", "dim_1", ..., "dim_N", adem谩s de "label" e "img"
df_donut = pd.read_csv("data/donut_de_Rodrigo_merit_secret_all_embeddings.csv")
# Selecci贸n de visualizaci贸n
donut_mode = st.selectbox(
"Seleccione visualizaci贸n para Donut:",
options=["PCA", "t-SNE"]
)
# Extraer columnas de embedding (aquellas que empiezan con "dim_")
embedding_cols = [col for col in df_donut.columns if col.startswith("dim_")]
all_embeddings = df_donut[embedding_cols].values
if donut_mode == "PCA":
pca = PCA(n_components=2)
reduced = pca.fit_transform(all_embeddings)
else:
tsne = TSNE(n_components=2, random_state=42, perplexity=30, learning_rate=200)
reduced = tsne.fit_transform(all_embeddings)
# A帽adir las coordenadas resultantes al DataFrame
df_donut['x'] = reduced[:, 0]
df_donut['y'] = reduced[:, 1]
unique_labels = df_donut['label'].unique().tolist()
plot_placeholder = st.empty()
# Mostrar gr谩fico inicial con todas las etiquetas
render_plot(unique_labels, df_donut, plot_placeholder)
# Desplegable para filtrar etiquetas
selected_labels = st.multiselect(
"Seleccione subsets para visualizar (Donut):",
options=unique_labels,
default=unique_labels
)
render_plot(selected_labels, df_donut, plot_placeholder)
# --- Segundo gr谩fico: datos de Idefics2 ---
st.markdown('<h2 class="sub-title">Idefics2</h2>', unsafe_allow_html=True)
# Se asume que "embeddings_idefics2.csv" tiene la misma estructura
df_idefics2 = pd.read_csv("data/embeddings_idefics2.csv")
idefics2_mode = st.selectbox(
"Seleccione visualizaci贸n para Idefics2:",
options=["PCA", "t-SNE"],
key="idefics2_mode"
)
embedding_cols2 = [col for col in df_idefics2.columns if col.startswith("dim_")]
all_embeddings2 = df_idefics2[embedding_cols2].values
if idefics2_mode == "PCA":
pca2 = PCA(n_components=2)
reduced2 = pca2.fit_transform(all_embeddings2)
else:
tsne2 = TSNE(n_components=2, random_state=42, perplexity=30, learning_rate=200)
reduced2 = tsne2.fit_transform(all_embeddings2)
df_idefics2['x'] = reduced2[:, 0]
df_idefics2['y'] = reduced2[:, 1]
unique_labels2 = df_idefics2['label'].unique().tolist()
plot_placeholder2 = st.empty()
render_plot(unique_labels2, df_idefics2, plot_placeholder2)
selected_labels2 = st.multiselect(
"Seleccione subsets para visualizar (Idefics2):",
options=unique_labels2,
default=unique_labels2,
key="idefics2"
)
render_plot(selected_labels2, df_idefics2, plot_placeholder2)