Spaces:
Sleeping
Sleeping
File size: 5,353 Bytes
9392036 913507e 7dae805 5be3cef 5498932 1280fd8 9392036 1280fd8 9392036 2ee3fae 9392036 1280fd8 913507e 574aa10 7dae805 574aa10 7dae805 9392036 574aa10 7dae805 2ee3fae 7dae805 574aa10 6071181 574aa10 c6959dd 5498932 c6959dd 5498932 c6959dd 5498932 c6959dd 5498932 574aa10 5498932 c6959dd 5498932 c6959dd 574aa10 c6959dd 574aa10 5498932 c6959dd 574aa10 5498932 c6959dd 5498932 c6959dd 5498932 c6959dd 5498932 574aa10 5498932 574aa10 c6959dd 574aa10 5498932 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import streamlit as st
import pandas as pd
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
TOOLTIPS = """
<div>
<div>
<img src="@img{safe}" style="width:128px; height:auto; float: left; margin: 0px 15px 15px 0px;" alt="@img" border="2"></img>
</div>
<div>
<span style="font-size: 17px; font-weight: bold;">@label</span>
</div>
</div>
"""
def render_plot(selected_labels, df, plot_placeholder):
if not selected_labels:
st.write("No data to display. Please select at least one subset.")
return
filtered_data = df[df['label'].isin(selected_labels)]
p = figure(width=400, height=400, tooltips=TOOLTIPS)
num_labels = len(selected_labels)
# Ajuste de la paleta
if num_labels < 3:
palette = Category10[3][:num_labels]
elif num_labels in [3, 4, 5, 6, 7, 8, 9, 10]:
palette = Category10[num_labels]
else:
palette = Category10[10][:num_labels]
# Graficar cada label por separado
for label, color in zip(selected_labels, palette):
subset = filtered_data[filtered_data['label'] == label]
source = ColumnDataSource(data=dict(
x=subset['x'],
y=subset['y'],
label=subset['label'],
img=subset['img']
))
p.scatter('x', 'y', size=12, source=source, color=color, legend_label=label)
p.legend.title = "Subsets"
p.legend.location = "top_right"
p.legend.click_policy = "hide"
plot_placeholder.bokeh_chart(p)
def config_style():
st.markdown(
"""
<style>
.main-title {
font-size: 50px;
color: #4CAF50;
text-align: center;
}
.sub-title {
font-size: 30px;
color: #555;
}
.custom-text {
font-size: 18px;
line-height: 1.5;
}
</style>
""",
unsafe_allow_html=True
)
st.markdown('<h1 class="main-title">Merit Secret Embeddings 馃帓馃搩馃弳</h1>', unsafe_allow_html=True)
st.markdown('<h2 class="sub-title">Donut</h2>', unsafe_allow_html=True)
st.markdown(
"""
<p class="custom-text">
Explore how Donut perceives real data.
</p>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
config_style()
# --- Primer gr谩fico: datos de Donut ---
# Se asume que "embeddings_donut.csv" contiene las columnas "dim_0", "dim_1", ..., "dim_N", adem谩s de "label" e "img"
df_donut = pd.read_csv("data/donut_de_Rodrigo_merit_secret_all_embeddings.csv")
# Selecci贸n de visualizaci贸n
donut_mode = st.selectbox(
"Seleccione visualizaci贸n para Donut:",
options=["PCA", "t-SNE"]
)
# Extraer columnas de embedding (aquellas que empiezan con "dim_")
embedding_cols = [col for col in df_donut.columns if col.startswith("dim_")]
all_embeddings = df_donut[embedding_cols].values
if donut_mode == "PCA":
pca = PCA(n_components=2)
reduced = pca.fit_transform(all_embeddings)
else:
tsne = TSNE(n_components=2, random_state=42, perplexity=30, learning_rate=200)
reduced = tsne.fit_transform(all_embeddings)
# A帽adir las coordenadas resultantes al DataFrame
df_donut['x'] = reduced[:, 0]
df_donut['y'] = reduced[:, 1]
unique_labels = df_donut['label'].unique().tolist()
plot_placeholder = st.empty()
# Mostrar gr谩fico inicial con todas las etiquetas
render_plot(unique_labels, df_donut, plot_placeholder)
# Desplegable para filtrar etiquetas
selected_labels = st.multiselect(
"Seleccione subsets para visualizar (Donut):",
options=unique_labels,
default=unique_labels
)
render_plot(selected_labels, df_donut, plot_placeholder)
# --- Segundo gr谩fico: datos de Idefics2 ---
st.markdown('<h2 class="sub-title">Idefics2</h2>', unsafe_allow_html=True)
# Se asume que "embeddings_idefics2.csv" tiene la misma estructura
df_idefics2 = pd.read_csv("data/embeddings_idefics2.csv")
idefics2_mode = st.selectbox(
"Seleccione visualizaci贸n para Idefics2:",
options=["PCA", "t-SNE"],
key="idefics2_mode"
)
embedding_cols2 = [col for col in df_idefics2.columns if col.startswith("dim_")]
all_embeddings2 = df_idefics2[embedding_cols2].values
if idefics2_mode == "PCA":
pca2 = PCA(n_components=2)
reduced2 = pca2.fit_transform(all_embeddings2)
else:
tsne2 = TSNE(n_components=2, random_state=42, perplexity=30, learning_rate=200)
reduced2 = tsne2.fit_transform(all_embeddings2)
df_idefics2['x'] = reduced2[:, 0]
df_idefics2['y'] = reduced2[:, 1]
unique_labels2 = df_idefics2['label'].unique().tolist()
plot_placeholder2 = st.empty()
render_plot(unique_labels2, df_idefics2, plot_placeholder2)
selected_labels2 = st.multiselect(
"Seleccione subsets para visualizar (Idefics2):",
options=unique_labels2,
default=unique_labels2,
key="idefics2"
)
render_plot(selected_labels2, df_idefics2, plot_placeholder2)
|