File size: 5,353 Bytes
9392036
 
913507e
7dae805
5be3cef
5498932
 
1280fd8
9392036
1280fd8
9392036
2ee3fae
9392036
 
 
 
1280fd8
 
913507e
574aa10
7dae805
 
 
 
 
574aa10
7dae805
 
 
 
 
 
 
 
 
9392036
574aa10
7dae805
 
 
 
 
 
 
 
2ee3fae
7dae805
 
 
 
 
 
 
574aa10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6071181
574aa10
c6959dd
5498932
 
 
 
c6959dd
 
 
 
 
5498932
 
 
 
c6959dd
5498932
 
c6959dd
5498932
 
 
 
 
 
 
 
574aa10
5498932
c6959dd
5498932
 
c6959dd
574aa10
c6959dd
574aa10
 
 
5498932
 
c6959dd
574aa10
5498932
 
 
 
c6959dd
 
 
 
 
 
5498932
 
 
c6959dd
5498932
 
c6959dd
5498932
 
 
 
 
 
 
574aa10
5498932
 
 
574aa10
c6959dd
574aa10
 
 
 
5498932
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import streamlit as st
import pandas as pd
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

TOOLTIPS = """
<div>
    <div>
        <img src="@img{safe}" style="width:128px; height:auto; float: left; margin: 0px 15px 15px 0px;" alt="@img" border="2"></img>
    </div>
    <div>
        <span style="font-size: 17px; font-weight: bold;">@label</span>
    </div>
</div>
"""

def render_plot(selected_labels, df, plot_placeholder):
    if not selected_labels:
        st.write("No data to display. Please select at least one subset.")
        return

    filtered_data = df[df['label'].isin(selected_labels)]
    p = figure(width=400, height=400, tooltips=TOOLTIPS)
    
    num_labels = len(selected_labels)
    # Ajuste de la paleta
    if num_labels < 3:
        palette = Category10[3][:num_labels]
    elif num_labels in [3, 4, 5, 6, 7, 8, 9, 10]:
        palette = Category10[num_labels]
    else:
        palette = Category10[10][:num_labels]

    # Graficar cada label por separado
    for label, color in zip(selected_labels, palette):
        subset = filtered_data[filtered_data['label'] == label]
        source = ColumnDataSource(data=dict(
            x=subset['x'],
            y=subset['y'],
            label=subset['label'],
            img=subset['img']
        ))
        p.scatter('x', 'y', size=12, source=source, color=color, legend_label=label)
    
    p.legend.title = "Subsets"
    p.legend.location = "top_right"
    p.legend.click_policy = "hide"

    plot_placeholder.bokeh_chart(p)

def config_style():
    st.markdown(
        """
        <style>
        .main-title {
            font-size: 50px;
            color: #4CAF50;
            text-align: center;
        }
        .sub-title {
            font-size: 30px;
            color: #555;
        }
        .custom-text {
            font-size: 18px;
            line-height: 1.5;
        }
        </style>
        """,
        unsafe_allow_html=True
    )

    st.markdown('<h1 class="main-title">Merit Secret Embeddings 馃帓馃搩馃弳</h1>', unsafe_allow_html=True)
    st.markdown('<h2 class="sub-title">Donut</h2>', unsafe_allow_html=True)
    st.markdown(
        """
        <p class="custom-text">
        Explore how Donut perceives real data. 
        </p>
        """,
        unsafe_allow_html=True
    )

if __name__ == "__main__":
    config_style()

    # --- Primer gr谩fico: datos de Donut ---
    # Se asume que "embeddings_donut.csv" contiene las columnas "dim_0", "dim_1", ..., "dim_N", adem谩s de "label" e "img"
    df_donut = pd.read_csv("data/donut_de_Rodrigo_merit_secret_all_embeddings.csv")
    
    # Selecci贸n de visualizaci贸n
    donut_mode = st.selectbox(
        "Seleccione visualizaci贸n para Donut:",
        options=["PCA", "t-SNE"]
    )
    
    # Extraer columnas de embedding (aquellas que empiezan con "dim_")
    embedding_cols = [col for col in df_donut.columns if col.startswith("dim_")]
    all_embeddings = df_donut[embedding_cols].values
    
    if donut_mode == "PCA":
        pca = PCA(n_components=2)
        reduced = pca.fit_transform(all_embeddings)
    else:
        tsne = TSNE(n_components=2, random_state=42, perplexity=30, learning_rate=200)
        reduced = tsne.fit_transform(all_embeddings)
    
    # A帽adir las coordenadas resultantes al DataFrame
    df_donut['x'] = reduced[:, 0]
    df_donut['y'] = reduced[:, 1]
    
    unique_labels = df_donut['label'].unique().tolist()
    plot_placeholder = st.empty()
    
    # Mostrar gr谩fico inicial con todas las etiquetas
    render_plot(unique_labels, df_donut, plot_placeholder)
    
    # Desplegable para filtrar etiquetas
    selected_labels = st.multiselect(
        "Seleccione subsets para visualizar (Donut):",
        options=unique_labels,
        default=unique_labels
    )
    render_plot(selected_labels, df_donut, plot_placeholder)
    
    # --- Segundo gr谩fico: datos de Idefics2 ---
    st.markdown('<h2 class="sub-title">Idefics2</h2>', unsafe_allow_html=True)
    
    # Se asume que "embeddings_idefics2.csv" tiene la misma estructura
    df_idefics2 = pd.read_csv("data/embeddings_idefics2.csv")
    
    idefics2_mode = st.selectbox(
        "Seleccione visualizaci贸n para Idefics2:",
        options=["PCA", "t-SNE"],
        key="idefics2_mode"
    )
    
    embedding_cols2 = [col for col in df_idefics2.columns if col.startswith("dim_")]
    all_embeddings2 = df_idefics2[embedding_cols2].values
    
    if idefics2_mode == "PCA":
        pca2 = PCA(n_components=2)
        reduced2 = pca2.fit_transform(all_embeddings2)
    else:
        tsne2 = TSNE(n_components=2, random_state=42, perplexity=30, learning_rate=200)
        reduced2 = tsne2.fit_transform(all_embeddings2)
    
    df_idefics2['x'] = reduced2[:, 0]
    df_idefics2['y'] = reduced2[:, 1]
    
    unique_labels2 = df_idefics2['label'].unique().tolist()
    plot_placeholder2 = st.empty()
    
    render_plot(unique_labels2, df_idefics2, plot_placeholder2)
    
    selected_labels2 = st.multiselect(
        "Seleccione subsets para visualizar (Idefics2):",
        options=unique_labels2,
        default=unique_labels2,
        key="idefics2"
    )
    render_plot(selected_labels2, df_idefics2, plot_placeholder2)