Spaces:

de-Rodrigo
/

Embeddings

Running

File size: 4,473 Bytes

import streamlit as st
import pandas as pd
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10

TOOLTIPS = """
<div>
    <div>
        <img src="@img{safe}" style="width:128px; height:auto; float: left; margin: 0px 15px 15px 0px;" alt="@img" border="2"></img>
    </div>
    <div>
        <span style="font-size: 17px; font-weight: bold;">@label</span>
    </div>
</div>
"""

def render_plot(selected_labels, df, plot_placeholder):
    if not selected_labels:
        st.write("No data to display. Please select at least one subset.")
        return

    filtered_data = df[df['label'].isin(selected_labels)]
    p = figure(width=400, height=400, tooltips=TOOLTIPS)
    
    num_labels = len(selected_labels)
    # Ajuste de la paleta
    if num_labels < 3:
        palette = Category10[3][:num_labels]
    elif num_labels in [3, 4, 5, 6, 7, 8, 9, 10]:
        palette = Category10[num_labels]
    else:
        palette = Category10[10][:num_labels]

    # Graficar cada label por separado
    for label, color in zip(selected_labels, palette):
        subset = filtered_data[filtered_data['label'] == label]
        source = ColumnDataSource(data=dict(
            x=subset['x'],
            y=subset['y'],
            label=subset['label'],
            img=subset['img']
        ))
        p.scatter('x', 'y', size=12, source=source, color=color, legend_label=label)
    
    p.legend.title = "Subsets"
    p.legend.location = "top_right"
    p.legend.click_policy = "hide"

    plot_placeholder.bokeh_chart(p)

def config_style():
    st.markdown(
        """
        <style>
        .main-title {
            font-size: 50px;
            color: #4CAF50;
            text-align: center;
        }
        .sub-title {
            font-size: 30px;
            color: #555;
        }
        .custom-text {
            font-size: 18px;
            line-height: 1.5;
        }
        </style>
        """,
        unsafe_allow_html=True
    )

    st.markdown('<h1 class="main-title">Merit Secret Embeddings 🎒📃🏆</h1>', unsafe_allow_html=True)
    st.markdown('<h2 class="sub-title">Donut</h2>', unsafe_allow_html=True)
    st.markdown(
        """
        <p class="custom-text">
        Explore how Donut perceives real data. 
        </p>
        """,
        unsafe_allow_html=True
    )

if __name__ == "__main__":
    config_style()

    # --- Primer gráfico: datos de Donut ---
    df_donut_pca = pd.read_csv("data/data_donut_pca.csv")
    df_donut_tsne = pd.read_csv("data/data_donut_tsne.csv")

    # Desplegable para seleccionar visualización
    donut_mode = st.selectbox(
        "Seleccione visualización para Donut:",
        options=["PCA", "t-SNE"]
    )
    
    # Escoger el DataFrame según la selección
    if donut_mode == "PCA":
        current_df_donut = df_donut_pca
    else:
        current_df_donut = df_donut_tsne

    unique_labels = current_df_donut['label'].unique().tolist()
    plot_placeholder = st.empty()

    # Mostrar gráfico inicial con todas las etiquetas
    render_plot(unique_labels, current_df_donut, plot_placeholder)

    # Desplegable para filtrar etiquetas
    selected_labels = st.multiselect(
        "Seleccione subsets para visualizar (Donut):",
        options=unique_labels,
        default=unique_labels
    )

    render_plot(selected_labels, current_df_donut, plot_placeholder)

    # --- Segundo gráfico: datos de Idefics2 ---
    st.markdown('<h2 class="sub-title">Idefics2</h2>', unsafe_allow_html=True)

    df_idefics2_pca = pd.read_csv("data/data_idefics2_pca.csv")
    df_idefics2_tsne = pd.read_csv("data/data_idefics2_tsne.csv")

    # Desplegable para seleccionar visualización para Idefics2
    idefics2_mode = st.selectbox(
        "Seleccione visualización para Idefics2:",
        options=["PCA", "t-SNE"],
        key="idefics2_mode"
    )
    
    if idefics2_mode == "PCA":
        current_df_idefics2 = df_idefics2_pca
    else:
        current_df_idefics2 = df_idefics2_tsne

    unique_labels2 = current_df_idefics2['label'].unique().tolist()
    plot_placeholder2 = st.empty()

    render_plot(unique_labels2, current_df_idefics2, plot_placeholder2)

    selected_labels2 = st.multiselect(
        "Seleccione subsets para visualizar (Idefics2):",
        options=unique_labels2,
        default=unique_labels2,
        key="idefics2"
    )

    render_plot(selected_labels2, current_df_idefics2, plot_placeholder2)