# -*- coding: utf-8 -*-
# @Date : 2025/2/5 16:26
# @Author : q275343119
# @File : data_page.py
# @Description:
import io
from st_aggrid import AgGrid, JsCode, ColumnsAutoSizeMode
import streamlit as st
import streamlit.components.v1 as components
from utils.st_copy_to_clipboard import st_copy_to_clipboard
from streamlit_theme import st_theme
from app.backend.app_init_func import LEADERBOARD_MAP
from app.backend.constant import LEADERBOARD_ICON_MAP, BASE_URL
from app.backend.json_util import compress_msgpack, decompress_msgpack
COLUMNS = ['model_name', 'vendor',
'embd_dtype', 'embd_dim', 'num_params', 'max_tokens', 'similarity',
'query_instruct', 'corpus_instruct', 'reference'
]
LARGER_HEADER_STYLE = {'fontSize': '18px'}
HEADER_STYLE = {'fontSize': '14px'}
CELL_STYLE = {'fontSize': '14px'}
def is_section(group_name):
for k, v in LEADERBOARD_MAP.items():
leaderboard_name = v[0][0]
if group_name == leaderboard_name:
return True
return False
def get_closed_dataset():
data_engine = st.session_state["data_engine"]
closed_list = []
results = data_engine.results
for result in results:
if result.get("is_closed"):
closed_list.append(result.get("dataset_name"))
return closed_list
def convert_df_to_csv(df):
output = io.StringIO()
df.to_csv(output, index=False)
return output.getvalue()
def get_column_state():
"""
get column state from url
"""
query_params = st.query_params.get("grid_state", None)
sider_bar_hidden = st.query_params.get("sider_bar_hidden", "False")
if query_params:
grid_state = decompress_msgpack(query_params)
st.session_state.grid_state = grid_state
if sider_bar_hidden.upper() == 'FALSE':
st.session_state.sider_bar_hidden = False
return None
def _get_dataset_columns(group_name, column_list, avg_column):
"""Generate dataset columns with proper grouping for individual dataset pages."""
dataset_columns = [col for col in column_list if col not in (avg_column, "Closed average", "Open average")]
# For individual dataset pages (not sections), group datasets by open/closed
if not is_section(group_name) and dataset_columns:
# Separate open and closed datasets
open_datasets = [d for d in dataset_columns if not d.startswith('_')]
closed_datasets = [d for d in dataset_columns if d.startswith('_')]
grouped_columns = []
# Add Open Datasets group
if open_datasets:
grouped_columns.append({
'headerName': 'Open Datasets',
'headerStyle': LARGER_HEADER_STYLE,
'headerClass': 'group-header',
'marryChildren': True,
'openByDefault': True,
'children': [
{
'headerName': column,
'field': column,
'headerStyle': HEADER_STYLE,
'cellStyle': CELL_STYLE,
"headerTooltip": column,
'headerComponent': JsCode(f"""
class DatasetHeaderRenderer {{
init(params) {{
this.eGui = document.createElement('div');
const columnName = params.displayName;
const fieldName = params.column.colId;
const link = document.createElement('a');
link.href = 'https://huggingface.co/datasets/embedding-benchmark/' + fieldName;
link.target = '_blank';
link.style.color = 'white';
link.style.textDecoration = 'underline';
link.style.cursor = 'pointer';
link.textContent = columnName;
link.addEventListener('click', function(e) {{
e.stopPropagation();
}});
this.eGui.appendChild(link);
}}
getGui() {{
return this.eGui;
}}
}}
""")
} for column in open_datasets
]
})
# Add Closed Datasets group
if closed_datasets:
grouped_columns.append({
'headerName': 'Closed Datasets',
'headerStyle': LARGER_HEADER_STYLE,
'headerClass': 'group-header',
'marryChildren': True,
'openByDefault': True,
'children': [
{
'headerName': column,
'field': column,
'headerStyle': HEADER_STYLE,
'cellStyle': CELL_STYLE,
"headerTooltip": column,
'headerComponent': JsCode(f"""
class DatasetHeaderRenderer {{
init(params) {{
this.eGui = document.createElement('div');
const columnName = params.displayName;
const fieldName = params.column.colId;
const link = document.createElement('a');
link.href = 'https://huggingface.co/datasets/embedding-benchmark/' + fieldName;
link.target = '_blank';
link.style.color = 'white';
link.style.textDecoration = 'underline';
link.style.cursor = 'pointer';
link.textContent = columnName;
link.addEventListener('click', function(e) {{
e.stopPropagation();
}});
this.eGui.appendChild(link);
}}
getGui() {{
return this.eGui;
}}
}}
""")
} for column in closed_datasets
]
})
return grouped_columns
else:
# For section pages, return columns without grouping (original behavior)
return [{'headerName': column if "Average" not in column else column.replace("Average", "").strip().capitalize(),
'field': column,
'headerStyle': HEADER_STYLE,
'cellStyle': CELL_STYLE,
"headerTooltip": column if "Average" not in column else column.replace("Average",
"").strip().capitalize(),
'headerComponent': JsCode(f"""
class DatasetHeaderRenderer {{
init(params) {{
this.eGui = document.createElement('div');
const columnName = params.displayName;
const fieldName = params.column.colId;
if (fieldName.includes('Average')) {{
this.eGui.textContent = columnName;
}} else {{
const link = document.createElement('a');
link.href = 'https://huggingface.co/datasets/embedding-benchmark/' + fieldName;
link.target = '_blank';
link.style.color = 'white';
link.style.textDecoration = 'underline';
link.style.cursor = 'pointer';
link.textContent = columnName;
link.addEventListener('click', function(e) {{
e.stopPropagation();
}});
this.eGui.appendChild(link);
}}
}}
getGui() {{
return this.eGui;
}}
}}
""")
} for column in dataset_columns]
def render_page(group_name):
grid_state = st.session_state.get("grid_state", {})
st.session_state.sider_bar_hidden = True
get_column_state()
if st.session_state.sider_bar_hidden:
st.markdown("""
""", unsafe_allow_html=True)
# Add theme color and grid styles
st.title("Retrieval Embedding Benchmark (RTEB)")
st.markdown("""
""", unsafe_allow_html=True)
# logo
# st.markdown('', unsafe_allow_html=True)
title = f'