File size: 3,372 Bytes
45e320b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
import pandas as pd

# Set page configuration
st.set_page_config(page_title="Cyber Benchmark Hub: SECQA Leaderboard", layout="wide")

# Main Title (ensures it's displayed on the main page)
st.title("Cyber Benchmark Hub: SECQA Leaderboard")
st.markdown("## Powered by **Priam Cyber AI**")
st.markdown("#### [View the SECQA Dataset](https://huggingface.co/datasets/zefang-liu/secqa)")

# Function to load and clean CSV data
@st.cache_data
def load_data(file_path):
    df = pd.read_csv(file_path)

    # Remove any unnamed columns (caused by trailing commas)
    df = df.loc[:, ~df.columns.str.contains('Unnamed', na=False)]

    # Standardize column names
    df.columns = df.columns.str.strip()
    df.rename(columns={
        "model name": "Model",
        "source": "Type",
        "v1 metric": "V1 Accuracy",
        "v2 metric": "V2 Accuracy"
    }, inplace=True)

    # Convert percentage strings to floats (e.g., "100%" → 1.0)
    for col in ["V1 Accuracy", "V2 Accuracy"]:
        df[col] = df[col].astype(str).str.replace("%", "").str.strip()
        df[col] = pd.to_numeric(df[col], errors='coerce') / 100

    return df

# Load dataset
file_path = "Benchmark.csv"  # Ensure this file is uploaded in your Hugging Face Space
df = load_data(file_path)

# Sidebar: Logo, then Filters and Options
with st.sidebar:
    st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg", use_container_width=True)
    st.divider()
    st.header("Filters & Options")
    dataset_version = st.radio("Select Dataset Version", ["v1", "v2"])
    source_filter = st.multiselect(
        "Select Model Type",
        options=df["Type"].unique().tolist(),
        default=df["Type"].unique().tolist()
    )
    st.markdown("---")
    st.header("Test Parameters")
    test_params = pd.DataFrame({
        "Value": [0, 1, 0, 1, 0]
    }, index=["Temperature", "n", "Presence Penalty", "Top_p", "Frequency Penalty"])
    st.table(test_params)

# Apply filtering based on the sidebar selections
df_filtered = df[df["Type"].isin(source_filter)] if source_filter else df

# Choose the correct metric version and compute Accuracy
df_filtered["Accuracy"] = df_filtered["V1 Accuracy"] if dataset_version == "v1" else df_filtered["V2 Accuracy"]
df_filtered = df_filtered[["Model", "Type", "Accuracy"]].dropna()  # Drop rows with errors

# Sort by Accuracy descending and add a Rank column starting from 1
df_filtered = df_filtered.sort_values("Accuracy", ascending=False).reset_index(drop=True)
df_filtered.insert(0, "Rank", range(1, len(df_filtered) + 1))

# Use columns to display leaderboard and model details side-by-side
col1, col2 = st.columns([2, 1])

with col1:
    st.subheader(f"Leaderboard for SECQA Version {dataset_version}")
    st.dataframe(df_filtered.reset_index(drop=True))

with col2:
    st.subheader("Model Details")
    selected_model = st.selectbox("Select a Model", df_filtered["Model"].tolist())
    model_details = df_filtered[df_filtered["Model"] == selected_model].iloc[0]
    st.write(f"**Model:** {model_details['Model']}")
    st.write(f"**Type:** {model_details['Type']}")
    st.write(f"**Accuracy:** {model_details['Accuracy']:.2%}")
    st.write(f"**Rank:** {model_details['Rank']}")

# Footer
st.markdown("---")
st.info("More dataset benchmarks will be added to this hub in the future.")