import streamlit as st import pandas as pd # Load data df_benchmark = pd.read_csv('leaderboard_data.csv') df_server = pd.read_csv('server_specs.csv') # Normalize column names (remove extra spaces, make case consistent) df_benchmark.columns = df_benchmark.columns.str.strip() df_server.columns = df_server.columns.str.strip() # Debug output (optional) # st.write("Benchmark columns:", df_benchmark.columns.tolist()) # st.write("Server columns:", df_server.columns.tolist()) # Ensure "Server Name" exists in both if "Server Name" not in df_benchmark.columns: st.error('"Server Name" column not found in leaderboard_data.csv') elif "Server Name" not in df_server.columns: st.error('"Server Name" column not found in server_specs.csv') else: # Remove duplicated columns except "Server Name" duplicate_cols = set(df_benchmark.columns) & set(df_server.columns) - {"Server Name"} df_server_filtered = df_server.drop(columns=duplicate_cols) # Merge datasets df = pd.merge(df_benchmark, df_server_filtered, on="Server Name", how="left") # Continue with the rest of the app... st.title("MSNP Leaderboard") st.markdown(""" [GitHub Repository](https://github.com/EvilFreelancer/llm-msnp-tests) This leaderboard shows the performance of quantized GGUF models (via Ollama) on various CPU and GPU combinations in a single-node setup. """) # Filters with st.sidebar: st.header("Filters") model_options = st.multiselect("Select model:", options=sorted(df["Model"].dropna().unique())) gpu_options = st.multiselect("Select GPU:", options=sorted(df["GPU(s)"].dropna().unique())) quantization_options = st.multiselect("Quantization:", options=sorted(df["Quantization"].dropna().unique())) context_length_options = st.multiselect("Context length:", options=sorted(df["Context Length"].dropna().unique())) server_options = st.multiselect("Select server:", options=sorted(df["Server Name"].dropna().unique())) # Apply filters filtered_df = df.copy() if model_options: filtered_df = filtered_df[filtered_df["Model"].isin(model_options)] if gpu_options: filtered_df = filtered_df[filtered_df["GPU(s)"].isin(gpu_options)] if quantization_options: filtered_df = filtered_df[filtered_df["Quantization"].isin(quantization_options)] if context_length_options: filtered_df = filtered_df[filtered_df["Context Length"].isin(context_length_options)] if server_options: filtered_df = filtered_df[filtered_df["Server Name"].isin(server_options)] # Display filtered table st.dataframe(filtered_df.sort_values(by="Tokens/sec", ascending=False).reset_index(drop=True))