import streamlit as st
import pandas as pd
import numpy as np
from PIL import Image
import base64
from io import BytesIO
# --- Page config ---
st.set_page_config(page_title="VeriFact Leaderboard", layout="wide")
# --- Load images ---
@st.cache_data
def load_image(path):
return Image.open(path)
# logo = load_image("factrbench.png")
# chart = load_image("test.png")
# Display logo
buf = BytesIO()
logo.save(buf, format="PNG")
logo_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
st.markdown(f"""
""", unsafe_allow_html=True)
# Header
st.markdown("""
VERIFACT: Enhancing Long-Form Factuality Evaluation...
# 📑 Paper | 💻 GitHub | 🤗 HuggingFace |
⚙️ Version: V1 | # Models: 11 | Updated: April 2025
""", unsafe_allow_html=True)
# --- Load data ---
@st.cache_data
def load_data(path="models.json"):
df = pd.read_json(path, lines=True)
df["Avg"] = df[[f"T{i}" for i in range(1,12)]].mean(axis=1).round(1)
# Compute rank per column
for col in [f"T{i}" for i in range(1,12)] + ["Avg"]:
df[f"{col}_rank"] = df[col].rank(ascending=False, method="min").astype(int)
return df
df = load_data()
# --- Tabs ---
tab1, tab2 = st.tabs(["Leaderboard", "Benchmark Details"])
with tab1:
st.markdown("**Leaderboard:** Higher scores shaded green; best models bolded.")
# Build HTML table
cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"]
max_ranks = {col: df[f"{col}_rank"].max() for col in cols if col!="Model"}
html = ""
# header
html += "" + "".join(f"{c} | " for c in cols) + "
"
# rows
for _, row in df.iterrows():
html += ""
for c in cols:
val = row[c] if c!="Model" else row[c]
if c=="Model":
html += f"{val} | "
else:
# color gradient
rank = row[f"{c}_rank"]
norm = 1 - (rank-1)/(max_ranks[c]-1 or 1)
# interpolate green-white
r = int(255 - norm*(255-182))
g = int(255 - norm*(255-243))
b = 255
style = f"background-color:rgb({r},{g},{b}); padding:4px;"
bold = "font-weight:bold;" if rank==1 else ""
html += f"{val} | "
html += "
"
html += "
"
st.markdown(html, unsafe_allow_html=True)
with tab2:
buf2 = BytesIO()
chart.save(buf2, format="PNG")
chart_b64 = base64.b64encode(buf2.getvalue()).decode("utf-8")
st.markdown(f"""
""", unsafe_allow_html=True)