Spaces:
Running
Running
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from PIL import Image | |
| import base64 | |
| from io import BytesIO | |
| # --- Page config --- | |
| st.set_page_config(page_title="VeriFact Leaderboard", layout="wide") | |
| # --- Load images --- | |
| def load_image(path): | |
| return Image.open(path) | |
| # logo = load_image("factrbench.png") | |
| # chart = load_image("test.png") | |
| # Display logo | |
| buf = BytesIO() | |
| logo.save(buf, format="PNG") | |
| logo_b64 = base64.b64encode(buf.getvalue()).decode("utf-8") | |
| st.markdown(f""" | |
| <div style="text-align:center; margin-bottom:20px;"> | |
| <img src="data:image/png;base64,{logo_b64}" style="width:50%; max-width:700px;"/> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Header | |
| st.markdown(""" | |
| <div style="text-align:center;"> | |
| <p style="font-size:22px;"> | |
| VERIFACT: Enhancing Long-Form Factuality Evaluation... | |
| </p> | |
| <p style="font-size:20px;"> | |
| # π <a href="">Paper</a> | π» <a href="">GitHub</a> | π€ <a href="">HuggingFace</a> | | |
| βοΈ <strong>Version</strong>: <strong>V1</strong> | <strong># Models</strong>: 11 | Updated: <strong>April 2025</strong> | |
| </p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # --- Load data --- | |
| def load_data(path="models.json"): | |
| df = pd.read_json(path, lines=True) | |
| df["Avg"] = df[[f"T{i}" for i in range(1,12)]].mean(axis=1).round(1) | |
| # Compute rank per column | |
| for col in [f"T{i}" for i in range(1,12)] + ["Avg"]: | |
| df[f"{col}_rank"] = df[col].rank(ascending=False, method="min").astype(int) | |
| return df | |
| df = load_data() | |
| # --- Tabs --- | |
| tab1, tab2 = st.tabs(["Leaderboard", "Benchmark Details"]) | |
| with tab1: | |
| st.markdown("**Leaderboard:** Higher scores shaded green; best models bolded.") | |
| # Build HTML table | |
| cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"] | |
| max_ranks = {col: df[f"{col}_rank"].max() for col in cols if col!="Model"} | |
| html = "<table style='border-collapse:collapse; width:100%;'>" | |
| # header | |
| html += "<tr>" + "".join(f"<th style='padding:4px;'>{c}</th>" for c in cols) + "</tr>" | |
| # rows | |
| for _, row in df.iterrows(): | |
| html += "<tr>" | |
| for c in cols: | |
| val = row[c] if c!="Model" else row[c] | |
| if c=="Model": | |
| html += f"<td style='padding:4px;text-align:left;'>{val}</td>" | |
| else: | |
| # color gradient | |
| rank = row[f"{c}_rank"] | |
| norm = 1 - (rank-1)/(max_ranks[c]-1 or 1) | |
| # interpolate green-white | |
| r = int(255 - norm*(255-182)) | |
| g = int(255 - norm*(255-243)) | |
| b = 255 | |
| style = f"background-color:rgb({r},{g},{b}); padding:4px;" | |
| bold = "font-weight:bold;" if rank==1 else "" | |
| html += f"<td style='{style}{bold}'>{val}</td>" | |
| html += "</tr>" | |
| html += "</table>" | |
| st.markdown(html, unsafe_allow_html=True) | |
| with tab2: | |
| buf2 = BytesIO() | |
| chart.save(buf2, format="PNG") | |
| chart_b64 = base64.b64encode(buf2.getvalue()).decode("utf-8") | |
| st.markdown(f""" | |
| <div style="text-align:center;"> | |
| <img src="data:image/png;base64,{chart_b64}" style="width:65%;"/> | |
| </div> | |
| """, unsafe_allow_html=True) | |