Spaces:

PKU-Alignment
/

EvalAnything-LeaderBoard

Running

File size: 10,456 Bytes

import os
import json
import gradio as gr
import pandas as pd
import numpy as np

from pathlib import Path
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download


from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
    ABOUT_TEXT
)
from src.display.css_html_js import custom_css
# from src.display.utils import (
#     BENCHMARK_COLS,
#     COLS,
#     EVAL_COLS,
#     EVAL_TYPES,
#     NUMERIC_INTERVALS,
#     TYPES,
#     AutoEvalColumn,
#     ModelType,
#     fields,
#     WeightType,
#     Precision
# )
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN

try:
    print(EVAL_RESULTS_PATH)
    snapshot_download(
        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
    )
except Exception:
    pass
    # restart_space()

SUBSET_COUNTS = {
    "Alignment-Object": 250,
    "Alignment-Attribute": 229,
    "Alignment-Action": 115,
    "Alignment-Count": 55,
    "Alignment-Location": 75,
    "Safety-Toxicity-Crime": 29,
    "Safety-Toxicity-Shocking": 31,
    "Safety-Toxicity-Disgust": 42,
    "Safety-Nsfw-Evident": 197,
    "Safety-Nsfw-Evasive": 177,
    "Safety-Nsfw-Subtle": 98,
    "Quality-Distortion-Human_face": 169,
    "Quality-Distortion-Human_limb": 152,
    "Quality-Distortion-Object": 100,
    "Quality-Blurry-Defocused": 350,
    "Quality-Blurry-Motion": 350,
    "Bias-Age": 80,
    "Bias-Gender": 140,
    "Bias-Race": 140,
    "Bias-Nationality": 120,
    "Bias-Religion": 60,
}

PERSPECTIVE_COUNTS= {
    "Alignment": 724,
    "Safety": 574,
    "Quality": 1121,
    "Bias": 540
}



META_DATA = ['Model']



def restart_space():
    API.restart_space(repo_id=REPO_ID)


# color_map = {
#     "Score Model": "#7497db",
#     "Opensource VLM": "#E8ECF2",
#     "Closesource VLM": "#ffcd75",
#     "Others": "#75809c",

#     # #7497db #E8ECF2 #ffcd75 #75809c
# }
# def color_model_type_column(df, color_map):
#     """
#     Apply color to the 'Modality' column of the DataFrame based on a given color mapping.

#     Parameters:
#     df (pd.DataFrame): The DataFrame containing the 'Modality' column.
#     color_map (dict): A dictionary mapping model types to colors.

#     Returns:
#     pd.Styler: The styled DataFrame.
#     """
#     # Function to apply color based on the model type
#     def apply_color(val):
#         color = color_map.get(val, "default")  # Default color if not specified in color_map
#         return f'background-color: {color}'
    
#     # Format for different columns
#     format_dict = {col: "{:.1f}" for col in df.columns if col not in META_DATA}
#     format_dict['Overall Score'] = "{:.2f}"
#     format_dict[''] = "{:d}"

#     return df.style.applymap(apply_color, subset=['Modality']).format(format_dict, na_rep='')

def regex_table(dataframe, regex, filter_button, style=True):
    """
    Takes a model name as a regex, then returns only the rows that has that in it.
    """
    # Split regex statement by comma and trim whitespace around regexes
    regex_list = [x.strip() for x in regex.split(",")]
    # Join the list into a single regex pattern with '|' acting as OR
    combined_regex = '|'.join(regex_list)
    
    # if filter_button, remove all rows with "ai2" in the model name
    update_scores = False
    if isinstance(filter_button, list) or isinstance(filter_button, str):
        if "Image-Text-to-Text" not in filter_button:
            dataframe = dataframe[~dataframe["Modality"].str.contains("Image-Text-to-Text", case=False, na=False)]
        if "Video-Text-to-Text" not in filter_button:
            dataframe = dataframe[~dataframe["Modality"].str.contains("Video-Text-to-Text", case=False, na=False)]
    # Filter the dataframe such that 'model' contains any of the regex patterns
    data = dataframe[dataframe["Model"].str.contains(combined_regex, case=False, na=False)]

    data.reset_index(drop=True, inplace=True)

    # replace column '' with count/rank
    data.insert(0, '', range(1, 1 + len(data)))
    
    # if style:
    #     # apply color
    #     data = color_model_type_column(data, color_map)

    return data

def get_leaderboard_results(results_path):
    data_dir = Path(results_path)
    files = [d for d in os.listdir(data_dir)] # TODO check if "Path(data_dir) / d" is a dir

    df = pd.DataFrame()
    for file in files:
        if not file.endswith(".json"):
            continue
        with open(results_path / file) as rf:
            result = json.load(rf)
            result = pd.DataFrame(result)
            df = pd.concat([result, df])
    df.reset_index(drop=True, inplace=True)
    return df


def avg_all_perspective(orig_df: pd.DataFrame, columns_name: list, meta_data=META_DATA, perspective_counts=PERSPECTIVE_COUNTS):
    new_df = orig_df[meta_data + columns_name]
    new_perspective_counts = {col: perspective_counts[col] for col in columns_name}
    total_count = sum(perspective_counts.values())
    weights = {perspective: count / total_count for perspective, count in perspective_counts.items()}
    def calculate_weighted_avg(row):
        weighted_sum = sum(row[col] * weights[col] for col in columns_name)
        return weighted_sum
    new_df["Overall Score"] = new_df.apply(calculate_weighted_avg, axis=1)

    cols = meta_data + ["Overall Score"]  + columns_name
    new_df = new_df[cols].sort_values(by="Overall Score", ascending=False).reset_index(drop=True)
    return new_df
    
data = {
    "Model": [
        "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B",
        "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B",
        "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B",
        "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B",
        "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B",
    ],
    "Modality":[
        "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text",
        "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text",
        "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text",
        "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text",
        "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text",
    ],
    "Correctness of Information": [
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
    ],
    "Detail Orientation": [
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
    ],
    "Safety": [
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
    ],
    "AVG": [
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
        100.00, 100.00, 100.00, 100.00,
    ]
}
df = pd.DataFrame(data)
total_models = len(df)

with gr.Blocks(css=custom_css) as app:
    with gr.Row():
        with gr.Column(scale=6):
            gr.Markdown(INTRODUCTION_TEXT.format(str(total_models)))
        with gr.Column(scale=4): 
            gr.Markdown("![](https://huggingface.co/spaces/Align-Anything/Leaderboard/blob/main/src/overview.jpeg)")
            # gr.HTML(BGB_LOGO, elem_classes="logo")
    
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("🏆 Align-Anything Leaderboard"):
            with gr.Row():
                search_overall = gr.Textbox(
                    label="Model Search (delimit with , )", 
                    placeholder="🔍 Search model (separate multiple queries with ``) and press ENTER...",
                    show_label=False
                )
                model_type_overall = gr.CheckboxGroup(
                    choices=["Image-Text-to-Text", "Video-Text-to-Text"], 
                    value=["Image-Text-to-Text", "Video-Text-to-Text"], 
                    label="Modality", 
                    show_label=False, 
                    interactive=True,
                )
            with gr.Row():
                Align_Anything_table_overall_hidden = gr.Dataframe(
                    df,
                    headers=df.columns.tolist(),
                    elem_id="Align_Anything_leadboard_overall_hidden",
                    wrap=True,
                    visible=False,
                )
                Align_Anything_table_overall = gr.Dataframe(
                    regex_table(
                        df.copy(), 
                        "", 
                        ["Video-Text-to-Text", "Image-Text-to-Text"]
                     ),
                    headers=df.columns.tolist(),
                    elem_id="Align_Anything_leadboard_overall",
                    wrap=True,
                )
        with gr.TabItem("About"):
            with gr.Row():
                gr.Markdown(ABOUT_TEXT)
    
    with gr.Accordion("📚 Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                lines=7,
                label="Copy the following to cite these results.",
                elem_id="citation-button",
                show_copy_button=True,
            )
        
    search_overall.change(regex_table, inputs=[Align_Anything_table_overall_hidden, search_overall, model_type_overall], outputs=Align_Anything_table_overall)
    model_type_overall.change(regex_table, inputs=[Align_Anything_table_overall_hidden, search_overall, model_type_overall], outputs=Align_Anything_table_overall)
    
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=18000) # restarted every 3h
scheduler.start()
# app.queue(default_concurrency_limit=40).launch()
app.launch(allowed_paths=['./', "./src", "./evals"])