|
import os |
|
import json |
|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
|
|
from pathlib import Path |
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
from huggingface_hub import snapshot_download |
|
|
|
|
|
from src.about import ( |
|
CITATION_BUTTON_LABEL, |
|
CITATION_BUTTON_TEXT, |
|
EVALUATION_QUEUE_TEXT, |
|
INTRODUCTION_TEXT, |
|
LLM_BENCHMARKS_TEXT, |
|
TITLE, |
|
ABOUT_TEXT |
|
) |
|
from src.display.css_html_js import custom_css |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN |
|
|
|
try: |
|
print(EVAL_RESULTS_PATH) |
|
snapshot_download( |
|
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN |
|
) |
|
except Exception: |
|
pass |
|
|
|
|
|
SUBSET_COUNTS = { |
|
"Alignment-Object": 250, |
|
"Alignment-Attribute": 229, |
|
"Alignment-Action": 115, |
|
"Alignment-Count": 55, |
|
"Alignment-Location": 75, |
|
"Safety-Toxicity-Crime": 29, |
|
"Safety-Toxicity-Shocking": 31, |
|
"Safety-Toxicity-Disgust": 42, |
|
"Safety-Nsfw-Evident": 197, |
|
"Safety-Nsfw-Evasive": 177, |
|
"Safety-Nsfw-Subtle": 98, |
|
"Quality-Distortion-Human_face": 169, |
|
"Quality-Distortion-Human_limb": 152, |
|
"Quality-Distortion-Object": 100, |
|
"Quality-Blurry-Defocused": 350, |
|
"Quality-Blurry-Motion": 350, |
|
"Bias-Age": 80, |
|
"Bias-Gender": 140, |
|
"Bias-Race": 140, |
|
"Bias-Nationality": 120, |
|
"Bias-Religion": 60, |
|
} |
|
|
|
PERSPECTIVE_COUNTS= { |
|
"Alignment": 724, |
|
"Safety": 574, |
|
"Quality": 1121, |
|
"Bias": 540 |
|
} |
|
|
|
|
|
|
|
META_DATA = ['Model'] |
|
|
|
|
|
|
|
def restart_space(): |
|
API.restart_space(repo_id=REPO_ID) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def regex_table(dataframe, regex, filter_button, style=True): |
|
""" |
|
Takes a model name as a regex, then returns only the rows that has that in it. |
|
""" |
|
|
|
regex_list = [x.strip() for x in regex.split(",")] |
|
|
|
combined_regex = '|'.join(regex_list) |
|
|
|
|
|
update_scores = False |
|
if isinstance(filter_button, list) or isinstance(filter_button, str): |
|
if "Image-Text-to-Text" not in filter_button: |
|
dataframe = dataframe[~dataframe["Modality"].str.contains("Image-Text-to-Text", case=False, na=False)] |
|
if "Video-Text-to-Text" not in filter_button: |
|
dataframe = dataframe[~dataframe["Modality"].str.contains("Video-Text-to-Text", case=False, na=False)] |
|
|
|
data = dataframe[dataframe["Model"].str.contains(combined_regex, case=False, na=False)] |
|
|
|
data.reset_index(drop=True, inplace=True) |
|
|
|
|
|
data.insert(0, '', range(1, 1 + len(data))) |
|
|
|
|
|
|
|
|
|
|
|
return data |
|
|
|
def get_leaderboard_results(results_path): |
|
data_dir = Path(results_path) |
|
files = [d for d in os.listdir(data_dir)] |
|
|
|
df = pd.DataFrame() |
|
for file in files: |
|
if not file.endswith(".json"): |
|
continue |
|
with open(results_path / file) as rf: |
|
result = json.load(rf) |
|
result = pd.DataFrame(result) |
|
df = pd.concat([result, df]) |
|
df.reset_index(drop=True, inplace=True) |
|
return df |
|
|
|
|
|
def avg_all_perspective(orig_df: pd.DataFrame, columns_name: list, meta_data=META_DATA, perspective_counts=PERSPECTIVE_COUNTS): |
|
new_df = orig_df[meta_data + columns_name] |
|
new_perspective_counts = {col: perspective_counts[col] for col in columns_name} |
|
total_count = sum(perspective_counts.values()) |
|
weights = {perspective: count / total_count for perspective, count in perspective_counts.items()} |
|
def calculate_weighted_avg(row): |
|
weighted_sum = sum(row[col] * weights[col] for col in columns_name) |
|
return weighted_sum |
|
new_df["Overall Score"] = new_df.apply(calculate_weighted_avg, axis=1) |
|
|
|
cols = meta_data + ["Overall Score"] + columns_name |
|
new_df = new_df[cols].sort_values(by="Overall Score", ascending=False).reset_index(drop=True) |
|
return new_df |
|
|
|
data = { |
|
"Model": [ |
|
"Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", |
|
"Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", |
|
"Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", |
|
"Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", |
|
"Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", |
|
], |
|
"Modality":[ |
|
"Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", |
|
"Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", |
|
"Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", |
|
"Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", |
|
"Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", |
|
], |
|
"Correctness of Information": [ |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
], |
|
"Detail Orientation": [ |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
], |
|
"Safety": [ |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
], |
|
"AVG": [ |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
100.00, 100.00, 100.00, 100.00, |
|
] |
|
} |
|
df = pd.DataFrame(data) |
|
total_models = len(df) |
|
|
|
with gr.Blocks(css=custom_css) as app: |
|
with gr.Row(): |
|
with gr.Column(scale=6): |
|
gr.Markdown(INTRODUCTION_TEXT.format(str(total_models))) |
|
with gr.Column(scale=4): |
|
gr.Markdown("") |
|
|
|
|
|
with gr.Tabs(elem_classes="tab-buttons") as tabs: |
|
with gr.TabItem("π Align-Anything Leaderboard"): |
|
with gr.Row(): |
|
search_overall = gr.Textbox( |
|
label="Model Search (delimit with , )", |
|
placeholder="π Search model (separate multiple queries with ``) and press ENTER...", |
|
show_label=False |
|
) |
|
model_type_overall = gr.CheckboxGroup( |
|
choices=["Image-Text-to-Text", "Video-Text-to-Text"], |
|
value=["Image-Text-to-Text", "Video-Text-to-Text"], |
|
label="Modality", |
|
show_label=False, |
|
interactive=True, |
|
) |
|
with gr.Row(): |
|
Align_Anything_table_overall_hidden = gr.Dataframe( |
|
df, |
|
headers=df.columns.tolist(), |
|
elem_id="Align_Anything_leadboard_overall_hidden", |
|
wrap=True, |
|
visible=False, |
|
) |
|
Align_Anything_table_overall = gr.Dataframe( |
|
regex_table( |
|
df.copy(), |
|
"", |
|
["Video-Text-to-Text", "Image-Text-to-Text"] |
|
), |
|
headers=df.columns.tolist(), |
|
elem_id="Align_Anything_leadboard_overall", |
|
wrap=True, |
|
) |
|
with gr.TabItem("About"): |
|
with gr.Row(): |
|
gr.Markdown(ABOUT_TEXT) |
|
|
|
with gr.Accordion("π Citation", open=False): |
|
citation_button = gr.Textbox( |
|
value=CITATION_BUTTON_TEXT, |
|
lines=7, |
|
label="Copy the following to cite these results.", |
|
elem_id="citation-button", |
|
show_copy_button=True, |
|
) |
|
|
|
search_overall.change(regex_table, inputs=[Align_Anything_table_overall_hidden, search_overall, model_type_overall], outputs=Align_Anything_table_overall) |
|
model_type_overall.change(regex_table, inputs=[Align_Anything_table_overall_hidden, search_overall, model_type_overall], outputs=Align_Anything_table_overall) |
|
|
|
scheduler = BackgroundScheduler() |
|
scheduler.add_job(restart_space, "interval", seconds=18000) |
|
scheduler.start() |
|
|
|
app.launch(allowed_paths=['./', "./src", "./evals"]) |