Spaces:

Skywork
/

SkyReels_A2_Bench

Running

App Files Files Community

ColinYK commited on Apr 7

Commit

50ebe61

verified ·

1 Parent(s): 42f44bf

Upload app.py

Browse files

Files changed (1) hide show

app.py +189 -0

app.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import gradio as gr
+import sys
+import pandas as pd
+import os
+import json
+import shutil
+import zipfile
+import uuid
+import requests
+TEMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
+metric_scale = {
+    'human_face_similarity': 1.5,
+    'clip_score': 0.02,
+    'env_clip': 1.5,
+    'other_subject_clip': 1.5,
+    'image_quality': 1,
+    'dynamic_degree': 1,
+    'aesthetic_quality': 1,
+    'motion_smoothness': 1,
+}
+class ModelResult:
+    def __init__(self, data):
+        self.name = data['model_name']
+        self.project_link = data.get('project_link', None)
+        self.result = data['result']
+    def to_dict(self):
+        if self.project_link is not None:
+            res = {
+                'model_name': f'<a href="{self.project_link}" target="_blank">{self.name}</a>',
+            }
+        else:
+            res = {
+                'model_name': self.name,
+            }
+        total_score = []
+        for metric in self.result.keys():
+            res[metric] = round(float(self.result[metric]) - 1e-3, 4)
+            total_score.append(self.result[metric] * metric_scale[metric])
+        total_score = sum(total_score) / len(total_score)
+        res['comprehensive score'] = round(total_score, 4)
+        return res
+def eval_request(model_name, org_link, huggingface_data_set_name):
+    params = {
+        "model_name": model_name,
+        "org_link": org_link,
+        "huggingface_data_set_name": huggingface_data_set_name
+    }
+    response = requests.post(
+        "http://47.239.99.255/A2Bench_evaluation/eval",
+        json=params,  # 使用json参数自动设置Content-Type为application/json
+        headers={"Content-Type": "application/json"}
+    )
+    return response.json()
+def evaluation(model_name, org_link, huggingface_data_set_name):
+    try:
+        if org_link=="":
+            org_link = None
+        eval_request(model_name, org_link, huggingface_data_set_name)
+        return "Evaluation completed successfully!"
+    except Exception as e:
+        raise gr.Error(f"Evaluation failed: {str(e)}")
+def load_leaderboard():
+    leaderboard_list = []
+    file_list = requests.get("http://47.239.99.255/A2Bench_evaluation/load_leaderboard")
+    for file in file_list.json():
+        leaderboard_list.append(ModelResult(file))
+    return leaderboard_list
+HEADER = ['model_name', 'comprehensive score', 'clip_score', 'human_face_similarity', 'env_clip', 'other_subject_clip', 'image_quality', 'dynamic_degree', 'aesthetic_quality', 'motion_smoothness']
+def display_table():
+    leaderboard_list = load_leaderboard()
+    data = {}
+    for metric in HEADER:
+        data[metric] = []
+    for model_result in leaderboard_list:
+        result_dict = model_result.to_dict()
+        for metric in HEADER:
+            data[metric].append(result_dict[metric])
+    df = pd.DataFrame(data)
+    df = df.sort_values(by='comprehensive score', ascending=False)
+    return df
+_HEADER_1 = '''
+<div style="text-align: center; max-width: 650px; margin: 0 auto;">
+    <h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; display: contents;">A2-Bench Leaderboard</h1>
+    <p style="font-size: 1rem; margin-bottom: 1.5rem;">Paper: <a href='https://arxiv.org/pdf/2504.02436' target='_blank'>SkyReels-A2 </a> | Codes: <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'>GitHub</a> | <a href='https://huggingface.co/Skywork/SkyReels-A2' target='_blank'>HugginceFace</a></p>
+</div>
+❗️❗️❗️**LEADERBOARD INTRODUCTION:** ❗️❗️❗️
+This is A2-Bench leaderboard which is used to evaluate the performance of elements-to-video (E2V) generation models.
+We provide an evaluation set containing 50 paired multiple elements (character, object, and background). You can check [evaluation set introduction]() for more details. Each evaluation case includes:
+<ul style="font-size: 0.9rem; margin-top: -0.5rem;">
+    <li>Human subject (characters): Includes both male and female subjects, covering generated by Flux, celebrities and ordinary people, additionally, we provide several generated human images</li>
+    <li>Non-human subject: Various objects including different types of animals, guitars, racing cars, balls, etc.</li>
+    <li>Background image: Diverse environmental settings including ordinary indoor and outdoor scenes and famous background suck as The Great Wall and Yellow Wind Ridge (from Black Myth: Wukong)</li>
+    <li>Prompt: "A realistic scene where [human] interacts with [object] in [environment], following physical laws and spatial logic".</li>
+</ul>
+</p>
+'''
+img = '''
+<div style="text-align: center; margin: 1rem 0;">
+    <h3 style="font-size: 1.2rem; margin-bottom: 0.5rem;">Example Test Case</h3>
+    <div style="display: flex; justify-content: center; gap: 1rem; margin: 1rem 0;">
+        <img src="https://www.helloimg.com/i/2025/04/07/67f386a7f3717.png" alt="Human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
+        <img src="https://www.helloimg.com/i/2025/04/07/67f38681d9c24.jpg" alt="Non-human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
+        <img src="https://www.helloimg.com/i/2025/04/07/67f38684117d0.jpg" alt="Background Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);">
+    </div>
+    <p style="font-size: 0.9rem; margin-top: 0.5rem;">Prompt: A man feeding a bird in the park.</p>
+</div>
+'''
+__HEADER__2 = '''
+We provide a set of evaluation metric of elements-to-video models and a leaderboard to show the performance of different models.
+Evaluation metric include:
+- Elements Consistency: Measures character id consistency using arcface human recognition model, and measures object and background consistency using CLIP model.
+- Video Quality: Measures video quality on image quality, dynamic degree, aesthetic quality and motion smoothness.
+- T2V Metrics: Measures text-video consistency using CLIP
+You can check [Metric Introduction](https://skyworkai.github.io/skyreels-a2.github.io/static/images/bench.png) for more details.
+The leaderboard ranks the models based on the comprehensive score, which is the weighted average of all the metrics. We give T2V metrics and object consistency metrics higher weights.
+You can click the model name to visit the project page, At meantime, you can upload your model result as a huggingface dataset like [this](https://huggingface.co/datasets/ColinYK/pika_dataset).
+'''  # noqa E501
+_CITE_ = r"""
+If A2-Bench is helpful, please help to ⭐ the <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'> Github Repo</a>. Thanks!
+---
+📧 **Contact**
+If you have any questions or feedbacks, feel free to open a discussion or contact <b>[email protected]</b>.
+"""  # noqa E501
+def upload_file(files):
+    target_dir = os.path.join(TEMP_DIR, f'{files.name}')
+    os.makedirs(target_dir, exist_ok=True)
+    shutil.move(files.name, target_dir)
+    return target_dir
+with gr.Blocks(css=".gr-dataframe a {text-decoration: none; color: inherit;}")  as demo:
+    gr.Markdown(_HEADER_1)
+    gr.HTML(img)
+    gr.Markdown(__HEADER__2)
+    with gr.Group():
+        table = gr.DataFrame(
+            value=display_table(),
+            datatype=['markdown', 'str'],
+            interactive=False,  # 允许编辑
+            headers=HEADER,
+        )
+        Refresh = gr.Button("Refresh")
+        Refresh.click(display_table, outputs=table)
+    with gr.Group():
+        with gr.Row():
+            model_name_input = gr.Textbox(label="Model Name", placeholder="Required:Enter your model name")
+            org_link_input = gr.Textbox(label="Project Page", placeholder="Optional:Enter project page, will show on leaderboard")
+            huggingface_data_set_name = gr.Textbox(label="Huggingface Data Set Name", placeholder="Required :Enter huggingface dataset set name, will show on leaderboard")
+        evaluation_btn = gr.Button("Evaluation")
+        output_message = gr.Textbox(label="Evaluation Status", interactive=False)
+        evaluation_btn.click(
+            evaluation,
+            inputs=[model_name_input, org_link_input, huggingface_data_set_name],
+            outputs=output_message,
+            api_name="evaluate",
+        )
+    gr.Markdown(_CITE_)
+if __name__ == "__main__":
+    demo.launch()