File size: 9,748 Bytes
3903870
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a320a20
3903870
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import gradio as gr
import sys
import pandas as pd
import os
import json
import shutil
import zipfile
import uuid
import requests

TEMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')

metric_scale = {
    'human_face_similarity': 1.5,
    'clip_score': 0.02,
    'env_clip': 1.5,
    'other_subject_clip': 1.5,
    'image_quality': 1,
    'dynamic_degree': 1,
    'aesthetic_quality': 1,
    'motion_smoothness': 1,
}

class ModelResult:
    def __init__(self, data):
        self.name = data['model_name']
        self.project_link = data.get('project_link', None)
        self.result = data['result']

    def to_dict(self):
        if self.project_link is not None:
            res = {
                'model_name': f'<a href="{self.project_link}" target="_blank">{self.name}</a>',
            }
        else:
            res = {
                'model_name': self.name,
            }
        total_score = []
        for metric in self.result.keys():
            res[metric] = round(float(self.result[metric]) - 1e-3, 4)
            total_score.append(self.result[metric] * metric_scale[metric])
        total_score = sum(total_score) / len(total_score)
        res['comprehensive score'] = round(total_score, 4)
        return res

def upload_large_zip_stream(zip_path, model_name, org_link, chunk_size=10*1024*1024):
    """Stream upload a large zip file in chunks"""
    file_id = str(uuid.uuid4())
    total_size = os.path.getsize(zip_path)
    total_chunks = (total_size + chunk_size - 1) // chunk_size
    
    with open(zip_path, 'rb') as f:
        for chunk_num in range(total_chunks):
            chunk = f.read(chunk_size)
            files = {
                'file': (os.path.basename(zip_path), chunk, 'application/zip'),
            }
            params = {
                'chunk_number': int(chunk_num),
                'total_chunks': int(total_chunks),
                'file_id': file_id,
                'model_name': model_name,
            }

            response = requests.post(
                "http://47.239.99.255/A2Bench_evaluation/update_zip/",
                files=files,
                params=params
            )
            data = response.json()
            print(f"Uploaded chunk {chunk_num+1}/{total_chunks} - {data['progress']}")
    
    print("Final upload status:", response.json())
    assert 'unzip_video_path' in response.json(), "Upload failed"
    assert 'unzip_status' in response.json(), "Upload failed"
    unzip_video_path = response.json()['unzip_video_path']
    unzip_status = response.json()['unzip_status']
    if unzip_status == "success":
        return unzip_video_path
    else:
        raise gr.Error("Upload failed")
    
def eval_request(model_name, org_link, result_path):
    params = {
        "model_name": model_name,
        "org_link": org_link,
        "result_path": result_path
    }
    response = requests.post("http://47.239.99.255/A2Bench_evaluation/eval", params=params)
    return response.json()

def evaluation(zip_path, model_name, org_link):
    try:
        unzip_video_path = upload_large_zip_stream(zip_path, model_name, org_link)
        if org_link=="":
            org_link = None
        eval_request(model_name, org_link, unzip_video_path)

        return "Evaluation completed successfully!"
    except Exception as e:
        raise gr.Error(f"Evaluation failed: {str(e)}")

        

def load_leaderboard():
    leaderboard_list = []
    file_list = requests.get("http://47.239.99.255/A2Bench_evaluation/load_leaderboard")
    for file in file_list.json():
        leaderboard_list.append(ModelResult(file))
    return leaderboard_list

HEADER = ['model_name', 'comprehensive score', 'clip_score', 'human_face_similarity', 'env_clip', 'other_subject_clip', 'image_quality', 'dynamic_degree', 'aesthetic_quality', 'motion_smoothness']

def display_table():
    leaderboard_list = load_leaderboard()
    data = {}
    for metric in HEADER:
        data[metric] = []
    for model_result in leaderboard_list:
        result_dict = model_result.to_dict()
        for metric in HEADER:
            data[metric].append(result_dict[metric])
    df = pd.DataFrame(data)
    df = df.sort_values(by='comprehensive score', ascending=False)
    return df

_HEADER_ = '''
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
    <h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; display: contents;">A2-Bench Leaderboard</h1>
    <p style="font-size: 1rem; margin-bottom: 1.5rem;">Paper: <a href='https://arxiv.org/pdf/2504.02436' target='_blank'>SkyReels-A2 </a> | Codes: <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'>GitHub</a> | <a href='https://huggingface.co/Skywork/SkyReels-A2' target='_blank'>HugginceFace</a></p> 
</div>

❗️❗️❗️**LEADERBOARD INTRODUCTION:** ❗️❗️❗️  
This is A2-Bench leaderboard which is used to evaluate the performance of elements-to-video (E2V) generation models.  
We provide an evaluation set containing 50 paired multiple elements (character, object, and background). You can check [evaluation set introduction]() for more details. Each evaluation case includes:
<ul style="font-size: 0.9rem; margin-top: -0.5rem;">
    <li>Human subject (characters): Includes both male and female subjects, covering generated by Flux, celebrities and ordinary people, additionally, we provide several generated human images</li>
    <li>Non-human subject: Various objects including different types of animals, guitars, racing cars, balls, etc.</li>
    <li>Background image: Diverse environmental settings including ordinary indoor and outdoor scenes and famous background suck as The Great Wall and Yellow Wind Ridge (from Black Myth: Wukong)</li>
    <li>Prompt: "A realistic scene where [human] interacts with [object] in [environment], following physical laws and spatial logic".</li>
</ul>
</p>
<div style="text-align: center; margin: 1rem 0;">
    <h3 style="font-size: 1.2rem; margin-bottom: 0.5rem;">Example Test Case</h3>
    <div style="display: flex; justify-content: center; gap: 1rem; margin: 1rem 0;">
        <img src="https://www.helloimg.com/i/2025/04/07/67f386a7f3717.png" alt="Human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
        <img src="https://www.helloimg.com/i/2025/04/07/67f38681d9c24.jpg" alt="Non-human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
        <img src="https://www.helloimg.com/i/2025/04/07/67f38684117d0.jpg" alt="Background Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);">
    </div>
    <p style="font-size: 0.9rem; margin-top: 0.5rem;">Prompt: A man feeding a bird in the park.</p>
</div>

We provide a set of evaluation metric of elements-to-video models and a leaderboard to show the performance of different models. 
Evaluation metric include:
- Elements Consistency: Measures character id consistency using arcface human recognition model, and measures object and background consistency using CLIP model.
- Video Quality: Measures video quality on image quality, dynamic degree, aesthetic quality and motion smoothness.
- T2V Metrics: Measures text-video consistency using CLIP 

You can check [Metric Introduction](https://skyworkai.github.io/skyreels-a2.github.io/static/images/bench.png) for more details.

The leaderboard ranks the models based on the comprehensive score, which is the weighted average of all the metrics. We give T2V metrics and object consistency metrics higher weights. 
You can click the model name to visit the project page, At meantime, you can upload your model result **zip file** to the leaderboard by clicking the "Evaluation" button. the zip file should include the video result named as "0.mp4", "1.mp4", "2.mp4", etc.
'''  # noqa E501

_CITE_ = r"""
If A2-Bench is helpful, please help to ⭐ the <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'> Github Repo</a>. Thanks!
---

📧 **Contact**
If you have any questions or feedbacks, feel free to open a discussion or contact <b>[email protected]</b>.
"""  # noqa E501

def upload_file(files):
    target_dir = os.path.join(TEMP_DIR, f'{files.name}')
    os.makedirs(target_dir, exist_ok=True)
    shutil.move(files.name, target_dir)
    return target_dir

with gr.Blocks(css=".gr-dataframe a {text-decoration: none; color: inherit;}")  as demo:
    gr.HTML(_HEADER_)
    with gr.Group():
        table = gr.DataFrame(
            value=display_table(),
            datatype=['markdown', 'str'],
            interactive=False,  # 允许编辑
            headers=HEADER,
        )
        Refresh = gr.Button("Refresh")
        Refresh.click(display_table, outputs=table)

    with gr.Group():

        upload_btn = gr.UploadButton("Click to Upload a File", file_types=[".zip"], file_count="single")
        with gr.Row():
            model_name_input = gr.Textbox(label="Model Name", placeholder="Required:Enter your model name")
            org_link_input = gr.Textbox(label="Project Page", placeholder="Optional:Enter project page, will show on leaderboard")
        file_output = gr.File()
        upload_btn.upload(upload_file, upload_btn, file_output)

        evaluation_btn = gr.Button("Evaluation")
        output_message = gr.Textbox(label="Evaluation Status", interactive=False)
        evaluation_btn.click(
            evaluation, 
            inputs=[file_output, model_name_input, org_link_input], 
            outputs=output_message,
            api_name="evaluate",
        )
        
        

    gr.Markdown(_CITE_)



if __name__ == "__main__":
    demo.launch()
    # demo.launch(share=True)