Spaces:
Running
Running
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import sys
|
3 |
+
import pandas as pd
|
4 |
+
import os
|
5 |
+
import json
|
6 |
+
import shutil
|
7 |
+
import zipfile
|
8 |
+
import uuid
|
9 |
+
import requests
|
10 |
+
|
11 |
+
TEMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
|
12 |
+
|
13 |
+
metric_scale = {
|
14 |
+
'human_face_similarity': 1.5,
|
15 |
+
'clip_score': 0.02,
|
16 |
+
'env_clip': 1.5,
|
17 |
+
'other_subject_clip': 1.5,
|
18 |
+
'image_quality': 1,
|
19 |
+
'dynamic_degree': 1,
|
20 |
+
'aesthetic_quality': 1,
|
21 |
+
'motion_smoothness': 1,
|
22 |
+
}
|
23 |
+
|
24 |
+
class ModelResult:
|
25 |
+
def __init__(self, data):
|
26 |
+
self.name = data['model_name']
|
27 |
+
self.project_link = data.get('project_link', None)
|
28 |
+
self.result = data['result']
|
29 |
+
|
30 |
+
def to_dict(self):
|
31 |
+
if self.project_link is not None:
|
32 |
+
res = {
|
33 |
+
'model_name': f'<a href="{self.project_link}" target="_blank">{self.name}</a>',
|
34 |
+
}
|
35 |
+
else:
|
36 |
+
res = {
|
37 |
+
'model_name': self.name,
|
38 |
+
}
|
39 |
+
total_score = []
|
40 |
+
for metric in self.result.keys():
|
41 |
+
res[metric] = round(float(self.result[metric]) - 1e-3, 4)
|
42 |
+
total_score.append(self.result[metric] * metric_scale[metric])
|
43 |
+
total_score = sum(total_score) / len(total_score)
|
44 |
+
res['comprehensive score'] = round(total_score, 4)
|
45 |
+
return res
|
46 |
+
|
47 |
+
|
48 |
+
def eval_request(model_name, org_link, huggingface_data_set_name):
|
49 |
+
params = {
|
50 |
+
"model_name": model_name,
|
51 |
+
"org_link": org_link,
|
52 |
+
"huggingface_data_set_name": huggingface_data_set_name
|
53 |
+
}
|
54 |
+
response = requests.post(
|
55 |
+
"http://47.239.99.255/A2Bench_evaluation/eval",
|
56 |
+
json=params, # 使用json参数自动设置Content-Type为application/json
|
57 |
+
headers={"Content-Type": "application/json"}
|
58 |
+
)
|
59 |
+
return response.json()
|
60 |
+
|
61 |
+
def evaluation(model_name, org_link, huggingface_data_set_name):
|
62 |
+
try:
|
63 |
+
if org_link=="":
|
64 |
+
org_link = None
|
65 |
+
eval_request(model_name, org_link, huggingface_data_set_name)
|
66 |
+
|
67 |
+
return "Evaluation completed successfully!"
|
68 |
+
except Exception as e:
|
69 |
+
raise gr.Error(f"Evaluation failed: {str(e)}")
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
def load_leaderboard():
|
74 |
+
leaderboard_list = []
|
75 |
+
file_list = requests.get("http://47.239.99.255/A2Bench_evaluation/load_leaderboard")
|
76 |
+
for file in file_list.json():
|
77 |
+
leaderboard_list.append(ModelResult(file))
|
78 |
+
return leaderboard_list
|
79 |
+
|
80 |
+
HEADER = ['model_name', 'comprehensive score', 'clip_score', 'human_face_similarity', 'env_clip', 'other_subject_clip', 'image_quality', 'dynamic_degree', 'aesthetic_quality', 'motion_smoothness']
|
81 |
+
|
82 |
+
def display_table():
|
83 |
+
leaderboard_list = load_leaderboard()
|
84 |
+
data = {}
|
85 |
+
for metric in HEADER:
|
86 |
+
data[metric] = []
|
87 |
+
for model_result in leaderboard_list:
|
88 |
+
result_dict = model_result.to_dict()
|
89 |
+
for metric in HEADER:
|
90 |
+
data[metric].append(result_dict[metric])
|
91 |
+
df = pd.DataFrame(data)
|
92 |
+
df = df.sort_values(by='comprehensive score', ascending=False)
|
93 |
+
return df
|
94 |
+
|
95 |
+
_HEADER_1 = '''
|
96 |
+
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
|
97 |
+
<h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; display: contents;">A2-Bench Leaderboard</h1>
|
98 |
+
<p style="font-size: 1rem; margin-bottom: 1.5rem;">Paper: <a href='https://arxiv.org/pdf/2504.02436' target='_blank'>SkyReels-A2 </a> | Codes: <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'>GitHub</a> | <a href='https://huggingface.co/Skywork/SkyReels-A2' target='_blank'>HugginceFace</a></p>
|
99 |
+
</div>
|
100 |
+
|
101 |
+
❗️❗️❗️**LEADERBOARD INTRODUCTION:** ❗️❗️❗️
|
102 |
+
This is A2-Bench leaderboard which is used to evaluate the performance of elements-to-video (E2V) generation models.
|
103 |
+
We provide an evaluation set containing 50 paired multiple elements (character, object, and background). You can check [evaluation set introduction]() for more details. Each evaluation case includes:
|
104 |
+
<ul style="font-size: 0.9rem; margin-top: -0.5rem;">
|
105 |
+
<li>Human subject (characters): Includes both male and female subjects, covering generated by Flux, celebrities and ordinary people, additionally, we provide several generated human images</li>
|
106 |
+
<li>Non-human subject: Various objects including different types of animals, guitars, racing cars, balls, etc.</li>
|
107 |
+
<li>Background image: Diverse environmental settings including ordinary indoor and outdoor scenes and famous background suck as The Great Wall and Yellow Wind Ridge (from Black Myth: Wukong)</li>
|
108 |
+
<li>Prompt: "A realistic scene where [human] interacts with [object] in [environment], following physical laws and spatial logic".</li>
|
109 |
+
</ul>
|
110 |
+
</p>
|
111 |
+
'''
|
112 |
+
|
113 |
+
img = '''
|
114 |
+
<div style="text-align: center; margin: 1rem 0;">
|
115 |
+
<h3 style="font-size: 1.2rem; margin-bottom: 0.5rem;">Example Test Case</h3>
|
116 |
+
<div style="display: flex; justify-content: center; gap: 1rem; margin: 1rem 0;">
|
117 |
+
<img src="https://www.helloimg.com/i/2025/04/07/67f386a7f3717.png" alt="Human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
118 |
+
<img src="https://www.helloimg.com/i/2025/04/07/67f38681d9c24.jpg" alt="Non-human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
119 |
+
<img src="https://www.helloimg.com/i/2025/04/07/67f38684117d0.jpg" alt="Background Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);">
|
120 |
+
</div>
|
121 |
+
<p style="font-size: 0.9rem; margin-top: 0.5rem;">Prompt: A man feeding a bird in the park.</p>
|
122 |
+
</div>
|
123 |
+
'''
|
124 |
+
__HEADER__2 = '''
|
125 |
+
|
126 |
+
We provide a set of evaluation metric of elements-to-video models and a leaderboard to show the performance of different models.
|
127 |
+
Evaluation metric include:
|
128 |
+
- Elements Consistency: Measures character id consistency using arcface human recognition model, and measures object and background consistency using CLIP model.
|
129 |
+
- Video Quality: Measures video quality on image quality, dynamic degree, aesthetic quality and motion smoothness.
|
130 |
+
- T2V Metrics: Measures text-video consistency using CLIP
|
131 |
+
|
132 |
+
You can check [Metric Introduction](https://skyworkai.github.io/skyreels-a2.github.io/static/images/bench.png) for more details.
|
133 |
+
|
134 |
+
The leaderboard ranks the models based on the comprehensive score, which is the weighted average of all the metrics. We give T2V metrics and object consistency metrics higher weights.
|
135 |
+
You can click the model name to visit the project page, At meantime, you can upload your model result as a huggingface dataset like [this](https://huggingface.co/datasets/ColinYK/pika_dataset).
|
136 |
+
''' # noqa E501
|
137 |
+
|
138 |
+
_CITE_ = r"""
|
139 |
+
If A2-Bench is helpful, please help to ⭐ the <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'> Github Repo</a>. Thanks!
|
140 |
+
---
|
141 |
+
|
142 |
+
📧 **Contact**
|
143 |
+
If you have any questions or feedbacks, feel free to open a discussion or contact <b>[email protected]</b>.
|
144 |
+
""" # noqa E501
|
145 |
+
|
146 |
+
def upload_file(files):
|
147 |
+
target_dir = os.path.join(TEMP_DIR, f'{files.name}')
|
148 |
+
os.makedirs(target_dir, exist_ok=True)
|
149 |
+
shutil.move(files.name, target_dir)
|
150 |
+
return target_dir
|
151 |
+
|
152 |
+
with gr.Blocks(css=".gr-dataframe a {text-decoration: none; color: inherit;}") as demo:
|
153 |
+
gr.Markdown(_HEADER_1)
|
154 |
+
gr.HTML(img)
|
155 |
+
gr.Markdown(__HEADER__2)
|
156 |
+
with gr.Group():
|
157 |
+
table = gr.DataFrame(
|
158 |
+
value=display_table(),
|
159 |
+
datatype=['markdown', 'str'],
|
160 |
+
interactive=False, # 允许编辑
|
161 |
+
headers=HEADER,
|
162 |
+
)
|
163 |
+
Refresh = gr.Button("Refresh")
|
164 |
+
Refresh.click(display_table, outputs=table)
|
165 |
+
|
166 |
+
with gr.Group():
|
167 |
+
|
168 |
+
with gr.Row():
|
169 |
+
model_name_input = gr.Textbox(label="Model Name", placeholder="Required:Enter your model name")
|
170 |
+
org_link_input = gr.Textbox(label="Project Page", placeholder="Optional:Enter project page, will show on leaderboard")
|
171 |
+
huggingface_data_set_name = gr.Textbox(label="Huggingface Data Set Name", placeholder="Required :Enter huggingface dataset set name, will show on leaderboard")
|
172 |
+
|
173 |
+
evaluation_btn = gr.Button("Evaluation")
|
174 |
+
output_message = gr.Textbox(label="Evaluation Status", interactive=False)
|
175 |
+
evaluation_btn.click(
|
176 |
+
evaluation,
|
177 |
+
inputs=[model_name_input, org_link_input, huggingface_data_set_name],
|
178 |
+
outputs=output_message,
|
179 |
+
api_name="evaluate",
|
180 |
+
)
|
181 |
+
|
182 |
+
|
183 |
+
|
184 |
+
gr.Markdown(_CITE_)
|
185 |
+
|
186 |
+
|
187 |
+
|
188 |
+
if __name__ == "__main__":
|
189 |
+
demo.launch()
|