ColinYK commited on
Commit
50ebe61
·
verified ·
1 Parent(s): 42f44bf

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -0
app.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sys
3
+ import pandas as pd
4
+ import os
5
+ import json
6
+ import shutil
7
+ import zipfile
8
+ import uuid
9
+ import requests
10
+
11
+ TEMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
12
+
13
+ metric_scale = {
14
+ 'human_face_similarity': 1.5,
15
+ 'clip_score': 0.02,
16
+ 'env_clip': 1.5,
17
+ 'other_subject_clip': 1.5,
18
+ 'image_quality': 1,
19
+ 'dynamic_degree': 1,
20
+ 'aesthetic_quality': 1,
21
+ 'motion_smoothness': 1,
22
+ }
23
+
24
+ class ModelResult:
25
+ def __init__(self, data):
26
+ self.name = data['model_name']
27
+ self.project_link = data.get('project_link', None)
28
+ self.result = data['result']
29
+
30
+ def to_dict(self):
31
+ if self.project_link is not None:
32
+ res = {
33
+ 'model_name': f'<a href="{self.project_link}" target="_blank">{self.name}</a>',
34
+ }
35
+ else:
36
+ res = {
37
+ 'model_name': self.name,
38
+ }
39
+ total_score = []
40
+ for metric in self.result.keys():
41
+ res[metric] = round(float(self.result[metric]) - 1e-3, 4)
42
+ total_score.append(self.result[metric] * metric_scale[metric])
43
+ total_score = sum(total_score) / len(total_score)
44
+ res['comprehensive score'] = round(total_score, 4)
45
+ return res
46
+
47
+
48
+ def eval_request(model_name, org_link, huggingface_data_set_name):
49
+ params = {
50
+ "model_name": model_name,
51
+ "org_link": org_link,
52
+ "huggingface_data_set_name": huggingface_data_set_name
53
+ }
54
+ response = requests.post(
55
+ "http://47.239.99.255/A2Bench_evaluation/eval",
56
+ json=params, # 使用json参数自动设置Content-Type为application/json
57
+ headers={"Content-Type": "application/json"}
58
+ )
59
+ return response.json()
60
+
61
+ def evaluation(model_name, org_link, huggingface_data_set_name):
62
+ try:
63
+ if org_link=="":
64
+ org_link = None
65
+ eval_request(model_name, org_link, huggingface_data_set_name)
66
+
67
+ return "Evaluation completed successfully!"
68
+ except Exception as e:
69
+ raise gr.Error(f"Evaluation failed: {str(e)}")
70
+
71
+
72
+
73
+ def load_leaderboard():
74
+ leaderboard_list = []
75
+ file_list = requests.get("http://47.239.99.255/A2Bench_evaluation/load_leaderboard")
76
+ for file in file_list.json():
77
+ leaderboard_list.append(ModelResult(file))
78
+ return leaderboard_list
79
+
80
+ HEADER = ['model_name', 'comprehensive score', 'clip_score', 'human_face_similarity', 'env_clip', 'other_subject_clip', 'image_quality', 'dynamic_degree', 'aesthetic_quality', 'motion_smoothness']
81
+
82
+ def display_table():
83
+ leaderboard_list = load_leaderboard()
84
+ data = {}
85
+ for metric in HEADER:
86
+ data[metric] = []
87
+ for model_result in leaderboard_list:
88
+ result_dict = model_result.to_dict()
89
+ for metric in HEADER:
90
+ data[metric].append(result_dict[metric])
91
+ df = pd.DataFrame(data)
92
+ df = df.sort_values(by='comprehensive score', ascending=False)
93
+ return df
94
+
95
+ _HEADER_1 = '''
96
+ <div style="text-align: center; max-width: 650px; margin: 0 auto;">
97
+ <h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; display: contents;">A2-Bench Leaderboard</h1>
98
+ <p style="font-size: 1rem; margin-bottom: 1.5rem;">Paper: <a href='https://arxiv.org/pdf/2504.02436' target='_blank'>SkyReels-A2 </a> | Codes: <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'>GitHub</a> | <a href='https://huggingface.co/Skywork/SkyReels-A2' target='_blank'>HugginceFace</a></p>
99
+ </div>
100
+
101
+ ❗️❗️❗️**LEADERBOARD INTRODUCTION:** ❗️❗️❗️
102
+ This is A2-Bench leaderboard which is used to evaluate the performance of elements-to-video (E2V) generation models.
103
+ We provide an evaluation set containing 50 paired multiple elements (character, object, and background). You can check [evaluation set introduction]() for more details. Each evaluation case includes:
104
+ <ul style="font-size: 0.9rem; margin-top: -0.5rem;">
105
+ <li>Human subject (characters): Includes both male and female subjects, covering generated by Flux, celebrities and ordinary people, additionally, we provide several generated human images</li>
106
+ <li>Non-human subject: Various objects including different types of animals, guitars, racing cars, balls, etc.</li>
107
+ <li>Background image: Diverse environmental settings including ordinary indoor and outdoor scenes and famous background suck as The Great Wall and Yellow Wind Ridge (from Black Myth: Wukong)</li>
108
+ <li>Prompt: "A realistic scene where [human] interacts with [object] in [environment], following physical laws and spatial logic".</li>
109
+ </ul>
110
+ </p>
111
+ '''
112
+
113
+ img = '''
114
+ <div style="text-align: center; margin: 1rem 0;">
115
+ <h3 style="font-size: 1.2rem; margin-bottom: 0.5rem;">Example Test Case</h3>
116
+ <div style="display: flex; justify-content: center; gap: 1rem; margin: 1rem 0;">
117
+ <img src="https://www.helloimg.com/i/2025/04/07/67f386a7f3717.png" alt="Human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
118
+ <img src="https://www.helloimg.com/i/2025/04/07/67f38681d9c24.jpg" alt="Non-human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
119
+ <img src="https://www.helloimg.com/i/2025/04/07/67f38684117d0.jpg" alt="Background Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);">
120
+ </div>
121
+ <p style="font-size: 0.9rem; margin-top: 0.5rem;">Prompt: A man feeding a bird in the park.</p>
122
+ </div>
123
+ '''
124
+ __HEADER__2 = '''
125
+
126
+ We provide a set of evaluation metric of elements-to-video models and a leaderboard to show the performance of different models.
127
+ Evaluation metric include:
128
+ - Elements Consistency: Measures character id consistency using arcface human recognition model, and measures object and background consistency using CLIP model.
129
+ - Video Quality: Measures video quality on image quality, dynamic degree, aesthetic quality and motion smoothness.
130
+ - T2V Metrics: Measures text-video consistency using CLIP
131
+
132
+ You can check [Metric Introduction](https://skyworkai.github.io/skyreels-a2.github.io/static/images/bench.png) for more details.
133
+
134
+ The leaderboard ranks the models based on the comprehensive score, which is the weighted average of all the metrics. We give T2V metrics and object consistency metrics higher weights.
135
+ You can click the model name to visit the project page, At meantime, you can upload your model result as a huggingface dataset like [this](https://huggingface.co/datasets/ColinYK/pika_dataset).
136
+ ''' # noqa E501
137
+
138
+ _CITE_ = r"""
139
+ If A2-Bench is helpful, please help to ⭐ the <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'> Github Repo</a>. Thanks!
140
+ ---
141
+
142
+ 📧 **Contact**
143
+ If you have any questions or feedbacks, feel free to open a discussion or contact <b>[email protected]</b>.
144
+ """ # noqa E501
145
+
146
+ def upload_file(files):
147
+ target_dir = os.path.join(TEMP_DIR, f'{files.name}')
148
+ os.makedirs(target_dir, exist_ok=True)
149
+ shutil.move(files.name, target_dir)
150
+ return target_dir
151
+
152
+ with gr.Blocks(css=".gr-dataframe a {text-decoration: none; color: inherit;}") as demo:
153
+ gr.Markdown(_HEADER_1)
154
+ gr.HTML(img)
155
+ gr.Markdown(__HEADER__2)
156
+ with gr.Group():
157
+ table = gr.DataFrame(
158
+ value=display_table(),
159
+ datatype=['markdown', 'str'],
160
+ interactive=False, # 允许编辑
161
+ headers=HEADER,
162
+ )
163
+ Refresh = gr.Button("Refresh")
164
+ Refresh.click(display_table, outputs=table)
165
+
166
+ with gr.Group():
167
+
168
+ with gr.Row():
169
+ model_name_input = gr.Textbox(label="Model Name", placeholder="Required:Enter your model name")
170
+ org_link_input = gr.Textbox(label="Project Page", placeholder="Optional:Enter project page, will show on leaderboard")
171
+ huggingface_data_set_name = gr.Textbox(label="Huggingface Data Set Name", placeholder="Required :Enter huggingface dataset set name, will show on leaderboard")
172
+
173
+ evaluation_btn = gr.Button("Evaluation")
174
+ output_message = gr.Textbox(label="Evaluation Status", interactive=False)
175
+ evaluation_btn.click(
176
+ evaluation,
177
+ inputs=[model_name_input, org_link_input, huggingface_data_set_name],
178
+ outputs=output_message,
179
+ api_name="evaluate",
180
+ )
181
+
182
+
183
+
184
+ gr.Markdown(_CITE_)
185
+
186
+
187
+
188
+ if __name__ == "__main__":
189
+ demo.launch()