Spaces:

xianbao
/

SkyReels_A2_Bench

Runtime error

App Files Files Community

ColinYK commited on Apr 7

Commit

3903870

verified ·

1 Parent(s): 15c1895

Upload app.py

Browse files

Files changed (1) hide show

app.py +218 -0

app.py ADDED Viewed

	@@ -0,0 +1,218 @@

+import gradio as gr
+import sys
+import pandas as pd
+import os
+import json
+import shutil
+import zipfile
+import uuid
+import requests
+TEMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
+metric_scale = {
+    'human_face_similarity': 1.5,
+    'clip_score': 0.02,
+    'env_clip': 1.5,
+    'other_subject_clip': 1.5,
+    'image_quality': 1,
+    'dynamic_degree': 1,
+    'aesthetic_quality': 1,
+    'motion_smoothness': 1,
+}
+class ModelResult:
+    def __init__(self, data):
+        self.name = data['model_name']
+        self.project_link = data.get('project_link', None)
+        self.result = data['result']
+    def to_dict(self):
+        if self.project_link is not None:
+            res = {
+                'model_name': f'<a href="{self.project_link}" target="_blank">{self.name}</a>',
+            }
+        else:
+            res = {
+                'model_name': self.name,
+            }
+        total_score = []
+        for metric in self.result.keys():
+            res[metric] = round(float(self.result[metric]) - 1e-3, 4)
+            total_score.append(self.result[metric] * metric_scale[metric])
+        total_score = sum(total_score) / len(total_score)
+        res['comprehensive score'] = round(total_score, 4)
+        return res
+def upload_large_zip_stream(zip_path, model_name, org_link, chunk_size=10*1024*1024):
+    """Stream upload a large zip file in chunks"""
+    file_id = str(uuid.uuid4())
+    total_size = os.path.getsize(zip_path)
+    total_chunks = (total_size + chunk_size - 1) // chunk_size
+    with open(zip_path, 'rb') as f:
+        for chunk_num in range(total_chunks):
+            chunk = f.read(chunk_size)
+            files = {
+                'file': (os.path.basename(zip_path), chunk, 'application/zip'),
+            }
+            params = {
+                'chunk_number': int(chunk_num),
+                'total_chunks': int(total_chunks),
+                'file_id': file_id,
+                'model_name': model_name,
+            }
+            response = requests.post(
+                "http://47.239.99.255/A2Bench_evaluation/update_zip/",
+                files=files,
+                params=params
+            )
+            data = response.json()
+            print(f"Uploaded chunk {chunk_num+1}/{total_chunks} - {data['progress']}")
+    print("Final upload status:", response.json())
+    assert 'unzip_video_path' in response.json(), "Upload failed"
+    assert 'unzip_status' in response.json(), "Upload failed"
+    unzip_video_path = response.json()['unzip_video_path']
+    unzip_status = response.json()['unzip_status']
+    if unzip_status == "success":
+        return unzip_video_path
+    else:
+        raise gr.Error("Upload failed")
+def eval_request(model_name, org_link, result_path):
+    params = {
+        "model_name": model_name,
+        "org_link": org_link,
+        "result_path": result_path
+    }
+    response = requests.post("http://47.239.99.255/A2Bench_evaluation/eval", params=params)
+    return response.json()
+def evaluation(zip_path, model_name, org_link):
+    try:
+        unzip_video_path = upload_large_zip_stream(zip_path, model_name, org_link)
+        if org_link=="":
+            org_link = None
+        eval_request(model_name, org_link, unzip_video_path)
+        return "Evaluation completed successfully!"
+    except Exception as e:
+        raise gr.Error(f"Evaluation failed: {str(e)}")
+def load_leaderboard():
+    leaderboard_list = []
+    file_list = requests.get("http://47.239.99.255/A2Bench_evaluation/load_leaderboard")
+    for file in file_list.json():
+        leaderboard_list.append(ModelResult(file))
+    return leaderboard_list
+HEADER = ['model_name', 'comprehensive score', 'clip_score', 'human_face_similarity', 'env_clip', 'other_subject_clip', 'image_quality', 'dynamic_degree', 'aesthetic_quality', 'motion_smoothness']
+def display_table():
+    leaderboard_list = load_leaderboard()
+    data = {}
+    for metric in HEADER:
+        data[metric] = []
+    for model_result in leaderboard_list:
+        result_dict = model_result.to_dict()
+        for metric in HEADER:
+            data[metric].append(result_dict[metric])
+    df = pd.DataFrame(data)
+    df = df.sort_values(by='comprehensive score', ascending=False)
+    return df
+_HEADER_ = '''
+<div style="text-align: center; max-width: 650px; margin: 0 auto;">
+    <h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; display: contents;">A2-Bench Leaderboard</h1>
+    <p style="font-size: 1rem; margin-bottom: 1.5rem;">Paper: <a href='https://arxiv.org/pdf/2504.02436' target='_blank'>SkyReels-A2 </a> | Codes: <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'>GitHub</a> | <a href='https://huggingface.co/Skywork/SkyReels-A2' target='_blank'>HugginceFace</a></p>
+</div>
+❗️❗️❗️**LEADERBOARD INTRODUCTION:** ❗️❗️❗️
+This is A2-Bench leaderboard which is used to evaluate the performance of elements-to-video (E2V) generation models.
+We provide an evaluation set containing 50 paired multiple elements (character, object, and background). You can check [evaluation set introduction]() for more details. Each evaluation case includes:
+<ul style="font-size: 0.9rem; margin-top: -0.5rem;">
+    <li>Human subject (characters): Includes both male and female subjects, covering generated by Flux, celebrities and ordinary people, additionally, we provide several generated human images</li>
+    <li>Non-human subject: Various objects including different types of animals, guitars, racing cars, balls, etc.</li>
+    <li>Background image: Diverse environmental settings including ordinary indoor and outdoor scenes and famous background suck as The Great Wall and Yellow Wind Ridge (from Black Myth: Wukong)</li>
+    <li>Prompt: "A realistic scene where [human] interacts with [object] in [environment], following physical laws and spatial logic".</li>
+</ul>
+</p>
+<div style="text-align: center; margin: 1rem 0;">
+    <h3 style="font-size: 1.2rem; margin-bottom: 0.5rem;">Example Test Case</h3>
+    <div style="display: flex; justify-content: center; gap: 1rem; margin: 1rem 0;">
+        <img src="https://www.helloimg.com/i/2025/04/07/67f386a7f3717.png" alt="Human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
+        <img src="https://www.helloimg.com/i/2025/04/07/67f38681d9c24.jpg" alt="Non-human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
+        <img src="https://www.helloimg.com/i/2025/04/07/67f38684117d0.jpg" alt="Background Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);">
+    </div>
+    <p style="font-size: 0.9rem; margin-top: 0.5rem;">Prompt: A man feeding a bird in the park.</p>
+</div>
+We provide a set of evaluation metric of elements-to-video models and a leaderboard to show the performance of different models.
+Evaluation metric include:
+- Elements Consistency: Measures character id consistency using arcface human recognition model, and measures object and background consistency using CLIP model.
+- Video Quality: Measures video quality on image quality, dynamic degree, aesthetic quality and motion smoothness.
+- T2V Metrics: Measures text-video consistency using CLIP
+You can check [Metric Introduction](https://skyworkai.github.io/skyreels-a2.github.io/static/images/bench.png) for more details.
+The leaderboard ranks the models based on the comprehensive score, which is the weighted average of all the metrics. We give T2V metrics and object consistency metrics higher weights.
+You can click the model name to visit the project page, At meantime, you can upload your model result **zip file** to the leaderboard by clicking the "Evaluation" button. the zip file should include the video result named as "0.mp4", "1.mp4", "2.mp4", etc.
+'''  # noqa E501
+_CITE_ = r"""
+If A2-Bench is helpful, please help to ⭐ the <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'> Github Repo</a>. Thanks!
+---
+📧 **Contact**
+If you have any questions or feedbacks, feel free to open a discussion or contact <b>[email protected]</b>.
+"""  # noqa E501
+def upload_file(files):
+    target_dir = os.path.join(TEMP_DIR, f'{files.name}')
+    os.makedirs(target_dir, exist_ok=True)
+    shutil.move(files.name, target_dir)
+    return target_dir
+with gr.Blocks(css=".gr-dataframe a {text-decoration: none; color: inherit;}")  as demo:
+    gr.Markdown(_HEADER_)
+    with gr.Group():
+        table = gr.DataFrame(
+            value=display_table(),
+            datatype=['markdown', 'str'],
+            interactive=False,  # 允许编辑
+            headers=HEADER,
+        )
+        Refresh = gr.Button("Refresh")
+        Refresh.click(display_table, outputs=table)
+    with gr.Group():
+        upload_btn = gr.UploadButton("Click to Upload a File", file_types=[".zip"], file_count="single")
+        with gr.Row():
+            model_name_input = gr.Textbox(label="Model Name", placeholder="Required:Enter your model name")
+            org_link_input = gr.Textbox(label="Project Page", placeholder="Optional:Enter project page, will show on leaderboard")
+        file_output = gr.File()
+        upload_btn.upload(upload_file, upload_btn, file_output)
+        evaluation_btn = gr.Button("Evaluation")
+        output_message = gr.Textbox(label="Evaluation Status", interactive=False)
+        evaluation_btn.click(
+            evaluation,
+            inputs=[file_output, model_name_input, org_link_input],
+            outputs=output_message,
+            api_name="evaluate",
+        )
+    gr.Markdown(_CITE_)
+if __name__ == "__main__":
+    demo.launch()
+    # demo.launch(share=True)