Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
·
c97bae1
1
Parent(s):
abd945c
Updated diffusion benchmark and data
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +1 -1
- benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml +4 -4
- benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml +4 -4
- benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py +1 -1
- benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py +26 -15
- benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py +41 -35
- benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py +1 -1
- benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml +1 -1
- benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py +2 -1
- benchmark/diffusion/text-to-image/scripts/benchmark_one_datapoint.py +49 -8
- benchmark/diffusion/text-to-image/scripts/benchmark_one_model.py +20 -18
- benchmark/diffusion/text-to-video/pegasus/A100/queue_1gpu.yaml +1 -1
- benchmark/diffusion/text-to-video/pegasus/H100/queue_1gpu.yaml +1 -1
- benchmark/diffusion/text-to-video/scripts/aggregate_leaderboard_models.py +2 -1
- benchmark/diffusion/text-to-video/scripts/benchmark_one_datapoint.py +11 -11
- benchmark/diffusion/text-to-video/scripts/benchmark_one_model.py +37 -35
- data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json +2 -2
- data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json +2 -2
- data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json +2 -2
- data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json +9 -0
- data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json +9 -0
- data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json +2 -2
- data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json +9 -0
- data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json +9 -0
- data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json +2 -2
- data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json +2 -2
- data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json +2 -2
- data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json +9 -0
- data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json +9 -0
- data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json +2 -2
- data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json +9 -0
- data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json +9 -0
- data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json +8 -0
- data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs1+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs2+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs1+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs16+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs2+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs32+steps25.json +8 -0
- data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs4+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs64+steps25.json +8 -0
- data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs8+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs1+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs2+steps25.json +2 -2
- data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs4+steps25.json +2 -2
.gitignore
CHANGED
|
@@ -18,4 +18,4 @@ build/
|
|
| 18 |
|
| 19 |
# Data files
|
| 20 |
*.log
|
| 21 |
-
|
|
|
|
| 18 |
|
| 19 |
# Data files
|
| 20 |
*.log
|
| 21 |
+
figures/
|
benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
- command:
|
| 2 |
-
- "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes
|
| 3 |
model:
|
| 4 |
-
- '--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt'
|
| 5 |
-
- '--model stabilityai/stable-video-diffusion-img2vid --num-frames 14'
|
| 6 |
-
- '--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25'
|
|
|
|
| 1 |
- command:
|
| 2 |
+
- "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 4 3 2 1 --power-limits 400 --num-inference-steps 1 2 4 8 16 25 30 40 50"
|
| 3 |
model:
|
| 4 |
+
- '--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt --width 1280 --height 720'
|
| 5 |
+
- '--model stabilityai/stable-video-diffusion-img2vid --num-frames 14 --width 1024 --height 576'
|
| 6 |
+
- '--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25 --width 1024 --height 576'
|
benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
- command:
|
| 2 |
-
- "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/
|
| 3 |
model:
|
| 4 |
-
-
|
| 5 |
-
-
|
| 6 |
-
-
|
|
|
|
| 1 |
- command:
|
| 2 |
+
- "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 4 3 2 1 --power-limits 700 --num-inference-steps 1 2 4 8 16 25 30 40 50"
|
| 3 |
model:
|
| 4 |
+
- "--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt --width 1280 --height 720"
|
| 5 |
+
- "--model stabilityai/stable-video-diffusion-img2vid --num-frames 14 --width 1024 --height 576"
|
| 6 |
+
- "--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25 --width 1024 --height 576"
|
benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py
CHANGED
|
@@ -15,7 +15,7 @@ def main(results_dir: Path, output_file: Path) -> None:
|
|
| 15 |
for model_dir in sorted(glob(f"{results_dir}/*/*")):
|
| 16 |
model_name = "/".join(model_dir.split("/")[-2:])
|
| 17 |
print(f" {model_name}")
|
| 18 |
-
result_file_cand = glob(f"{model_dir}/bs1+*+results.json")
|
| 19 |
assert len(result_file_cand) == 1, model_name
|
| 20 |
results_data = json.load(open(result_file_cand[0]))
|
| 21 |
denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer"
|
|
|
|
| 15 |
for model_dir in sorted(glob(f"{results_dir}/*/*")):
|
| 16 |
model_name = "/".join(model_dir.split("/")[-2:])
|
| 17 |
print(f" {model_name}")
|
| 18 |
+
result_file_cand = glob(f"{model_dir}/bs1+*+steps25+results.json")
|
| 19 |
assert len(result_file_cand) == 1, model_name
|
| 20 |
results_data = json.load(open(result_file_cand[0]))
|
| 21 |
denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer"
|
benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py
CHANGED
|
@@ -27,10 +27,10 @@ class Results:
|
|
| 27 |
model: str
|
| 28 |
num_parameters: dict[str, int]
|
| 29 |
gpu_model: str
|
| 30 |
-
num_infernece_steps: int
|
| 31 |
-
num_frames: int
|
| 32 |
power_limit: int
|
| 33 |
batch_size: int
|
|
|
|
|
|
|
| 34 |
num_prompts: int
|
| 35 |
total_runtime: float = 0.0
|
| 36 |
total_energy: float = 0.0
|
|
@@ -80,6 +80,7 @@ def load_text_image_prompts(
|
|
| 80 |
path: str,
|
| 81 |
batch_size: int,
|
| 82 |
num_batches: int | None = None,
|
|
|
|
| 83 |
) -> tuple[int, list[tuple[list[str], list[Image.Image]]]]:
|
| 84 |
"""Load the dataset to feed the model and return it as a list of batches of prompts.
|
| 85 |
|
|
@@ -93,6 +94,9 @@ def load_text_image_prompts(
|
|
| 93 |
dataset = json.load(open(path))
|
| 94 |
assert len(dataset["caption"]) == len(dataset["video_id"])
|
| 95 |
|
|
|
|
|
|
|
|
|
|
| 96 |
if num_batches is not None:
|
| 97 |
if len(dataset["caption"]) < num_batches * batch_size:
|
| 98 |
raise ValueError("Not enough data for the requested number of batches.")
|
|
@@ -103,6 +107,8 @@ def load_text_image_prompts(
|
|
| 103 |
dataset["first_frame"] = [
|
| 104 |
load_image(str(image_path / f"{video_id}.jpg")) for video_id in dataset["video_id"]
|
| 105 |
]
|
|
|
|
|
|
|
| 106 |
|
| 107 |
batched = [
|
| 108 |
(dataset["caption"][i : i + batch_size], dataset["first_frame"][i : i + batch_size])
|
|
@@ -135,8 +141,8 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 135 |
|
| 136 |
results_dir = Path(args.result_root) / args.model
|
| 137 |
results_dir.mkdir(parents=True, exist_ok=True)
|
| 138 |
-
benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}")
|
| 139 |
-
video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+generated"
|
| 140 |
video_dir.mkdir(exist_ok=True)
|
| 141 |
|
| 142 |
arg_out_filename = f"{benchmark_name}+args.json"
|
|
@@ -150,11 +156,16 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 150 |
pynvml.nvmlInit()
|
| 151 |
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
|
| 152 |
gpu_model = pynvml.nvmlDeviceGetName(handle)
|
| 153 |
-
pynvml.nvmlDeviceSetPersistenceMode(handle, pynvml.NVML_FEATURE_ENABLED)
|
| 154 |
-
pynvml.nvmlDeviceSetPowerManagementLimit(handle, args.power_limit * 1000)
|
| 155 |
pynvml.nvmlShutdown()
|
| 156 |
|
| 157 |
-
num_prompts, batched_prompts = load_text_image_prompts(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
pipeline = get_pipeline(args.model)
|
| 160 |
|
|
@@ -189,7 +200,7 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 189 |
fps_param_name = fps_param_name_candidates[0]
|
| 190 |
|
| 191 |
torch.cuda.reset_peak_memory_stats(device="cuda:0")
|
| 192 |
-
zeus_monitor.begin_window("benchmark",
|
| 193 |
|
| 194 |
# Build common parameter dict for all batches
|
| 195 |
params: dict[str, Any] = dict(
|
|
@@ -210,15 +221,15 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 210 |
if args.add_text_prompt:
|
| 211 |
params["prompt"] = intermediate.prompts
|
| 212 |
|
| 213 |
-
zeus_monitor.begin_window("batch",
|
| 214 |
frames = pipeline(**params).frames
|
| 215 |
-
batch_measurements = zeus_monitor.end_window("batch",
|
| 216 |
|
| 217 |
intermediate.frames = frames
|
| 218 |
intermediate.batch_latency = batch_measurements.time
|
| 219 |
intermediate.batch_energy = batch_measurements.total_energy
|
| 220 |
|
| 221 |
-
measurements = zeus_monitor.end_window("benchmark",
|
| 222 |
peak_memory = torch.cuda.max_memory_allocated(device="cuda:0")
|
| 223 |
|
| 224 |
results: list[Result] = []
|
|
@@ -255,10 +266,10 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 255 |
model=args.model,
|
| 256 |
num_parameters=count_parameters(pipeline),
|
| 257 |
gpu_model=gpu_model,
|
| 258 |
-
num_infernece_steps=args.num_inference_steps,
|
| 259 |
-
num_frames=args.num_frames,
|
| 260 |
power_limit=args.power_limit,
|
| 261 |
batch_size=args.batch_size,
|
|
|
|
|
|
|
| 262 |
num_prompts=num_prompts,
|
| 263 |
total_runtime=measurements.time,
|
| 264 |
total_energy=measurements.total_energy,
|
|
@@ -289,8 +300,8 @@ if __name__ == "__main__":
|
|
| 289 |
parser.add_argument("--num-inference-steps", type=int, default=50, help="The number of denoising steps.")
|
| 290 |
parser.add_argument("--num-frames", type=int, default=1, help="The number of frames to generate.")
|
| 291 |
parser.add_argument("--fps", type=int, default=16, help="Frames per second for micro-conditioning.")
|
| 292 |
-
parser.add_argument("--height", type=int, help="Height of the generated video.")
|
| 293 |
-
parser.add_argument("--width", type=int, help="Width of the generated video.")
|
| 294 |
parser.add_argument("--num-batches", type=int, default=None, help="The number of batches to use from the dataset.")
|
| 295 |
parser.add_argument("--save-every", type=int, default=10, help="Save generations to file every N prompts.")
|
| 296 |
parser.add_argument("--seed", type=int, default=0, help="The seed to use for the RNG.")
|
|
|
|
| 27 |
model: str
|
| 28 |
num_parameters: dict[str, int]
|
| 29 |
gpu_model: str
|
|
|
|
|
|
|
| 30 |
power_limit: int
|
| 31 |
batch_size: int
|
| 32 |
+
num_inference_steps: int
|
| 33 |
+
num_frames: int
|
| 34 |
num_prompts: int
|
| 35 |
total_runtime: float = 0.0
|
| 36 |
total_energy: float = 0.0
|
|
|
|
| 80 |
path: str,
|
| 81 |
batch_size: int,
|
| 82 |
num_batches: int | None = None,
|
| 83 |
+
image_resize: tuple[int, int] | None = None,
|
| 84 |
) -> tuple[int, list[tuple[list[str], list[Image.Image]]]]:
|
| 85 |
"""Load the dataset to feed the model and return it as a list of batches of prompts.
|
| 86 |
|
|
|
|
| 94 |
dataset = json.load(open(path))
|
| 95 |
assert len(dataset["caption"]) == len(dataset["video_id"])
|
| 96 |
|
| 97 |
+
dataset["caption"] *= 10
|
| 98 |
+
dataset["video_id"] *= 10
|
| 99 |
+
|
| 100 |
if num_batches is not None:
|
| 101 |
if len(dataset["caption"]) < num_batches * batch_size:
|
| 102 |
raise ValueError("Not enough data for the requested number of batches.")
|
|
|
|
| 107 |
dataset["first_frame"] = [
|
| 108 |
load_image(str(image_path / f"{video_id}.jpg")) for video_id in dataset["video_id"]
|
| 109 |
]
|
| 110 |
+
if image_resize is not None:
|
| 111 |
+
dataset["first_frame"] = [image.resize(image_resize) for image in dataset["first_frame"]]
|
| 112 |
|
| 113 |
batched = [
|
| 114 |
(dataset["caption"][i : i + batch_size], dataset["first_frame"][i : i + batch_size])
|
|
|
|
| 141 |
|
| 142 |
results_dir = Path(args.result_root) / args.model
|
| 143 |
results_dir.mkdir(parents=True, exist_ok=True)
|
| 144 |
+
benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}")
|
| 145 |
+
video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}+generated"
|
| 146 |
video_dir.mkdir(exist_ok=True)
|
| 147 |
|
| 148 |
arg_out_filename = f"{benchmark_name}+args.json"
|
|
|
|
| 156 |
pynvml.nvmlInit()
|
| 157 |
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
|
| 158 |
gpu_model = pynvml.nvmlDeviceGetName(handle)
|
| 159 |
+
# pynvml.nvmlDeviceSetPersistenceMode(handle, pynvml.NVML_FEATURE_ENABLED)
|
| 160 |
+
# pynvml.nvmlDeviceSetPowerManagementLimit(handle, args.power_limit * 1000)
|
| 161 |
pynvml.nvmlShutdown()
|
| 162 |
|
| 163 |
+
num_prompts, batched_prompts = load_text_image_prompts(
|
| 164 |
+
args.dataset_path,
|
| 165 |
+
args.batch_size,
|
| 166 |
+
args.num_batches,
|
| 167 |
+
(args.width, args.height),
|
| 168 |
+
)
|
| 169 |
|
| 170 |
pipeline = get_pipeline(args.model)
|
| 171 |
|
|
|
|
| 200 |
fps_param_name = fps_param_name_candidates[0]
|
| 201 |
|
| 202 |
torch.cuda.reset_peak_memory_stats(device="cuda:0")
|
| 203 |
+
zeus_monitor.begin_window("benchmark", sync_execution=False)
|
| 204 |
|
| 205 |
# Build common parameter dict for all batches
|
| 206 |
params: dict[str, Any] = dict(
|
|
|
|
| 221 |
if args.add_text_prompt:
|
| 222 |
params["prompt"] = intermediate.prompts
|
| 223 |
|
| 224 |
+
zeus_monitor.begin_window("batch", sync_execution=False)
|
| 225 |
frames = pipeline(**params).frames
|
| 226 |
+
batch_measurements = zeus_monitor.end_window("batch", sync_execution=False)
|
| 227 |
|
| 228 |
intermediate.frames = frames
|
| 229 |
intermediate.batch_latency = batch_measurements.time
|
| 230 |
intermediate.batch_energy = batch_measurements.total_energy
|
| 231 |
|
| 232 |
+
measurements = zeus_monitor.end_window("benchmark", sync_execution=False)
|
| 233 |
peak_memory = torch.cuda.max_memory_allocated(device="cuda:0")
|
| 234 |
|
| 235 |
results: list[Result] = []
|
|
|
|
| 266 |
model=args.model,
|
| 267 |
num_parameters=count_parameters(pipeline),
|
| 268 |
gpu_model=gpu_model,
|
|
|
|
|
|
|
| 269 |
power_limit=args.power_limit,
|
| 270 |
batch_size=args.batch_size,
|
| 271 |
+
num_inference_steps=args.num_inference_steps,
|
| 272 |
+
num_frames=args.num_frames,
|
| 273 |
num_prompts=num_prompts,
|
| 274 |
total_runtime=measurements.time,
|
| 275 |
total_energy=measurements.total_energy,
|
|
|
|
| 300 |
parser.add_argument("--num-inference-steps", type=int, default=50, help="The number of denoising steps.")
|
| 301 |
parser.add_argument("--num-frames", type=int, default=1, help="The number of frames to generate.")
|
| 302 |
parser.add_argument("--fps", type=int, default=16, help="Frames per second for micro-conditioning.")
|
| 303 |
+
parser.add_argument("--height", type=int, required=True, help="Height of the generated video.")
|
| 304 |
+
parser.add_argument("--width", type=int, required=True, help="Width of the generated video.")
|
| 305 |
parser.add_argument("--num-batches", type=int, default=None, help="The number of batches to use from the dataset.")
|
| 306 |
parser.add_argument("--save-every", type=int, default=10, help="Save generations to file every N prompts.")
|
| 307 |
parser.add_argument("--seed", type=int, default=0, help="The seed to use for the RNG.")
|
benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py
CHANGED
|
@@ -28,44 +28,48 @@ def main(args: argparse.Namespace) -> None:
|
|
| 28 |
print_and_write(outfile, f"Benchmarking {args.model}\n")
|
| 29 |
print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n")
|
| 30 |
print_and_write(outfile, f"Power limits: {args.power_limits}\n")
|
|
|
|
| 31 |
|
| 32 |
for batch_size in args.batch_sizes:
|
| 33 |
for power_limit in args.power_limits:
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
i
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
|
| 70 |
|
| 71 |
|
|
@@ -77,8 +81,10 @@ if __name__ == "__main__":
|
|
| 77 |
parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark")
|
| 78 |
parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark")
|
| 79 |
parser.add_argument("--num-frames", type=str, help="Number of frames to generate")
|
| 80 |
-
parser.add_argument("--num-inference-steps", type=str, help="Number of
|
| 81 |
parser.add_argument("--add-text-prompt", action="store_true", help="Input text prompt alongside image.")
|
|
|
|
|
|
|
| 82 |
parser.add_argument("--dataset-path", type=str, help="Path to the dataset JSON file.")
|
| 83 |
args = parser.parse_args()
|
| 84 |
main(args)
|
|
|
|
| 28 |
print_and_write(outfile, f"Benchmarking {args.model}\n")
|
| 29 |
print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n")
|
| 30 |
print_and_write(outfile, f"Power limits: {args.power_limits}\n")
|
| 31 |
+
print_and_write(outfile, f"Number of inference steps: {args.num_inference_steps}\n")
|
| 32 |
|
| 33 |
for batch_size in args.batch_sizes:
|
| 34 |
for power_limit in args.power_limits:
|
| 35 |
+
for num_inference_steps in args.num_inference_steps:
|
| 36 |
+
print_and_write(outfile, f"{batch_size=}, {power_limit=}, {num_inference_steps=}\n", flush=True)
|
| 37 |
+
with subprocess.Popen(
|
| 38 |
+
args=[
|
| 39 |
+
"docker", "run",
|
| 40 |
+
"--gpus", '"device=' + ','.join(args.gpu_ids) + '"',
|
| 41 |
+
"--cap-add", "SYS_ADMIN",
|
| 42 |
+
"--name", f"leaderboard-i2v-{''.join(args.gpu_ids)}",
|
| 43 |
+
"--rm",
|
| 44 |
+
"-v", "/data/leaderboard/hfcache:/root/.cache/huggingface",
|
| 45 |
+
"-v", f"{os.getcwd()}:/workspace/image-to-video",
|
| 46 |
+
"mlenergy/leaderboard:diffusion-i2v",
|
| 47 |
+
"--dataset-path", args.dataset_path,
|
| 48 |
+
"--result-root", args.result_root,
|
| 49 |
+
"--batch-size", batch_size,
|
| 50 |
+
"--num-batches", "8",
|
| 51 |
+
"--power-limit", power_limit,
|
| 52 |
+
"--model", args.model,
|
| 53 |
+
"--huggingface-token", hf_token,
|
| 54 |
+
"--num-frames", args.num_frames,
|
| 55 |
+
"--num-inference-steps", num_inference_steps,
|
| 56 |
+
"--width", str(args.width),
|
| 57 |
+
"--height", str(args.height),
|
| 58 |
+
] + (["--add-text-prompt"] if args.add_text_prompt else []),
|
| 59 |
+
stdout=subprocess.PIPE,
|
| 60 |
+
stderr=subprocess.STDOUT,
|
| 61 |
+
text=True,
|
| 62 |
+
) as proc:
|
| 63 |
+
if proc.stdout:
|
| 64 |
+
i = 0
|
| 65 |
+
for line in proc.stdout:
|
| 66 |
+
print_and_write(outfile, line, flush=i % 50 == 0)
|
| 67 |
+
i += 1
|
| 68 |
|
| 69 |
+
# If proc exited with non-zero status, it's probably an OOM.
|
| 70 |
+
# Move on to the next batch size.
|
| 71 |
+
if proc.returncode != 0:
|
| 72 |
+
break
|
| 73 |
|
| 74 |
|
| 75 |
|
|
|
|
| 81 |
parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark")
|
| 82 |
parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark")
|
| 83 |
parser.add_argument("--num-frames", type=str, help="Number of frames to generate")
|
| 84 |
+
parser.add_argument("--num-inference-steps", type=str, nargs="+", default=["1", "2", "4", "8", "16", "30", "40", "50"], help="Number of inference steps to run")
|
| 85 |
parser.add_argument("--add-text-prompt", action="store_true", help="Input text prompt alongside image.")
|
| 86 |
+
parser.add_argument("--height", type=int, required=True, help="Height of the generated video.")
|
| 87 |
+
parser.add_argument("--width", type=int, required=True, help="Width of the generated video.")
|
| 88 |
parser.add_argument("--dataset-path", type=str, help="Path to the dataset JSON file.")
|
| 89 |
args = parser.parse_args()
|
| 90 |
main(args)
|
benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
| 3 |
|
| 4 |
import cv2
|
| 5 |
|
| 6 |
-
DATASET_PATH = "
|
| 7 |
|
| 8 |
|
| 9 |
def main() -> None:
|
|
|
|
| 3 |
|
| 4 |
import cv2
|
| 5 |
|
| 6 |
+
DATASET_PATH = "sharegpt4video_100.json"
|
| 7 |
|
| 8 |
|
| 9 |
def main() -> None:
|
benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
- command:
|
| 2 |
-
- "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --gpu-ids {{ gpu }} --batch-sizes 16 8 4 2 1 --power-limits 400"
|
| 3 |
model:
|
| 4 |
- stabilityai/stable-diffusion-2-1
|
| 5 |
- stabilityai/stable-diffusion-xl-base-1.0
|
|
|
|
| 1 |
- command:
|
| 2 |
+
- "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --gpu-ids {{ gpu }} --batch-sizes 64 32 16 8 4 2 1 --num-inference-steps 1 2 4 8 16 25 30 40 50 --power-limits 400"
|
| 3 |
model:
|
| 4 |
- stabilityai/stable-diffusion-2-1
|
| 5 |
- stabilityai/stable-diffusion-xl-base-1.0
|
benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py
CHANGED
|
@@ -15,7 +15,7 @@ def main(results_dir: Path, output_file: Path) -> None:
|
|
| 15 |
for model_dir in sorted(glob(f"{results_dir}/*/*")):
|
| 16 |
model_name = "/".join(model_dir.split("/")[-2:])
|
| 17 |
print(f" {model_name}")
|
| 18 |
-
result_file_cand = glob(f"{model_dir}/bs1+*+results.json")
|
| 19 |
assert len(result_file_cand) == 1, model_name
|
| 20 |
results_data = json.load(open(result_file_cand[0]))
|
| 21 |
denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer"
|
|
@@ -24,6 +24,7 @@ def main(results_dir: Path, output_file: Path) -> None:
|
|
| 24 |
nickname=model_name.split("/")[-1].replace("-", " ").title(),
|
| 25 |
total_params=raw_params_to_readable(sum(results_data["num_parameters"].values())),
|
| 26 |
denoising_params=raw_params_to_readable(results_data["num_parameters"][denosing_module_name]),
|
|
|
|
| 27 |
)
|
| 28 |
assert model_name not in models
|
| 29 |
models[model_name] = model_info
|
|
|
|
| 15 |
for model_dir in sorted(glob(f"{results_dir}/*/*")):
|
| 16 |
model_name = "/".join(model_dir.split("/")[-2:])
|
| 17 |
print(f" {model_name}")
|
| 18 |
+
result_file_cand = glob(f"{model_dir}/bs1+*+steps25+results.json")
|
| 19 |
assert len(result_file_cand) == 1, model_name
|
| 20 |
results_data = json.load(open(result_file_cand[0]))
|
| 21 |
denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer"
|
|
|
|
| 24 |
nickname=model_name.split("/")[-1].replace("-", " ").title(),
|
| 25 |
total_params=raw_params_to_readable(sum(results_data["num_parameters"].values())),
|
| 26 |
denoising_params=raw_params_to_readable(results_data["num_parameters"][denosing_module_name]),
|
| 27 |
+
resolution="NA",
|
| 28 |
)
|
| 29 |
assert model_name not in models
|
| 30 |
models[model_name] = model_info
|
benchmark/diffusion/text-to-image/scripts/benchmark_one_datapoint.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import os
|
|
|
|
| 4 |
import json
|
| 5 |
import argparse
|
|
|
|
| 6 |
from pprint import pprint
|
| 7 |
from pathlib import Path
|
| 8 |
from contextlib import suppress
|
|
@@ -11,6 +13,7 @@ from dataclasses import dataclass, field, asdict
|
|
| 11 |
import torch
|
| 12 |
import pynvml
|
| 13 |
import numpy as np
|
|
|
|
| 14 |
from PIL import Image
|
| 15 |
from datasets import load_dataset, Dataset
|
| 16 |
from transformers.trainer_utils import set_seed
|
|
@@ -35,9 +38,9 @@ class Results:
|
|
| 35 |
model: str
|
| 36 |
num_parameters: dict[str, int]
|
| 37 |
gpu_model: str
|
| 38 |
-
num_inference_steps: int
|
| 39 |
power_limit: int
|
| 40 |
batch_size: int
|
|
|
|
| 41 |
num_prompts: int
|
| 42 |
average_clip_score: float = 0.0
|
| 43 |
total_runtime: float = 0.0
|
|
@@ -118,6 +121,28 @@ def load_partiprompts(
|
|
| 118 |
return len(batched) * batch_size, batched
|
| 119 |
|
| 120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
def calculate_clip_score(
|
| 122 |
model: CLIPModel,
|
| 123 |
processor: CLIPProcessor,
|
|
@@ -183,8 +208,8 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 183 |
|
| 184 |
results_dir = Path(args.result_root) / args.model
|
| 185 |
results_dir.mkdir(parents=True, exist_ok=True)
|
| 186 |
-
benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}")
|
| 187 |
-
image_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+generated"
|
| 188 |
image_dir.mkdir(exist_ok=True)
|
| 189 |
|
| 190 |
arg_out_filename = f"{benchmark_name}+args.json"
|
|
@@ -222,27 +247,42 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 222 |
ResultIntermediateBatched(prompts=batch) for batch in batched_prompts
|
| 223 |
]
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
torch.cuda.reset_peak_memory_stats(device="cuda:0")
|
| 226 |
-
zeus_monitor.begin_window("benchmark",
|
| 227 |
|
| 228 |
for ind, intermediate in enumerate(intermediates):
|
| 229 |
print(f"Batch {ind + 1}/{len(intermediates)}")
|
| 230 |
-
zeus_monitor.begin_window("batch",
|
| 231 |
images = pipeline(
|
| 232 |
intermediate.prompts,
|
| 233 |
generator=rng,
|
| 234 |
num_inference_steps=args.num_inference_steps,
|
| 235 |
output_type="np",
|
| 236 |
).images
|
| 237 |
-
batch_measurements = zeus_monitor.end_window("batch",
|
| 238 |
|
| 239 |
intermediate.images = images
|
| 240 |
intermediate.batch_latency = batch_measurements.time
|
| 241 |
intermediate.batch_energy = batch_measurements.total_energy
|
| 242 |
|
| 243 |
-
measurements = zeus_monitor.end_window("benchmark",
|
| 244 |
peak_memory = torch.cuda.max_memory_allocated(device="cuda:0")
|
| 245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
# Scale images to [0, 256] and convert to uint8
|
| 247 |
for intermediate in intermediates:
|
| 248 |
intermediate.images = (intermediate.images * 255).astype("uint8")
|
|
@@ -292,9 +332,9 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 292 |
model=args.model,
|
| 293 |
num_parameters=count_parameters(pipeline),
|
| 294 |
gpu_model=gpu_model,
|
| 295 |
-
num_inference_steps=args.num_inference_steps,
|
| 296 |
power_limit=args.power_limit,
|
| 297 |
batch_size=args.batch_size,
|
|
|
|
| 298 |
num_prompts=num_prompts,
|
| 299 |
average_clip_score=sum(r.clip_score for r in results) / len(results),
|
| 300 |
total_runtime=measurements.time,
|
|
@@ -326,6 +366,7 @@ if __name__ == "__main__":
|
|
| 326 |
parser.add_argument("--image-save-every", type=int, default=10, help="Save images to file every N prompts.")
|
| 327 |
parser.add_argument("--seed", type=int, default=0, help="The seed to use for the RNG.")
|
| 328 |
parser.add_argument("--huggingface-token", type=str, help="The HuggingFace token to use.")
|
|
|
|
| 329 |
args = parser.parse_args()
|
| 330 |
|
| 331 |
benchmark(args)
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import os
|
| 4 |
+
import time
|
| 5 |
import json
|
| 6 |
import argparse
|
| 7 |
+
import multiprocessing as mp
|
| 8 |
from pprint import pprint
|
| 9 |
from pathlib import Path
|
| 10 |
from contextlib import suppress
|
|
|
|
| 13 |
import torch
|
| 14 |
import pynvml
|
| 15 |
import numpy as np
|
| 16 |
+
import pandas as pd
|
| 17 |
from PIL import Image
|
| 18 |
from datasets import load_dataset, Dataset
|
| 19 |
from transformers.trainer_utils import set_seed
|
|
|
|
| 38 |
model: str
|
| 39 |
num_parameters: dict[str, int]
|
| 40 |
gpu_model: str
|
|
|
|
| 41 |
power_limit: int
|
| 42 |
batch_size: int
|
| 43 |
+
num_inference_steps: int
|
| 44 |
num_prompts: int
|
| 45 |
average_clip_score: float = 0.0
|
| 46 |
total_runtime: float = 0.0
|
|
|
|
| 121 |
return len(batched) * batch_size, batched
|
| 122 |
|
| 123 |
|
| 124 |
+
def power_monitor(csv_path: str, gpu_indices: list[int], chan: mp.SimpleQueue) -> None:
|
| 125 |
+
pynvml.nvmlInit()
|
| 126 |
+
handles = [pynvml.nvmlDeviceGetHandleByIndex(i) for i in gpu_indices]
|
| 127 |
+
|
| 128 |
+
fields = [
|
| 129 |
+
(pynvml.NVML_FI_DEV_POWER_AVERAGE, pynvml.NVML_POWER_SCOPE_GPU),
|
| 130 |
+
(pynvml.NVML_FI_DEV_POWER_AVERAGE, pynvml.NVML_POWER_SCOPE_MEMORY),
|
| 131 |
+
]
|
| 132 |
+
|
| 133 |
+
columns = ["timestamp"] + sum([[f"gpu{i}", f"vram{i}"] for i in gpu_indices], [])
|
| 134 |
+
power: list[list] = []
|
| 135 |
+
while chan.empty():
|
| 136 |
+
row = [time.monotonic()]
|
| 137 |
+
values = [pynvml.nvmlDeviceGetFieldValues(h, fields) for h in handles]
|
| 138 |
+
for value in values:
|
| 139 |
+
row.extend((value[0].value.uiVal, value[1].value.uiVal))
|
| 140 |
+
power.append(row)
|
| 141 |
+
time.sleep(max(0.0, 0.1 - (time.monotonic() - row[0])))
|
| 142 |
+
|
| 143 |
+
pd.DataFrame(power, columns=columns).to_csv(csv_path, index=False)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
def calculate_clip_score(
|
| 147 |
model: CLIPModel,
|
| 148 |
processor: CLIPProcessor,
|
|
|
|
| 208 |
|
| 209 |
results_dir = Path(args.result_root) / args.model
|
| 210 |
results_dir.mkdir(parents=True, exist_ok=True)
|
| 211 |
+
benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}")
|
| 212 |
+
image_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}+generated"
|
| 213 |
image_dir.mkdir(exist_ok=True)
|
| 214 |
|
| 215 |
arg_out_filename = f"{benchmark_name}+args.json"
|
|
|
|
| 247 |
ResultIntermediateBatched(prompts=batch) for batch in batched_prompts
|
| 248 |
]
|
| 249 |
|
| 250 |
+
pmon = None
|
| 251 |
+
pmon_chan = None
|
| 252 |
+
if args.monitor_power:
|
| 253 |
+
pmon_chan = mp.SimpleQueue()
|
| 254 |
+
pmon = mp.get_context("spawn").Process(
|
| 255 |
+
target=power_monitor,
|
| 256 |
+
args=(f"{benchmark_name}+power.csv", [g.gpu_index for g in zeus_monitor.gpus.gpus], pmon_chan),
|
| 257 |
+
)
|
| 258 |
+
pmon.start()
|
| 259 |
+
|
| 260 |
torch.cuda.reset_peak_memory_stats(device="cuda:0")
|
| 261 |
+
zeus_monitor.begin_window("benchmark", sync_execution=False)
|
| 262 |
|
| 263 |
for ind, intermediate in enumerate(intermediates):
|
| 264 |
print(f"Batch {ind + 1}/{len(intermediates)}")
|
| 265 |
+
zeus_monitor.begin_window("batch", sync_execution=False)
|
| 266 |
images = pipeline(
|
| 267 |
intermediate.prompts,
|
| 268 |
generator=rng,
|
| 269 |
num_inference_steps=args.num_inference_steps,
|
| 270 |
output_type="np",
|
| 271 |
).images
|
| 272 |
+
batch_measurements = zeus_monitor.end_window("batch", sync_execution=False)
|
| 273 |
|
| 274 |
intermediate.images = images
|
| 275 |
intermediate.batch_latency = batch_measurements.time
|
| 276 |
intermediate.batch_energy = batch_measurements.total_energy
|
| 277 |
|
| 278 |
+
measurements = zeus_monitor.end_window("benchmark", sync_execution=False)
|
| 279 |
peak_memory = torch.cuda.max_memory_allocated(device="cuda:0")
|
| 280 |
|
| 281 |
+
if pmon is not None and pmon_chan is not None:
|
| 282 |
+
pmon_chan.put("stop")
|
| 283 |
+
pmon.join(timeout=5.0)
|
| 284 |
+
pmon.terminate()
|
| 285 |
+
|
| 286 |
# Scale images to [0, 256] and convert to uint8
|
| 287 |
for intermediate in intermediates:
|
| 288 |
intermediate.images = (intermediate.images * 255).astype("uint8")
|
|
|
|
| 332 |
model=args.model,
|
| 333 |
num_parameters=count_parameters(pipeline),
|
| 334 |
gpu_model=gpu_model,
|
|
|
|
| 335 |
power_limit=args.power_limit,
|
| 336 |
batch_size=args.batch_size,
|
| 337 |
+
num_inference_steps=args.num_inference_steps,
|
| 338 |
num_prompts=num_prompts,
|
| 339 |
average_clip_score=sum(r.clip_score for r in results) / len(results),
|
| 340 |
total_runtime=measurements.time,
|
|
|
|
| 366 |
parser.add_argument("--image-save-every", type=int, default=10, help="Save images to file every N prompts.")
|
| 367 |
parser.add_argument("--seed", type=int, default=0, help="The seed to use for the RNG.")
|
| 368 |
parser.add_argument("--huggingface-token", type=str, help="The HuggingFace token to use.")
|
| 369 |
+
parser.add_argument("--monitor-power", default=False, action="store_true", help="Whether to monitor power over time.")
|
| 370 |
args = parser.parse_args()
|
| 371 |
|
| 372 |
benchmark(args)
|
benchmark/diffusion/text-to-image/scripts/benchmark_one_model.py
CHANGED
|
@@ -28,12 +28,13 @@ def main(args: argparse.Namespace) -> None:
|
|
| 28 |
print_and_write(outfile, f"Benchmarking {args.model}\n")
|
| 29 |
print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n")
|
| 30 |
print_and_write(outfile, f"Power limits: {args.power_limits}\n")
|
|
|
|
| 31 |
|
| 32 |
for batch_size in args.batch_sizes:
|
| 33 |
for power_limit in args.power_limits:
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
"docker", "run",
|
| 38 |
"--gpus", '"device=' + ','.join(args.gpu_ids) + '"',
|
| 39 |
"--cap-add", "SYS_ADMIN",
|
|
@@ -48,22 +49,21 @@ def main(args: argparse.Namespace) -> None:
|
|
| 48 |
"--power-limit", power_limit,
|
| 49 |
"--model", args.model,
|
| 50 |
"--huggingface-token", hf_token,
|
| 51 |
-
"--num-inference-steps",
|
| 52 |
-
]
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
text=True
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
i += 1
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
|
| 68 |
|
| 69 |
|
|
@@ -74,5 +74,7 @@ if __name__ == "__main__":
|
|
| 74 |
parser.add_argument("--gpu-ids", type=str, nargs="+", help="GPU IDs to use")
|
| 75 |
parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark")
|
| 76 |
parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark")
|
|
|
|
|
|
|
| 77 |
args = parser.parse_args()
|
| 78 |
main(args)
|
|
|
|
| 28 |
print_and_write(outfile, f"Benchmarking {args.model}\n")
|
| 29 |
print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n")
|
| 30 |
print_and_write(outfile, f"Power limits: {args.power_limits}\n")
|
| 31 |
+
print_and_write(outfile, f"Number of inference steps: {args.num_inference_steps}\n")
|
| 32 |
|
| 33 |
for batch_size in args.batch_sizes:
|
| 34 |
for power_limit in args.power_limits:
|
| 35 |
+
for num_inference_steps in args.num_inference_steps:
|
| 36 |
+
print_and_write(outfile, f"{batch_size=}, {power_limit=}, {num_inference_steps=}\n", flush=True)
|
| 37 |
+
cmd=[
|
| 38 |
"docker", "run",
|
| 39 |
"--gpus", '"device=' + ','.join(args.gpu_ids) + '"',
|
| 40 |
"--cap-add", "SYS_ADMIN",
|
|
|
|
| 49 |
"--power-limit", power_limit,
|
| 50 |
"--model", args.model,
|
| 51 |
"--huggingface-token", hf_token,
|
| 52 |
+
"--num-inference-steps", num_inference_steps,
|
| 53 |
+
]
|
| 54 |
+
if args.monitor_power:
|
| 55 |
+
cmd.append("--monitor-power")
|
| 56 |
+
with subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc:
|
| 57 |
+
if proc.stdout:
|
| 58 |
+
i = 0
|
| 59 |
+
for line in proc.stdout:
|
| 60 |
+
print_and_write(outfile, line, flush=i % 50 == 0)
|
| 61 |
+
i += 1
|
|
|
|
| 62 |
|
| 63 |
+
# If proc exited with non-zero status, it's probably an OOM.
|
| 64 |
+
# Move on to the next batch size.
|
| 65 |
+
if proc.returncode != 0:
|
| 66 |
+
break
|
| 67 |
|
| 68 |
|
| 69 |
|
|
|
|
| 74 |
parser.add_argument("--gpu-ids", type=str, nargs="+", help="GPU IDs to use")
|
| 75 |
parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark")
|
| 76 |
parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark")
|
| 77 |
+
parser.add_argument("--num-inference-steps", type=str, nargs="+", default=["1", "2", "4", "8", "16", "25", "30", "40", "50"], help="Number of inference steps to run")
|
| 78 |
+
parser.add_argument("--monitor-power", default=False, action="store_true", help="Whether to monitor power over time.")
|
| 79 |
args = parser.parse_args()
|
| 80 |
main(args)
|
benchmark/diffusion/text-to-video/pegasus/A100/queue_1gpu.yaml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
- command:
|
| 2 |
-
- "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 16 8 4 2 1 --power-limits 400 --num-inference-steps 25 --num-frames 16"
|
| 3 |
model:
|
| 4 |
- ali-vilab/text-to-video-ms-1.7b
|
| 5 |
- guoyww/animatediff-motion-adapter-v1-5-3
|
|
|
|
| 1 |
- command:
|
| 2 |
+
- "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 32 16 8 4 2 1 --power-limits 400 --num-inference-steps 1 2 4 8 16 25 30 40 50 --num-frames 16"
|
| 3 |
model:
|
| 4 |
- ali-vilab/text-to-video-ms-1.7b
|
| 5 |
- guoyww/animatediff-motion-adapter-v1-5-3
|
benchmark/diffusion/text-to-video/pegasus/H100/queue_1gpu.yaml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
- command:
|
| 2 |
-
- "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/
|
| 3 |
model:
|
| 4 |
- ali-vilab/text-to-video-ms-1.7b
|
| 5 |
- guoyww/animatediff-motion-adapter-v1-5-3
|
|
|
|
| 1 |
- command:
|
| 2 |
+
- "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 32 16 8 4 2 1 --power-limits 700 --num-inference-steps 1 2 4 8 16 25 30 40 50 --num-frames 16"
|
| 3 |
model:
|
| 4 |
- ali-vilab/text-to-video-ms-1.7b
|
| 5 |
- guoyww/animatediff-motion-adapter-v1-5-3
|
benchmark/diffusion/text-to-video/scripts/aggregate_leaderboard_models.py
CHANGED
|
@@ -15,7 +15,7 @@ def main(results_dir: Path, output_file: Path) -> None:
|
|
| 15 |
for model_dir in sorted(glob(f"{results_dir}/*/*")):
|
| 16 |
model_name = "/".join(model_dir.split("/")[-2:])
|
| 17 |
print(f" {model_name}")
|
| 18 |
-
result_file_cand = glob(f"{model_dir}/bs1+*+results.json")
|
| 19 |
assert len(result_file_cand) == 1, model_name
|
| 20 |
results_data = json.load(open(result_file_cand[0]))
|
| 21 |
denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer"
|
|
@@ -24,6 +24,7 @@ def main(results_dir: Path, output_file: Path) -> None:
|
|
| 24 |
nickname=model_name.split("/")[-1].replace("-", " ").title(),
|
| 25 |
total_params=raw_params_to_readable(sum(results_data["num_parameters"].values())),
|
| 26 |
denoising_params=raw_params_to_readable(results_data["num_parameters"][denosing_module_name]),
|
|
|
|
| 27 |
)
|
| 28 |
assert model_name not in models
|
| 29 |
models[model_name] = model_info
|
|
|
|
| 15 |
for model_dir in sorted(glob(f"{results_dir}/*/*")):
|
| 16 |
model_name = "/".join(model_dir.split("/")[-2:])
|
| 17 |
print(f" {model_name}")
|
| 18 |
+
result_file_cand = glob(f"{model_dir}/bs1+*+steps25+results.json")
|
| 19 |
assert len(result_file_cand) == 1, model_name
|
| 20 |
results_data = json.load(open(result_file_cand[0]))
|
| 21 |
denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer"
|
|
|
|
| 24 |
nickname=model_name.split("/")[-1].replace("-", " ").title(),
|
| 25 |
total_params=raw_params_to_readable(sum(results_data["num_parameters"].values())),
|
| 26 |
denoising_params=raw_params_to_readable(results_data["num_parameters"][denosing_module_name]),
|
| 27 |
+
resolution="NA",
|
| 28 |
)
|
| 29 |
assert model_name not in models
|
| 30 |
models[model_name] = model_info
|
benchmark/diffusion/text-to-video/scripts/benchmark_one_datapoint.py
CHANGED
|
@@ -32,10 +32,10 @@ class Results:
|
|
| 32 |
model: str
|
| 33 |
num_parameters: dict[str, int]
|
| 34 |
gpu_model: str
|
| 35 |
-
num_inference_steps: int
|
| 36 |
-
num_frames: int
|
| 37 |
power_limit: int
|
| 38 |
batch_size: int
|
|
|
|
|
|
|
| 39 |
num_prompts: int
|
| 40 |
total_runtime: float = 0.0
|
| 41 |
total_energy: float = 0.0
|
|
@@ -119,7 +119,7 @@ def load_text_prompts(
|
|
| 119 |
Returns:
|
| 120 |
Total number of prompts and a list of batches of prompts.
|
| 121 |
"""
|
| 122 |
-
dataset = json.load(open(path))["caption"]
|
| 123 |
if num_batches is not None:
|
| 124 |
if len(dataset) < num_batches * batch_size:
|
| 125 |
raise ValueError("Dataset is too small for the given number of batches.")
|
|
@@ -151,8 +151,8 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 151 |
|
| 152 |
results_dir = Path(args.result_root) / args.model
|
| 153 |
results_dir.mkdir(parents=True, exist_ok=True)
|
| 154 |
-
benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}")
|
| 155 |
-
video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+generated"
|
| 156 |
video_dir.mkdir(exist_ok=True)
|
| 157 |
|
| 158 |
arg_out_filename = f"{benchmark_name}+args.json"
|
|
@@ -190,7 +190,7 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 190 |
]
|
| 191 |
|
| 192 |
torch.cuda.reset_peak_memory_stats(device="cuda:0")
|
| 193 |
-
zeus_monitor.begin_window("benchmark",
|
| 194 |
|
| 195 |
# Build common parameter dict for all batches
|
| 196 |
params: dict[str, Any] = dict(
|
|
@@ -208,15 +208,15 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 208 |
|
| 209 |
params["prompt"] = intermediate.prompts
|
| 210 |
|
| 211 |
-
zeus_monitor.begin_window("batch",
|
| 212 |
frames = pipeline(**params).frames
|
| 213 |
-
batch_measurements = zeus_monitor.end_window("batch",
|
| 214 |
|
| 215 |
intermediate.frames = frames
|
| 216 |
intermediate.batch_latency = batch_measurements.time
|
| 217 |
intermediate.batch_energy = batch_measurements.total_energy
|
| 218 |
|
| 219 |
-
measurements = zeus_monitor.end_window("benchmark",
|
| 220 |
peak_memory = torch.cuda.max_memory_allocated(device="cuda:0")
|
| 221 |
|
| 222 |
results: list[Result] = []
|
|
@@ -253,10 +253,10 @@ def benchmark(args: argparse.Namespace) -> None:
|
|
| 253 |
model=args.model,
|
| 254 |
num_parameters=count_parameters(pipeline),
|
| 255 |
gpu_model=gpu_model,
|
| 256 |
-
num_inference_steps=args.num_inference_steps,
|
| 257 |
-
num_frames=args.num_frames,
|
| 258 |
power_limit=args.power_limit,
|
| 259 |
batch_size=args.batch_size,
|
|
|
|
|
|
|
| 260 |
num_prompts=num_prompts,
|
| 261 |
total_runtime=measurements.time,
|
| 262 |
total_energy=measurements.total_energy,
|
|
|
|
| 32 |
model: str
|
| 33 |
num_parameters: dict[str, int]
|
| 34 |
gpu_model: str
|
|
|
|
|
|
|
| 35 |
power_limit: int
|
| 36 |
batch_size: int
|
| 37 |
+
num_inference_steps: int
|
| 38 |
+
num_frames: int
|
| 39 |
num_prompts: int
|
| 40 |
total_runtime: float = 0.0
|
| 41 |
total_energy: float = 0.0
|
|
|
|
| 119 |
Returns:
|
| 120 |
Total number of prompts and a list of batches of prompts.
|
| 121 |
"""
|
| 122 |
+
dataset = json.load(open(path))["caption"] * 10
|
| 123 |
if num_batches is not None:
|
| 124 |
if len(dataset) < num_batches * batch_size:
|
| 125 |
raise ValueError("Dataset is too small for the given number of batches.")
|
|
|
|
| 151 |
|
| 152 |
results_dir = Path(args.result_root) / args.model
|
| 153 |
results_dir.mkdir(parents=True, exist_ok=True)
|
| 154 |
+
benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}")
|
| 155 |
+
video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}+generated"
|
| 156 |
video_dir.mkdir(exist_ok=True)
|
| 157 |
|
| 158 |
arg_out_filename = f"{benchmark_name}+args.json"
|
|
|
|
| 190 |
]
|
| 191 |
|
| 192 |
torch.cuda.reset_peak_memory_stats(device="cuda:0")
|
| 193 |
+
zeus_monitor.begin_window("benchmark", sync_execution=False)
|
| 194 |
|
| 195 |
# Build common parameter dict for all batches
|
| 196 |
params: dict[str, Any] = dict(
|
|
|
|
| 208 |
|
| 209 |
params["prompt"] = intermediate.prompts
|
| 210 |
|
| 211 |
+
zeus_monitor.begin_window("batch", sync_execution=False)
|
| 212 |
frames = pipeline(**params).frames
|
| 213 |
+
batch_measurements = zeus_monitor.end_window("batch", sync_execution=False)
|
| 214 |
|
| 215 |
intermediate.frames = frames
|
| 216 |
intermediate.batch_latency = batch_measurements.time
|
| 217 |
intermediate.batch_energy = batch_measurements.total_energy
|
| 218 |
|
| 219 |
+
measurements = zeus_monitor.end_window("benchmark", sync_execution=False)
|
| 220 |
peak_memory = torch.cuda.max_memory_allocated(device="cuda:0")
|
| 221 |
|
| 222 |
results: list[Result] = []
|
|
|
|
| 253 |
model=args.model,
|
| 254 |
num_parameters=count_parameters(pipeline),
|
| 255 |
gpu_model=gpu_model,
|
|
|
|
|
|
|
| 256 |
power_limit=args.power_limit,
|
| 257 |
batch_size=args.batch_size,
|
| 258 |
+
num_inference_steps=args.num_inference_steps,
|
| 259 |
+
num_frames=args.num_frames,
|
| 260 |
num_prompts=num_prompts,
|
| 261 |
total_runtime=measurements.time,
|
| 262 |
total_energy=measurements.total_energy,
|
benchmark/diffusion/text-to-video/scripts/benchmark_one_model.py
CHANGED
|
@@ -28,44 +28,46 @@ def main(args: argparse.Namespace) -> None:
|
|
| 28 |
print_and_write(outfile, f"Benchmarking {args.model}\n")
|
| 29 |
print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n")
|
| 30 |
print_and_write(outfile, f"Power limits: {args.power_limits}\n")
|
|
|
|
| 31 |
|
| 32 |
for batch_size in args.batch_sizes:
|
| 33 |
for power_limit in args.power_limits:
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
|
| 70 |
|
| 71 |
|
|
@@ -76,7 +78,7 @@ if __name__ == "__main__":
|
|
| 76 |
parser.add_argument("--gpu-ids", type=str, nargs="+", help="GPU IDs to use")
|
| 77 |
parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark")
|
| 78 |
parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark")
|
| 79 |
-
parser.add_argument("--num-inference-steps", type=str,
|
| 80 |
parser.add_argument("--num-frames", type=str, required=True, help="Number of frames to generate")
|
| 81 |
parser.add_argument("--dataset-path", type=str, help="Path to the dataset JSON file.")
|
| 82 |
args = parser.parse_args()
|
|
|
|
| 28 |
print_and_write(outfile, f"Benchmarking {args.model}\n")
|
| 29 |
print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n")
|
| 30 |
print_and_write(outfile, f"Power limits: {args.power_limits}\n")
|
| 31 |
+
print_and_write(outfile, f"Number of inference steps: {args.num_inference_steps}\n")
|
| 32 |
|
| 33 |
for batch_size in args.batch_sizes:
|
| 34 |
for power_limit in args.power_limits:
|
| 35 |
+
for num_inference_steps in args.num_inference_steps:
|
| 36 |
+
print_and_write(outfile, f"{batch_size=}, {power_limit=}, {num_inference_steps=}\n", flush=True)
|
| 37 |
+
with subprocess.Popen(
|
| 38 |
+
args=[
|
| 39 |
+
"docker", "run",
|
| 40 |
+
"--gpus", '"device=' + ','.join(args.gpu_ids) + '"',
|
| 41 |
+
"--cap-add", "SYS_ADMIN",
|
| 42 |
+
"--name", f"leaderboard-t2v-{''.join(args.gpu_ids)}",
|
| 43 |
+
"--rm",
|
| 44 |
+
"-v", "/data/leaderboard/hfcache:/root/.cache/huggingface",
|
| 45 |
+
"-v", f"{os.getcwd()}:/workspace/text-to-video",
|
| 46 |
+
"mlenergy/leaderboard:diffusion-t2v",
|
| 47 |
+
"--result-root", args.result_root,
|
| 48 |
+
"--batch-size", batch_size,
|
| 49 |
+
"--num-batches", "10",
|
| 50 |
+
"--power-limit", power_limit,
|
| 51 |
+
"--model", args.model,
|
| 52 |
+
"--dataset-path", args.dataset_path,
|
| 53 |
+
"--huggingface-token", hf_token,
|
| 54 |
+
"--num-inference-steps", num_inference_steps,
|
| 55 |
+
"--num-frames", args.num_frames,
|
| 56 |
+
],
|
| 57 |
+
stdout=subprocess.PIPE,
|
| 58 |
+
stderr=subprocess.STDOUT,
|
| 59 |
+
text=True,
|
| 60 |
+
) as proc:
|
| 61 |
+
if proc.stdout:
|
| 62 |
+
i = 0
|
| 63 |
+
for line in proc.stdout:
|
| 64 |
+
print_and_write(outfile, line, flush=i % 50 == 0)
|
| 65 |
+
i += 1
|
| 66 |
|
| 67 |
+
# If proc exited with non-zero status, it's probably an OOM.
|
| 68 |
+
# Move on to the next batch size.
|
| 69 |
+
if proc.returncode != 0:
|
| 70 |
+
break
|
| 71 |
|
| 72 |
|
| 73 |
|
|
|
|
| 78 |
parser.add_argument("--gpu-ids", type=str, nargs="+", help="GPU IDs to use")
|
| 79 |
parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark")
|
| 80 |
parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark")
|
| 81 |
+
parser.add_argument("--num-inference-steps", type=str, nargs="+", default=["1", "2", "4", "8", "16", "25", "30", "40", "50"], help="Number of denoising steps")
|
| 82 |
parser.add_argument("--num-frames", type=str, required=True, help="Number of frames to generate")
|
| 83 |
parser.add_argument("--dataset-path", type=str, help="Path to the dataset JSON file.")
|
| 84 |
args = parser.parse_args()
|
data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "ali-vilab/i2vgen-xl",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/video (J)":
|
| 5 |
-
"Batch latency (s)":
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 16
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "ali-vilab/i2vgen-xl",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/video (J)": 16915.850124999997,
|
| 5 |
+
"Batch latency (s)": 46.14208295941353,
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 16
|
data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "ali-vilab/i2vgen-xl",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/video (J)":
|
| 5 |
-
"Batch latency (s)":
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 16
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "ali-vilab/i2vgen-xl",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/video (J)": 16496.045437499997,
|
| 5 |
+
"Batch latency (s)": 89.03019031882286,
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 16
|
data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "stabilityai/stable-video-diffusion-img2vid-xt",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/video (J)":
|
| 5 |
-
"Batch latency (s)": 42.
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 25
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "stabilityai/stable-video-diffusion-img2vid-xt",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/video (J)": 15709.767625000095,
|
| 5 |
+
"Batch latency (s)": 42.397395104169846,
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 25
|
data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Model": "stabilityai/stable-video-diffusion-img2vid-xt",
|
| 3 |
+
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/video (J)": 15291.016625000047,
|
| 5 |
+
"Batch latency (s)": 82.90474811196327,
|
| 6 |
+
"Batch size": 2,
|
| 7 |
+
"Denoising steps": 25,
|
| 8 |
+
"Frames": 25
|
| 9 |
+
}
|
data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Model": "stabilityai/stable-video-diffusion-img2vid-xt",
|
| 3 |
+
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/video (J)": 14761.389999999976,
|
| 5 |
+
"Batch latency (s)": 120.65004900523594,
|
| 6 |
+
"Batch size": 3,
|
| 7 |
+
"Denoising steps": 25,
|
| 8 |
+
"Frames": 25
|
| 9 |
+
}
|
data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "stabilityai/stable-video-diffusion-img2vid",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/video (J)":
|
| 5 |
-
"Batch latency (s)": 24.
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 14
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "stabilityai/stable-video-diffusion-img2vid",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/video (J)": 9066.434124999912,
|
| 5 |
+
"Batch latency (s)": 24.369865357875824,
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 14
|
data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Model": "stabilityai/stable-video-diffusion-img2vid",
|
| 3 |
+
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/video (J)": 8835.22312499996,
|
| 5 |
+
"Batch latency (s)": 47.65615049004555,
|
| 6 |
+
"Batch size": 2,
|
| 7 |
+
"Denoising steps": 25,
|
| 8 |
+
"Frames": 14
|
| 9 |
+
}
|
data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Model": "stabilityai/stable-video-diffusion-img2vid",
|
| 3 |
+
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/video (J)": 8683.536285714292,
|
| 5 |
+
"Batch latency (s)": 70.55723374230521,
|
| 6 |
+
"Batch size": 3,
|
| 7 |
+
"Denoising steps": 25,
|
| 8 |
+
"Frames": 14
|
| 9 |
+
}
|
data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "ali-vilab/i2vgen-xl",
|
| 3 |
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
-
"Energy/video (J)":
|
| 5 |
-
"Batch latency (s)":
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 16
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "ali-vilab/i2vgen-xl",
|
| 3 |
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
+
"Energy/video (J)": 14867.419125000015,
|
| 5 |
+
"Batch latency (s)": 23.717748790979385,
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 16
|
data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "ali-vilab/i2vgen-xl",
|
| 3 |
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
-
"Energy/video (J)":
|
| 5 |
-
"Batch latency (s)":
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 16
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "ali-vilab/i2vgen-xl",
|
| 3 |
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
+
"Energy/video (J)": 14348.508499999996,
|
| 5 |
+
"Batch latency (s)": 44.71498331427574,
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 16
|
data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "stabilityai/stable-video-diffusion-img2vid-xt",
|
| 3 |
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
-
"Energy/video (J)":
|
| 5 |
-
"Batch latency (s)": 20.
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 25
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "stabilityai/stable-video-diffusion-img2vid-xt",
|
| 3 |
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
+
"Energy/video (J)": 13392.813624999952,
|
| 5 |
+
"Batch latency (s)": 20.788252592086792,
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 25
|
data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Model": "stabilityai/stable-video-diffusion-img2vid-xt",
|
| 3 |
+
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
+
"Energy/video (J)": 12901.83275000006,
|
| 5 |
+
"Batch latency (s)": 39.99498334527016,
|
| 6 |
+
"Batch size": 2,
|
| 7 |
+
"Denoising steps": 25,
|
| 8 |
+
"Frames": 25
|
| 9 |
+
}
|
data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Model": "stabilityai/stable-video-diffusion-img2vid-xt",
|
| 3 |
+
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
+
"Energy/video (J)": 12790.552809523862,
|
| 5 |
+
"Batch latency (s)": 59.380911929266794,
|
| 6 |
+
"Batch size": 3,
|
| 7 |
+
"Denoising steps": 25,
|
| 8 |
+
"Frames": 25
|
| 9 |
+
}
|
data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "stabilityai/stable-video-diffusion-img2vid",
|
| 3 |
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
-
"Energy/video (J)":
|
| 5 |
-
"Batch latency (s)": 12.
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 14
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "stabilityai/stable-video-diffusion-img2vid",
|
| 3 |
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
+
"Energy/video (J)": 7623.074500000104,
|
| 5 |
+
"Batch latency (s)": 12.191031396389008,
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25,
|
| 8 |
"Frames": 14
|
data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Model": "stabilityai/stable-video-diffusion-img2vid",
|
| 3 |
+
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
+
"Energy/video (J)": 7416.721437499975,
|
| 5 |
+
"Batch latency (s)": 23.368041068315506,
|
| 6 |
+
"Batch size": 2,
|
| 7 |
+
"Denoising steps": 25,
|
| 8 |
+
"Frames": 14
|
| 9 |
+
}
|
data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Model": "stabilityai/stable-video-diffusion-img2vid",
|
| 3 |
+
"GPU": "NVIDIA H100 80GB HBM3",
|
| 4 |
+
"Energy/video (J)": 7354.00133333333,
|
| 5 |
+
"Batch latency (s)": 34.5100462777274,
|
| 6 |
+
"Batch size": 3,
|
| 7 |
+
"Denoising steps": 25,
|
| 8 |
+
"Frames": 14
|
| 9 |
+
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-2-2-decoder",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 1.
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-2-2-decoder",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 324.06850000005215,
|
| 5 |
+
"Batch latency (s)": 1.6537675857543945,
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-2-2-decoder",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 7.
|
| 6 |
"Batch size": 16,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-2-2-decoder",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 172.51030000000029,
|
| 5 |
+
"Batch latency (s)": 7.375234842300415,
|
| 6 |
"Batch size": 16,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-2-2-decoder",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 1.
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-2-2-decoder",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 230.3378000000026,
|
| 5 |
+
"Batch latency (s)": 1.5861663103103638,
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Model": "kandinsky-community/kandinsky-2-2-decoder",
|
| 3 |
+
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 163.0797656249997,
|
| 5 |
+
"Batch latency (s)": 13.998618459701538,
|
| 6 |
+
"Batch size": 32,
|
| 7 |
+
"Denoising steps": 25
|
| 8 |
+
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-2-2-decoder",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 2.
|
| 6 |
"Batch size": 4,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-2-2-decoder",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 200.16462499999906,
|
| 5 |
+
"Batch latency (s)": 2.299217462539673,
|
| 6 |
"Batch size": 4,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-2-2-decoder",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 4.
|
| 6 |
"Batch size": 8,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-2-2-decoder",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 184.9021625000052,
|
| 5 |
+
"Batch latency (s)": 4.0124232292175295,
|
| 6 |
"Batch size": 8,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs1+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-3",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 3.
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-3",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 930.2532999999821,
|
| 5 |
+
"Batch latency (s)": 3.0359585523605346,
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs2+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-3",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 5.
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "kandinsky-community/kandinsky-3",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 895.7575500000036,
|
| 5 |
+
"Batch latency (s)": 5.261959171295166,
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs1+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "prompthero/openjourney-v4",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)":
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "prompthero/openjourney-v4",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 227.21699999999254,
|
| 5 |
+
"Batch latency (s)": 0.9210062503814698,
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs16+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "prompthero/openjourney-v4",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 6.
|
| 6 |
"Batch size": 16,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "prompthero/openjourney-v4",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 156.51368749999673,
|
| 5 |
+
"Batch latency (s)": 6.559858226776123,
|
| 6 |
"Batch size": 16,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs2+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "prompthero/openjourney-v4",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 1.
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "prompthero/openjourney-v4",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 188.78500000000932,
|
| 5 |
+
"Batch latency (s)": 1.1187455892562865,
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs32+steps25.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Model": "prompthero/openjourney-v4",
|
| 3 |
+
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 154.23499999999768,
|
| 5 |
+
"Batch latency (s)": 12.850126147270203,
|
| 6 |
+
"Batch size": 32,
|
| 7 |
+
"Denoising steps": 25
|
| 8 |
+
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs4+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "prompthero/openjourney-v4",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 1.
|
| 6 |
"Batch size": 4,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "prompthero/openjourney-v4",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 175.33082500000017,
|
| 5 |
+
"Batch latency (s)": 1.8664743423461914,
|
| 6 |
"Batch size": 4,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs64+steps25.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Model": "prompthero/openjourney-v4",
|
| 3 |
+
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 150.57691875000017,
|
| 5 |
+
"Batch latency (s)": 25.000647592544556,
|
| 6 |
+
"Batch size": 64,
|
| 7 |
+
"Denoising steps": 25
|
| 8 |
+
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs8+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "prompthero/openjourney-v4",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 3.
|
| 6 |
"Batch size": 8,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "prompthero/openjourney-v4",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 163.7534500000067,
|
| 5 |
+
"Batch latency (s)": 3.423132634162903,
|
| 6 |
"Batch size": 8,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs1+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "segmind/SSD-1B",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 1.
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "segmind/SSD-1B",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 745.7899999999441,
|
| 5 |
+
"Batch latency (s)": 1.9644724607467652,
|
| 6 |
"Batch size": 1,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs2+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "segmind/SSD-1B",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 3.
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "segmind/SSD-1B",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 700.4580500000156,
|
| 5 |
+
"Batch latency (s)": 3.6897377252578734,
|
| 6 |
"Batch size": 2,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs4+steps25.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"Model": "segmind/SSD-1B",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
-
"Energy/image (J)":
|
| 5 |
-
"Batch latency (s)": 7.
|
| 6 |
"Batch size": 4,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"Model": "segmind/SSD-1B",
|
| 3 |
"GPU": "NVIDIA A100-SXM4-40GB",
|
| 4 |
+
"Energy/image (J)": 688.6121250000084,
|
| 5 |
+
"Batch latency (s)": 7.168970584869385,
|
| 6 |
"Batch size": 4,
|
| 7 |
"Denoising steps": 25
|
| 8 |
}
|