Spaces:
Running
on
Zero
Running
on
Zero
File size: 8,579 Bytes
7e1fa02 99738e0 47dbef4 99738e0 2a887f4 b5ba35d 99738e0 ecd9835 d574ec9 47dbef4 5b14e9e 47dbef4 b3edf02 47dbef4 d7e8671 47dbef4 f3bd7d5 99738e0 47dbef4 1c193eb 99738e0 083a5c2 8207a46 083a5c2 99738e0 7e1fa02 99738e0 fc778e7 67882ec fc778e7 a0ead6f fc778e7 99738e0 7e1fa02 99738e0 7e1fa02 99738e0 7e1fa02 99738e0 7e1fa02 99738e0 7e1fa02 99738e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import gradio as gr
import torch
import spaces
import os
import numpy as np
from PIL import Image
from omegaconf import OmegaConf
from image_datasets.dataset import image_resize
args = OmegaConf.load("inference_configs/inference.yaml")
device = torch.device("cuda")
dtype = torch.bfloat16
@spaces.GPU
def generate(image: Image.Image, edit_prompt: str):
from src.flux.xflux_pipeline import XFluxSampler
sampler = XFluxSampler(
device = device,
ip_loaded=False,
spatial_condition=True,
clip_image_processor=None,
image_encoder=None,
improj=None,
share_position_embedding = True,
)
img = image_resize(image, 512)
w, h = img.size
img = img.resize(((w // 32) * 32, (h // 32) * 32))
img = torch.from_numpy((np.array(img) / 127.5) - 1)
img = img.permute(2, 0, 1).unsqueeze(0).to(device, dtype=dtype)
result = sampler(
prompt=edit_prompt,
width=args.sample_width,
height=args.sample_height,
num_steps=args.sample_steps,
image_prompt=None,
true_gs=args.cfg_scale,
seed=args.seed,
ip_scale=args.ip_scale if args.use_ip else 1.0,
source_image=img if args.use_spatial_condition else None,
)
return result
def get_samples():
sample_list = [
{
"image": "assets/0_camera_zoom/20486354.png",
"edit_prompt": "Zoom in on the coral and add a small blue fish in the background.",
},
{
"image": "assets/0_camera_zoom/168836781.png",
"edit_prompt": "The camera moves slightly closer to the person in the red raincoat.",
},
{
"image": "assets/0_camera_zoom/195278796.png",
"edit_prompt": "A blue sign with white text and a white sign with green text appear at the bottom of the frame, and the camera zooms out.",
},
{
"image": "assets/0_camera_zoom/242167914.png",
"edit_prompt": "The person in the foreground moves further away from the camera.",
},
{
"image": "assets/1_camera_motion/205012085.png",
"edit_prompt": "The camera moves slightly downward.",
},
{
"image": "assets/1_camera_motion/238430441.png",
"edit_prompt": "The camera angle changes, tilting slightly to the left and downward.",
},
{
"image": "assets/2_object_motion/34440751.png",
"edit_prompt": "The train moves forward, and a station building appears on the left side of the frame.",
},
{
"image": "assets/2_object_motion/47140330.png",
"edit_prompt": "The train on the bridge disappears.",
},
{
"image": "assets/2_object_motion/65531461.png",
"edit_prompt": "The jet bridge retracts from the airplane.",
},
{
"image": "assets/2_object_motion/236575633.png",
"edit_prompt": "The puppy on the left moves its head to face forward.",
},
{
"image": "assets/3_human_motion/473660.png",
"edit_prompt": "The person's arms are raised higher in the second frame.",
},
{
"image": "assets/3_human_motion/114875262.png",
"edit_prompt": "The person moves from a prone position with arms extended forward to a kneeling position on the mat.",
},
{
"image": "assets/3_human_motion/133541209.png",
"edit_prompt": "The person's right arm changes from being bent with their hand near their head to giving a thumbs-up gesture.",
},
{
"image": "assets/3_human_motion/152522070.png",
"edit_prompt": "The person tilts their head downwards.",
},
{
"image": "assets/3_human_motion/158685768.png",
"edit_prompt": "The person turns their head to the right.",
},
{
"image": "assets/4_interaction/142739045.png",
"edit_prompt": "Milk is poured into the bowl of cereal, and the glass is lowered and partially emptied.",
},
{
"image": "assets/4_interaction/146371498.png",
"edit_prompt": "The hand with the glove moves closer to the black and wooden object, lifting it off the surface.",
},
{
"image": "assets/4_interaction/148905535.png",
"edit_prompt": "The hand holding the pen moves downwards, and the pen is no longer visible.",
},
{
"image": "assets/4_interaction/151416962.png",
"edit_prompt": "The person lowers the phone from their ear and looks at it.",
},
{
"image": "assets/4_interaction/165994252.png",
"edit_prompt": "The person lifts the box off the table.",
},
{
"image": "assets/4_interaction/220356955.png",
"edit_prompt": "The person lowers the cup and places it on the table.",
},
{
"image": "assets/4_interaction/231403861.png",
"edit_prompt": "The person tilts their head to the right and raises the pineapple closer to their face.",
},
{
"image": "assets/4_interaction/234177339.png",
"edit_prompt": "The person changes their hand position from holding their face to holding a phone.",
},
]
return [
[
Image.open(sample["image"]).resize((512, 512)),
sample["edit_prompt"],
]
for sample in sample_list
]
def create_app():
with gr.Blocks() as app:
gr.HTML(
"""
<div style="text-align: center;">
<h2>ByteMorpher</h2>
<a href="https://arxiv.org/abs/2506.03107" target="_blank"><img src="https://img.shields.io/badge/arXiv-Paper-red" style="display:inline-block;"></a>
<a href="https://boese0601.github.io/bytemorph/" target="_blank"><img src="https://img.shields.io/badge/Project-Website-blue" style="display:inline-block;"></a>
<a href="https://github.com/ByteDance-Seed/BM-code" target="_blank"><img src="https://img.shields.io/github/stars/Boese0601/ByteMorph?label=GitHub%20%E2%98%85&logo=github&color=green" style="display:inline-block;"></a>
<a href="https://huggingface.co/datasets/ByteDance-Seed/BM-6M" target="_blank"><img src="https://img.shields.io/badge/π€%20Hugging%20Face-Dataset-yellow" style="display:inline-block;"></a>
<a href="https://huggingface.co/datasets/ByteDance-Seed/BM-6M-Demo" target="_blank"><img src="https://img.shields.io/badge/π€%20Hugging%20Face-Dataset_Demo-yellow" style="display:inline-block;"></a>
<a href="https://huggingface.co/datasets/ByteDance-Seed/BM-Bench" target="_blank"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace%20-Benchmark-yellow" style="display:inline-block;"></a>
<a href="https://huggingface.co/ByteDance-Seed/BM-Model" target="_blank"><img src="https://img.shields.io/badge/π€%20Hugging%20Face%20-Model-yellow" style="display:inline-block;"></a>
</div>
"""
)
# gr.Markdown(header, elem_id="header")
with gr.Row(equal_height=False):
with gr.Column(variant="panel", elem_classes="inputPanel"):
original_image = gr.Image(
type="pil", label="Condition Image", width=300, elem_id="input"
)
edit_prompt = gr.Textbox(lines=2, label="Edit Prompt", elem_id="edit_prompt")
submit_btn = gr.Button("Run", elem_id="submit_btn")
with gr.Column(variant="panel", elem_classes="outputPanel"):
output_image = gr.Image(type="pil", elem_id="output")
with gr.Row():
examples = gr.Examples(
examples=get_samples(),
inputs=[original_image, edit_prompt],
label="Examples",
)
submit_btn.click(
fn=generate,
inputs=[original_image, edit_prompt],
outputs=output_image,
)
gr.HTML(
"""
<div style="text-align: center;">
* This demo's template was modified from <a href="https://arxiv.org/abs/2411.15098" target="_blank">OminiControl</a>.
</div>
"""
)
return app
if __name__ == "__main__":
create_app().launch(debug=False, share=False, ssr_mode=False)
|