File size: 8,579 Bytes
7e1fa02
 
99738e0
 
 
 
 
47dbef4
 
99738e0
2a887f4
b5ba35d
99738e0
 
ecd9835
d574ec9
47dbef4
 
 
5b14e9e
47dbef4
b3edf02
47dbef4
 
d7e8671
 
47dbef4
f3bd7d5
99738e0
 
 
 
 
 
47dbef4
 
 
 
 
 
 
 
 
 
 
1c193eb
99738e0
 
 
 
 
 
 
083a5c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8207a46
083a5c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99738e0
 
 
 
 
 
 
 
 
7e1fa02
99738e0
 
fc778e7
 
 
 
67882ec
fc778e7
a0ead6f
 
 
 
 
fc778e7
 
 
 
99738e0
 
 
 
7e1fa02
99738e0
 
7e1fa02
99738e0
 
7e1fa02
99738e0
 
 
 
 
 
7e1fa02
99738e0
 
 
 
 
 
 
 
 
 
 
 
 
7e1fa02
 
99738e0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import gradio as gr
import torch
import spaces
import os
import numpy as np
from PIL import Image
from omegaconf import OmegaConf

from image_datasets.dataset import image_resize
args = OmegaConf.load("inference_configs/inference.yaml")
device = torch.device("cuda")
dtype = torch.bfloat16
@spaces.GPU
def generate(image: Image.Image, edit_prompt: str):
    
    from src.flux.xflux_pipeline import XFluxSampler
    

    sampler = XFluxSampler(
        device = device,
        ip_loaded=False,
        spatial_condition=True,
        clip_image_processor=None,
        image_encoder=None,
        improj=None,
        share_position_embedding = True,
    )
    
    img = image_resize(image, 512)
    w, h = img.size
    img = img.resize(((w // 32) * 32, (h // 32) * 32))
    img = torch.from_numpy((np.array(img) / 127.5) - 1)
    img = img.permute(2, 0, 1).unsqueeze(0).to(device, dtype=dtype)

    result = sampler(
        prompt=edit_prompt,
        width=args.sample_width,
        height=args.sample_height,
        num_steps=args.sample_steps,
        image_prompt=None,
        true_gs=args.cfg_scale,
        seed=args.seed,
        ip_scale=args.ip_scale if args.use_ip else 1.0,
        source_image=img if args.use_spatial_condition else None,
    )
    return result

def get_samples():
    sample_list = [
        {
            "image": "assets/0_camera_zoom/20486354.png",
            "edit_prompt": "Zoom in on the coral and add a small blue fish in the background.",
        },
        {
            "image": "assets/0_camera_zoom/168836781.png",
            "edit_prompt": "The camera moves slightly closer to the person in the red raincoat.",
        },
        {
            "image": "assets/0_camera_zoom/195278796.png",
            "edit_prompt": "A blue sign with white text and a white sign with green text appear at the bottom of the frame, and the camera zooms out.",
        },
        {
            "image": "assets/0_camera_zoom/242167914.png",
            "edit_prompt": "The person in the foreground moves further away from the camera.",
        },
        {
            "image": "assets/1_camera_motion/205012085.png",
            "edit_prompt": "The camera moves slightly downward.",
        },
        {
            "image": "assets/1_camera_motion/238430441.png",
            "edit_prompt": "The camera angle changes, tilting slightly to the left and downward.",
        },
        {
            "image": "assets/2_object_motion/34440751.png",
            "edit_prompt": "The train moves forward, and a station building appears on the left side of the frame.",
        },
        {
            "image": "assets/2_object_motion/47140330.png",
            "edit_prompt": "The train on the bridge disappears.",
        },
        {
            "image": "assets/2_object_motion/65531461.png",
            "edit_prompt": "The jet bridge retracts from the airplane.",
        },
        {
            "image": "assets/2_object_motion/236575633.png",
            "edit_prompt": "The puppy on the left moves its head to face forward.",
        },
        {
            "image": "assets/3_human_motion/473660.png",
            "edit_prompt": "The person's arms are raised higher in the second frame.",
        },
        {
            "image": "assets/3_human_motion/114875262.png",
            "edit_prompt": "The person moves from a prone position with arms extended forward to a kneeling position on the mat.",
        },
        {
            "image": "assets/3_human_motion/133541209.png",
            "edit_prompt": "The person's right arm changes from being bent with their hand near their head to giving a thumbs-up gesture.",
        },
        {
            "image": "assets/3_human_motion/152522070.png",
            "edit_prompt": "The person tilts their head downwards.",
        },
        {
            "image": "assets/3_human_motion/158685768.png",
            "edit_prompt": "The person turns their head to the right.",
        },
        {
            "image": "assets/4_interaction/142739045.png",
            "edit_prompt": "Milk is poured into the bowl of cereal, and the glass is lowered and partially emptied.",
        },
        {
            "image": "assets/4_interaction/146371498.png",
            "edit_prompt": "The hand with the glove moves closer to the black and wooden object, lifting it off the surface.",
        },
        {
            "image": "assets/4_interaction/148905535.png",
            "edit_prompt": "The hand holding the pen moves downwards, and the pen is no longer visible.",
        },
        {
            "image": "assets/4_interaction/151416962.png",
            "edit_prompt": "The person lowers the phone from their ear and looks at it.",
        },
        {
            "image": "assets/4_interaction/165994252.png",
            "edit_prompt": "The person lifts the box off the table.",
        },
        {
            "image": "assets/4_interaction/220356955.png",
            "edit_prompt": "The person lowers the cup and places it on the table.",
        },
        {
            "image": "assets/4_interaction/231403861.png",
            "edit_prompt": "The person tilts their head to the right and raises the pineapple closer to their face.",
        },
        {
            "image": "assets/4_interaction/234177339.png",
            "edit_prompt": "The person changes their hand position from holding their face to holding a phone.",
        },
    ]
    return [
        [
            Image.open(sample["image"]).resize((512, 512)),
            sample["edit_prompt"],
        ]
        for sample in sample_list
    ]


def create_app():
    with gr.Blocks() as app:
        gr.HTML(
            """
            <div style="text-align: center;">
                <h2>ByteMorpher</h2>
                <a href="https://arxiv.org/abs/2506.03107" target="_blank"><img src="https://img.shields.io/badge/arXiv-Paper-red" style="display:inline-block;"></a>
                <a href="https://boese0601.github.io/bytemorph/" target="_blank"><img src="https://img.shields.io/badge/Project-Website-blue" style="display:inline-block;"></a>
                <a href="https://github.com/ByteDance-Seed/BM-code" target="_blank"><img src="https://img.shields.io/github/stars/Boese0601/ByteMorph?label=GitHub%20%E2%98%85&logo=github&color=green" style="display:inline-block;"></a>
                <a href="https://huggingface.co/datasets/ByteDance-Seed/BM-6M" target="_blank"><img src="https://img.shields.io/badge/πŸ€—%20Hugging%20Face-Dataset-yellow" style="display:inline-block;"></a>
                <a href="https://huggingface.co/datasets/ByteDance-Seed/BM-6M-Demo" target="_blank"><img src="https://img.shields.io/badge/πŸ€—%20Hugging%20Face-Dataset_Demo-yellow" style="display:inline-block;"></a>
                <a href="https://huggingface.co/datasets/ByteDance-Seed/BM-Bench" target="_blank"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace%20-Benchmark-yellow" style="display:inline-block;"></a>
                <a href="https://huggingface.co/ByteDance-Seed/BM-Model" target="_blank"><img src="https://img.shields.io/badge/πŸ€—%20Hugging%20Face%20-Model-yellow" style="display:inline-block;"></a>
            </div>
            """
        )
        # gr.Markdown(header, elem_id="header")
        with gr.Row(equal_height=False):
            with gr.Column(variant="panel", elem_classes="inputPanel"):
                original_image = gr.Image(
                    type="pil", label="Condition Image", width=300, elem_id="input"
                )
                edit_prompt = gr.Textbox(lines=2, label="Edit Prompt", elem_id="edit_prompt")
                submit_btn = gr.Button("Run", elem_id="submit_btn")

            with gr.Column(variant="panel", elem_classes="outputPanel"):
                output_image = gr.Image(type="pil", elem_id="output")

        with gr.Row():
            examples = gr.Examples(
                examples=get_samples(),
                inputs=[original_image, edit_prompt],
                label="Examples",
            )

        submit_btn.click(
            fn=generate,
            inputs=[original_image, edit_prompt],
            outputs=output_image,
        )
        gr.HTML(
            """
            <div style="text-align: center;">
                * This demo's template was modified from <a href="https://arxiv.org/abs/2411.15098" target="_blank">OminiControl</a>.
            </div>
            """
        )
    return app

if __name__ == "__main__":
    create_app().launch(debug=False, share=False, ssr_mode=False)