Spaces:

Boese0601
/

ByteMorph-Demo

Running on Zero

File size: 8,579 Bytes

import gradio as gr
import torch
import spaces
import os
import numpy as np
from PIL import Image
from omegaconf import OmegaConf

from image_datasets.dataset import image_resize
args = OmegaConf.load("inference_configs/inference.yaml")
device = torch.device("cuda")
dtype = torch.bfloat16
@spaces.GPU
def generate(image: Image.Image, edit_prompt: str):
    
    from src.flux.xflux_pipeline import XFluxSampler
    

    sampler = XFluxSampler(
        device = device,
        ip_loaded=False,
        spatial_condition=True,
        clip_image_processor=None,
        image_encoder=None,
        improj=None,
        share_position_embedding = True,
    )
    
    img = image_resize(image, 512)
    w, h = img.size
    img = img.resize(((w // 32) * 32, (h // 32) * 32))
    img = torch.from_numpy((np.array(img) / 127.5) - 1)
    img = img.permute(2, 0, 1).unsqueeze(0).to(device, dtype=dtype)

    result = sampler(
        prompt=edit_prompt,
        width=args.sample_width,
        height=args.sample_height,
        num_steps=args.sample_steps,
        image_prompt=None,
        true_gs=args.cfg_scale,
        seed=args.seed,
        ip_scale=args.ip_scale if args.use_ip else 1.0,
        source_image=img if args.use_spatial_condition else None,
    )
    return result

def get_samples():
    sample_list = [
        {
            "image": "assets/0_camera_zoom/20486354.png",
            "edit_prompt": "Zoom in on the coral and add a small blue fish in the background.",
        },
        {
            "image": "assets/0_camera_zoom/168836781.png",
            "edit_prompt": "The camera moves slightly closer to the person in the red raincoat.",
        },
        {
            "image": "assets/0_camera_zoom/195278796.png",
            "edit_prompt": "A blue sign with white text and a white sign with green text appear at the bottom of the frame, and the camera zooms out.",
        },
        {
            "image": "assets/0_camera_zoom/242167914.png",
            "edit_prompt": "The person in the foreground moves further away from the camera.",
        },
        {
            "image": "assets/1_camera_motion/205012085.png",
            "edit_prompt": "The camera moves slightly downward.",
        },
        {
            "image": "assets/1_camera_motion/238430441.png",
            "edit_prompt": "The camera angle changes, tilting slightly to the left and downward.",
        },
        {
            "image": "assets/2_object_motion/34440751.png",
            "edit_prompt": "The train moves forward, and a station building appears on the left side of the frame.",
        },
        {
            "image": "assets/2_object_motion/47140330.png",
            "edit_prompt": "The train on the bridge disappears.",
        },
        {
            "image": "assets/2_object_motion/65531461.png",
            "edit_prompt": "The jet bridge retracts from the airplane.",
        },
        {
            "image": "assets/2_object_motion/236575633.png",
            "edit_prompt": "The puppy on the left moves its head to face forward.",
        },
        {
            "image": "assets/3_human_motion/473660.png",
            "edit_prompt": "The person's arms are raised higher in the second frame.",
        },
        {
            "image": "assets/3_human_motion/114875262.png",
            "edit_prompt": "The person moves from a prone position with arms extended forward to a kneeling position on the mat.",
        },
        {
            "image": "assets/3_human_motion/133541209.png",
            "edit_prompt": "The person's right arm changes from being bent with their hand near their head to giving a thumbs-up gesture.",
        },
        {
            "image": "assets/3_human_motion/152522070.png",
            "edit_prompt": "The person tilts their head downwards.",
        },
        {
            "image": "assets/3_human_motion/158685768.png",
            "edit_prompt": "The person turns their head to the right.",
        },
        {
            "image": "assets/4_interaction/142739045.png",
            "edit_prompt": "Milk is poured into the bowl of cereal, and the glass is lowered and partially emptied.",
        },
        {
            "image": "assets/4_interaction/146371498.png",
            "edit_prompt": "The hand with the glove moves closer to the black and wooden object, lifting it off the surface.",
        },
        {
            "image": "assets/4_interaction/148905535.png",
            "edit_prompt": "The hand holding the pen moves downwards, and the pen is no longer visible.",
        },
        {
            "image": "assets/4_interaction/151416962.png",
            "edit_prompt": "The person lowers the phone from their ear and looks at it.",
        },
        {
            "image": "assets/4_interaction/165994252.png",
            "edit_prompt": "The person lifts the box off the table.",
        },
        {
            "image": "assets/4_interaction/220356955.png",
            "edit_prompt": "The person lowers the cup and places it on the table.",
        },
        {
            "image": "assets/4_interaction/231403861.png",
            "edit_prompt": "The person tilts their head to the right and raises the pineapple closer to their face.",
        },
        {
            "image": "assets/4_interaction/234177339.png",
            "edit_prompt": "The person changes their hand position from holding their face to holding a phone.",
        },
    ]
    return [
        [
            Image.open(sample["image"]).resize((512, 512)),
            sample["edit_prompt"],
        ]
        for sample in sample_list
    ]


def create_app():
    with gr.Blocks() as app:
        gr.HTML(
            """
            <div style="text-align: center;">
                <h2>ByteMorpher</h2>
                <a href="https://arxiv.org/abs/2506.03107" target="_blank"><img src="https://img.shields.io/badge/arXiv-Paper-red" style="display:inline-block;"></a>
                <a href="https://boese0601.github.io/bytemorph/" target="_blank"><img src="https://img.shields.io/badge/Project-Website-blue" style="display:inline-block;"></a>
                <a href="https://github.com/ByteDance-Seed/BM-code" target="_blank"><img src="https://img.shields.io/github/stars/Boese0601/ByteMorph?label=GitHub%20%E2%98%85&logo=github&color=green" style="display:inline-block;"></a>
                <a href="https://huggingface.co/datasets/ByteDance-Seed/BM-6M" target="_blank"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Dataset-yellow" style="display:inline-block;"></a>
                <a href="https://huggingface.co/datasets/ByteDance-Seed/BM-6M-Demo" target="_blank"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Dataset_Demo-yellow" style="display:inline-block;"></a>
                <a href="https://huggingface.co/datasets/ByteDance-Seed/BM-Bench" target="_blank"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace%20-Benchmark-yellow" style="display:inline-block;"></a>
                <a href="https://huggingface.co/ByteDance-Seed/BM-Model" target="_blank"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face%20-Model-yellow" style="display:inline-block;"></a>
            </div>
            """
        )
        # gr.Markdown(header, elem_id="header")
        with gr.Row(equal_height=False):
            with gr.Column(variant="panel", elem_classes="inputPanel"):
                original_image = gr.Image(
                    type="pil", label="Condition Image", width=300, elem_id="input"
                )
                edit_prompt = gr.Textbox(lines=2, label="Edit Prompt", elem_id="edit_prompt")
                submit_btn = gr.Button("Run", elem_id="submit_btn")

            with gr.Column(variant="panel", elem_classes="outputPanel"):
                output_image = gr.Image(type="pil", elem_id="output")

        with gr.Row():
            examples = gr.Examples(
                examples=get_samples(),
                inputs=[original_image, edit_prompt],
                label="Examples",
            )

        submit_btn.click(
            fn=generate,
            inputs=[original_image, edit_prompt],
            outputs=output_image,
        )
        gr.HTML(
            """
            <div style="text-align: center;">
                * This demo's template was modified from <a href="https://arxiv.org/abs/2411.15098" target="_blank">OminiControl</a>.
            </div>
            """
        )
    return app

if __name__ == "__main__":
    create_app().launch(debug=False, share=False, ssr_mode=False)