import gradio as gr import torch import spaces import os import numpy as np from PIL import Image from omegaconf import OmegaConf from image_datasets.dataset import image_resize args = OmegaConf.load("inference_configs/inference.yaml") device = torch.device("cuda") dtype = torch.bfloat16 @spaces.GPU def generate(image: Image.Image, edit_prompt: str): from src.flux.xflux_pipeline import XFluxSampler sampler = XFluxSampler( device = device, ip_loaded=False, spatial_condition=True, clip_image_processor=None, image_encoder=None, improj=None, share_position_embedding = True, ) img = image_resize(image, 512) w, h = img.size img = img.resize(((w // 32) * 32, (h // 32) * 32)) img = torch.from_numpy((np.array(img) / 127.5) - 1) img = img.permute(2, 0, 1).unsqueeze(0).to(device, dtype=dtype) result = sampler( prompt=edit_prompt, width=args.sample_width, height=args.sample_height, num_steps=args.sample_steps, image_prompt=None, true_gs=args.cfg_scale, seed=args.seed, ip_scale=args.ip_scale if args.use_ip else 1.0, source_image=img if args.use_spatial_condition else None, ) return result def get_samples(): sample_list = [ { "image": "assets/0_camera_zoom/20486354.png", "edit_prompt": "Zoom in on the coral and add a small blue fish in the background.", }, { "image": "assets/0_camera_zoom/168836781.png", "edit_prompt": "The camera moves slightly closer to the person in the red raincoat.", }, { "image": "assets/0_camera_zoom/195278796.png", "edit_prompt": "A blue sign with white text and a white sign with green text appear at the bottom of the frame, and the camera zooms out.", }, { "image": "assets/0_camera_zoom/242167914.png", "edit_prompt": "The person in the foreground moves further away from the camera.", }, { "image": "assets/1_camera_motion/205012085.png", "edit_prompt": "The camera moves slightly downward.", }, { "image": "assets/1_camera_motion/238430441.png", "edit_prompt": "The camera angle changes, tilting slightly to the left and downward.", }, { "image": "assets/2_object_motion/34440751.png", "edit_prompt": "The train moves forward, and a station building appears on the left side of the frame.", }, { "image": "assets/2_object_motion/47140330.png", "edit_prompt": "The train on the bridge disappears.", }, { "image": "assets/2_object_motion/65531461.png", "edit_prompt": "The jet bridge retracts from the airplane.", }, { "image": "assets/2_object_motion/236575633.png", "edit_prompt": "The puppy on the left moves its head to face forward.", }, { "image": "assets/3_human_motion/473660.png", "edit_prompt": "The person's arms are raised higher in the second frame.", }, { "image": "assets/3_human_motion/114875262.png", "edit_prompt": "The person moves from a prone position with arms extended forward to a kneeling position on the mat.", }, { "image": "assets/3_human_motion/133541209.png", "edit_prompt": "The person's right arm changes from being bent with their hand near their head to giving a thumbs-up gesture.", }, { "image": "assets/3_human_motion/152522070.png", "edit_prompt": "The person tilts their head downwards.", }, { "image": "assets/3_human_motion/158685768.png", "edit_prompt": "The person turns their head to the right.", }, { "image": "assets/4_interaction/142739045.png", "edit_prompt": "Milk is poured into the bowl of cereal, and the glass is lowered and partially emptied.", }, { "image": "assets/4_interaction/146371498.png", "edit_prompt": "The hand with the glove moves closer to the black and wooden object, lifting it off the surface.", }, { "image": "assets/4_interaction/148905535.png", "edit_prompt": "The hand holding the pen moves downwards, and the pen is no longer visible.", }, { "image": "assets/4_interaction/151416962.png", "edit_prompt": "The person lowers the phone from their ear and looks at it.", }, { "image": "assets/4_interaction/165994252.png", "edit_prompt": "The person lifts the box off the table.", }, { "image": "assets/4_interaction/220356955.png", "edit_prompt": "The person lowers the cup and places it on the table.", }, { "image": "assets/4_interaction/231403861.png", "edit_prompt": "The person tilts their head to the right and raises the pineapple closer to their face.", }, { "image": "assets/4_interaction/234177339.png", "edit_prompt": "The person changes their hand position from holding their face to holding a phone.", }, ] return [ [ Image.open(sample["image"]).resize((512, 512)), sample["edit_prompt"], ] for sample in sample_list ] def create_app(): with gr.Blocks() as app: gr.HTML( """

ByteMorpher

""" ) # gr.Markdown(header, elem_id="header") with gr.Row(equal_height=False): with gr.Column(variant="panel", elem_classes="inputPanel"): original_image = gr.Image( type="pil", label="Condition Image", width=300, elem_id="input" ) edit_prompt = gr.Textbox(lines=2, label="Edit Prompt", elem_id="edit_prompt") submit_btn = gr.Button("Run", elem_id="submit_btn") with gr.Column(variant="panel", elem_classes="outputPanel"): output_image = gr.Image(type="pil", elem_id="output") with gr.Row(): examples = gr.Examples( examples=get_samples(), inputs=[original_image, edit_prompt], label="Examples", ) submit_btn.click( fn=generate, inputs=[original_image, edit_prompt], outputs=output_image, ) gr.HTML( """
* This demo's template was modified from OminiControl.
""" ) return app if __name__ == "__main__": create_app().launch(debug=False, share=False, ssr_mode=False)