File size: 4,831 Bytes
2bbf6b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import torch
from pipeline_flux_regional_pulid import RegionalFluxPipeline_PULID, RegionalFluxAttnProcessor2_0
if __name__ == "__main__":
model_path = "black-forest-labs/FLUX.1-dev"
pipeline = RegionalFluxPipeline_PULID.from_pretrained(model_path, torch_dtype=torch.bfloat16).to("cuda")
attn_procs = {}
for name in pipeline.transformer.attn_processors.keys():
if 'transformer_blocks' in name and name.endswith("attn.processor"):
attn_procs[name] = RegionalFluxAttnProcessor2_0()
else:
attn_procs[name] = pipeline.transformer.attn_processors[name]
pipeline.transformer.set_attn_processor(attn_procs)
# load pulid
pipeline.load_pulid_models()
pipeline.load_pretrain()
# single-person example
# generation settings
image_width = 1280
image_height = 1280
num_samples = 1
num_inference_steps = 24
guidance_scale = 3.5
seed = 124
# regional prompting settings
mask_inject_steps = 10
double_inject_blocks_interval = 1
single_inject_blocks_interval = 1
base_ratio = 0.2
# regional prompting settings
base_prompt = "In a classroom during the afternoon, a man is practicing guitar by himself, with sunlight beautifully illuminating the room"
background_prompt = "empty classroom"
regional_prompt_mask_pairs = {
"0": {
"description": "A man in a blue shirt and jeans, playing guitar",
"mask": [64, 320, 448, 1280]
}
}
# pulid input
id_image_paths = ["./assets/lecun.jpeg"]
id_weights = [1.0] # scale for pulid embedding injection
# multi-person example
# generation settings
# image_width = 1280
# image_height = 968
# num_samples = 1
# num_inference_steps = 24
# guidance_scale = 3.5
# seed = 124
# regional prompting settings
# mask_inject_steps = 8
# double_inject_blocks_interval = 1
# single_inject_blocks_interval = 2
# base_ratio = 0.1
# base_prompt = "In an elegant dining room, two men are having dinner at opposite ends of a long formal table, with warm lighting creating an atmospheric ambiance"
# background_prompt = "a dining room"
# regional_prompt_mask_pairs = {
# "0": {
# "description": "A man in a suit sitting at the table, with a plate of food and wine glass in front of him",
# "mask": [64, 128, 320, 968]
# },
# "1": {
# "description": "A man in a suit sitting at the table, with a plate of food and wine glass in front of him",
# "mask": [960, 128, 1216, 968]
# }
# }
# # pulid input
# id_image_paths = ["./assets/trump.jpg", "./assets/musk.jpg"]
# id_weights = [0.8, 0.8] # scale for pulid embedding injection
# prepare regional prompts and masks
# ensure image width and height are divisible by the vae scale factor
image_width = (image_width // pipeline.vae_scale_factor) * pipeline.vae_scale_factor
image_height = (image_height // pipeline.vae_scale_factor) * pipeline.vae_scale_factor
regional_prompts = []
regional_masks = []
background_mask = torch.ones((image_height, image_width))
for region_idx, region in regional_prompt_mask_pairs.items():
description = region['description']
mask = region['mask']
x1, y1, x2, y2 = mask
mask = torch.zeros((image_height, image_width))
mask[y1:y2, x1:x2] = 1.0
background_mask -= mask
regional_prompts.append(description)
regional_masks.append(mask)
# if regional masks don't cover the whole image, append background prompt and mask
if background_mask.sum() > 0:
regional_prompts.append(background_prompt)
regional_masks.append(background_mask)
# setup regional kwargs that pass to the pipeline
joint_attention_kwargs = {
'regional_prompts': regional_prompts,
'regional_masks': regional_masks,
'double_inject_blocks_interval': double_inject_blocks_interval,
'single_inject_blocks_interval': single_inject_blocks_interval,
'base_ratio': base_ratio,
'id_image_paths': id_image_paths,
'id_weights': id_weights,
'id_masks': regional_masks[:len(id_image_paths)], # use foreground mask as id mask
}
images = pipeline(
prompt=base_prompt,
num_samples=num_samples,
width=image_width, height=image_height,
mask_inject_steps=mask_inject_steps,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
generator=torch.Generator("cuda").manual_seed(seed),
joint_attention_kwargs=joint_attention_kwargs,
).images
for idx, image in enumerate(images):
image.save(f"output_{idx}.jpg")
|