Spaces:
Paused
Paused
| import gradio as gr | |
| from diffusers import StableDiffusionXLPipeline, DDIMScheduler | |
| import torch | |
| import sa_handler | |
| import inversion | |
| import numpy as np | |
| from diffusers.utils import load_image | |
| from PIL import Image | |
| import io | |
| # Model Load | |
| scheduler = DDIMScheduler( | |
| beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", | |
| clip_sample=False, set_alpha_to_one=False) | |
| pipeline = StableDiffusionXLPipeline.from_pretrained( | |
| "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", | |
| use_safetensors=True, | |
| scheduler=scheduler | |
| ).to("cuda") | |
| # Function to process the image | |
| def process_image(image, prompt, style): | |
| src_prompt = f'Man laying in a bed, {style}.' | |
| num_inference_steps = 50 | |
| x0 = np.array(Image.fromarray(image).resize((1024, 1024))) | |
| zts = inversion.ddim_inversion(pipeline, x0, src_prompt, num_inference_steps, 2) | |
| prompts = [ | |
| src_prompt, | |
| f"{prompt}, {style}." | |
| ] | |
| shared_score_shift = np.log(2) | |
| shared_score_scale = 1.0 | |
| handler = sa_handler.Handler(pipeline) | |
| sa_args = sa_handler.StyleAlignedArgs( | |
| share_group_norm=True, share_layer_norm=True, share_attention=True, | |
| adain_queries=True, adain_keys=True, adain_values=False, | |
| shared_score_shift=shared_score_shift, shared_score_scale=shared_score_scale,) | |
| handler.register(sa_args) | |
| zT, inversion_callback = inversion.make_inversion_callback(zts, offset=5) | |
| g_cpu = torch.Generator(device='cpu') | |
| g_cpu.manual_seed(10) | |
| latents = torch.randn(len(prompts), 4, 128, 128, device='cpu', generator=g_cpu, | |
| dtype=pipeline.unet.dtype,).to('cuda:0') | |
| latents[0] = zT | |
| images_a = pipeline(prompts, latents=latents, | |
| callback_on_step_end=inversion_callback, | |
| num_inference_steps=num_inference_steps, guidance_scale=10.0).images | |
| handler.remove() | |
| return Image.fromarray(images_a[1]) | |
| # Gradio interface | |
| iface = gr.Interface( | |
| fn=process_image, | |
| inputs=[ | |
| gr.inputs.Image(type="numpy"), | |
| gr.inputs.Textbox(label="Enter your prompt"), | |
| gr.inputs.Textbox(label="Enter your style", default="medieval painting") | |
| ], | |
| outputs="image", | |
| title="Stable Diffusion XL with Style Alignment", | |
| description="Generate images in the style of your choice." | |
| ) | |
| iface.launch() | |