File size: 3,043 Bytes
e29a7a0
 
13e41c1
 
 
7ad3690
 
13e41c1
 
 
 
 
 
 
 
 
 
fb70850
13e41c1
 
 
fb70850
13e41c1
e29a7a0
 
7ad3690
13e41c1
 
 
 
 
 
 
 
fb70850
 
 
13e41c1
 
fb70850
e29a7a0
13e41c1
 
 
 
 
 
 
 
 
 
 
 
 
 
e29a7a0
 
 
 
 
 
7ad3690
 
e29a7a0
13e41c1
 
e29a7a0
 
 
 
 
13e41c1
e29a7a0
 
13e41c1
 
 
e29a7a0
 
 
 
 
13e41c1
e29a7a0
 
 
 
13e41c1
 
 
 
 
 
 
 
 
 
e29a7a0
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import gradio as gr
import torch
import numpy as np
import cv2
from diffusers import StableDiffusionPipeline
from model import UNet2DConditionModelEx
from pipeline import StableDiffusionControlLoraV3Pipeline 
from PIL import Image
import os
from huggingface_hub import login

# Login using the token
login(token=os.environ.get("HF_TOKEN"))

# Initialize the models
base_model = "runwayml/stable-diffusion-v1-5"
dtype = torch.float32

# Load the custom UNet
unet = UNet2DConditionModelEx.from_pretrained(
    base_model,
    subfolder="unet",
    torch_dtype=dtype
)

# Add conditioning with ow-gbi-control-lora
unet = unet.add_extra_conditions("ow-gbi-control-lora")

# Create the pipeline with custom UNet
pipe = StableDiffusionControlLoraV3Pipeline.from_pretrained(
    base_model, 
    unet=unet,
    torch_dtype=dtype
)

# Load the ControlLoRA weights
pipe.load_lora_weights(
    "models",
    weight_name="40kHalf.safetensors"
)

def get_canny_image(image, low_threshold=100, high_threshold=200):
    if isinstance(image, Image.Image):
        image = np.array(image)
    
    if image.shape[2] == 4:
        image = image[..., :3]
    
    canny_image = cv2.Canny(image, low_threshold, high_threshold)
    canny_image = np.stack([canny_image] * 3, axis=-1)
    return Image.fromarray(canny_image)

def generate_image(input_image, prompt, negative_prompt, guidance_scale, steps, low_threshold, high_threshold):
    canny_image = get_canny_image(input_image, low_threshold, high_threshold)
    
    with torch.no_grad():
        image = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            num_inference_steps=steps,
            guidance_scale=guidance_scale,
            image=canny_image,
            extra_condition_scale=1.0
        ).images[0]
    
    return canny_image, image

# Create the Gradio interface
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(label="Input Image", type="numpy")
            prompt = gr.Textbox(label="Prompt")
            negative_prompt = gr.Textbox(label="Negative Prompt")
            with gr.Row():
                low_threshold = gr.Slider(minimum=1, maximum=255, value=100, label="Canny Low Threshold")
                high_threshold = gr.Slider(minimum=1, maximum=255, value=200, label="Canny High Threshold")
            guidance_scale = gr.Slider(minimum=1, maximum=20, value=7.5, label="Guidance Scale")
            steps = gr.Slider(minimum=1, maximum=100, value=50, label="Steps")
            generate = gr.Button("Generate")
        
        with gr.Column():
            canny_output = gr.Image(label="Canny Edge Detection")
            result = gr.Image(label="Generated Image")
    
    generate.click(
        fn=generate_image,
        inputs=[
            input_image,
            prompt,
            negative_prompt,
            guidance_scale,
            steps,
            low_threshold,
            high_threshold
        ],
        outputs=[canny_output, result]
    )

demo.launch()