File size: 6,586 Bytes
d101062
9c5d561
327dc47
6b4e294
d101062
e05e986
 
 
 
d101062
 
8bd5dc7
327dc47
e05e986
8bd5dc7
6b4e294
e05e986
 
 
 
 
 
 
 
 
 
 
 
 
 
ce193d2
e05e986
 
 
 
d101062
e05e986
8bd5dc7
d101062
8bd5dc7
e05e986
d101062
 
 
 
 
 
 
 
 
 
 
 
e05e986
 
 
 
 
 
 
 
 
 
 
8bd5dc7
ce193d2
e05e986
d101062
e05e986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d101062
8bd5dc7
 
e05e986
 
8bd5dc7
e05e986
327dc47
 
 
 
 
9c5d561
 
 
d101062
 
 
 
 
 
e05e986
d101062
 
 
 
 
e05e986
d101062
 
 
9c5d561
 
d101062
e05e986
d101062
 
327dc47
e05e986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d101062
e05e986
 
 
 
 
 
 
 
 
 
 
d101062
 
 
e05e986
 
 
9c5d561
 
 
e05e986
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import spaces
import gradio as gr
import re
from PIL import Image
import os

# Set memory optimization flags
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

import numpy as np
import torch
from diffusers import FluxImg2ImgPipeline

# Global pipe variable for lazy loading
pipe = None

# Use float16 instead of bfloat16 for T4 compatibility
dtype = torch.float16
device = "cuda" if torch.cuda.is_available() else "cpu"

def get_pipe():
    global pipe
    if pipe is None:
        pipe = FluxImg2ImgPipeline.from_pretrained(
            "black-forest-labs/FLUX.1-schnell", 
            torch_dtype=torch.float16,
            variant="fp16"
        ).to(device)
    return pipe

def sanitize_prompt(prompt):
  # Allow only alphanumeric characters, spaces, and basic punctuation
  allowed_chars = re.compile(r"[^a-zA-Z0-9\s.,!?-]")
  sanitized_prompt = allowed_chars.sub("", prompt)
  return sanitized_prompt

def convert_to_fit_size(original_width_and_height, maximum_size = 1024):
    width, height = original_width_and_height
    if width <= maximum_size and height <= maximum_size:
        return width, height
    
    if width > height:
        scaling_factor = maximum_size / width
    else:
        scaling_factor = maximum_size / height

    new_width = int(width * scaling_factor)
    new_height = int(height * scaling_factor)
    return new_width, new_height

def adjust_to_multiple_of_32(width: int, height: int):
    width = width - (width % 32)
    height = height - (height % 32)
    return width, height

def resize_image(image: Image.Image, max_dim: int = 512) -> Image.Image:
    """Resizes image to fit within max_dim while preserving aspect ratio"""
    w, h = image.size
    ratio = min(max_dim / w, max_dim / h)
    if ratio < 1.0:
        new_w = int(w * ratio)
        new_h = int(h * ratio)
        image = image.resize((new_w, new_h), Image.LANCZOS)
    return image

@spaces.GPU(duration=120)
def process_images(image, prompt="a girl", strength=0.75, seed=0, inference_step=4, progress=gr.Progress(track_tqdm=True)):
    progress(0, desc="Starting")
    
    # Get the model using lazy loading
    model = get_pipe()
    
    def process_img2img(image, prompt="a person", strength=0.75, seed=0, num_inference_steps=4):
        if image is None:
            print("empty input image returned")
            return None
            
        # Resize image to reduce memory usage
        image = resize_image(image, max_dim=512)
        
        generator = torch.Generator(device).manual_seed(seed)
        fit_width, fit_height = convert_to_fit_size(image.size, maximum_size=512)
        width, height = adjust_to_multiple_of_32(fit_width, fit_height)
        image = image.resize((width, height), Image.LANCZOS)
        
        # Use autocast for better memory efficiency
        with torch.cuda.amp.autocast(dtype=torch.float16):
            with torch.no_grad():
                output = model(
                    prompt=prompt, 
                    image=image, 
                    generator=generator, 
                    strength=strength, 
                    width=width, 
                    height=height,
                    guidance_scale=0, 
                    num_inference_steps=num_inference_steps, 
                    max_sequence_length=256
                )
        
        pil_image = output.images[0]
        new_width, new_height = pil_image.size
    
        if (new_width != fit_width) or (new_height != fit_height):
            resized_image = pil_image.resize((fit_width, fit_height), Image.LANCZOS)
            return resized_image
        return pil_image
    
    output = process_img2img(image, prompt, strength, seed, inference_step)
    return output

def read_file(path: str) -> str:
    with open(path, 'r', encoding='utf-8') as f:
        content = f.read()
    return content

css="""
#col-left {
    margin: 0 auto;
    max-width: 640px;
}
#col-right {
    margin: 0 auto;
    max-width: 640px;
}
.grid-container {
  display: flex;
  align-items: center;
  justify-content: center;
  gap:10px
}

.image {
  width: 128px; 
  height: 128px; 
  object-fit: cover; 
}

.text {
  font-size: 16px;
}
"""

with gr.Blocks(css=css, elem_id="demo-container") as demo:
    with gr.Column():
        gr.HTML(read_file("demo_header.html"))
        gr.HTML(read_file("demo_tools.html"))
    with gr.Row():
                with gr.Column():
                    image = gr.Image(height=800,sources=['upload','clipboard'],image_mode='RGB', elem_id="image_upload", type="pil", label="Upload")
                    with gr.Row(elem_id="prompt-container",  equal_height=False):
                        with gr.Row():
                            prompt = gr.Textbox(label="Prompt",value="a women",placeholder="Your prompt (what you want in place of what is erased)", elem_id="prompt")
                            
                    btn = gr.Button("Img2Img", elem_id="run_button",variant="primary")
                    
                    with gr.Accordion(label="Advanced Settings", open=False):
                        with gr.Row( equal_height=True):
                            strength = gr.Number(value=0.75, minimum=0, maximum=0.75, step=0.01, label="strength")
                            seed = gr.Number(value=100, minimum=0, step=1, label="seed")
                            inference_step = gr.Number(value=4, minimum=1, step=4, label="inference_step")
                        id_input=gr.Text(label="Name", visible=False)
                            
                with gr.Column():
                    image_out = gr.Image(height=800,sources=[],label="Output", elem_id="output-img",format="jpg")
                   
    gr.Examples(
               examples=[
                    ["examples/draw_input.jpg", "examples/draw_output.jpg","a women ,eyes closed,mouth opened"],
                    ["examples/draw-gimp_input.jpg", "examples/draw-gimp_output.jpg","a women ,eyes closed,mouth opened"],
                    ["examples/gimp_input.jpg", "examples/gimp_output.jpg","a women ,hand on neck"],
                    ["examples/inpaint_input.jpg", "examples/inpaint_output.jpg","a women ,hand on neck"]
                         ]
,
                inputs=[image,image_out,prompt],
    )
    gr.HTML(
       gr.HTML(read_file("demo_footer.html"))
    )
    gr.on(
        triggers=[btn.click, prompt.submit],
        fn = process_images,
        inputs = [image,prompt,strength,seed,inference_step],
        outputs = [image_out]
    )

if __name__ == "__main__":
    demo.launch(share=True, show_error=True)