Tanut commited on
Commit
e8943d1
·
1 Parent(s): ee61c84

Testing img2img

Browse files
Files changed (1) hide show
  1. app.py +128 -28
app.py CHANGED
@@ -7,8 +7,8 @@ import qrcode
7
  from qrcode.constants import ERROR_CORRECT_H
8
  from diffusers import (
9
  StableDiffusionPipeline,
10
- StableDiffusionControlNetPipeline,
11
- StableDiffusionControlNetImg2ImgPipeline, # NEW: img2img pipeline
12
  ControlNetModel,
13
  DPMSolverMultistepScheduler,
14
  )
@@ -17,7 +17,7 @@ from diffusers import (
17
  os.environ.setdefault("MPLCONFIGDIR", "/tmp/mpl")
18
 
19
  MODEL_ID = "runwayml/stable-diffusion-v1-5"
20
- CN_QRMON = "monster-labs/control_v1p_sd15_qrcode_monster"
21
  DTYPE = torch.float16
22
 
23
  # ---------- helpers ----------
@@ -90,7 +90,7 @@ def get_sd_pipe():
90
  return _SD
91
 
92
  def get_qrmon_txt2img_pipe():
93
- """(kept for completeness; not used in the two-stage flow)"""
94
  global _CN_TXT2IMG
95
  if _CN_TXT2IMG is None:
96
  cn = ControlNetModel.from_pretrained(CN_QRMON, torch_dtype=DTYPE, use_safetensors=True)
@@ -106,7 +106,7 @@ def get_qrmon_txt2img_pipe():
106
  return _CN_TXT2IMG
107
 
108
  def get_qrmon_img2img_pipe():
109
- """This is the pipeline we want for stage B."""
110
  global _CN_IMG2IMG
111
  if _CN_IMG2IMG is None:
112
  cn = ControlNetModel.from_pretrained(CN_QRMON, torch_dtype=DTYPE, use_safetensors=True)
@@ -142,6 +142,56 @@ def txt2img(prompt: str, negative: str, steps: int, cfg: float, width: int, heig
142
  )
143
  return out.images[0]
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  @spaces.GPU(duration=120)
146
  def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: float,
147
  size: int, border: int, back_color: str, blur: float,
@@ -149,7 +199,7 @@ def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: floa
149
  denoise: float = 0.45):
150
  s = snap8(size)
151
 
152
- # --- Stage A: base art (txt2img) ---
153
  sd = get_sd_pipe()
154
  if int(seed) < 0:
155
  seed = random.randint(0, 2**31 - 1)
@@ -159,7 +209,7 @@ def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: floa
159
  gc.collect()
160
  with torch.autocast(device_type="cuda", dtype=DTYPE):
161
  base = sd(
162
- prompt=str(style_prompt), # don't include "QR code" here
163
  negative_prompt=str(negative or ""),
164
  num_inference_steps=max(int(steps)//2, 12),
165
  guidance_scale=float(cfg),
@@ -167,29 +217,46 @@ def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: floa
167
  generator=gen,
168
  ).images[0]
169
 
170
- # Control image (QR)
171
  qr_img = make_qr(url=url, size=s, border=int(border),
172
  back_color=back_color, blur_radius=float(blur))
173
 
174
- # --- Stage B: ControlNet img2img (QR Monster) ---
175
  pipe = get_qrmon_img2img_pipe()
176
  if torch.cuda.is_available(): torch.cuda.empty_cache()
177
  gc.collect()
178
  with torch.autocast(device_type="cuda", dtype=DTYPE):
179
- out = pipe(
180
- prompt=str(style_prompt),
181
- negative_prompt=str(negative or ""),
182
- image=base, # init image (img2img)
183
- control_image=qr_img, # control image (QR)
184
- strength=float(denoise), # 0.3–0.6 keeps composition
185
- controlnet_conditioning_scale=float(qr_weight),
186
- control_guidance_start=0.05,
187
- control_guidance_end=0.95,
188
- num_inference_steps=int(steps),
189
- guidance_scale=float(cfg),
190
- width=s, height=s,
191
- generator=gen,
192
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  img = out.images[0]
195
  img = enforce_qr_contrast(img, qr_img, strength=float(repair_strength), feather=float(feather))
@@ -197,7 +264,7 @@ def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: floa
197
 
198
  # ---------- UI ----------
199
  with gr.Blocks() as demo:
200
- gr.Markdown("# ZeroGPU Stable Diffusion + AI QR Codes (Monster v2)")
201
 
202
  with gr.Tab("Text → Image"):
203
  prompt = gr.Textbox(label="Prompt", value="a cozy reading nook, warm sunlight, cinematic lighting, highly detailed")
@@ -210,10 +277,43 @@ with gr.Blocks() as demo:
210
  out_img = gr.Image(label="Image", interactive=False)
211
  gr.Button("Generate").click(txt2img, [prompt, negative, steps, cfg, width, height, seed], out_img)
212
 
213
- with gr.Tab("QR Code Stylizer (ControlNet Monster — two-stage)"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  url = gr.Textbox(label="URL/Text", value="http://www.mybirdfire.com")
215
- s_prompt = gr.Textbox(label="Style prompt (no 'QR code' needed)", value="baroque palace interior, intricate roots, dramatic lighting, ultra detailed")
216
- s_negative= gr.Textbox(label="Negative prompt", value="lowres, low contrast, blurry, jpeg artifacts, worst quality, watermark, text")
 
 
217
  size = gr.Slider(384, 1024, value=768, step=64, label="Canvas (px)")
218
  steps2 = gr.Slider(10, 60, value=28, step=1, label="Total steps")
219
  cfg2 = gr.Slider(1.0, 12.0, value=6.5, step=0.1, label="CFG")
@@ -228,7 +328,7 @@ with gr.Blocks() as demo:
228
  final_img = gr.Image(label="Final stylized QR")
229
  ctrl_img = gr.Image(label="Control QR used")
230
  base_img = gr.Image(label="Base art (Stage A)")
231
- gr.Button("Stylize QR").click(
232
  qr_stylize,
233
  [url, s_prompt, s_negative, steps2, cfg2, size, border, back_col, blur, qr_w, repair, feather, seed2, denoise],
234
  [final_img, ctrl_img, base_img]
 
7
  from qrcode.constants import ERROR_CORRECT_H
8
  from diffusers import (
9
  StableDiffusionPipeline,
10
+ StableDiffusionControlNetPipeline, # TXT2IMG (Method 1)
11
+ StableDiffusionControlNetImg2ImgPipeline, # two-stage img2img
12
  ControlNetModel,
13
  DPMSolverMultistepScheduler,
14
  )
 
17
  os.environ.setdefault("MPLCONFIGDIR", "/tmp/mpl")
18
 
19
  MODEL_ID = "runwayml/stable-diffusion-v1-5"
20
+ CN_QRMON = "monster-labs/control_v1p_sd15_qrcode_monster"
21
  DTYPE = torch.float16
22
 
23
  # ---------- helpers ----------
 
90
  return _SD
91
 
92
  def get_qrmon_txt2img_pipe():
93
+ """Method 1 (TXT2IMG): SD + ControlNet QR-Monster, no init image, only conditioning image."""
94
  global _CN_TXT2IMG
95
  if _CN_TXT2IMG is None:
96
  cn = ControlNetModel.from_pretrained(CN_QRMON, torch_dtype=DTYPE, use_safetensors=True)
 
106
  return _CN_TXT2IMG
107
 
108
  def get_qrmon_img2img_pipe():
109
+ """Two-stage B: SD img2img with ControlNet QR-Monster (kept so you can compare)."""
110
  global _CN_IMG2IMG
111
  if _CN_IMG2IMG is None:
112
  cn = ControlNetModel.from_pretrained(CN_QRMON, torch_dtype=DTYPE, use_safetensors=True)
 
142
  )
143
  return out.images[0]
144
 
145
+ # ---- Method 1: TXT2IMG ControlNet (no init image; QR as conditioning only) ----
146
+ @spaces.GPU(duration=120)
147
+ def qr_txt2img(url: str, style_prompt: str, negative: str, steps: int, cfg: float,
148
+ size: int, border: int, back_color: str, blur: float,
149
+ qr_weight: float, start: float, end: float, seed: int,
150
+ repair_strength: float, feather: float):
151
+ s = snap8(size)
152
+ qr_img = make_qr(url=url, size=s, border=int(border), back_color=back_color, blur_radius=float(blur))
153
+
154
+ if int(seed) < 0:
155
+ seed = random.randint(0, 2**31 - 1)
156
+ gen = torch.Generator(device="cuda").manual_seed(int(seed))
157
+
158
+ pipe = get_qrmon_txt2img_pipe()
159
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
160
+ gc.collect()
161
+ with torch.autocast(device_type="cuda", dtype=DTYPE):
162
+ try:
163
+ out = pipe(
164
+ prompt=str(style_prompt),
165
+ negative_prompt=str(negative or ""),
166
+ image=qr_img, # ControlNet conditioning
167
+ controlnet_conditioning_scale=float(qr_weight),
168
+ control_guidance_start=float(start),
169
+ control_guidance_end=float(end),
170
+ num_inference_steps=int(steps),
171
+ guidance_scale=float(cfg),
172
+ width=s, height=s,
173
+ generator=gen,
174
+ )
175
+ except TypeError:
176
+ # Fallback for older diffusers param names
177
+ out = pipe(
178
+ prompt=str(style_prompt),
179
+ negative_prompt=str(negative or ""),
180
+ control_image=qr_img,
181
+ controlnet_conditioning_scale=float(qr_weight),
182
+ controlnet_start=float(start),
183
+ controlnet_end=float(end),
184
+ num_inference_steps=int(steps),
185
+ guidance_scale=float(cfg),
186
+ width=s, height=s,
187
+ generator=gen,
188
+ )
189
+
190
+ img = out.images[0]
191
+ img = enforce_qr_contrast(img, qr_img, strength=float(repair_strength), feather=float(feather))
192
+ return img, qr_img
193
+
194
+ # ---- Two-stage (your previous Method-1 variant using IMG2IMG) ----
195
  @spaces.GPU(duration=120)
196
  def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: float,
197
  size: int, border: int, back_color: str, blur: float,
 
199
  denoise: float = 0.45):
200
  s = snap8(size)
201
 
202
+ # Stage A: base art (txt2img)
203
  sd = get_sd_pipe()
204
  if int(seed) < 0:
205
  seed = random.randint(0, 2**31 - 1)
 
209
  gc.collect()
210
  with torch.autocast(device_type="cuda", dtype=DTYPE):
211
  base = sd(
212
+ prompt=str(style_prompt),
213
  negative_prompt=str(negative or ""),
214
  num_inference_steps=max(int(steps)//2, 12),
215
  guidance_scale=float(cfg),
 
217
  generator=gen,
218
  ).images[0]
219
 
220
+ # control image (QR)
221
  qr_img = make_qr(url=url, size=s, border=int(border),
222
  back_color=back_color, blur_radius=float(blur))
223
 
224
+ # Stage B: img2img with ControlNet QR
225
  pipe = get_qrmon_img2img_pipe()
226
  if torch.cuda.is_available(): torch.cuda.empty_cache()
227
  gc.collect()
228
  with torch.autocast(device_type="cuda", dtype=DTYPE):
229
+ try:
230
+ out = pipe(
231
+ prompt=str(style_prompt),
232
+ negative_prompt=str(negative or ""),
233
+ image=base, # init image
234
+ image_guidance_scale=None,
235
+ control_image=qr_img, # QR conditioning
236
+ strength=float(denoise),
237
+ controlnet_conditioning_scale=float(qr_weight),
238
+ control_guidance_start=0.05,
239
+ control_guidance_end=0.95,
240
+ num_inference_steps=int(steps),
241
+ guidance_scale=float(cfg),
242
+ width=s, height=s,
243
+ generator=gen,
244
+ )
245
+ except TypeError:
246
+ out = pipe(
247
+ prompt=str(style_prompt),
248
+ negative_prompt=str(negative or ""),
249
+ image=base,
250
+ control_image=qr_img,
251
+ strength=float(denoise),
252
+ controlnet_conditioning_scale=float(qr_weight),
253
+ controlnet_start=0.05,
254
+ controlnet_end=0.95,
255
+ num_inference_steps=int(steps),
256
+ guidance_scale=float(cfg),
257
+ width=s, height=s,
258
+ generator=gen,
259
+ )
260
 
261
  img = out.images[0]
262
  img = enforce_qr_contrast(img, qr_img, strength=float(repair_strength), feather=float(feather))
 
264
 
265
  # ---------- UI ----------
266
  with gr.Blocks() as demo:
267
+ gr.Markdown("# ZeroGPU Stable Diffusion + AI QR Codes")
268
 
269
  with gr.Tab("Text → Image"):
270
  prompt = gr.Textbox(label="Prompt", value="a cozy reading nook, warm sunlight, cinematic lighting, highly detailed")
 
277
  out_img = gr.Image(label="Image", interactive=False)
278
  gr.Button("Generate").click(txt2img, [prompt, negative, steps, cfg, width, height, seed], out_img)
279
 
280
+ # ---- Method 1: TXT2IMG ControlNet ----
281
+ with gr.Tab("QR (Method 1 — TXT2IMG)"):
282
+ url_m1 = gr.Textbox(label="URL/Text", value="http://www.mybirdfire.com")
283
+ prompt_m1 = gr.Textbox(label="Style prompt (no 'QR code' needed)",
284
+ value="epic phoenix in flames, dramatic lighting, detailed, 8k")
285
+ neg_m1 = gr.Textbox(label="Negative prompt",
286
+ value="lowres, low contrast, blurry, jpeg artifacts, worst quality, bad anatomy, extra digits")
287
+ size_m1 = gr.Slider(384, 1024, value=768, step=64, label="Canvas (px)")
288
+ steps_m1 = gr.Slider(10, 60, value=28, step=1, label="Steps")
289
+ cfg_m1 = gr.Slider(1.0, 12.0, value=6.5, step=0.1, label="CFG")
290
+ border_m1 = gr.Slider(4, 20, value=12, step=1, label="QR border (quiet zone)")
291
+ back_m1 = gr.ColorPicker(value="#808080", label="QR background")
292
+ blur_m1 = gr.Slider(0.0, 3.0, value=1.2, step=0.1, label="Soften control (blur)")
293
+ weight_m1 = gr.Slider(0.6, 1.6, value=1.2, step=0.05, label="QR control weight")
294
+ start_m1 = gr.Slider(0.0, 1.0, value=0.05, step=0.01, label="Control start")
295
+ end_m1 = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="Control end")
296
+ seed_m1 = gr.Number(value=-1, precision=0, label="Seed (-1 random)")
297
+ repair_m1 = gr.Slider(0.0, 1.0, value=0.6, step=0.05, label="Post repair strength")
298
+ feather_m1 = gr.Slider(0.0, 3.0, value=1.0, step=0.1, label="Repair feather (px)")
299
+
300
+ final_m1 = gr.Image(label="Final QR (TXT2IMG)")
301
+ ctrl_m1 = gr.Image(label="Control QR used")
302
+
303
+ gr.Button("Generate (Method 1)").click(
304
+ qr_txt2img,
305
+ [url_m1, prompt_m1, neg_m1, steps_m1, cfg_m1, size_m1, border_m1, back_m1, blur_m1,
306
+ weight_m1, start_m1, end_m1, seed_m1, repair_m1, feather_m1],
307
+ [final_m1, ctrl_m1]
308
+ )
309
+
310
+ # ---- Two-stage (Method-1 variant, IMG2IMG) ----
311
+ with gr.Tab("QR (Two-stage IMG2IMG)"):
312
  url = gr.Textbox(label="URL/Text", value="http://www.mybirdfire.com")
313
+ s_prompt = gr.Textbox(label="Style prompt (no 'QR code' needed)",
314
+ value="epic phoenix in flames, dramatic lighting, detailed, 8k")
315
+ s_negative= gr.Textbox(label="Negative prompt",
316
+ value="lowres, low contrast, blurry, jpeg artifacts, worst quality, bad anatomy, extra digits")
317
  size = gr.Slider(384, 1024, value=768, step=64, label="Canvas (px)")
318
  steps2 = gr.Slider(10, 60, value=28, step=1, label="Total steps")
319
  cfg2 = gr.Slider(1.0, 12.0, value=6.5, step=0.1, label="CFG")
 
328
  final_img = gr.Image(label="Final stylized QR")
329
  ctrl_img = gr.Image(label="Control QR used")
330
  base_img = gr.Image(label="Base art (Stage A)")
331
+ gr.Button("Stylize QR (Two-stage)").click(
332
  qr_stylize,
333
  [url, s_prompt, s_negative, steps2, cfg2, size, border, back_col, blur, qr_w, repair, feather, seed2, denoise],
334
  [final_img, ctrl_img, base_img]