Spaces:

kokoit555
/

GenImages_ControlNet

Running on Zero

App Files Files Community

Tanut commited on 23 days ago

Commit

e8943d1

1 Parent(s): ee61c84

Testing img2img

Browse files

Files changed (1) hide show

app.py +128 -28

app.py CHANGED Viewed

@@ -7,8 +7,8 @@ import qrcode
 from qrcode.constants import ERROR_CORRECT_H
 from diffusers import (
     StableDiffusionPipeline,
-    StableDiffusionControlNetPipeline,
-    StableDiffusionControlNetImg2ImgPipeline,   # NEW: img2img pipeline
     ControlNetModel,
     DPMSolverMultistepScheduler,
 )
@@ -17,7 +17,7 @@ from diffusers import (
 os.environ.setdefault("MPLCONFIGDIR", "/tmp/mpl")
 MODEL_ID = "runwayml/stable-diffusion-v1-5"
-CN_QRMON = "monster-labs/control_v1p_sd15_qrcode_monster"
 DTYPE = torch.float16
 # ---------- helpers ----------
@@ -90,7 +90,7 @@ def get_sd_pipe():
     return _SD
 def get_qrmon_txt2img_pipe():
-    """(kept for completeness; not used in the two-stage flow)"""
     global _CN_TXT2IMG
     if _CN_TXT2IMG is None:
         cn = ControlNetModel.from_pretrained(CN_QRMON, torch_dtype=DTYPE, use_safetensors=True)
@@ -106,7 +106,7 @@ def get_qrmon_txt2img_pipe():
     return _CN_TXT2IMG
 def get_qrmon_img2img_pipe():
-    """This is the pipeline we want for stage B."""
     global _CN_IMG2IMG
     if _CN_IMG2IMG is None:
         cn = ControlNetModel.from_pretrained(CN_QRMON, torch_dtype=DTYPE, use_safetensors=True)
@@ -142,6 +142,56 @@ def txt2img(prompt: str, negative: str, steps: int, cfg: float, width: int, heig
         )
     return out.images[0]
 @spaces.GPU(duration=120)
 def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: float,
                size: int, border: int, back_color: str, blur: float,
@@ -149,7 +199,7 @@ def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: floa
                denoise: float = 0.45):
     s = snap8(size)
-    # --- Stage A: base art (txt2img) ---
     sd = get_sd_pipe()
     if int(seed) < 0:
         seed = random.randint(0, 2**31 - 1)
@@ -159,7 +209,7 @@ def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: floa
     gc.collect()
     with torch.autocast(device_type="cuda", dtype=DTYPE):
         base = sd(
-            prompt=str(style_prompt),                      # don't include "QR code" here
             negative_prompt=str(negative or ""),
             num_inference_steps=max(int(steps)//2, 12),
             guidance_scale=float(cfg),
@@ -167,29 +217,46 @@ def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: floa
             generator=gen,
         ).images[0]
-    # Control image (QR)
     qr_img = make_qr(url=url, size=s, border=int(border),
                      back_color=back_color, blur_radius=float(blur))
-    # --- Stage B: ControlNet img2img (QR Monster) ---
     pipe = get_qrmon_img2img_pipe()
     if torch.cuda.is_available(): torch.cuda.empty_cache()
     gc.collect()
     with torch.autocast(device_type="cuda", dtype=DTYPE):
-        out = pipe(
-            prompt=str(style_prompt),
-            negative_prompt=str(negative or ""),
-            image=base,                                   # init image (img2img)
-            control_image=qr_img,                         # control image (QR)
-            strength=float(denoise),                      # 0.3–0.6 keeps composition
-            controlnet_conditioning_scale=float(qr_weight),
-            control_guidance_start=0.05,
-            control_guidance_end=0.95,
-            num_inference_steps=int(steps),
-            guidance_scale=float(cfg),
-            width=s, height=s,
-            generator=gen,
-        )
     img = out.images[0]
     img = enforce_qr_contrast(img, qr_img, strength=float(repair_strength), feather=float(feather))
@@ -197,7 +264,7 @@ def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: floa
 # ---------- UI ----------
 with gr.Blocks() as demo:
-    gr.Markdown("# ZeroGPU Stable Diffusion + AI QR Codes (Monster v2)")
     with gr.Tab("Text → Image"):
         prompt  = gr.Textbox(label="Prompt", value="a cozy reading nook, warm sunlight, cinematic lighting, highly detailed")
@@ -210,10 +277,43 @@ with gr.Blocks() as demo:
         out_img = gr.Image(label="Image", interactive=False)
         gr.Button("Generate").click(txt2img, [prompt, negative, steps, cfg, width, height, seed], out_img)
-    with gr.Tab("QR Code Stylizer (ControlNet Monster — two-stage)"):
         url       = gr.Textbox(label="URL/Text", value="http://www.mybirdfire.com")
-        s_prompt  = gr.Textbox(label="Style prompt (no 'QR code' needed)", value="baroque palace interior, intricate roots, dramatic lighting, ultra detailed")
-        s_negative= gr.Textbox(label="Negative prompt", value="lowres, low contrast, blurry, jpeg artifacts, worst quality, watermark, text")
         size      = gr.Slider(384, 1024, value=768, step=64, label="Canvas (px)")
         steps2    = gr.Slider(10, 60, value=28, step=1, label="Total steps")
         cfg2      = gr.Slider(1.0, 12.0, value=6.5, step=0.1, label="CFG")
@@ -228,7 +328,7 @@ with gr.Blocks() as demo:
         final_img = gr.Image(label="Final stylized QR")
         ctrl_img  = gr.Image(label="Control QR used")
         base_img  = gr.Image(label="Base art (Stage A)")
-        gr.Button("Stylize QR").click(
             qr_stylize,
             [url, s_prompt, s_negative, steps2, cfg2, size, border, back_col, blur, qr_w, repair, feather, seed2, denoise],
             [final_img, ctrl_img, base_img]

 from qrcode.constants import ERROR_CORRECT_H
 from diffusers import (
     StableDiffusionPipeline,
+    StableDiffusionControlNetPipeline,         # TXT2IMG (Method 1)
+    StableDiffusionControlNetImg2ImgPipeline,  # two-stage img2img
     ControlNetModel,
     DPMSolverMultistepScheduler,
 )
 os.environ.setdefault("MPLCONFIGDIR", "/tmp/mpl")
 MODEL_ID = "runwayml/stable-diffusion-v1-5"
+CN_QRMON  = "monster-labs/control_v1p_sd15_qrcode_monster"
 DTYPE = torch.float16
 # ---------- helpers ----------
     return _SD
 def get_qrmon_txt2img_pipe():
+    """Method 1 (TXT2IMG): SD + ControlNet QR-Monster, no init image, only conditioning image."""
     global _CN_TXT2IMG
     if _CN_TXT2IMG is None:
         cn = ControlNetModel.from_pretrained(CN_QRMON, torch_dtype=DTYPE, use_safetensors=True)
     return _CN_TXT2IMG
 def get_qrmon_img2img_pipe():
+    """Two-stage B: SD img2img with ControlNet QR-Monster (kept so you can compare)."""
     global _CN_IMG2IMG
     if _CN_IMG2IMG is None:
         cn = ControlNetModel.from_pretrained(CN_QRMON, torch_dtype=DTYPE, use_safetensors=True)
         )
     return out.images[0]
+# ---- Method 1: TXT2IMG ControlNet (no init image; QR as conditioning only) ----
+@spaces.GPU(duration=120)
+def qr_txt2img(url: str, style_prompt: str, negative: str, steps: int, cfg: float,
+               size: int, border: int, back_color: str, blur: float,
+               qr_weight: float, start: float, end: float, seed: int,
+               repair_strength: float, feather: float):
+    s = snap8(size)
+    qr_img = make_qr(url=url, size=s, border=int(border), back_color=back_color, blur_radius=float(blur))
+    if int(seed) < 0:
+        seed = random.randint(0, 2**31 - 1)
+    gen = torch.Generator(device="cuda").manual_seed(int(seed))
+    pipe = get_qrmon_txt2img_pipe()
+    if torch.cuda.is_available(): torch.cuda.empty_cache()
+    gc.collect()
+    with torch.autocast(device_type="cuda", dtype=DTYPE):
+        try:
+            out = pipe(
+                prompt=str(style_prompt),
+                negative_prompt=str(negative or ""),
+                image=qr_img,  # ControlNet conditioning
+                controlnet_conditioning_scale=float(qr_weight),
+                control_guidance_start=float(start),
+                control_guidance_end=float(end),
+                num_inference_steps=int(steps),
+                guidance_scale=float(cfg),
+                width=s, height=s,
+                generator=gen,
+            )
+        except TypeError:
+            # Fallback for older diffusers param names
+            out = pipe(
+                prompt=str(style_prompt),
+                negative_prompt=str(negative or ""),
+                control_image=qr_img,
+                controlnet_conditioning_scale=float(qr_weight),
+                controlnet_start=float(start),
+                controlnet_end=float(end),
+                num_inference_steps=int(steps),
+                guidance_scale=float(cfg),
+                width=s, height=s,
+                generator=gen,
+            )
+    img = out.images[0]
+    img = enforce_qr_contrast(img, qr_img, strength=float(repair_strength), feather=float(feather))
+    return img, qr_img
+# ---- Two-stage (your previous Method-1 variant using IMG2IMG) ----
 @spaces.GPU(duration=120)
 def qr_stylize(url: str, style_prompt: str, negative: str, steps: int, cfg: float,
                size: int, border: int, back_color: str, blur: float,
                denoise: float = 0.45):
     s = snap8(size)
+    # Stage A: base art (txt2img)
     sd = get_sd_pipe()
     if int(seed) < 0:
         seed = random.randint(0, 2**31 - 1)
     gc.collect()
     with torch.autocast(device_type="cuda", dtype=DTYPE):
         base = sd(
+            prompt=str(style_prompt),
             negative_prompt=str(negative or ""),
             num_inference_steps=max(int(steps)//2, 12),
             guidance_scale=float(cfg),
             generator=gen,
         ).images[0]
+    # control image (QR)
     qr_img = make_qr(url=url, size=s, border=int(border),
                      back_color=back_color, blur_radius=float(blur))
+    # Stage B: img2img with ControlNet QR
     pipe = get_qrmon_img2img_pipe()
     if torch.cuda.is_available(): torch.cuda.empty_cache()
     gc.collect()
     with torch.autocast(device_type="cuda", dtype=DTYPE):
+        try:
+            out = pipe(
+                prompt=str(style_prompt),
+                negative_prompt=str(negative or ""),
+                image=base,            # init image
+                image_guidance_scale=None,
+                control_image=qr_img,  # QR conditioning
+                strength=float(denoise),
+                controlnet_conditioning_scale=float(qr_weight),
+                control_guidance_start=0.05,
+                control_guidance_end=0.95,
+                num_inference_steps=int(steps),
+                guidance_scale=float(cfg),
+                width=s, height=s,
+                generator=gen,
+            )
+        except TypeError:
+            out = pipe(
+                prompt=str(style_prompt),
+                negative_prompt=str(negative or ""),
+                image=base,
+                control_image=qr_img,
+                strength=float(denoise),
+                controlnet_conditioning_scale=float(qr_weight),
+                controlnet_start=0.05,
+                controlnet_end=0.95,
+                num_inference_steps=int(steps),
+                guidance_scale=float(cfg),
+                width=s, height=s,
+                generator=gen,
+            )
     img = out.images[0]
     img = enforce_qr_contrast(img, qr_img, strength=float(repair_strength), feather=float(feather))
 # ---------- UI ----------
 with gr.Blocks() as demo:
+    gr.Markdown("# ZeroGPU Stable Diffusion + AI QR Codes")
     with gr.Tab("Text → Image"):
         prompt  = gr.Textbox(label="Prompt", value="a cozy reading nook, warm sunlight, cinematic lighting, highly detailed")
         out_img = gr.Image(label="Image", interactive=False)
         gr.Button("Generate").click(txt2img, [prompt, negative, steps, cfg, width, height, seed], out_img)
+    # ---- Method 1: TXT2IMG ControlNet ----
+    with gr.Tab("QR (Method 1 — TXT2IMG)"):
+        url_m1      = gr.Textbox(label="URL/Text", value="http://www.mybirdfire.com")
+        prompt_m1   = gr.Textbox(label="Style prompt (no 'QR code' needed)",
+                                 value="epic phoenix in flames, dramatic lighting, detailed, 8k")
+        neg_m1      = gr.Textbox(label="Negative prompt",
+                                 value="lowres, low contrast, blurry, jpeg artifacts, worst quality, bad anatomy, extra digits")
+        size_m1     = gr.Slider(384, 1024, value=768, step=64, label="Canvas (px)")
+        steps_m1    = gr.Slider(10, 60, value=28, step=1, label="Steps")
+        cfg_m1      = gr.Slider(1.0, 12.0, value=6.5, step=0.1, label="CFG")
+        border_m1   = gr.Slider(4, 20, value=12, step=1, label="QR border (quiet zone)")
+        back_m1     = gr.ColorPicker(value="#808080", label="QR background")
+        blur_m1     = gr.Slider(0.0, 3.0, value=1.2, step=0.1, label="Soften control (blur)")
+        weight_m1   = gr.Slider(0.6, 1.6, value=1.2, step=0.05, label="QR control weight")
+        start_m1    = gr.Slider(0.0, 1.0, value=0.05, step=0.01, label="Control start")
+        end_m1      = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="Control end")
+        seed_m1     = gr.Number(value=-1, precision=0, label="Seed (-1 random)")
+        repair_m1   = gr.Slider(0.0, 1.0, value=0.6, step=0.05, label="Post repair strength")
+        feather_m1  = gr.Slider(0.0, 3.0, value=1.0, step=0.1, label="Repair feather (px)")
+        final_m1 = gr.Image(label="Final QR (TXT2IMG)")
+        ctrl_m1  = gr.Image(label="Control QR used")
+        gr.Button("Generate (Method 1)").click(
+            qr_txt2img,
+            [url_m1, prompt_m1, neg_m1, steps_m1, cfg_m1, size_m1, border_m1, back_m1, blur_m1,
+             weight_m1, start_m1, end_m1, seed_m1, repair_m1, feather_m1],
+            [final_m1, ctrl_m1]
+        )
+    # ---- Two-stage (Method-1 variant, IMG2IMG) ----
+    with gr.Tab("QR (Two-stage IMG2IMG)"):
         url       = gr.Textbox(label="URL/Text", value="http://www.mybirdfire.com")
+        s_prompt  = gr.Textbox(label="Style prompt (no 'QR code' needed)",
+                               value="epic phoenix in flames, dramatic lighting, detailed, 8k")
+        s_negative= gr.Textbox(label="Negative prompt",
+                               value="lowres, low contrast, blurry, jpeg artifacts, worst quality, bad anatomy, extra digits")
         size      = gr.Slider(384, 1024, value=768, step=64, label="Canvas (px)")
         steps2    = gr.Slider(10, 60, value=28, step=1, label="Total steps")
         cfg2      = gr.Slider(1.0, 12.0, value=6.5, step=0.1, label="CFG")
         final_img = gr.Image(label="Final stylized QR")
         ctrl_img  = gr.Image(label="Control QR used")
         base_img  = gr.Image(label="Base art (Stage A)")
+        gr.Button("Stylize QR (Two-stage)").click(
             qr_stylize,
             [url, s_prompt, s_negative, steps2, cfg2, size, border, back_col, blur, qr_w, repair, feather, seed2, denoise],
             [final_img, ctrl_img, base_img]