img2img_test

Runtime error

App Files Files Community

Gemini899 commited on Feb 25

Commit

83d1db7

verified ·

1 Parent(s): 193cf00

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -191

app.py CHANGED Viewed

@@ -1,118 +1,108 @@
-import spaces
 import gradio as gr
 import re
 from PIL import Image
-import os
-import numpy as np
-import torch
-from diffusers import StableDiffusionImg2ImgPipeline
-# Choose a higher-quality or specialized model.
-model_id = "SG161222/Realistic_Vision_V2.0"  # e.g. "runwayml/stable-diffusion-v1-5"
-# Typically use float16 to reduce memory usage if on GPU
 dtype = torch.float16
 device = "cuda" if torch.cuda.is_available() else "cpu"
-pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id, torch_dtype=dtype).to(device)
-def sanitize_prompt(prompt):
-    # Allow only alphanumeric characters, spaces, and basic punctuation
     allowed_chars = re.compile(r"[^a-zA-Z0-9\s.,!?-]")
-    sanitized_prompt = allowed_chars.sub("", prompt)
-    return sanitized_prompt
-def convert_to_fit_size(original_width_and_height, maximum_size=2048):
-    width, height = original_width_and_height
-    # If within maximum size on both sides, no need to shrink
-    if width <= maximum_size and height <= maximum_size:
-        return width, height
-    # Otherwise, scale down so the largest dimension = maximum_size
-    if width > height:
-        scaling_factor = maximum_size / width
-    else:
-        scaling_factor = maximum_size / height
-    new_width = int(width * scaling_factor)
-    new_height = int(height * scaling_factor)
-    return new_width, new_height
-def adjust_to_multiple_of_32(width: int, height: int):
-    # Stable Diffusion pipelines typically work best with dims multiple-of-32
-    width = width - (width % 32)
-    height = height - (height % 32)
-    # Ensure not to drop to zero
-    width = max(width, 32)
-    height = max(height, 32)
-    return width, height
-@spaces.GPU(duration=120)
-def process_images(
-    image,
-    prompt="a girl",
-    strength=0.75,
-    seed=0,
-    inference_step=30,
-    progress=gr.Progress(track_tqdm=True)
 ):
-    # Provide feedback in the Gradio UI
-    progress(0, desc="Starting")
-    def process_img2img(img, prompt="a person", strength=0.75, seed=0, num_inference_steps=30):
-        if img is None:
-            print("empty input image returned")
-            return None
-        # Make results reproducible
-        generator = torch.Generator(device).manual_seed(seed)
-        # 1) Resize the input image to fit within a maximum dimension
-        fit_width, fit_height = convert_to_fit_size(img.size)
-        # 2) Adjust final dimensions to multiples of 32
-        width, height = adjust_to_multiple_of_32(fit_width, fit_height)
-        # Use high-quality Lanczos downsampling
-        img = img.resize((width, height), Image.LANCZOS)
-        # For better quality, let's set guidance_scale ~7 and steps ~30
-        output = pipe(
-            prompt=prompt,
-            image=img,
-            generator=generator,
-            strength=strength,
-            guidance_scale=7.0,  # typical, can tune to 5-10
-            num_inference_steps=num_inference_steps,
-        )
-        pil_image = output.images[0]
-        # If we forcibly down/up scaled to multiple-of-32, let's restore to the "fit" size
-        # (not strictly necessary, but can preserve original aspect ratio exactly)
-        new_width, new_height = pil_image.size
-        if (new_width != fit_width) or (new_height != fit_height):
-            resized_image = pil_image.resize((fit_width, fit_height), Image.LANCZOS)
-            return resized_image
-        return pil_image
-    # Actually run the process
-    output = process_img2img(
-        img=image,
         prompt=prompt,
         strength=strength,
-        seed=seed,
-        num_inference_steps=inference_step
     )
-    return output
-def read_file(path: str) -> str:
-    with open(path, 'r', encoding='utf-8') as f:
-        content = f.read()
-    return content
 css = """
 #col-left {
     margin: 0 auto;
@@ -122,111 +112,39 @@ css = """
     margin: 0 auto;
     max-width: 640px;
 }
-.grid-container {
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  gap:10px
-}
-.image {
-  width: 128px;
-  height: 128px;
-  object-fit: cover;
-}
-.text {
-  font-size: 16px;
-}
 """
-with gr.Blocks(css=css, elem_id="demo-container") as demo:
-    with gr.Column():
-        # Replace "demo_header.html" and "demo_tools.html" with your actual files or remove if not needed
-        try:
-            gr.HTML(read_file("demo_header.html"))
-        except:
-            pass
-        try:
-            gr.HTML(read_file("demo_tools.html"))
-        except:
-            pass
     with gr.Row():
         with gr.Column():
-            image = gr.Image(
-                height=800,
-                sources=['upload','clipboard'],
-                image_mode='RGB',
-                elem_id="image_upload",
                 type="pil",
-                label="Upload"
             )
-            with gr.Row(elem_id="prompt-container", equal_height=False):
-                with gr.Row():
-                    prompt = gr.Textbox(
-                        label="Prompt",
-                        value="a portrait of a beautiful woman",
-                        placeholder="Your prompt",
-                        elem_id="prompt"
-                    )
-            btn = gr.Button("Img2Img", elem_id="run_button", variant="primary")
-            with gr.Accordion(label="Advanced Settings", open=False):
-                with gr.Row(equal_height=True):
-                    strength = gr.Slider(
-                        value=0.75,
-                        minimum=0.0,
-                        maximum=1.0,
-                        step=0.01,
-                        label="strength"
-                    )
-                    seed = gr.Number(
-                        value=100,
-                        minimum=0,
-                        step=1,
-                        label="seed"
-                    )
-                    inference_step = gr.Number(
-                        value=30,
-                        minimum=1,
-                        step=1,
-                        label="num_inference_steps"
-                    )
-                id_input = gr.Text(label="Name", visible=False)
-        with gr.Column():
-            image_out = gr.Image(
-                height=800,
-                sources=[],
-                label="Output",
-                elem_id="output-img",
-                format="jpg"
             )
-    # Optional examples. Replace with your own images or remove.
-    gr.Examples(
-        examples=[
-            ["examples/draw_input.jpg", None, "a woman, eyes closed, mouth opened"],
-            ["examples/gimp_input.jpg", None, "a woman, hand on neck"]
-        ],
-        inputs=[image, image_out, prompt]
-    )
-    # Maybe a footer file or custom HTML. If not present, remove.
-    try:
-        gr.HTML(gr.HTML(read_file("demo_footer.html")))
-    except:
-        pass
-    # When the "Img2Img" button is clicked or the prompt is submitted, run `process_images`.
-    gr.on(
-        triggers=[btn.click, prompt.submit],
-        fn=process_images,
-        inputs=[image, prompt, strength, seed, inference_step],
-        outputs=[image_out]
     )
 if __name__ == "__main__":
-    # Launch the Gradio app.
-    # If you set share=True, you'll get a public link.
-    demo.launch(share=True, show_error=True)

 import gradio as gr
 import re
+import torch
 from PIL import Image
+import spaces
+from diffusers import StableDiffusionXLImg2ImgPipeline
+#
+# Load the two SDXL pipelines (base + refiner) globally, so they only load once.
+#
+BASE_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"
+REFINER_MODEL_ID = "stabilityai/stable-diffusion-xl-refiner-1.0"
 dtype = torch.float16
 device = "cuda" if torch.cuda.is_available() else "cpu"
+pipe_base = StableDiffusionXLImg2ImgPipeline.from_pretrained(BASE_MODEL_ID, torch_dtype=dtype).to(device)
+pipe_refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(REFINER_MODEL_ID, torch_dtype=dtype).to(device)
+#
+# Helper functions
+#
+def sanitize_prompt(prompt: str) -> str:
+    # Simple sanitation: remove suspicious characters
     allowed_chars = re.compile(r"[^a-zA-Z0-9\s.,!?-]")
+    return allowed_chars.sub("", prompt)
+def resize_to_multiple_of_64(image: Image.Image, max_dim: int = 1024):
+    """
+    Resizes the image so that both width/height <= max_dim,
+    and each dimension is a multiple of 64.
+    (SDXL often uses 1024x1024. You can do multiples of 128 if you prefer.)
+    """
+    w, h = image.size
+    # If image is bigger than max_dim in any dimension, scale it down
+    ratio = min(max_dim / w, max_dim / h, 1.0)
+    new_w = int(w * ratio)
+    new_h = int(h * ratio)
+    # Round down to multiples of 64 for best results in SDXL
+    new_w = new_w - (new_w % 64)
+    new_h = new_h - (new_h % 64)
+    new_w = max(new_w, 64)
+    new_h = max(new_h, 64)
+    return image.resize((new_w, new_h), Image.LANCZOS)
+@spaces.GPU(duration=240)  # Increase time if needed (SDXL can be slow)
+def run_img2img_sdxl(
+    init_image,
+    prompt: str,
+    strength: float,
+    seed: int,
+    steps_base: int,
+    steps_refiner: int,
 ):
+    """
+    Runs a two-step SDXL (base + refiner) pass for high-quality img2img.
+    """
+    if init_image is None:
+        print("No input image provided.")
+        return None
+    # Clean up prompt
+    prompt = sanitize_prompt(prompt)
+    # Ensure reproducibility
+    generator = torch.Generator(device).manual_seed(seed)
+    # Possibly resize the input to a smaller multiple-of-64 dimension
+    # (1024x1024 or smaller is typical for SDXL)
+    init_image = resize_to_multiple_of_64(init_image, max_dim=1024)
+    # 1) Base pass
+    base_output = pipe_base(
         prompt=prompt,
+        image=init_image,
         strength=strength,
+        guidance_scale=8.0,      # Adjust if you want more or less adherence to prompt
+        num_inference_steps=steps_base,
+        generator=generator
+    )
+    base_image = base_output.images[0]
+    # 2) Refiner pass
+    # Typically set strength=0.0 for the refiner to do final detailing,
+    # and possibly a slightly higher guidance scale.
+    refiner_output = pipe_refiner(
+        prompt=prompt,
+        image=base_image,
+        strength=0.0,  # strictly refine
+        guidance_scale=9.0,
+        num_inference_steps=steps_refiner,
+        generator=generator
     )
+    final_image = refiner_output.images[0]
+    return final_image
+#
+# Gradio UI
+#
 css = """
 #col-left {
     margin: 0 auto;
     margin: 0 auto;
     max-width: 640px;
 }
 """
+with gr.Blocks(css=css) as demo:
+    gr.Markdown("## SDXL Img2Img (Base + Refiner) — High Quality Demo")
     with gr.Row():
         with gr.Column():
+            init_image = gr.Image(
+                label="Init Image (Img2Img)",
                 type="pil",
+                image_mode="RGB",
+                height=512
             )
+            prompt = gr.Textbox(
+                label="Prompt",
+                placeholder="Describe what you want to see"
             )
+            run_button = gr.Button("Generate")
+            with gr.Accordion("Advanced Options", open=False):
+                strength = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Strength (img2img)")
+                seed = gr.Number(value=42, label="Seed", precision=0)
+                steps_base = gr.Slider(1, 100, value=50, step=1, label="Steps (Base)")
+                steps_refiner = gr.Slider(1, 100, value=30, step=1, label="Steps (Refiner)")
+        with gr.Column():
+            result_image = gr.Image(label="Result", height=512)
+    # Link the button to our function
+    run_button.click(
+        fn=run_img2img_sdxl,
+        inputs=[init_image, prompt, strength, seed, steps_base, steps_refiner],
+        outputs=[result_image]
     )
 if __name__ == "__main__":
+    demo.launch(share=True)