Spaces:

comrender
/

fluxhdupscaler

Running on Zero

App Files Files Community

comrender commited on 20 days ago

Commit

8f8e423

verified ·

1 Parent(s): 5e3f60d

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -32

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import gradio as gr
 import numpy as np
 import spaces
 import torch
-from diffusers import FluxControlNetModel, FluxControlNetPipeline
 from transformers import AutoProcessor, AutoModelForCausalLM
 from gradio_imageslider import ImageSlider
 from PIL import Image
@@ -40,7 +40,7 @@ print("📥 Downloading FLUX model...")
 model_path = snapshot_download(
     repo_id="black-forest-labs/FLUX.1-dev",
     repo_type="model",
-    ignore_patterns=["*.md", "*..gitattributes"],
     local_dir="FLUX.1-dev",
     token=huggingface_token,
 )
@@ -58,16 +58,10 @@ florence_processor = AutoProcessor.from_pretrained(
     trust_remote_code=True
 )
-# Load FLUX ControlNet pipeline
-print("📥 Loading FLUX ControlNet...")
-controlnet = FluxControlNetModel.from_pretrained(
-    "jasperai/Flux.1-dev-Controlnet-Upscaler",
-    torch_dtype=torch.bfloat16
-).to(device)
-pipe = FluxControlNetPipeline.from_pretrained(
     model_path,
-    controlnet=controlnet,
     torch_dtype=torch.bfloat16
 )
 pipe.to(device)
@@ -75,7 +69,7 @@ pipe.to(device)
 print("✅ All models loaded successfully!")
 MAX_SEED = 1000000
-MAX_PIXEL_BUDGET = 1024 * 1024
 def generate_caption(image):
@@ -86,9 +80,6 @@ def generate_caption(image):
         inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(device)
-        # Cast floating-point inputs to match model's dtype (float16)
-        inputs["pixel_values"] = inputs["pixel_values"].to(torch.float16)
         generated_ids = florence_model.generate(
             input_ids=inputs["input_ids"],
             pixel_values=inputs["pixel_values"],
@@ -156,8 +147,8 @@ def enhance_image(
     randomize_seed,
     num_inference_steps,
     upscale_factor,
-    controlnet_conditioning_scale,
     guidance_scale,
     use_generated_caption,
     custom_prompt,
     progress=gr.Progress(track_tqdm=True),
@@ -200,8 +191,8 @@ def enhance_image(
     # Generate upscaled image
     image = pipe(
         prompt=prompt,
-        control_image=control_image,
-        controlnet_conditioning_scale=controlnet_conditioning_scale,
         num_inference_steps=num_inference_steps,
         guidance_scale=guidance_scale,
         height=control_image.size[1],
@@ -281,15 +272,6 @@ with gr.Blocks(css=css, title="🎨 AI Image Enhancer - Florence-2 + FLUX") as d
                 info="More steps = better quality but slower"
             )
-            controlnet_conditioning_scale = gr.Slider(
-                label="ControlNet Conditioning Scale",
-                minimum=0.1,
-                maximum=1.5,
-                step=0.1,
-                value=0.6,
-                info="How much to preserve original structure"
-            )
             guidance_scale = gr.Slider(
                 label="Guidance Scale",
                 minimum=1.0,
@@ -299,6 +281,15 @@ with gr.Blocks(css=css, title="🎨 AI Image Enhancer - Florence-2 + FLUX") as d
                 info="How closely to follow the prompt"
             )
             with gr.Row():
                 randomize_seed = gr.Checkbox(
                     label="Randomize seed",
@@ -346,8 +337,8 @@ with gr.Blocks(css=css, title="🎨 AI Image Enhancer - Florence-2 + FLUX") as d
     # Examples
     gr.Examples(
         examples=[
-            [None, "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg", 42, False, 28, 2, 0.6, 3.5, True, ""],
-            [None, "https://picsum.photos/512/512", 123, False, 25, 3, 0.8, 4.0, True, ""],
         ],
         inputs=[
             input_image,
@@ -356,8 +347,8 @@ with gr.Blocks(css=css, title="🎨 AI Image Enhancer - Florence-2 + FLUX") as d
             randomize_seed,
             num_inference_steps,
             upscale_factor,
-            controlnet_conditioning_scale,
             guidance_scale,
             use_generated_caption,
             custom_prompt,
         ]
@@ -373,8 +364,8 @@ with gr.Blocks(css=css, title="🎨 AI Image Enhancer - Florence-2 + FLUX") as d
             randomize_seed,
             num_inference_steps,
             upscale_factor,
-            controlnet_conditioning_scale,
             guidance_scale,
             use_generated_caption,
             custom_prompt,
         ],
@@ -386,7 +377,7 @@ with gr.Blocks(css=css, title="🎨 AI Image Enhancer - Florence-2 + FLUX") as d
         <h4>💡 How it works:</h4>
         <ol>
             <li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
-            <li><strong>FLUX ControlNet</strong> uses this caption to guide the upscaling process</li>
             <li>The result is an enhanced, higher-resolution image with improved details</li>
         </ol>
         <p><strong>Note:</strong> Due to memory constraints, output is limited to 1024x1024 pixels total budget.</p>

 import numpy as np
 import spaces
 import torch
+from diffusers import FluxImg2ImgPipeline
 from transformers import AutoProcessor, AutoModelForCausalLM
 from gradio_imageslider import ImageSlider
 from PIL import Image
 model_path = snapshot_download(
     repo_id="black-forest-labs/FLUX.1-dev",
     repo_type="model",
+    ignore_patterns=["*.md", "*.gitattributes"],
     local_dir="FLUX.1-dev",
     token=huggingface_token,
 )
     trust_remote_code=True
 )
+# Load FLUX Img2Img pipeline
+print("📥 Loading FLUX Img2Img...")
+pipe = FluxImg2ImgPipeline.from_pretrained(
     model_path,
     torch_dtype=torch.bfloat16
 )
 pipe.to(device)
 print("✅ All models loaded successfully!")
 MAX_SEED = 1000000
+MAX_PIXEL_BUDGET = 4096 * 4096
 def generate_caption(image):
         inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(device)
         generated_ids = florence_model.generate(
             input_ids=inputs["input_ids"],
             pixel_values=inputs["pixel_values"],
     randomize_seed,
     num_inference_steps,
     upscale_factor,
     guidance_scale,
+    denoising_strength,
     use_generated_caption,
     custom_prompt,
     progress=gr.Progress(track_tqdm=True),
     # Generate upscaled image
     image = pipe(
         prompt=prompt,
+        image=control_image,
+        strength=denoising_strength,
         num_inference_steps=num_inference_steps,
         guidance_scale=guidance_scale,
         height=control_image.size[1],
                 info="More steps = better quality but slower"
             )
             guidance_scale = gr.Slider(
                 label="Guidance Scale",
                 minimum=1.0,
                 info="How closely to follow the prompt"
             )
+            denoising_strength = gr.Slider(
+                label="Denoising Strength",
+                minimum=0.0,
+                maximum=1.0,
+                step=0.05,
+                value=0.3,
+                info="Controls how much the image is transformed (from Ultimate SD Upscaler concept)"
+            )
             with gr.Row():
                 randomize_seed = gr.Checkbox(
                     label="Randomize seed",
     # Examples
     gr.Examples(
         examples=[
+            [None, "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg", 42, False, 28, 2, 3.5, 0.3, True, ""],
+            [None, "https://picsum.photos/512/512", 123, False, 25, 3, 4.0, 0.4, True, ""],
         ],
         inputs=[
             input_image,
             randomize_seed,
             num_inference_steps,
             upscale_factor,
             guidance_scale,
+            denoising_strength,
             use_generated_caption,
             custom_prompt,
         ]
             randomize_seed,
             num_inference_steps,
             upscale_factor,
             guidance_scale,
+            denoising_strength,
             use_generated_caption,
             custom_prompt,
         ],
         <h4>💡 How it works:</h4>
         <ol>
             <li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
+            <li><strong>FLUX Img2Img</strong> uses this caption to guide the upscaling process with denoising</li>
             <li>The result is an enhanced, higher-resolution image with improved details</li>
         </ol>
         <p><strong>Note:</strong> Due to memory constraints, output is limited to 1024x1024 pixels total budget.</p>