Spaces:

comrender
/

fluxhdupscaler

Running on Zero

App Files Files Community

comrender commited on 9 days ago

Commit

da3febd

verified ·

1 Parent(s): a1ef78c

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -51

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ from PIL import Image
 from huggingface_hub import snapshot_download
 import requests
 import io
 # For ESRGAN (requires pip install basicsr gfpgan)
 try:
@@ -61,7 +62,7 @@ florence_model = AutoModelForCausalLM.from_pretrained(
     "microsoft/Florence-2-large",
     torch_dtype=torch.float16,
     trust_remote_code=True,
-    attn_implementation="eager"  # Fix for SDPA compatibility issue
 ).to(device)
 florence_processor = AutoProcessor.from_pretrained(
     "microsoft/Florence-2-large",
@@ -94,17 +95,15 @@ if USE_ESRGAN:
     esrgan_model.to(device)
 MAX_SEED = 1000000
-MAX_PIXEL_BUDGET = 8192 * 8192  # Increased for tiling support
 def generate_caption(image):
     """Generate detailed caption using Florence-2"""
     try:
         task_prompt = "<MORE_DETAILED_CAPTION>"
         prompt = task_prompt
         inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(device)
-        inputs["pixel_values"] = inputs["pixel_values"].to(torch.float16)  # Match model dtype
         generated_ids = florence_model.generate(
             input_ids=inputs["input_ids"],
@@ -123,13 +122,10 @@ def generate_caption(image):
         print(f"Caption generation failed: {e}")
         return "a high quality detailed image"
 def process_input(input_image, upscale_factor):
     """Process input image and handle size constraints"""
     w, h = input_image.size
     w_original, h_original = w, h
-    aspect_ratio = w / h
     was_resized = False
     if w * h * upscale_factor**2 > MAX_PIXEL_BUDGET:
@@ -148,17 +144,19 @@ def process_input(input_image, upscale_factor):
     return input_image, w_original, h_original, was_resized
 def load_image_from_url(url):
-    """Load image from URL"""
     try:
         response = requests.get(url, stream=True)
         response.raise_for_status()
-        return Image.open(response.raw)
     except Exception as e:
         raise gr.Error(f"Failed to load image from URL: {e}")
 def esrgan_upscale(image, scale=4):
     if not USE_ESRGAN:
         return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
@@ -168,14 +166,12 @@ def esrgan_upscale(image, scale=4):
     output_img = tensor2img(output, rgb2bgr=False, min_max=(0, 1))
     return Image.fromarray(output_img)
 def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator, tile_size=1024, overlap=32):
     """Tiled Img2Img to mimic Ultimate SD Upscaler tiling"""
     w, h = image.size
-    output = image.copy()  # Start with the control image
-    # For handling long prompts: truncate for CLIP, full for T5
-    max_clip_tokens = pipe.tokenizer.model_max_length  # Typically 77
     input_ids = pipe.tokenizer.encode(prompt, return_tensors="pt")
     if input_ids.shape[1] > max_clip_tokens:
         input_ids = input_ids[:, :max_clip_tokens]
@@ -189,7 +185,6 @@ def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator
             tile_h = min(tile_size, h - y)
             tile = image.crop((x, y, x + tile_w, y + tile_h))
-            # Run Flux on tile
             gen_tile = pipe(
                 prompt=prompt_clip,
                 prompt_2=prompt,
@@ -202,14 +197,11 @@ def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator
                 generator=generator,
             ).images[0]
-            # Resize back to exact tile size if pipeline adjusted it
             gen_tile = gen_tile.resize((tile_w, tile_h), resample=Image.LANCZOS)
-            # Paste with blending if overlap
             if overlap > 0:
                 paste_box = (x, y, x + tile_w, y + tile_h)
                 if x > 0 or y > 0:
-                    # Simple linear blend on overlaps
                     mask = Image.new('L', (tile_w, tile_h), 255)
                     if x > 0:
                         blend_width = min(overlap, tile_w)
@@ -229,6 +221,14 @@ def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator
     return output
 @spaces.GPU(duration=120)
 def enhance_image(
@@ -243,20 +243,16 @@ def enhance_image(
     progress=gr.Progress(track_tqdm=True),
 ):
     """Main enhancement function"""
-    # Handle image input
     if image_input is not None:
-        input_image = image_input
     elif image_url:
         input_image = load_image_from_url(image_url)
     else:
         raise gr.Error("Please provide an image (upload or URL)")
-    # Convert input image to PNG in backend
-    buffer = io.BytesIO()
-    input_image.save(buffer, format="PNG")
-    buffer.seek(0)
-    input_image = Image.open(buffer)
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     else:
@@ -264,12 +260,10 @@ def enhance_image(
     true_input_image = input_image
-    # Process input image
     input_image, w_original, h_original, was_resized = process_input(
         input_image, upscale_factor
     )
-    # Generate caption if requested
     if use_generated_caption:
         gr.Info("🔍 Generating image caption...")
         generated_caption = generate_caption(input_image)
@@ -281,21 +275,19 @@ def enhance_image(
     gr.Info("🚀 Upscaling image...")
-    # Initial upscale
     if USE_ESRGAN and upscale_factor == 4:
         control_image = esrgan_upscale(input_image, upscale_factor)
     else:
         w, h = input_image.size
         control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
-    # Tiled Flux Img2Img for refinement
     image = tiled_flux_img2img(
         pipe,
         prompt,
         control_image,
         denoising_strength,
         num_inference_steps,
-        1.0,  # Hardcoded guidance_scale to 1
         generator,
         tile_size=1024,
         overlap=32
@@ -305,12 +297,10 @@ def enhance_image(
         gr.Info(f"📏 Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
         image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
-    # Resize input image to match output size for slider alignment
     resized_input = true_input_image.resize(image.size, resample=Image.LANCZOS)
     return [resized_input, image], image
 # Create Gradio interface
 with gr.Blocks(css=css, title="🎨 Flux dev Creative Upscaler - Florence-2 + FLUX") as demo:
     gr.HTML("""
@@ -330,7 +320,7 @@ with gr.Blocks(css=css, title="🎨 Flux dev Creative Upscaler - Florence-2 + FL
                     input_image = gr.Image(
                         label="Upload Image",
                         type="pil",
-                        height=200  # Made smaller
                     )
                 with gr.TabItem("🔗 Image URL"):
@@ -395,26 +385,27 @@ with gr.Blocks(css=css, title="🎨 Flux dev Creative Upscaler - Florence-2 + FL
                 size="lg"
             )
-        with gr.Column(scale=2):  # Larger scale for results
             gr.HTML("<h3>📊 Results</h3>")
             result_slider = ImageSlider(
-                type="pil",
-                interactive=False,  # Disable interactivity to prevent uploads
-                height=600,  # Made larger
-                elem_id="result_slider",
-                label=None  # Remove default label
-            )
-            upscaled_output = gr.Image(
-                label="Upscaled Image (Download as PNG)",
                 type="pil",
                 interactive=False,
-                show_download_button=True,
                 height=600,
             )
-    # Event handler
     enhance_btn.click(
         fn=enhance_image,
         inputs=[
@@ -427,7 +418,13 @@ with gr.Blocks(css=css, title="🎨 Flux dev Creative Upscaler - Florence-2 + FL
             use_generated_caption,
             custom_prompt,
         ],
-        outputs=[result_slider, upscaled_output]
     )
     gr.HTML("""
@@ -436,7 +433,6 @@ with gr.Blocks(css=css, title="🎨 Flux dev Creative Upscaler - Florence-2 + FL
     </div>
     """)
-    # Custom CSS for slider
     gr.HTML("""
     <style>
         #result_slider .slider {
@@ -490,7 +486,6 @@ with gr.Blocks(css=css, title="🎨 Flux dev Creative Upscaler - Florence-2 + FL
     </style>
     """)
-    # JS to set slider default position to middle
     gr.HTML("""
     <script>
         document.addEventListener('DOMContentLoaded', function() {

 from huggingface_hub import snapshot_download
 import requests
 import io
+import base64
 # For ESRGAN (requires pip install basicsr gfpgan)
 try:
     "microsoft/Florence-2-large",
     torch_dtype=torch.float16,
     trust_remote_code=True,
+    attn_implementation="eager"
 ).to(device)
 florence_processor = AutoProcessor.from_pretrained(
     "microsoft/Florence-2-large",
     esrgan_model.to(device)
 MAX_SEED = 1000000
+MAX_PIXEL_BUDGET = 8192 * 8192
 def generate_caption(image):
     """Generate detailed caption using Florence-2"""
     try:
         task_prompt = "<MORE_DETAILED_CAPTION>"
         prompt = task_prompt
         inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(device)
+        inputs["pixel_values"] = inputs["pixel_values"].to(torch.float16)
         generated_ids = florence_model.generate(
             input_ids=inputs["input_ids"],
         print(f"Caption generation failed: {e}")
         return "a high quality detailed image"
 def process_input(input_image, upscale_factor):
     """Process input image and handle size constraints"""
     w, h = input_image.size
     w_original, h_original = w, h
     was_resized = False
     if w * h * upscale_factor**2 > MAX_PIXEL_BUDGET:
     return input_image, w_original, h_original, was_resized
 def load_image_from_url(url):
+    """Load image from URL and convert to PNG"""
     try:
         response = requests.get(url, stream=True)
         response.raise_for_status()
+        img = Image.open(response.raw)
+        buffer = io.BytesIO()
+        img.save(buffer, format="PNG")
+        buffer.seek(0)
+        return Image.open(buffer)
     except Exception as e:
         raise gr.Error(f"Failed to load image from URL: {e}")
 def esrgan_upscale(image, scale=4):
     if not USE_ESRGAN:
         return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
     output_img = tensor2img(output, rgb2bgr=False, min_max=(0, 1))
     return Image.fromarray(output_img)
 def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator, tile_size=1024, overlap=32):
     """Tiled Img2Img to mimic Ultimate SD Upscaler tiling"""
     w, h = image.size
+    output = image.copy()
+    max_clip_tokens = pipe.tokenizer.model_max_length
     input_ids = pipe.tokenizer.encode(prompt, return_tensors="pt")
     if input_ids.shape[1] > max_clip_tokens:
         input_ids = input_ids[:, :max_clip_tokens]
             tile_h = min(tile_size, h - y)
             tile = image.crop((x, y, x + tile_w, y + tile_h))
             gen_tile = pipe(
                 prompt=prompt_clip,
                 prompt_2=prompt,
                 generator=generator,
             ).images[0]
             gen_tile = gen_tile.resize((tile_w, tile_h), resample=Image.LANCZOS)
             if overlap > 0:
                 paste_box = (x, y, x + tile_w, y + tile_h)
                 if x > 0 or y > 0:
                     mask = Image.new('L', (tile_w, tile_h), 255)
                     if x > 0:
                         blend_width = min(overlap, tile_w)
     return output
+def download_png(image):
+    """Convert image to PNG and return as downloadable file"""
+    if image is None:
+        raise gr.Error("No upscaled image available to download")
+    buffer = io.BytesIO()
+    image.save(buffer, format="PNG")
+    buffer.seek(0)
+    return buffer
 @spaces.GPU(duration=120)
 def enhance_image(
     progress=gr.Progress(track_tqdm=True),
 ):
     """Main enhancement function"""
     if image_input is not None:
+        buffer = io.BytesIO()
+        image_input.save(buffer, format="PNG")
+        buffer.seek(0)
+        input_image = Image.open(buffer)
     elif image_url:
         input_image = load_image_from_url(image_url)
     else:
         raise gr.Error("Please provide an image (upload or URL)")
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     else:
     true_input_image = input_image
     input_image, w_original, h_original, was_resized = process_input(
         input_image, upscale_factor
     )
     if use_generated_caption:
         gr.Info("🔍 Generating image caption...")
         generated_caption = generate_caption(input_image)
     gr.Info("🚀 Upscaling image...")
     if USE_ESRGAN and upscale_factor == 4:
         control_image = esrgan_upscale(input_image, upscale_factor)
     else:
         w, h = input_image.size
         control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
     image = tiled_flux_img2img(
         pipe,
         prompt,
         control_image,
         denoising_strength,
         num_inference_steps,
+        1.0,
         generator,
         tile_size=1024,
         overlap=32
         gr.Info(f"📏 Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
         image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
     resized_input = true_input_image.resize(image.size, resample=Image.LANCZOS)
     return [resized_input, image], image
 # Create Gradio interface
 with gr.Blocks(css=css, title="🎨 Flux dev Creative Upscaler - Florence-2 + FLUX") as demo:
     gr.HTML("""
                     input_image = gr.Image(
                         label="Upload Image",
                         type="pil",
+                        height=200
                     )
                 with gr.TabItem("🔗 Image URL"):
                 size="lg"
             )
+        with gr.Column(scale=2):
             gr.HTML("<h3>📊 Results</h3>")
             result_slider = ImageSlider(
                 type="pil",
                 interactive=False,
                 height=600,
+                elem_id="result_slider",
+                label=None
+            )
+            download_btn = gr.Button(
+                "📥 Download as PNG",
+                variant="secondary",
+                size="lg"
             )
+    # State to store the upscaled image
+    upscaled_image_state = gr.State()
+    # Event handlers
     enhance_btn.click(
         fn=enhance_image,
         inputs=[
             use_generated_caption,
             custom_prompt,
         ],
+        outputs=[result_slider, upscaled_image_state]
+    )
+    download_btn.click(
+        fn=download_png,
+        inputs=[upscaled_image_state],
+        outputs=gr.File(label="Download Upscaled Image as PNG")
     )
     gr.HTML("""
     </div>
     """)
     gr.HTML("""
     <style>
         #result_slider .slider {
     </style>
     """)
     gr.HTML("""
     <script>
         document.addEventListener('DOMContentLoaded', function() {