Spaces:

comrender
/

fluxhdupscaler

Running on Zero

App Files Files Community

comrender commited on 17 days ago

Commit

082dbe6

verified ·

1 Parent(s): 656f4ee

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -39

app.py CHANGED Viewed

@@ -13,6 +13,15 @@ from PIL import Image
 from huggingface_hub import snapshot_download
 import requests
 css = """
 #col-container {
     margin: 0 auto;
@@ -70,8 +79,21 @@ pipe.enable_vae_slicing()
 print("✅ All models loaded successfully!")
 MAX_SEED = 1000000
-MAX_PIXEL_BUDGET = 4096 * 4096
 def generate_caption(image):
@@ -115,33 +137,82 @@ def process_input(input_image, upscale_factor):
         gr.Info(
             f"Requested output image is too large. Resizing input to fit within pixel budget."
         )
-        input_image = input_image.resize(
-            (
-                int(aspect_ratio * MAX_PIXEL_BUDGET**0.5 // upscale_factor),
-                int(MAX_PIXEL_BUDGET**0.5 // aspect_ratio // upscale_factor),
-            ),
-            resample=Image.LANCZOS
-        )
         was_resized = True
-    # Resize to multiple of 8
-    w, h = input_image.size
-    w = w - w % 8
-    h = h - h % 8
-    return input_image.resize((w, h), resample=Image.LANCZOS), w_original, h_original, was_resized
 def load_image_from_url(url):
     """Load image from URL"""
     try:
-        response = requests.get(url)
         response.raise_for_status()
-        return Image.open(requests.get(url, stream=True).raw)
     except Exception as e:
         raise gr.Error(f"Failed to load image from URL: {e}")
 @spaces.GPU(duration=120)
 def enhance_image(
     image_input,
@@ -183,33 +254,35 @@ def enhance_image(
     else:
         prompt = custom_prompt if custom_prompt.strip() else ""
-    # Rescale with upscale factor using LANCZOS
-    w, h = input_image.size
-    control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
     generator = torch.Generator().manual_seed(seed)
     gr.Info("🚀 Upscaling image...")
-    # Generate upscaled image
-    image = pipe(
-        prompt=prompt,
-        image=control_image,
-        strength=denoising_strength,
-        num_inference_steps=num_inference_steps,
-        guidance_scale=guidance_scale,
-        height=control_image.size[1],
-        width=control_image.size[0],
-        generator=generator,
-    ).images[0]
     if was_resized:
         gr.Info(f"📏 Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
-    # Resize to target desired size
-    final_image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
-    return [true_input_image, final_image], seed, generated_caption if use_generated_caption else ""
 # Create Gradio interface
@@ -380,10 +453,10 @@ with gr.Blocks(css=css, title="🎨 AI Image Enhancer - Florence-2 + FLUX") as d
         <h4>💡 How it works:</h4>
         <ol>
             <li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
-            <li>Initial upscale with LANCZOS interpolation</li>
-            <li><strong>FLUX Img2Img</strong> enhances the upscaled image with AI diffusion guided by the caption</li>
         </ol>
-        <p><strong>Note:</strong> Output limited to 4096x4096 pixels total budget to prevent memory issues.</p>
     </div>
     """)

 from huggingface_hub import snapshot_download
 import requests
+# For ESRGAN (requires pip install basicsr gfpgan)
+try:
+    from basicsr.archs.rrdbnet_arch import RRDBNet
+    from basicsr.utils import img2tensor, tensor2img
+    USE_ESRGAN = True
+except ImportError:
+    USE_ESRGAN = False
+    warnings.warn("basicsr not installed; falling back to LANCZOS interpolation.")
 css = """
 #col-container {
     margin: 0 auto;
 print("✅ All models loaded successfully!")
+# Download ESRGAN model if using
+if USE_ESRGAN:
+    esrgan_path = "4x-UltraSharp.pth"
+    if not os.path.exists(esrgan_path):
+        url = "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x-UltraSharp.pth"
+        with open(esrgan_path, "wb") as f:
+            f.write(requests.get(url).content)
+    esrgan_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+    state_dict = torch.load(esrgan_path)['params_ema']
+    esrgan_model.load_state_dict(state_dict)
+    esrgan_model.eval()
+    esrgan_model.to(device)
 MAX_SEED = 1000000
+MAX_PIXEL_BUDGET = 8192 * 8192  # Increased for tiling support
 def generate_caption(image):
         gr.Info(
             f"Requested output image is too large. Resizing input to fit within pixel budget."
         )
+        target_input_pixels = MAX_PIXEL_BUDGET / (upscale_factor ** 2)
+        scale = (target_input_pixels / (w * h)) ** 0.5
+        new_w = int(w * scale) - int(w * scale) % 8
+        new_h = int(h * scale) - int(h * scale) % 8
+        input_image = input_image.resize((new_w, new_h), resample=Image.LANCZOS)
         was_resized = True
+    return input_image, w_original, h_original, was_resized
 def load_image_from_url(url):
     """Load image from URL"""
     try:
+        response = requests.get(url, stream=True)
         response.raise_for_status()
+        return Image.open(response.raw)
     except Exception as e:
         raise gr.Error(f"Failed to load image from URL: {e}")
+def esrgan_upscale(image, scale=4):
+    if not USE_ESRGAN:
+        return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
+    img = img2tensor(np.array(image) / 255., bgr2rgb=False, float32=True)
+    with torch.no_grad():
+        output = esrgan_model(img.unsqueeze(0)).squeeze()
+    output_img = tensor2img(output, rgb2bgr=False, min_max=(0, 1))
+    return Image.fromarray(output_img)
+def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator, tile_size=1024, overlap=32):
+    """Tiled Img2Img to mimic Ultimate SD Upscaler tiling"""
+    w, h = image.size
+    output = image.copy()  # Start with the control image
+    for x in range(0, w, tile_size - overlap):
+        for y in range(0, h, tile_size - overlap):
+            tile_w = min(tile_size, w - x)
+            tile_h = min(tile_size, h - y)
+            tile = image.crop((x, y, x + tile_w, y + tile_h))
+            # Run Flux on tile
+            gen_tile = pipe(
+                prompt=prompt,
+                image=tile,
+                strength=strength,
+                num_inference_steps=steps,
+                guidance_scale=guidance,
+                height=tile_h,
+                width=tile_w,
+                generator=generator,
+            ).images[0]
+            # Paste with blending if overlap
+            if overlap > 0:
+                paste_box = (x, y, x + tile_w, y + tile_h)
+                if x > 0 or y > 0:
+                    # Simple linear blend on overlaps
+                    mask = Image.new('L', (tile_w, tile_h), 255)
+                    if x > 0:
+                        for i in range(overlap):
+                            for j in range(tile_h):
+                                mask.putpixel((i, j), int(255 * (i / overlap)))
+                    if y > 0:
+                        for i in range(tile_w):
+                            for j in range(overlap):
+                                mask.putpixel((i, j), int(255 * (j / overlap)))
+                    output.paste(gen_tile, paste_box, mask)
+                else:
+                    output.paste(gen_tile, paste_box)
+            else:
+                output.paste(gen_tile, (x, y))
+    return output
 @spaces.GPU(duration=120)
 def enhance_image(
     image_input,
     else:
         prompt = custom_prompt if custom_prompt.strip() else ""
     generator = torch.Generator().manual_seed(seed)
     gr.Info("🚀 Upscaling image...")
+    # Initial upscale
+    if USE_ESRGAN and upscale_factor == 4:
+        control_image = esrgan_upscale(input_image, upscale_factor)
+    else:
+        w, h = input_image.size
+        control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
+    # Tiled Flux Img2Img for refinement
+    image = tiled_flux_img2img(
+        pipe,
+        prompt,
+        control_image,
+        denoising_strength,
+        num_inference_steps,
+        guidance_scale,
+        generator,
+        tile_size=1024,
+        overlap=32
+    )
     if was_resized:
         gr.Info(f"📏 Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
+        image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
+    return [true_input_image, image], seed, generated_caption if use_generated_caption else ""
 # Create Gradio interface
         <h4>💡 How it works:</h4>
         <ol>
             <li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
+            <li>Initial upscale with LANCZOS interpolation (or ESRGAN if installed)</li>
+            <li><strong>FLUX Img2Img</strong> enhances the upscaled image with tiled AI diffusion guided by the caption</li>
         </ol>
+        <p><strong>Note:</strong> Output limited to 8192x8192 pixels total budget. Tiling enables larger sizes.</p>
     </div>
     """)