Spaces:

comrender
/

fluxhdupscaler

Running on Zero

App Files Files Community

comrender commited on 2 days ago

Commit

1a431a3

verified ·

1 Parent(s): 3bb8a2e

Update app.py

Browse files

Files changed (1) hide show

app.py +372 -303

app.py CHANGED Viewed

@@ -1,75 +1,26 @@
-import os
 import random
 import warnings
-import gc
 import gradio as gr
 import numpy as np
 import spaces
 import torch
-import torch.nn as nn
 from diffusers import FluxImg2ImgPipeline
 from gradio_imageslider import ImageSlider
 from PIL import Image
 from huggingface_hub import snapshot_download
 import requests
-# Minimal ESRGAN implementation (without basicsr dependency)
-class ResidualDenseBlock(nn.Module):
-    def __init__(self, num_feat=64, num_grow_ch=32):
-        super(ResidualDenseBlock, self).__init__()
-        self.conv1 = nn.Conv2d(num_feat, num_grow_ch, 3, 1, 1)
-        self.conv2 = nn.Conv2d(num_feat + num_grow_ch, num_grow_ch, 3, 1, 1)
-        self.conv3 = nn.Conv2d(num_feat + 2 * num_grow_ch, num_grow_ch, 3, 1, 1)
-        self.conv4 = nn.Conv2d(num_feat + 3 * num_grow_ch, num_grow_ch, 3, 1, 1)
-        self.conv5 = nn.Conv2d(num_feat + 4 * num_grow_ch, num_feat, 3, 1, 1)
-        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-    def forward(self, x):
-        x1 = self.lrelu(self.conv1(x))
-        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
-        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
-        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
-        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
-        return x5 * 0.2 + x
-class RRDB(nn.Module):
-    def __init__(self, num_feat, num_grow_ch=32):
-        super(RRDB, self).__init__()
-        self.rdb1 = ResidualDenseBlock(num_feat, num_grow_ch)
-        self.rdb2 = ResidualDenseBlock(num_feat, num_grow_ch)
-        self.rdb3 = ResidualDenseBlock(num_feat, num_grow_ch)
-    def forward(self, x):
-        out = self.rdb1(x)
-        out = self.rdb2(out)
-        out = self.rdb3(out)
-        return out * 0.2 + x
-class RRDBNet(nn.Module):
-    def __init__(self, num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4):
-        super(RRDBNet, self).__init__()
-        self.scale = scale
-        self.conv_first = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1)
-        self.body = nn.Sequential(*[RRDB(num_feat, num_grow_ch) for _ in range(num_block)])
-        self.conv_body = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
-        # Upsampling
-        self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
-        self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
-        self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
-        self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-    def forward(self, x):
-        fea = self.conv_first(x)
-        trunk = self.conv_body(self.body(fea))
-        fea = fea + trunk
-        fea = self.lrelu(self.conv_up1(nn.functional.interpolate(fea, scale_factor=2, mode='nearest')))
-        fea = self.lrelu(self.conv_up2(nn.functional.interpolate(fea, scale_factor=2, mode='nearest')))
-        out = self.conv_last(self.lrelu(self.conv_hr(fea)))
-        return out
 css = """
 #col-container {
@@ -82,10 +33,14 @@ css = """
 }
 """
 # Get HuggingFace token
 huggingface_token = os.getenv("HF_TOKEN")
-# Download FLUX model if not already cached
 print("📥 Downloading FLUX model...")
 model_path = snapshot_download(
     repo_id="black-forest-labs/FLUX.1-dev",
@@ -95,276 +50,324 @@ model_path = snapshot_download(
     token=huggingface_token,
 )
-# Load FLUX pipeline on CPU initially
-print("📥 Loading FLUX Img2Img pipeline...")
 pipe = FluxImg2ImgPipeline.from_pretrained(
     model_path,
-    torch_dtype=torch.bfloat16,
-    use_safetensors=True
 )
-# Enable memory optimizations
 pipe.enable_vae_tiling()
 pipe.enable_vae_slicing()
-pipe.vae.enable_tiling()
-pipe.vae.enable_slicing()
-# Download and load ESRGAN 4x-UltraSharp model
-print("📥 Loading ESRGAN 4x-UltraSharp...")
-esrgan_path = "4x-UltraSharp.pth"
-if not os.path.exists(esrgan_path):
-    print("Downloading ESRGAN model...")
-    url = "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x-UltraSharp.pth"
-    response = requests.get(url)
-    with open(esrgan_path, "wb") as f:
-        f.write(response.content)
-# Initialize ESRGAN model
-esrgan_model = RRDBNet(
-    num_in_ch=3,
-    num_out_ch=3,
-    num_feat=64,
-    num_block=23,
-    num_grow_ch=32,
-    scale=4
-)
-# Load state dict
-state_dict = torch.load(esrgan_path, map_location='cpu')
-if 'params_ema' in state_dict:
-    state_dict = state_dict['params_ema']
-elif 'params' in state_dict:
-    state_dict = state_dict['params']
-# Clean state dict keys if needed
-cleaned_state_dict = {}
-for k, v in state_dict.items():
-    if k.startswith('module.'):
-        cleaned_state_dict[k[7:]] = v
-    else:
-        cleaned_state_dict[k] = v
-esrgan_model.load_state_dict(cleaned_state_dict, strict=False)
-esrgan_model.eval()
 print("✅ All models loaded successfully!")
 MAX_SEED = 1000000
-MAX_INPUT_SIZE = 512  # Max input size before upscaling
-def make_multiple_16(n):
-    """Round to nearest multiple of 16 for FLUX requirements"""
-    return ((n + 15) // 16) * 16
-def truncate_prompt(prompt, max_tokens=75):
-    """Truncate prompt to avoid CLIP token limit (77 tokens)"""
-    if not prompt:
-        return ""
-    # Simple truncation by character count (rough approximation)
-    if len(prompt) > 250:  # ~75 tokens
-        return prompt[:250] + "..."
-    return prompt
-def prepare_image(image, max_size=MAX_INPUT_SIZE):
-    """Prepare image for processing"""
-    w, h = image.size
-    # Limit input size
-    if w > max_size or h > max_size:
-        image.thumbnail((max_size, max_size), Image.LANCZOS)
-    return image
-def esrgan_upscale(image, model, device='cuda', upscale_factor=4):
-    """Upscale image using ESRGAN with variable factor support"""
-    orig_w, orig_h = image.size
-    pre_resize_factor = upscale_factor / 4.0
-    low_res_w = int(orig_w * pre_resize_factor)
-    low_res_h = int(orig_h * pre_resize_factor)
-    if low_res_w < 1 or low_res_h < 1:
-        raise ValueError("Upscale factor too small for image size")
-    low_res_image = image.resize((low_res_w, low_res_h), Image.BICUBIC)  # Changed to BICUBIC for better match to training degradation
-    # Prepare image
-    img_np = np.array(low_res_image).astype(np.float32) / 255.
-    img_np = np.transpose(img_np, (2, 0, 1))  # HWC to CHW
-    img_tensor = torch.from_numpy(img_np).unsqueeze(0).to(device)
-    # Upscale
     with torch.no_grad():
-        output = model(img_tensor)
-        output = output.squeeze(0).cpu().clamp(0, 1)
-        output_np = output.numpy()
-        output_np = np.transpose(output_np, (1, 2, 0))  # CHW to HWC
-        output_np = (output_np * 255).astype(np.uint8)
-    upscaled = Image.fromarray(output_np)
-    # Resize back to exact target size if needed (due to rounding)
-    target_w = int(orig_w * upscale_factor)
-    target_h = int(orig_h * upscale_factor)
-    if upscaled.size != (target_w, target_h):
-        upscaled = upscaled.resize((target_w, target_h), Image.BICUBIC)  # Changed to BICUBIC
-    return upscaled
-@spaces.GPU(duration=120)  # Increased to 120 seconds
 def enhance_image(
-    input_image,
-    prompt,
     seed,
     randomize_seed,
     num_inference_steps,
-    denoising_strength,
     upscale_factor,
     progress=gr.Progress(track_tqdm=True),
 ):
     """Main enhancement function"""
-    if input_image is None:
-        raise gr.Error("Please upload an image")
-    # Clear memory
-    torch.cuda.empty_cache()
-    gc.collect()
     try:
-        # Randomize seed if needed
-        if randomize_seed:
-            seed = random.randint(0, MAX_SEED)
-        # Prepare and validate prompt
-        prompt = truncate_prompt(prompt.strip() if prompt else "high quality, detailed")
-        # Prepare input image
-        input_image = prepare_image(input_image)
-        original_size = input_image.size
-        # Step 1: ESRGAN upscale on GPU
-        gr.Info(f"🔍 Upscaling with ESRGAN x{upscale_factor}...")
-        # Move ESRGAN to GPU for faster processing
-        esrgan_model.to("cuda")
-        upscaled_image = esrgan_upscale(input_image, esrgan_model, device="cuda", upscale_factor=upscale_factor)
-        # Move ESRGAN back to CPU to free memory
-        esrgan_model.to("cpu")
-        torch.cuda.empty_cache()
-        # Ensure dimensions are multiples of 16 for FLUX
-        w, h = upscaled_image.size
-        new_w = make_multiple_16(w)
-        new_h = make_multiple_16(h)
-        if new_w != w or new_h != h:
-            # Pad image to meet requirements
-            padded = Image.new('RGB', (new_w, new_h))
-            padded.paste(upscaled_image, (0, 0))
-            upscaled_image = padded
-        # Step 2: FLUX enhancement
-        gr.Info("🎨 Enhancing with FLUX...")
-        # Move pipeline to GPU
-        pipe.to("cuda")
-        # Generate with FLUX
-        generator = torch.Generator(device="cuda").manual_seed(seed)
-        with torch.inference_mode():
-            result = pipe(
-                prompt=prompt,
-                image=upscaled_image,
-                strength=denoising_strength,
-                num_inference_steps=num_inference_steps,
-                guidance_scale=3.5,  # Recommended for FLUX.1-dev to reduce artifacts
-                height=new_h,
-                width=new_w,
-                generator=generator,
-            ).images[0]
-        # Crop back if we padded
-        if new_w != w or new_h != h:
-            result = result.crop((0, 0, w, h))
-        # Move pipeline back to CPU
-        pipe.to("cpu")
-        torch.cuda.empty_cache()
-        gc.collect()
-        # Prepare images for slider (before/after)
-        input_resized = input_image.resize(result.size, Image.LANCZOS)
-        gr.Info("✅ Enhancement complete!")
-        return [input_resized, result], seed
     except Exception as e:
-        # Cleanup on error
-        pipe.to("cpu")
         esrgan_model.to("cpu")
-        torch.cuda.empty_cache()
-        gc.collect()
-        raise gr.Error(f"Enhancement failed: {str(e)}")
 # Create Gradio interface
-with gr.Blocks(css=css) as demo:
     gr.HTML("""
     <div class="main-header">
-        <h1>🚀 Flux Dev Ultimate Upscaler</h1>
-        <p>Upload an image to upscale 2-4x with ESRGAN and enhance with FLUX</p>
-        <p>Optimized for <strong>ZeroGPU</strong> | Max input: 512x512 → Output: up to 2048x2048</p>
     </div>
-    """)
     with gr.Row():
         with gr.Column(scale=1):
-            # Input section
-            input_image = gr.Image(
-                label="Input Image",
-                type="pil",
-                height=256
             )
-            prompt = gr.Textbox(
-                label="Describe image with prompt",
-                placeholder="Describe the desired enhancement (e.g., 'high quality, sharp details, vibrant colors')",
-                value="high quality, ultra detailed, sharp",
                 lines=2
             )
-            # Advanced Settings (always open)
             upscale_factor = gr.Slider(
-                label="Upscale Ratio",
-                minimum=2,
                 maximum=4,
                 step=1,
-                value=4,
-                info="Choose upscale factor (2x, 3x, 4x). Use 4x for best results; lower may cause color artifacts."
             )
             num_inference_steps = gr.Slider(
-                label="Enhancement Steps",
-                minimum=10,
-                maximum=25,
                 step=1,
-                value=20,  # Increased default for better denoising
                 info="More steps = better quality but slower"
             )
             denoising_strength = gr.Slider(
-                label="Creativity (Denoising)",
-                minimum=0.1,
-                maximum=0.6,
                 step=0.05,
-                value=0.35,
-                info="Higher = more changes to the image"
             )
             with gr.Row():
@@ -372,58 +375,124 @@ with gr.Blocks(css=css) as demo:
                     label="Randomize seed",
                     value=True
                 )
-                seed = gr.Number(
                     label="Seed",
-                    value=42
                 )
             enhance_btn = gr.Button(
-                "Upscale",
                 variant="primary",
                 size="lg"
             )
-        with gr.Column(scale=2):
-            # Output section
             result_slider = ImageSlider(
                 type="pil",
-                label="Before / After",
-                interactive=False,
-                height=512
-            )
-            used_seed = gr.Number(
-                label="Seed Used",
-                interactive=False,
-                visible=False
             )
     # Event handler
     enhance_btn.click(
         fn=enhance_image,
         inputs=[
             input_image,
-            prompt,
             seed,
             randomize_seed,
             num_inference_steps,
-            denoising_strength,
             upscale_factor,
         ],
-        outputs=[result_slider, used_seed]
     )
     gr.HTML("""
-    <div style="margin-top: 2rem; text-align: center; color: #666;">
-        <p>📌 Pipeline: ESRGAN 2-4x-UltraSharp → FLUX Dev Enhancement</p>
-        <p>⚡ Optimized for ZeroGPU with automatic memory management</p>
-        <p>📌 Note: User is responsible for obtaining commercial license from Flux Dev if using image commercially under their license.</p>
     </div>
     """)
 if __name__ == "__main__":
-    demo.queue(max_size=3).launch(
-        share=False,
-        server_name="0.0.0.0",
-        server_port=7860
-    )

+import logging
 import random
 import warnings
+import os
 import gradio as gr
 import numpy as np
 import spaces
 import torch
 from diffusers import FluxImg2ImgPipeline
+from transformers import AutoProcessor, AutoModelForCausalLM
 from gradio_imageslider import ImageSlider
 from PIL import Image
 from huggingface_hub import snapshot_download
 import requests
+# For ESRGAN (requires pip install basicsr gfpgan)
+try:
+    from basicsr.archs.rrdbnet_arch import RRDBNet
+    from basicsr.utils import img2tensor, tensor2img
+    USE_ESRGAN = True
+except ImportError:
+    USE_ESRGAN = False
+    warnings.warn("basicsr not installed; falling back to LANCZOS interpolation.")
 css = """
 #col-container {
 }
 """
+# Device setup - Default to CPU, let runtime handle GPU
+power_device = "ZeroGPU"
+device = "cpu"
 # Get HuggingFace token
 huggingface_token = os.getenv("HF_TOKEN")
+# Download FLUX model
 print("📥 Downloading FLUX model...")
 model_path = snapshot_download(
     repo_id="black-forest-labs/FLUX.1-dev",
     token=huggingface_token,
 )
+# Load Florence-2 model for image captioning on CPU
+print("📥 Loading Florence-2 model...")
+florence_model = AutoModelForCausalLM.from_pretrained(
+    "microsoft/Florence-2-large",
+    torch_dtype=torch.float32,  # Force CPU dtype
+    trust_remote_code=True,
+    attn_implementation="eager"
+).to(device)
+florence_processor = AutoProcessor.from_pretrained(
+    "microsoft/Florence-2-large",
+    trust_remote_code=True
+)
+# Load FLUX Img2Img pipeline on CPU
+print("📥 Loading FLUX Img2Img...")
 pipe = FluxImg2ImgPipeline.from_pretrained(
     model_path,
+    torch_dtype=torch.float32  # Force CPU dtype
 )
 pipe.enable_vae_tiling()
 pipe.enable_vae_slicing()
 print("✅ All models loaded successfully!")
+# Download ESRGAN model if using
+if USE_ESRGAN:
+    esrgan_path = "4x-UltraSharp.pth"
+    if not os.path.exists(esrgan_path):
+        url = "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x-UltraSharp.pth"
+        with open(esrgan_path, "wb") as f:
+            f.write(requests.get(url).content)
+    esrgan_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+    state_dict = torch.load(esrgan_path)['params_ema']
+    esrgan_model.load_state_dict(state_dict)
+    esrgan_model.eval()
 MAX_SEED = 1000000
+MAX_PIXEL_BUDGET = 8192 * 8192  # Increased for tiling support
+def generate_caption(image):
+    """Generate detailed caption using Florence-2"""
+    try:
+        task_prompt = "<MORE_DETAILED_CAPTION>"
+        prompt = task_prompt
+        inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(device)
+        generated_ids = florence_model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=1024,
+            num_beams=3,
+            do_sample=True,
+        )
+        generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+        parsed_answer = florence_processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
+        caption = parsed_answer[task_prompt]
+        return caption
+    except Exception as e:
+        print(f"Caption generation failed: {e}")
+        return "a high quality detailed image"
+def process_input(input_image, upscale_factor):
+    """Process input image and handle size constraints"""
+    w, h = input_image.size
+    w_original, h_original = w, h
+    aspect_ratio = w / h
+    was_resized = False
+    if w * h * upscale_factor**2 > MAX_PIXEL_BUDGET:
+        warnings.warn(
+            f"Requested output image is too large ({w * upscale_factor}x{h * upscale_factor}). Resizing to fit budget."
+        )
+        gr.Info(
+            f"Requested output image is too large. Resizing input to fit within pixel budget."
+        )
+        target_input_pixels = MAX_PIXEL_BUDGET / (upscale_factor ** 2)
+        scale = (target_input_pixels / (w * h)) ** 0.5
+        new_w = int(w * scale) - int(w * scale) % 8
+        new_h = int(h * scale) - int(h * scale) % 8
+        input_image = input_image.resize((new_w, new_h), resample=Image.LANCZOS)
+        was_resized = True
+    return input_image, w_original, h_original, was_resized
+def load_image_from_url(url):
+    """Load image from URL"""
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        return Image.open(response.raw)
+    except Exception as e:
+        raise gr.Error(f"Failed to load image from URL: {e}")
+def esrgan_upscale(image, scale=4):
+    if not USE_ESRGAN:
+        return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
+    img = img2tensor(np.array(image) / 255., bgr2rgb=False, float32=True)
     with torch.no_grad():
+        output = esrgan_model(img.unsqueeze(0)).squeeze()
+    output_img = tensor2img(output, rgb2bgr=False, min_max=(0, 1))
+    return Image.fromarray(output_img)
+def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator, tile_size=1024, overlap=32):
+    """Tiled Img2Img to mimic Ultimate SD Upscaler tiling"""
+    w, h = image.size
+    output = image.copy()  # Start with the control image
+    for x in range(0, w, tile_size - overlap):
+        for y in range(0, h, tile_size - overlap):
+            tile_w = min(tile_size, w - x)
+            tile_h = min(tile_size, h - y)
+            tile = image.crop((x, y, x + tile_w, y + tile_h))
+            # Run Flux on tile
+            gen_tile = pipe(
+                prompt=prompt,
+                image=tile,
+                strength=strength,
+                num_inference_steps=steps,
+                guidance_scale=guidance,
+                height=tile_h,
+                width=tile_w,
+                generator=generator,
+            ).images[0]
+            # Paste with blending if overlap
+            if overlap > 0:
+                paste_box = (x, y, x + tile_w, y + tile_h)
+                if x > 0 or y > 0:
+                    # Simple linear blend on overlaps
+                    mask = Image.new('L', (tile_w, tile_h), 255)
+                    if x > 0:
+                        for i in range(overlap):
+                            for j in range(tile_h):
+                                mask.putpixel((i, j), int(255 * (i / overlap)))
+                    if y > 0:
+                        for i in range(tile_w):
+                            for j in range(overlap):
+                                mask.putpixel((i, j), int(255 * (j / overlap)))
+                    output.paste(gen_tile, paste_box, mask)
+                else:
+                    output.paste(gen_tile, paste_box)
+            else:
+                output.paste(gen_tile, (x, y))
+    return output
+@spaces.GPU(duration=120)
 def enhance_image(
+    image_input,
+    image_url,
     seed,
     randomize_seed,
     num_inference_steps,
     upscale_factor,
+    denoising_strength,
+    use_generated_caption,
+    custom_prompt,
     progress=gr.Progress(track_tqdm=True),
 ):
     """Main enhancement function"""
+    # Move models to GPU with fallback to CPU
     try:
+        device = "cuda"
+        pipe.to(device)
+        florence_model.to(device)
+        if USE_ESRGAN:
+            esrgan_model.to(device)
     except Exception as e:
+        print(f"GPU error: {e}, falling back to CPU")
+        device = "cpu"
+    # Handle image input
+    if image_input is not None:
+        input_image = image_input
+    elif image_url:
+        input_image = load_image_from_url(image_url)
+    else:
+        raise gr.Error("Please provide an image (upload or URL)")
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    true_input_image = input_image
+    # Process input image
+    input_image, w_original, h_original, was_resized = process_input(
+        input_image, upscale_factor
+    )
+    # Generate caption if requested
+    if use_generated_caption:
+        gr.Info("🔍 Generating image caption...")
+        generated_caption = generate_caption(input_image)
+        prompt = generated_caption
+    else:
+        prompt = custom_prompt if custom_prompt.strip() else ""
+    generator = torch.Generator(device=device).manual_seed(seed)
+    gr.Info("🚀 Upscaling image...")
+    # Initial upscale
+    if USE_ESRGAN and upscale_factor == 4:
+        control_image = esrgan_upscale(input_image, upscale_factor)
+    else:
+        w, h = input_image.size
+        control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
+    # Tiled Flux Img2Img for refinement
+    image = tiled_flux_img2img(
+        pipe,
+        prompt,
+        control_image,
+        denoising_strength,
+        num_inference_steps,
+        1.0,  # Hardcoded guidance_scale to 1
+        generator,
+        tile_size=1024,
+        overlap=32
+    )
+    if was_resized:
+        gr.Info(f"📏 Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
+        image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
+    # Resize input image to match output size for slider alignment
+    resized_input = true_input_image.resize(image.size, resample=Image.LANCZOS)
+    # Move back to CPU to release GPU
+    pipe.to("cpu")
+    florence_model.to("cpu")
+    if USE_ESRGAN:
         esrgan_model.to("cpu")
+    return [resized_input, image]
 # Create Gradio interface
+with gr.Blocks(css=css, title="🎨 AI Image Upscaler - Florence-2 + FLUX") as demo:
     gr.HTML("""
     <div class="main-header">
+        <h1>🎨 AI Image Upscaler</h1>
+        <p>Upload an image or provide a URL to upscale it using Florence-2 captioning and FLUX upscaling</p>
+        <p>Currently running on <strong>{}</strong></p>
     </div>
+    """.format(power_device))
     with gr.Row():
         with gr.Column(scale=1):
+            gr.HTML("<h3>📤 Input</h3>")
+            with gr.Tabs():
+                with gr.TabItem("📁 Upload Image"):
+                    input_image = gr.Image(
+                        label="Upload Image",
+                        type="pil",
+                        height=200  # Made smaller
+                    )
+                with gr.TabItem("🔗 Image URL"):
+                    image_url = gr.Textbox(
+                        label="Image URL",
+                        placeholder="https://example.com/image.jpg",
+                        value="https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg"
+                    )
+            gr.HTML("<h3>🎛️ Caption Settings</h3>")
+            use_generated_caption = gr.Checkbox(
+                label="Use AI-generated caption (Florence-2)",
+                value=True,
+                info="Generate detailed caption automatically"
             )
+            custom_prompt = gr.Textbox(
+                label="Custom Prompt (optional)",
+                placeholder="Enter custom prompt or leave empty for generated caption",
                 lines=2
             )
+            gr.HTML("<h3>⚙️ Upscaling Settings</h3>")
             upscale_factor = gr.Slider(
+                label="Upscale Factor",
+                minimum=1,
                 maximum=4,
                 step=1,
+                value=2,
+                info="How much to upscale the image"
             )
             num_inference_steps = gr.Slider(
+                label="Number of Inference Steps",
+                minimum=8,
+                maximum=50,
                 step=1,
+                value=25,
                 info="More steps = better quality but slower"
             )
             denoising_strength = gr.Slider(
+                label="Denoising Strength",
+                minimum=0.0,
+                maximum=1.0,
                 step=0.05,
+                value=0.3,
+                info="Controls how much the image is transformed"
             )
             with gr.Row():
                     label="Randomize seed",
                     value=True
                 )
+                seed = gr.Slider(
                     label="Seed",
+                    minimum=0,
+                    maximum=MAX_SEED,
+                    step=1,
+                    value=42,
+                    interactive=True
                 )
             enhance_btn = gr.Button(
+                "🚀 Upscale Image",
                 variant="primary",
                 size="lg"
             )
+        with gr.Column(scale=2):  # Larger scale for results
+            gr.HTML("<h3>📊 Results</h3>")
             result_slider = ImageSlider(
                 type="pil",
+                interactive=False,  # Disable interactivity to prevent uploads
+                height=600,  # Made larger
+                elem_id="result_slider",
+                label=None  # Remove default label
             )
     # Event handler
     enhance_btn.click(
         fn=enhance_image,
         inputs=[
             input_image,
+            image_url,
             seed,
             randomize_seed,
             num_inference_steps,
             upscale_factor,
+            denoising_strength,
+            use_generated_caption,
+            custom_prompt,
         ],
+        outputs=[result_slider]
     )
     gr.HTML("""
+    <div style="margin-top: 2rem; padding: 1rem; background: #f0f0f0; border-radius: 8px;">
+        <p><strong>Note:</strong> This upscaler uses the Flux dev model. Users are responsible for obtaining commercial rights if used commercially under their license.</p>
     </div>
     """)
+    # Custom CSS for slider
+    gr.HTML("""
+    <style>
+        #result_slider .slider {
+            width: 100% !important;
+            max-width: inherit !important;
+        }
+        #result_slider img {
+            object-fit: contain !important;
+            width: 100% !important;
+            height: auto !important;
+        }
+        #result_slider .gr-button-tool {
+            display: none !important;
+        }
+        #result_slider .gr-button-undo {
+            display: none !important;
+        }
+        #result_slider .gr-button-clear {
+            display: none !important;
+        }
+        #result_slider .badge-container .badge {
+            display: none !important;
+        }
+        #result_slider .badge-container::before {
+            content: "Before";
+            position: absolute;
+            top: 10px;
+            left: 10px;
+            background: rgba(0,0,0,0.5);
+            color: white;
+            padding: 5px;
+            border-radius: 5px;
+            z-index: 10;
+        }
+        #result_slider .badge-container::after {
+            content: "After";
+            position: absolute;
+            top: 10px;
+            right: 10px;
+            background: rgba(0,0,0,0.5);
+            color: white;
+            padding: 5px;
+            border-radius: 5px;
+            z-index: 10;
+        }
+        #result_slider .fullscreen img {
+            object-fit: contain !important;
+            width: 100vw !important;
+            height: 100vh !important;
+            position: absolute;
+            top: 0;
+            left: 0;
+        }
+    </style>
+    """)
+    # JS to set slider default position to middle
+    gr.HTML("""
+    <script>
+        document.addEventListener('DOMContentLoaded', function() {
+            const sliderInput = document.querySelector('#result_slider input[type="range"]');
+            if (sliderInput) {
+                sliderInput.value = 50;
+                sliderInput.dispatchEvent(new Event('input'));
+            }
+        });
+    </script>
+    """)
 if __name__ == "__main__":
+    demo.queue().launch(share=True, server_name="0.0.0.0", server_port=7860)