Spaces:

comrender
/

fluxhdupscaler

Running on Zero

App Files Files Community

comrender commited on 6 days ago

Commit

b0a9f3e

verified ·

1 Parent(s): 93af3e2

Update app.py

Browse files

Files changed (1) hide show

app.py +294 -206

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from PIL import Image
 from huggingface_hub import snapshot_download
 import requests
-# For ESRGAN (requires pip install basicsr gfpgan)
 try:
     from basicsr.archs.rrdbnet_arch import RRDBNet
     from basicsr.utils import img2tensor, tensor2img
@@ -33,9 +33,9 @@ css = """
 }
 """
-# Device setup - Force CPU for startup in ZeroGPU
 power_device = "ZeroGPU"
-device = "cpu"
 # Get HuggingFace token
 huggingface_token = os.getenv("HF_TOKEN")
@@ -54,7 +54,7 @@ model_path = snapshot_download(
 print("📥 Loading Florence-2 model...")
 florence_model = AutoModelForCausalLM.from_pretrained(
     "microsoft/Florence-2-large",
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     trust_remote_code=True,
     attn_implementation="eager"
 ).to(device)
@@ -67,7 +67,7 @@ florence_processor = AutoProcessor.from_pretrained(
 print("📥 Loading FLUX Img2Img...")
 pipe = FluxImg2ImgPipeline.from_pretrained(
     model_path,
-    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
 )
 pipe.enable_vae_tiling()
 pipe.enable_vae_slicing()
@@ -76,27 +76,51 @@ print("✅ All models loaded successfully!")
 # Download ESRGAN model if using
 if USE_ESRGAN:
-    esrgan_path = "4x-UltraSharp.pth"
-    if not os.path.exists(esrgan_path):
-        url = "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x-UltraSharp.pth"
-        with open(esrgan_path, "wb") as f:
-            f.write(requests.get(url).content)
-    esrgan_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
-    state_dict = torch.load(esrgan_path)['params_ema']
-    esrgan_model.load_state_dict(state_dict)
-    esrgan_model.eval()
 MAX_SEED = 1000000
-MAX_PIXEL_BUDGET = 8192 * 8192  # Increased for tiling support
 def generate_caption(image):
     """Generate detailed caption using Florence-2"""
     try:
         task_prompt = "<MORE_DETAILED_CAPTION>"
         prompt = task_prompt
-        inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(florence_model.device)  # Fixed: Use model's current device instead of static 'device'
         generated_ids = florence_model.generate(
             input_ids=inputs["input_ids"],
@@ -107,7 +131,11 @@ def generate_caption(image):
         )
         generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-        parsed_answer = florence_processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
         caption = parsed_answer[task_prompt]
         return caption
@@ -120,10 +148,9 @@ def process_input(input_image, upscale_factor):
     """Process input image and handle size constraints"""
     w, h = input_image.size
     w_original, h_original = w, h
-    aspect_ratio = w / h
     was_resized = False
     if w * h * upscale_factor**2 > MAX_PIXEL_BUDGET:
         warnings.warn(
             f"Requested output image is too large ({w * upscale_factor}x{h * upscale_factor}). Resizing to fit budget."
@@ -133,11 +160,11 @@ def process_input(input_image, upscale_factor):
         )
         target_input_pixels = MAX_PIXEL_BUDGET / (upscale_factor ** 2)
         scale = (target_input_pixels / (w * h)) ** 0.5
-        new_w = int(w * scale) - int(w * scale) % 16  # Fixed: Use % 16 for FLUX alignment (was % 8)
-        new_h = int(h * scale) - int(h * scale) % 16  # Fixed: Use % 16 for FLUX alignment (was % 8)
         input_image = input_image.resize((new_w, new_h), resample=Image.LANCZOS)
         was_resized = True
     return input_image, w_original, h_original, was_resized
@@ -152,61 +179,168 @@ def load_image_from_url(url):
 def esrgan_upscale(image, scale=4):
     if not USE_ESRGAN:
         return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
-    img = img2tensor(np.array(image) / 255., bgr2rgb=False, float32=True)
-    with torch.no_grad():
-        output = esrgan_model(img.unsqueeze(0)).squeeze()
-    output_img = tensor2img(output, rgb2bgr=False, min_max=(0, 1))
-    return Image.fromarray(output_img)
-def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator, tile_size=1024, overlap=32):
-    """Tiled Img2Img to mimic Ultimate SD Upscaler tiling"""
-    w, h = image.size
-    output = image.copy()  # Start with the control image
-    for x in range(0, w, tile_size - overlap):
-        for y in range(0, h, tile_size - overlap):
             tile_w = min(tile_size, w - x)
             tile_h = min(tile_size, h - y)
-            tile = image.crop((x, y, x + tile_w, y + tile_h))
-            # Run Flux on tile
             gen_tile = pipe(
                 prompt=prompt,
                 image=tile,
                 strength=strength,
                 num_inference_steps=steps,
                 guidance_scale=guidance,
-                height=tile_h,
-                width=tile_w,
                 generator=generator,
             ).images[0]
-            # Fixed: Resize generated tile back to exact tile dimensions if pipeline auto-resized for multiple-of-16 requirement
-            gen_tile = gen_tile.resize((tile_w, tile_h), resample=Image.LANCZOS)
-            # Paste with blending if overlap
-            if overlap > 0:
-                paste_box = (x, y, x + tile_w, y + tile_h)
-                if x > 0 or y > 0:
-                    # Simple linear blend on overlaps
-                    mask = Image.new('L', (tile_w, tile_h), 255)
-                    if x > 0:
-                        for i in range(overlap):
-                            for j in range(tile_h):
-                                mask.putpixel((i, j), int(255 * (i / overlap)))
-                    if y > 0:
-                        for i in range(tile_w):
-                            for j in range(overlap):
-                                mask.putpixel((i, j), int(255 * (j / overlap)))
-                    output.paste(gen_tile, paste_box, mask)
-                else:
-                    output.paste(gen_tile, paste_box)
             else:
-                output.paste(gen_tile, (x, y))
     return output
@@ -224,85 +358,106 @@ def enhance_image(
     progress=gr.Progress(track_tqdm=True),
 ):
     """Main enhancement function"""
-    # Move models to GPU inside the function
-    pipe.to("cuda")
-    florence_model.to("cuda")
-    # Handle image input
-    if image_input is not None:
-        input_image = image_input
-    elif image_url:
-        input_image = load_image_from_url(image_url)
-    else:
-        raise gr.Error("Please provide an image (upload or URL)")
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    true_input_image = input_image
-    # Process input image
-    input_image, w_original, h_original, was_resized = process_input(
-        input_image, upscale_factor
-    )
-    # Generate caption if requested
-    if use_generated_caption:
-        gr.Info("🔍 Generating image caption...")
-        generated_caption = generate_caption(input_image)
-        prompt = generated_caption
-    else:
-        prompt = custom_prompt if custom_prompt.strip() else ""
-    generator = torch.Generator(device="cuda").manual_seed(seed)
-    gr.Info("🚀 Upscaling image...")
-    # Initial upscale
-    if USE_ESRGAN and upscale_factor == 4:
-        esrgan_model.to("cuda")
-        control_image = esrgan_upscale(input_image, upscale_factor)
-        esrgan_model.to("cpu")
-    else:
-        w, h = input_image.size
-        control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
-    # Tiled Flux Img2Img for refinement
-    image = tiled_flux_img2img(
-        pipe,
-        prompt,
-        control_image,
-        denoising_strength,
-        num_inference_steps,
-        1.0,  # Hardcoded guidance_scale to 1
-        generator,
-        tile_size=1024,
-        overlap=32
-    )
-    if was_resized:
-        gr.Info(f"📏 Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
-        image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
-    # Resize input image to match output size for slider alignment
-    resized_input = true_input_image.resize(image.size, resample=Image.LANCZOS)
-    # Move back to CPU to release GPU
-    pipe.to("cpu")
-    florence_model.to("cpu")
-    return [resized_input, image]
 # Create Gradio interface
 with gr.Blocks(css=css, title="🎨 AI Image Upscaler - Florence-2 + FLUX") as demo:
-    gr.HTML("""
     <div class="main-header">
         <h1>🎨 AI Image Upscaler</h1>
         <p>Upload an image or provide a URL to upscale it using Florence-2 captioning and FLUX upscaling</p>
-        <p>Currently running on <strong>{}</strong></p>
     </div>
-    """.format(power_device))
     with gr.Row():
         with gr.Column(scale=1):
@@ -313,14 +468,14 @@ with gr.Blocks(css=css, title="🎨 AI Image Upscaler - Florence-2 + FLUX") as d
                     input_image = gr.Image(
                         label="Upload Image",
                         type="pil",
-                        height=200  # Made smaller
                     )
                 with gr.TabItem("🔗 Image URL"):
                     image_url = gr.Textbox(
                         label="Image URL",
                         placeholder="https://example.com/image.jpg",
-                        value="https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg"
                     )
             gr.HTML("<h3>🎛️ Caption Settings</h3>")
@@ -386,15 +541,15 @@ with gr.Blocks(css=css, title="🎨 AI Image Upscaler - Florence-2 + FLUX") as d
                 size="lg"
             )
-        with gr.Column(scale=2):  # Larger scale for results
             gr.HTML("<h3>📊 Results</h3>")
             result_slider = ImageSlider(
                 type="pil",
-                interactive=False,  # Disable interactivity to prevent uploads
-                height=600,  # Made larger
                 elem_id="result_slider",
-                label=None  # Remove default label
             )
     # Event handler
@@ -419,73 +574,6 @@ with gr.Blocks(css=css, title="🎨 AI Image Upscaler - Florence-2 + FLUX") as d
         <p><strong>Note:</strong> This upscaler uses the Flux dev model. Users are responsible for obtaining commercial rights if used commercially under their license.</p>
     </div>
     """)
-    # Custom CSS for slider
-    gr.HTML("""
-    <style>
-        #result_slider .slider {
-            width: 100% !important;
-            max-width: inherit !important;
-        }
-        #result_slider img {
-            object-fit: contain !important;
-            width: 100% !important;
-            height: auto !important;
-        }
-        #result_slider .gr-button-tool {
-            display: none !important;
-        }
-        #result_slider .gr-button-undo {
-            display: none !important;
-        }
-        #result_slider .gr-button-clear {
-            display: none !important;
-        }
-        #result_slider .badge-container .badge {
-            display: none !important;
-        }
-        #result_slider .badge-container::before {
-            content: "Before";
-            position: absolute;
-            top: 10px;
-            left: 10px;
-            background: rgba(0,0,0,0.5);
-            color: white;
-            padding: 5px;
-            border-radius: 5px;
-            z-index: 10;
-        }
-        #result_slider .badge-container::after {
-            content: "After";
-            position: absolute;
-            top: 10px;
-            right: 10px;
-            background: rgba(0,0,0,0.5);
-            color: white;
-            padding: 5px;
-            border-radius: 5px;
-            z-index: 10;
-        }
-        #result_slider .fullscreen img {
-            object-fit: contain !important;
-            width: 100vw !important;
-            height: 100vh !important;
-        }
-    </style>
-    """)
-    # JS to set slider default position to middle
-    gr.HTML("""
-    <script>
-        document.addEventListener('DOMContentLoaded', function() {
-            const sliderInput = document.querySelector('#result_slider input[type="range"]');
-            if (sliderInput) {
-                sliderInput.value = 50;
-                sliderInput.dispatchEvent(new Event('input'));
-            }
-        });
-    </script>
-    """)
 if __name__ == "__main__":
     demo.queue().launch(share=True, server_name="0.0.0.0", server_port=7860)

 from huggingface_hub import snapshot_download
 import requests
+# For ESRGAN (optional - will work without it)
 try:
     from basicsr.archs.rrdbnet_arch import RRDBNet
     from basicsr.utils import img2tensor, tensor2img
 }
 """
+# Device setup
 power_device = "ZeroGPU"
+device = "cpu"  # Start on CPU, will move to GPU when needed
 # Get HuggingFace token
 huggingface_token = os.getenv("HF_TOKEN")
 print("📥 Loading Florence-2 model...")
 florence_model = AutoModelForCausalLM.from_pretrained(
     "microsoft/Florence-2-large",
+    torch_dtype=torch.float32,  # Use float32 on CPU to avoid dtype issues
     trust_remote_code=True,
     attn_implementation="eager"
 ).to(device)
 print("📥 Loading FLUX Img2Img...")
 pipe = FluxImg2ImgPipeline.from_pretrained(
     model_path,
+    torch_dtype=torch.float32  # Start with float32 on CPU
 )
 pipe.enable_vae_tiling()
 pipe.enable_vae_slicing()
 # Download ESRGAN model if using
 if USE_ESRGAN:
+    try:
+        esrgan_path = "4x-UltraSharp.pth"
+        if not os.path.exists(esrgan_path):
+            url = "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x-UltraSharp.pth"
+            print("📥 Downloading ESRGAN model...")
+            with open(esrgan_path, "wb") as f:
+                f.write(requests.get(url).content)
+        esrgan_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+        state_dict = torch.load(esrgan_path, map_location='cpu')['params_ema']
+        esrgan_model.load_state_dict(state_dict)
+        esrgan_model.eval()
+        print("✅ ESRGAN model loaded!")
+    except Exception as e:
+        print(f"Failed to load ESRGAN: {e}")
+        USE_ESRGAN = False
 MAX_SEED = 1000000
+MAX_PIXEL_BUDGET = 8192 * 8192
+def make_multiple_16(n):
+    """Round up to nearest multiple of 16"""
+    return ((n + 15) // 16) * 16
 def generate_caption(image):
     """Generate detailed caption using Florence-2"""
     try:
+        # Ensure model is on the correct device with correct dtype
+        if florence_model.device.type == "cuda":
+            florence_model.to(torch.float16)
         task_prompt = "<MORE_DETAILED_CAPTION>"
         prompt = task_prompt
+        inputs = florence_processor(
+            text=prompt,
+            images=image,
+            return_tensors="pt"
+        ).to(florence_model.device)
+        # Ensure dtype consistency
+        if florence_model.device.type == "cuda":
+            if hasattr(inputs, "pixel_values"):
+                inputs["pixel_values"] = inputs["pixel_values"].to(torch.float16)
         generated_ids = florence_model.generate(
             input_ids=inputs["input_ids"],
         )
         generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+        parsed_answer = florence_processor.post_process_generation(
+            generated_text,
+            task=task_prompt,
+            image_size=(image.width, image.height)
+        )
         caption = parsed_answer[task_prompt]
         return caption
     """Process input image and handle size constraints"""
     w, h = input_image.size
     w_original, h_original = w, h
     was_resized = False
     if w * h * upscale_factor**2 > MAX_PIXEL_BUDGET:
         warnings.warn(
             f"Requested output image is too large ({w * upscale_factor}x{h * upscale_factor}). Resizing to fit budget."
         )
         target_input_pixels = MAX_PIXEL_BUDGET / (upscale_factor ** 2)
         scale = (target_input_pixels / (w * h)) ** 0.5
+        new_w = make_multiple_16(int(w * scale))
+        new_h = make_multiple_16(int(h * scale))
         input_image = input_image.resize((new_w, new_h), resample=Image.LANCZOS)
         was_resized = True
     return input_image, w_original, h_original, was_resized
 def esrgan_upscale(image, scale=4):
+    """Upscale image using ESRGAN or fallback to LANCZOS"""
     if not USE_ESRGAN:
         return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
+    try:
+        img = img2tensor(np.array(image) / 255., bgr2rgb=False, float32=True)
+        with torch.no_grad():
+            # Move model to same device as image tensor
+            if torch.cuda.is_available():
+                esrgan_model.to("cuda")
+                img = img.to("cuda")
+            output = esrgan_model(img.unsqueeze(0)).squeeze()
+        output_img = tensor2img(output, rgb2bgr=False, min_max=(0, 1))
+        return Image.fromarray(output_img)
+    except Exception as e:
+        print(f"ESRGAN upscale failed: {e}, falling back to LANCZOS")
+        return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
+def create_blend_mask(width, height, overlap, edge_x, edge_y):
+    """Create a gradient blend mask for smooth tile transitions"""
+    mask = Image.new('L', (width, height), 255)
+    pixels = mask.load()
+    # Horizontal blend (left edge)
+    if edge_x and overlap > 0:
+        for x in range(min(overlap, width)):
+            alpha = x / overlap
+            for y in range(height):
+                pixels[x, y] = int(255 * alpha)
+    # Vertical blend (top edge)
+    if edge_y and overlap > 0:
+        for y in range(min(overlap, height)):
+            alpha = y / overlap
+            for x in range(width):
+                # Combine with existing alpha if both edges
+                existing = pixels[x, y] / 255.0
+                combined = min(existing, alpha)
+                pixels[x, y] = int(255 * combined)
+    return mask
+def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator, tile_size=1024, overlap=64):
+    """Tiled Img2Img to handle large images"""
+    w, h = image.size
+    # Ensure tile_size is divisible by 16
+    tile_size = make_multiple_16(tile_size)
+    overlap = make_multiple_16(overlap)
+    # If image is small enough, process without tiling
+    if w <= tile_size and h <= tile_size:
+        # Ensure dimensions are divisible by 16
+        new_w = make_multiple_16(w)
+        new_h = make_multiple_16(h)
+        if new_w != w or new_h != h:
+            padded_image = Image.new('RGB', (new_w, new_h))
+            padded_image.paste(image, (0, 0))
+        else:
+            padded_image = image
+        result = pipe(
+            prompt=prompt,
+            image=padded_image,
+            strength=strength,
+            num_inference_steps=steps,
+            guidance_scale=guidance,
+            height=new_h,
+            width=new_w,
+            generator=generator,
+        ).images[0]
+        # Crop back to original size if padded
+        if new_w != w or new_h != h:
+            result = result.crop((0, 0, w, h))
+        return result
+    # Process with tiling for large images
+    output = Image.new('RGB', (w, h))
+    # Calculate tile positions
+    tiles = []
+    for y in range(0, h, tile_size - overlap):
+        for x in range(0, w, tile_size - overlap):
             tile_w = min(tile_size, w - x)
             tile_h = min(tile_size, h - y)
+            # Ensure tile dimensions are divisible by 16
+            tile_w_padded = make_multiple_16(tile_w)
+            tile_h_padded = make_multiple_16(tile_h)
+            tiles.append({
+                'x': x,
+                'y': y,
+                'w': tile_w,
+                'h': tile_h,
+                'w_padded': tile_w_padded,
+                'h_padded': tile_h_padded,
+                'edge_x': x > 0,
+                'edge_y': y > 0
+            })
+    # Process each tile
+    for i, tile_info in enumerate(tiles):
+        print(f"Processing tile {i+1}/{len(tiles)}...")
+        # Extract tile from image
+        tile = image.crop((
+            tile_info['x'],
+            tile_info['y'],
+            tile_info['x'] + tile_info['w'],
+            tile_info['y'] + tile_info['h']
+        ))
+        # Pad if necessary
+        if tile_info['w_padded'] != tile_info['w'] or tile_info['h_padded'] != tile_info['h']:
+            padded_tile = Image.new('RGB', (tile_info['w_padded'], tile_info['h_padded']))
+            padded_tile.paste(tile, (0, 0))
+            tile = padded_tile
+        # Process tile with FLUX
+        try:
             gen_tile = pipe(
                 prompt=prompt,
                 image=tile,
                 strength=strength,
                 num_inference_steps=steps,
                 guidance_scale=guidance,
+                height=tile_info['h_padded'],
+                width=tile_info['w_padded'],
                 generator=generator,
             ).images[0]
+            # Crop back to original tile size if padded
+            if tile_info['w_padded'] != tile_info['w'] or tile_info['h_padded'] != tile_info['h']:
+                gen_tile = gen_tile.crop((0, 0, tile_info['w'], tile_info['h']))
+            # Create blend mask if needed
+            if overlap > 0 and (tile_info['edge_x'] or tile_info['edge_y']):
+                mask = create_blend_mask(
+                    tile_info['w'],
+                    tile_info['h'],
+                    overlap,
+                    tile_info['edge_x'],
+                    tile_info['edge_y']
+                )
+                # Composite with blending
+                output.paste(gen_tile, (tile_info['x'], tile_info['y']), mask)
             else:
+                # Direct paste for first tile or no overlap
+                output.paste(gen_tile, (tile_info['x'], tile_info['y']))
+        except Exception as e:
+            print(f"Error processing tile: {e}")
+            # Fallback: paste original tile
+            output.paste(tile, (tile_info['x'], tile_info['y']))
     return output
     progress=gr.Progress(track_tqdm=True),
 ):
     """Main enhancement function"""
+    try:
+        # Move models to GPU and convert to appropriate dtype
+        pipe.to("cuda")
+        pipe.to(torch.bfloat16)
+        florence_model.to("cuda")
+        florence_model.to(torch.float16)
+        # Handle image input
+        if image_input is not None:
+            input_image = image_input
+        elif image_url:
+            input_image = load_image_from_url(image_url)
+        else:
+            raise gr.Error("Please provide an image (upload or URL)")
+        if randomize_seed:
+            seed = random.randint(0, MAX_SEED)
+        true_input_image = input_image
+        # Process input image
+        input_image, w_original, h_original, was_resized = process_input(
+            input_image, upscale_factor
+        )
+        # Generate caption if requested
+        if use_generated_caption:
+            gr.Info("🔍 Generating image caption...")
+            generated_caption = generate_caption(input_image)
+            prompt = generated_caption
+            print(f"Generated caption: {prompt}")
+        else:
+            prompt = custom_prompt if custom_prompt.strip() else ""
+        generator = torch.Generator(device="cuda").manual_seed(seed)
+        gr.Info("🚀 Upscaling image...")
+        # Initial upscale
+        if USE_ESRGAN and upscale_factor == 4:
+            if torch.cuda.is_available():
+                esrgan_model.to("cuda")
+            control_image = esrgan_upscale(input_image, upscale_factor)
+            if torch.cuda.is_available():
+                esrgan_model.to("cpu")
+        else:
+            w, h = input_image.size
+            control_image = input_image.resize(
+                (w * upscale_factor, h * upscale_factor),
+                resample=Image.LANCZOS
+            )
+        # Tiled Flux Img2Img for refinement
+        image = tiled_flux_img2img(
+            pipe,
+            prompt,
+            control_image,
+            denoising_strength,
+            num_inference_steps,
+            1.0,  # guidance_scale fixed to 1.0
+            generator,
+            tile_size=1024,
+            overlap=64
+        )
+        if was_resized:
+            gr.Info(f"📏 Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
+            image = image.resize(
+                (w_original * upscale_factor, h_original * upscale_factor),
+                resample=Image.LANCZOS
+            )
+        # Resize input image to match output size for slider alignment
+        resized_input = true_input_image.resize(image.size, resample=Image.LANCZOS)
+        # Move models back to CPU to release GPU
+        pipe.to("cpu")
+        florence_model.to("cpu")
+        torch.cuda.empty_cache()
+        return [resized_input, image]
+    except Exception as e:
+        # Ensure models are moved back to CPU even on error
+        pipe.to("cpu")
+        florence_model.to("cpu")
+        torch.cuda.empty_cache()
+        raise gr.Error(f"Enhancement failed: {str(e)}")
 # Create Gradio interface
 with gr.Blocks(css=css, title="🎨 AI Image Upscaler - Florence-2 + FLUX") as demo:
+    gr.HTML(f"""
     <div class="main-header">
         <h1>🎨 AI Image Upscaler</h1>
         <p>Upload an image or provide a URL to upscale it using Florence-2 captioning and FLUX upscaling</p>
+        <p>Currently running on <strong>{power_device}</strong></p>
     </div>
+    """)
     with gr.Row():
         with gr.Column(scale=1):
                     input_image = gr.Image(
                         label="Upload Image",
                         type="pil",
+                        height=200
                     )
                 with gr.TabItem("🔗 Image URL"):
                     image_url = gr.Textbox(
                         label="Image URL",
                         placeholder="https://example.com/image.jpg",
+                        value=""
                     )
             gr.HTML("<h3>🎛️ Caption Settings</h3>")
                 size="lg"
             )
+        with gr.Column(scale=2):
             gr.HTML("<h3>📊 Results</h3>")
             result_slider = ImageSlider(
                 type="pil",
+                interactive=False,
+                height=600,
                 elem_id="result_slider",
+                label=None
             )
     # Event handler
         <p><strong>Note:</strong> This upscaler uses the Flux dev model. Users are responsible for obtaining commercial rights if used commercially under their license.</p>
     </div>
     """)
 if __name__ == "__main__":
     demo.queue().launch(share=True, server_name="0.0.0.0", server_port=7860)