Spaces:

comrender
/

fluxhdupscaler

Running on Zero

App Files Files Community

comrender commited on 8 days ago

Commit

74168bc

verified ·

1 Parent(s): 5d03bff

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -31

app.py CHANGED Viewed

@@ -6,15 +6,70 @@ import gradio as gr
 import numpy as np
 import spaces
 import torch
 from diffusers import FluxImg2ImgPipeline
 from gradio_imageslider import ImageSlider
 from PIL import Image
 from huggingface_hub import snapshot_download
 import requests
-# ESRGAN imports
-from basicsr.archs.rrdbnet_arch import RRDBNet
-from basicsr.utils import img2tensor, tensor2img
 css = """
 #col-container {
@@ -73,12 +128,23 @@ esrgan_model = RRDBNet(
     num_grow_ch=32,
     scale=4
 )
 state_dict = torch.load(esrgan_path, map_location='cpu')
 if 'params_ema' in state_dict:
     state_dict = state_dict['params_ema']
 elif 'params' in state_dict:
     state_dict = state_dict['params']
-esrgan_model.load_state_dict(state_dict)
 esrgan_model.eval()
 print("✅ All models loaded successfully!")
@@ -114,18 +180,21 @@ def prepare_image(image, max_size=MAX_INPUT_SIZE):
     return image
-def esrgan_upscale(image):
     """Upscale image 4x using ESRGAN"""
-    # Convert PIL to tensor
     img_np = np.array(image).astype(np.float32) / 255.
-    img_tensor = img2tensor(img_np, bgr2rgb=False, float32=True)
     # Upscale
     with torch.no_grad():
-        output = esrgan_model(img_tensor.unsqueeze(0).cpu())
-    # Convert back to PIL
-    output_np = tensor2img(output.squeeze(0), rgb2bgr=False, min_max=(0, 1))
     return Image.fromarray(output_np)
@@ -159,27 +228,16 @@ def enhance_image(
         input_image = prepare_image(input_image)
         original_size = input_image.size
-        # Step 1: ESRGAN upscale (4x) on CPU
         gr.Info("🔍 Upscaling with ESRGAN 4x...")
-        with torch.no_grad():
-            # Move ESRGAN to GPU for faster processing
-            esrgan_model.to("cuda")
-            # Convert image for ESRGAN
-            img_np = np.array(input_image).astype(np.float32) / 255.
-            img_tensor = img2tensor(img_np, bgr2rgb=False, float32=True)
-            img_tensor = img_tensor.unsqueeze(0).to("cuda")
-            # Upscale
-            output_tensor = esrgan_model(img_tensor)
-            # Convert back to PIL
-            output_np = tensor2img(output_tensor.squeeze(0).cpu(), rgb2bgr=False, min_max=(0, 1))
-            upscaled_image = Image.fromarray(output_np)
-            # Move ESRGAN back to CPU to free memory
-            esrgan_model.to("cpu")
-            torch.cuda.empty_cache()
         # Ensure dimensions are multiples of 16 for FLUX
         w, h = upscaled_image.size

 import numpy as np
 import spaces
 import torch
+import torch.nn as nn
 from diffusers import FluxImg2ImgPipeline
 from gradio_imageslider import ImageSlider
 from PIL import Image
 from huggingface_hub import snapshot_download
 import requests
+# Minimal ESRGAN implementation (without basicsr dependency)
+class ResidualDenseBlock(nn.Module):
+    def __init__(self, num_feat=64, num_grow_ch=32):
+        super(ResidualDenseBlock, self).__init__()
+        self.conv1 = nn.Conv2d(num_feat, num_grow_ch, 3, 1, 1)
+        self.conv2 = nn.Conv2d(num_feat + num_grow_ch, num_grow_ch, 3, 1, 1)
+        self.conv3 = nn.Conv2d(num_feat + 2 * num_grow_ch, num_grow_ch, 3, 1, 1)
+        self.conv4 = nn.Conv2d(num_feat + 3 * num_grow_ch, num_grow_ch, 3, 1, 1)
+        self.conv5 = nn.Conv2d(num_feat + 4 * num_grow_ch, num_feat, 3, 1, 1)
+        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+    def forward(self, x):
+        x1 = self.lrelu(self.conv1(x))
+        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
+        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
+        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
+        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
+        return x5 * 0.2 + x
+class RRDB(nn.Module):
+    def __init__(self, num_feat, num_grow_ch=32):
+        super(RRDB, self).__init__()
+        self.rdb1 = ResidualDenseBlock(num_feat, num_grow_ch)
+        self.rdb2 = ResidualDenseBlock(num_feat, num_grow_ch)
+        self.rdb3 = ResidualDenseBlock(num_feat, num_grow_ch)
+    def forward(self, x):
+        out = self.rdb1(x)
+        out = self.rdb2(out)
+        out = self.rdb3(out)
+        return out * 0.2 + x
+class RRDBNet(nn.Module):
+    def __init__(self, num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4):
+        super(RRDBNet, self).__init__()
+        self.scale = scale
+        self.conv_first = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1)
+        self.body = nn.Sequential(*[RRDB(num_feat, num_grow_ch) for _ in range(num_block)])
+        self.conv_body = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+        # Upsampling
+        self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+        self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+        self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+        self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+    def forward(self, x):
+        fea = self.conv_first(x)
+        trunk = self.conv_body(self.body(fea))
+        fea = fea + trunk
+        fea = self.lrelu(self.conv_up1(nn.functional.interpolate(fea, scale_factor=2, mode='nearest')))
+        fea = self.lrelu(self.conv_up2(nn.functional.interpolate(fea, scale_factor=2, mode='nearest')))
+        out = self.conv_last(self.lrelu(self.conv_hr(fea)))
+        return out
 css = """
 #col-container {
     num_grow_ch=32,
     scale=4
 )
+# Load state dict
 state_dict = torch.load(esrgan_path, map_location='cpu')
 if 'params_ema' in state_dict:
     state_dict = state_dict['params_ema']
 elif 'params' in state_dict:
     state_dict = state_dict['params']
+# Clean state dict keys if needed
+cleaned_state_dict = {}
+for k, v in state_dict.items():
+    if k.startswith('module.'):
+        cleaned_state_dict[k[7:]] = v
+    else:
+        cleaned_state_dict[k] = v
+esrgan_model.load_state_dict(cleaned_state_dict, strict=False)
 esrgan_model.eval()
 print("✅ All models loaded successfully!")
     return image
+def esrgan_upscale(image, model, device='cuda'):
     """Upscale image 4x using ESRGAN"""
+    # Prepare image
     img_np = np.array(image).astype(np.float32) / 255.
+    img_np = np.transpose(img_np, (2, 0, 1))  # HWC to CHW
+    img_tensor = torch.from_numpy(img_np).unsqueeze(0).to(device)
     # Upscale
     with torch.no_grad():
+        output = model(img_tensor)
+        output = output.squeeze(0).cpu().clamp(0, 1)
+        output_np = output.numpy()
+        output_np = np.transpose(output_np, (1, 2, 0))  # CHW to HWC
+        output_np = (output_np * 255).astype(np.uint8)
     return Image.fromarray(output_np)
         input_image = prepare_image(input_image)
         original_size = input_image.size
+        # Step 1: ESRGAN upscale (4x) on GPU
         gr.Info("🔍 Upscaling with ESRGAN 4x...")
+        # Move ESRGAN to GPU for faster processing
+        esrgan_model.to("cuda")
+        upscaled_image = esrgan_upscale(input_image, esrgan_model, device="cuda")
+        # Move ESRGAN back to CPU to free memory
+        esrgan_model.to("cpu")
+        torch.cuda.empty_cache()
         # Ensure dimensions are multiples of 16 for FLUX
         w, h = upscaled_image.size