comrender commited on
Commit
082dbe6
Β·
verified Β·
1 Parent(s): 656f4ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -39
app.py CHANGED
@@ -13,6 +13,15 @@ from PIL import Image
13
  from huggingface_hub import snapshot_download
14
  import requests
15
 
 
 
 
 
 
 
 
 
 
16
  css = """
17
  #col-container {
18
  margin: 0 auto;
@@ -70,8 +79,21 @@ pipe.enable_vae_slicing()
70
 
71
  print("βœ… All models loaded successfully!")
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  MAX_SEED = 1000000
74
- MAX_PIXEL_BUDGET = 4096 * 4096
75
 
76
 
77
  def generate_caption(image):
@@ -115,33 +137,82 @@ def process_input(input_image, upscale_factor):
115
  gr.Info(
116
  f"Requested output image is too large. Resizing input to fit within pixel budget."
117
  )
118
- input_image = input_image.resize(
119
- (
120
- int(aspect_ratio * MAX_PIXEL_BUDGET**0.5 // upscale_factor),
121
- int(MAX_PIXEL_BUDGET**0.5 // aspect_ratio // upscale_factor),
122
- ),
123
- resample=Image.LANCZOS
124
- )
125
  was_resized = True
126
 
127
- # Resize to multiple of 8
128
- w, h = input_image.size
129
- w = w - w % 8
130
- h = h - h % 8
131
-
132
- return input_image.resize((w, h), resample=Image.LANCZOS), w_original, h_original, was_resized
133
 
134
 
135
  def load_image_from_url(url):
136
  """Load image from URL"""
137
  try:
138
- response = requests.get(url)
139
  response.raise_for_status()
140
- return Image.open(requests.get(url, stream=True).raw)
141
  except Exception as e:
142
  raise gr.Error(f"Failed to load image from URL: {e}")
143
 
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  @spaces.GPU(duration=120)
146
  def enhance_image(
147
  image_input,
@@ -183,33 +254,35 @@ def enhance_image(
183
  else:
184
  prompt = custom_prompt if custom_prompt.strip() else ""
185
 
186
- # Rescale with upscale factor using LANCZOS
187
- w, h = input_image.size
188
- control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
189
-
190
  generator = torch.Generator().manual_seed(seed)
191
 
192
  gr.Info("πŸš€ Upscaling image...")
193
-
194
- # Generate upscaled image
195
- image = pipe(
196
- prompt=prompt,
197
- image=control_image,
198
- strength=denoising_strength,
199
- num_inference_steps=num_inference_steps,
200
- guidance_scale=guidance_scale,
201
- height=control_image.size[1],
202
- width=control_image.size[0],
203
- generator=generator,
204
- ).images[0]
 
 
 
 
 
 
 
 
205
 
206
  if was_resized:
207
  gr.Info(f"πŸ“ Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
208
-
209
- # Resize to target desired size
210
- final_image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
211
 
212
- return [true_input_image, final_image], seed, generated_caption if use_generated_caption else ""
213
 
214
 
215
  # Create Gradio interface
@@ -380,10 +453,10 @@ with gr.Blocks(css=css, title="🎨 AI Image Enhancer - Florence-2 + FLUX") as d
380
  <h4>πŸ’‘ How it works:</h4>
381
  <ol>
382
  <li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
383
- <li>Initial upscale with LANCZOS interpolation</li>
384
- <li><strong>FLUX Img2Img</strong> enhances the upscaled image with AI diffusion guided by the caption</li>
385
  </ol>
386
- <p><strong>Note:</strong> Output limited to 4096x4096 pixels total budget to prevent memory issues.</p>
387
  </div>
388
  """)
389
 
 
13
  from huggingface_hub import snapshot_download
14
  import requests
15
 
16
+ # For ESRGAN (requires pip install basicsr gfpgan)
17
+ try:
18
+ from basicsr.archs.rrdbnet_arch import RRDBNet
19
+ from basicsr.utils import img2tensor, tensor2img
20
+ USE_ESRGAN = True
21
+ except ImportError:
22
+ USE_ESRGAN = False
23
+ warnings.warn("basicsr not installed; falling back to LANCZOS interpolation.")
24
+
25
  css = """
26
  #col-container {
27
  margin: 0 auto;
 
79
 
80
  print("βœ… All models loaded successfully!")
81
 
82
+ # Download ESRGAN model if using
83
+ if USE_ESRGAN:
84
+ esrgan_path = "4x-UltraSharp.pth"
85
+ if not os.path.exists(esrgan_path):
86
+ url = "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x-UltraSharp.pth"
87
+ with open(esrgan_path, "wb") as f:
88
+ f.write(requests.get(url).content)
89
+ esrgan_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
90
+ state_dict = torch.load(esrgan_path)['params_ema']
91
+ esrgan_model.load_state_dict(state_dict)
92
+ esrgan_model.eval()
93
+ esrgan_model.to(device)
94
+
95
  MAX_SEED = 1000000
96
+ MAX_PIXEL_BUDGET = 8192 * 8192 # Increased for tiling support
97
 
98
 
99
  def generate_caption(image):
 
137
  gr.Info(
138
  f"Requested output image is too large. Resizing input to fit within pixel budget."
139
  )
140
+ target_input_pixels = MAX_PIXEL_BUDGET / (upscale_factor ** 2)
141
+ scale = (target_input_pixels / (w * h)) ** 0.5
142
+ new_w = int(w * scale) - int(w * scale) % 8
143
+ new_h = int(h * scale) - int(h * scale) % 8
144
+ input_image = input_image.resize((new_w, new_h), resample=Image.LANCZOS)
 
 
145
  was_resized = True
146
 
147
+ return input_image, w_original, h_original, was_resized
 
 
 
 
 
148
 
149
 
150
  def load_image_from_url(url):
151
  """Load image from URL"""
152
  try:
153
+ response = requests.get(url, stream=True)
154
  response.raise_for_status()
155
+ return Image.open(response.raw)
156
  except Exception as e:
157
  raise gr.Error(f"Failed to load image from URL: {e}")
158
 
159
 
160
+ def esrgan_upscale(image, scale=4):
161
+ if not USE_ESRGAN:
162
+ return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
163
+ img = img2tensor(np.array(image) / 255., bgr2rgb=False, float32=True)
164
+ with torch.no_grad():
165
+ output = esrgan_model(img.unsqueeze(0)).squeeze()
166
+ output_img = tensor2img(output, rgb2bgr=False, min_max=(0, 1))
167
+ return Image.fromarray(output_img)
168
+
169
+
170
+ def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator, tile_size=1024, overlap=32):
171
+ """Tiled Img2Img to mimic Ultimate SD Upscaler tiling"""
172
+ w, h = image.size
173
+ output = image.copy() # Start with the control image
174
+
175
+ for x in range(0, w, tile_size - overlap):
176
+ for y in range(0, h, tile_size - overlap):
177
+ tile_w = min(tile_size, w - x)
178
+ tile_h = min(tile_size, h - y)
179
+ tile = image.crop((x, y, x + tile_w, y + tile_h))
180
+
181
+ # Run Flux on tile
182
+ gen_tile = pipe(
183
+ prompt=prompt,
184
+ image=tile,
185
+ strength=strength,
186
+ num_inference_steps=steps,
187
+ guidance_scale=guidance,
188
+ height=tile_h,
189
+ width=tile_w,
190
+ generator=generator,
191
+ ).images[0]
192
+
193
+ # Paste with blending if overlap
194
+ if overlap > 0:
195
+ paste_box = (x, y, x + tile_w, y + tile_h)
196
+ if x > 0 or y > 0:
197
+ # Simple linear blend on overlaps
198
+ mask = Image.new('L', (tile_w, tile_h), 255)
199
+ if x > 0:
200
+ for i in range(overlap):
201
+ for j in range(tile_h):
202
+ mask.putpixel((i, j), int(255 * (i / overlap)))
203
+ if y > 0:
204
+ for i in range(tile_w):
205
+ for j in range(overlap):
206
+ mask.putpixel((i, j), int(255 * (j / overlap)))
207
+ output.paste(gen_tile, paste_box, mask)
208
+ else:
209
+ output.paste(gen_tile, paste_box)
210
+ else:
211
+ output.paste(gen_tile, (x, y))
212
+
213
+ return output
214
+
215
+
216
  @spaces.GPU(duration=120)
217
  def enhance_image(
218
  image_input,
 
254
  else:
255
  prompt = custom_prompt if custom_prompt.strip() else ""
256
 
 
 
 
 
257
  generator = torch.Generator().manual_seed(seed)
258
 
259
  gr.Info("πŸš€ Upscaling image...")
260
+
261
+ # Initial upscale
262
+ if USE_ESRGAN and upscale_factor == 4:
263
+ control_image = esrgan_upscale(input_image, upscale_factor)
264
+ else:
265
+ w, h = input_image.size
266
+ control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
267
+
268
+ # Tiled Flux Img2Img for refinement
269
+ image = tiled_flux_img2img(
270
+ pipe,
271
+ prompt,
272
+ control_image,
273
+ denoising_strength,
274
+ num_inference_steps,
275
+ guidance_scale,
276
+ generator,
277
+ tile_size=1024,
278
+ overlap=32
279
+ )
280
 
281
  if was_resized:
282
  gr.Info(f"πŸ“ Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
283
+ image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
 
 
284
 
285
+ return [true_input_image, image], seed, generated_caption if use_generated_caption else ""
286
 
287
 
288
  # Create Gradio interface
 
453
  <h4>πŸ’‘ How it works:</h4>
454
  <ol>
455
  <li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
456
+ <li>Initial upscale with LANCZOS interpolation (or ESRGAN if installed)</li>
457
+ <li><strong>FLUX Img2Img</strong> enhances the upscaled image with tiled AI diffusion guided by the caption</li>
458
  </ol>
459
+ <p><strong>Note:</strong> Output limited to 8192x8192 pixels total budget. Tiling enables larger sizes.</p>
460
  </div>
461
  """)
462