Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,15 @@ from PIL import Image
|
|
13 |
from huggingface_hub import snapshot_download
|
14 |
import requests
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
css = """
|
17 |
#col-container {
|
18 |
margin: 0 auto;
|
@@ -70,8 +79,21 @@ pipe.enable_vae_slicing()
|
|
70 |
|
71 |
print("β
All models loaded successfully!")
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
MAX_SEED = 1000000
|
74 |
-
MAX_PIXEL_BUDGET =
|
75 |
|
76 |
|
77 |
def generate_caption(image):
|
@@ -115,33 +137,82 @@ def process_input(input_image, upscale_factor):
|
|
115 |
gr.Info(
|
116 |
f"Requested output image is too large. Resizing input to fit within pixel budget."
|
117 |
)
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
resample=Image.LANCZOS
|
124 |
-
)
|
125 |
was_resized = True
|
126 |
|
127 |
-
|
128 |
-
w, h = input_image.size
|
129 |
-
w = w - w % 8
|
130 |
-
h = h - h % 8
|
131 |
-
|
132 |
-
return input_image.resize((w, h), resample=Image.LANCZOS), w_original, h_original, was_resized
|
133 |
|
134 |
|
135 |
def load_image_from_url(url):
|
136 |
"""Load image from URL"""
|
137 |
try:
|
138 |
-
response = requests.get(url)
|
139 |
response.raise_for_status()
|
140 |
-
return Image.open(
|
141 |
except Exception as e:
|
142 |
raise gr.Error(f"Failed to load image from URL: {e}")
|
143 |
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
@spaces.GPU(duration=120)
|
146 |
def enhance_image(
|
147 |
image_input,
|
@@ -183,33 +254,35 @@ def enhance_image(
|
|
183 |
else:
|
184 |
prompt = custom_prompt if custom_prompt.strip() else ""
|
185 |
|
186 |
-
# Rescale with upscale factor using LANCZOS
|
187 |
-
w, h = input_image.size
|
188 |
-
control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
|
189 |
-
|
190 |
generator = torch.Generator().manual_seed(seed)
|
191 |
|
192 |
gr.Info("π Upscaling image...")
|
193 |
-
|
194 |
-
#
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
if was_resized:
|
207 |
gr.Info(f"π Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
|
208 |
-
|
209 |
-
# Resize to target desired size
|
210 |
-
final_image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
|
211 |
|
212 |
-
return [true_input_image,
|
213 |
|
214 |
|
215 |
# Create Gradio interface
|
@@ -380,10 +453,10 @@ with gr.Blocks(css=css, title="π¨ AI Image Enhancer - Florence-2 + FLUX") as d
|
|
380 |
<h4>π‘ How it works:</h4>
|
381 |
<ol>
|
382 |
<li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
|
383 |
-
<li>Initial upscale with LANCZOS interpolation</li>
|
384 |
-
<li><strong>FLUX Img2Img</strong> enhances the upscaled image with AI diffusion guided by the caption</li>
|
385 |
</ol>
|
386 |
-
<p><strong>Note:</strong> Output limited to
|
387 |
</div>
|
388 |
""")
|
389 |
|
|
|
13 |
from huggingface_hub import snapshot_download
|
14 |
import requests
|
15 |
|
16 |
+
# For ESRGAN (requires pip install basicsr gfpgan)
|
17 |
+
try:
|
18 |
+
from basicsr.archs.rrdbnet_arch import RRDBNet
|
19 |
+
from basicsr.utils import img2tensor, tensor2img
|
20 |
+
USE_ESRGAN = True
|
21 |
+
except ImportError:
|
22 |
+
USE_ESRGAN = False
|
23 |
+
warnings.warn("basicsr not installed; falling back to LANCZOS interpolation.")
|
24 |
+
|
25 |
css = """
|
26 |
#col-container {
|
27 |
margin: 0 auto;
|
|
|
79 |
|
80 |
print("β
All models loaded successfully!")
|
81 |
|
82 |
+
# Download ESRGAN model if using
|
83 |
+
if USE_ESRGAN:
|
84 |
+
esrgan_path = "4x-UltraSharp.pth"
|
85 |
+
if not os.path.exists(esrgan_path):
|
86 |
+
url = "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x-UltraSharp.pth"
|
87 |
+
with open(esrgan_path, "wb") as f:
|
88 |
+
f.write(requests.get(url).content)
|
89 |
+
esrgan_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
|
90 |
+
state_dict = torch.load(esrgan_path)['params_ema']
|
91 |
+
esrgan_model.load_state_dict(state_dict)
|
92 |
+
esrgan_model.eval()
|
93 |
+
esrgan_model.to(device)
|
94 |
+
|
95 |
MAX_SEED = 1000000
|
96 |
+
MAX_PIXEL_BUDGET = 8192 * 8192 # Increased for tiling support
|
97 |
|
98 |
|
99 |
def generate_caption(image):
|
|
|
137 |
gr.Info(
|
138 |
f"Requested output image is too large. Resizing input to fit within pixel budget."
|
139 |
)
|
140 |
+
target_input_pixels = MAX_PIXEL_BUDGET / (upscale_factor ** 2)
|
141 |
+
scale = (target_input_pixels / (w * h)) ** 0.5
|
142 |
+
new_w = int(w * scale) - int(w * scale) % 8
|
143 |
+
new_h = int(h * scale) - int(h * scale) % 8
|
144 |
+
input_image = input_image.resize((new_w, new_h), resample=Image.LANCZOS)
|
|
|
|
|
145 |
was_resized = True
|
146 |
|
147 |
+
return input_image, w_original, h_original, was_resized
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
|
150 |
def load_image_from_url(url):
|
151 |
"""Load image from URL"""
|
152 |
try:
|
153 |
+
response = requests.get(url, stream=True)
|
154 |
response.raise_for_status()
|
155 |
+
return Image.open(response.raw)
|
156 |
except Exception as e:
|
157 |
raise gr.Error(f"Failed to load image from URL: {e}")
|
158 |
|
159 |
|
160 |
+
def esrgan_upscale(image, scale=4):
|
161 |
+
if not USE_ESRGAN:
|
162 |
+
return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
|
163 |
+
img = img2tensor(np.array(image) / 255., bgr2rgb=False, float32=True)
|
164 |
+
with torch.no_grad():
|
165 |
+
output = esrgan_model(img.unsqueeze(0)).squeeze()
|
166 |
+
output_img = tensor2img(output, rgb2bgr=False, min_max=(0, 1))
|
167 |
+
return Image.fromarray(output_img)
|
168 |
+
|
169 |
+
|
170 |
+
def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator, tile_size=1024, overlap=32):
|
171 |
+
"""Tiled Img2Img to mimic Ultimate SD Upscaler tiling"""
|
172 |
+
w, h = image.size
|
173 |
+
output = image.copy() # Start with the control image
|
174 |
+
|
175 |
+
for x in range(0, w, tile_size - overlap):
|
176 |
+
for y in range(0, h, tile_size - overlap):
|
177 |
+
tile_w = min(tile_size, w - x)
|
178 |
+
tile_h = min(tile_size, h - y)
|
179 |
+
tile = image.crop((x, y, x + tile_w, y + tile_h))
|
180 |
+
|
181 |
+
# Run Flux on tile
|
182 |
+
gen_tile = pipe(
|
183 |
+
prompt=prompt,
|
184 |
+
image=tile,
|
185 |
+
strength=strength,
|
186 |
+
num_inference_steps=steps,
|
187 |
+
guidance_scale=guidance,
|
188 |
+
height=tile_h,
|
189 |
+
width=tile_w,
|
190 |
+
generator=generator,
|
191 |
+
).images[0]
|
192 |
+
|
193 |
+
# Paste with blending if overlap
|
194 |
+
if overlap > 0:
|
195 |
+
paste_box = (x, y, x + tile_w, y + tile_h)
|
196 |
+
if x > 0 or y > 0:
|
197 |
+
# Simple linear blend on overlaps
|
198 |
+
mask = Image.new('L', (tile_w, tile_h), 255)
|
199 |
+
if x > 0:
|
200 |
+
for i in range(overlap):
|
201 |
+
for j in range(tile_h):
|
202 |
+
mask.putpixel((i, j), int(255 * (i / overlap)))
|
203 |
+
if y > 0:
|
204 |
+
for i in range(tile_w):
|
205 |
+
for j in range(overlap):
|
206 |
+
mask.putpixel((i, j), int(255 * (j / overlap)))
|
207 |
+
output.paste(gen_tile, paste_box, mask)
|
208 |
+
else:
|
209 |
+
output.paste(gen_tile, paste_box)
|
210 |
+
else:
|
211 |
+
output.paste(gen_tile, (x, y))
|
212 |
+
|
213 |
+
return output
|
214 |
+
|
215 |
+
|
216 |
@spaces.GPU(duration=120)
|
217 |
def enhance_image(
|
218 |
image_input,
|
|
|
254 |
else:
|
255 |
prompt = custom_prompt if custom_prompt.strip() else ""
|
256 |
|
|
|
|
|
|
|
|
|
257 |
generator = torch.Generator().manual_seed(seed)
|
258 |
|
259 |
gr.Info("π Upscaling image...")
|
260 |
+
|
261 |
+
# Initial upscale
|
262 |
+
if USE_ESRGAN and upscale_factor == 4:
|
263 |
+
control_image = esrgan_upscale(input_image, upscale_factor)
|
264 |
+
else:
|
265 |
+
w, h = input_image.size
|
266 |
+
control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
|
267 |
+
|
268 |
+
# Tiled Flux Img2Img for refinement
|
269 |
+
image = tiled_flux_img2img(
|
270 |
+
pipe,
|
271 |
+
prompt,
|
272 |
+
control_image,
|
273 |
+
denoising_strength,
|
274 |
+
num_inference_steps,
|
275 |
+
guidance_scale,
|
276 |
+
generator,
|
277 |
+
tile_size=1024,
|
278 |
+
overlap=32
|
279 |
+
)
|
280 |
|
281 |
if was_resized:
|
282 |
gr.Info(f"π Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
|
283 |
+
image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
|
|
|
|
|
284 |
|
285 |
+
return [true_input_image, image], seed, generated_caption if use_generated_caption else ""
|
286 |
|
287 |
|
288 |
# Create Gradio interface
|
|
|
453 |
<h4>π‘ How it works:</h4>
|
454 |
<ol>
|
455 |
<li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
|
456 |
+
<li>Initial upscale with LANCZOS interpolation (or ESRGAN if installed)</li>
|
457 |
+
<li><strong>FLUX Img2Img</strong> enhances the upscaled image with tiled AI diffusion guided by the caption</li>
|
458 |
</ol>
|
459 |
+
<p><strong>Note:</strong> Output limited to 8192x8192 pixels total budget. Tiling enables larger sizes.</p>
|
460 |
</div>
|
461 |
""")
|
462 |
|