comrender commited on
Commit
415a802
·
verified ·
1 Parent(s): 4a135c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +326 -381
app.py CHANGED
@@ -11,19 +11,14 @@ from transformers import AutoProcessor, AutoModelForCausalLM
11
  from gradio_imageslider import ImageSlider
12
  from PIL import Image
13
  from huggingface_hub import snapshot_download
14
- import requests
15
- import io
16
-
17
- # For ESRGAN (requires pip install basicsr gfpgan)
18
  try:
19
  from basicsr.archs.rrdbnet_arch import RRDBNet
20
  from basicsr.utils import img2tensor, tensor2img
21
  USE_ESRGAN = True
22
  except ImportError:
23
  USE_ESRGAN = False
24
- warnings.warn("basicsr not installed; falling back to LANCZOS interpolation.")
25
-
26
- css = """
27
  #col-container {
28
  margin: 0 auto;
29
  max-width: 800px;
@@ -32,27 +27,23 @@ css = """
32
  text-align: center;
33
  margin-bottom: 2rem;
34
  }
35
- """
36
-
37
- # Device setup
38
- power_device = "ZeroGPU"
39
- device = "cpu"
40
-
41
- # Get HuggingFace token
42
- huggingface_token = os.getenv("HF_TOKEN")
43
-
44
- # Download FLUX model
45
- print("📥 Downloading FLUX model...")
46
  model_path = snapshot_download(
47
  repo_id="black-forest-labs/FLUX.1-dev",
48
  repo_type="model",
49
  ignore_patterns=["*.md", "*.gitattributes"],
50
  local_dir="FLUX.1-dev",
51
  token=huggingface_token,
52
- )
53
-
54
- # Load Florence-2 model for image captioning
55
- print("📥 Loading Florence-2 model...")
56
  florence_model = AutoModelForCausalLM.from_pretrained(
57
  "microsoft/Florence-2-large",
58
  torch_dtype=torch.float16,
@@ -62,21 +53,15 @@ florence_model = AutoModelForCausalLM.from_pretrained(
62
  florence_processor = AutoProcessor.from_pretrained(
63
  "microsoft/Florence-2-large",
64
  trust_remote_code=True
65
- )
66
-
67
- # Load FLUX Img2Img pipeline
68
- print("📥 Loading FLUX Img2Img...")
69
  pipe = FluxImg2ImgPipeline.from_pretrained(
70
  model_path,
71
  torch_dtype=torch.bfloat16
72
  )
73
  pipe.to(device)
74
  pipe.enable_vae_tiling()
75
- pipe.enable_vae_slicing()
76
-
77
- print("✅ All models loaded successfully!")
78
-
79
- # Download ESRGAN model if using
80
  if USE_ESRGAN:
81
  esrgan_path = "4x-UltraSharp.pth"
82
  if not os.path.exists(esrgan_path):
@@ -87,149 +72,124 @@ if USE_ESRGAN:
87
  state_dict = torch.load(esrgan_path)['params_ema']
88
  esrgan_model.load_state_dict(state_dict)
89
  esrgan_model.eval()
90
- esrgan_model.to(device)
91
-
92
- MAX_SEED = 1000000
93
- MAX_PIXEL_BUDGET = 8192 * 8192 # Increased for tiling support
94
-
95
-
96
- def generate_caption(image):
97
  """Generate detailed caption using Florence-2"""
98
  try:
99
  task_prompt = "<MORE_DETAILED_CAPTION>"
100
- prompt = task_prompt
101
-
102
- inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(device)
103
- inputs["pixel_values"] = inputs["pixel_values"].to(torch.float16) # Match model dtype
104
-
105
- generated_ids = florence_model.generate(
106
- input_ids=inputs["input_ids"],
107
- pixel_values=inputs["pixel_values"],
108
- max_new_tokens=1024,
109
- num_beams=3,
110
- do_sample=True,
111
- )
112
-
113
- generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
114
- parsed_answer = florence_processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
115
-
116
- caption = parsed_answer[task_prompt]
117
- return caption
118
- except Exception as e:
119
- print(f"Caption generation failed: {e}")
120
- return "a high quality detailed image"
121
-
122
-
123
- def process_input(input_image, upscale_factor):
124
  """Process input image and handle size constraints"""
125
  w, h = input_image.size
126
  w_original, h_original = w, h
127
- aspect_ratio = w / h
128
-
129
- was_resized = False
130
-
131
- if w * h * upscale_factor**2 > MAX_PIXEL_BUDGET:
132
- warnings.warn(
133
- f"Requested output image is too large ({w * upscale_factor}x{h * upscale_factor}). Resizing to fit budget."
134
- )
135
- gr.Info(
136
- f"Requested output image is too large. Resizing input to fit within pixel budget."
137
- )
138
- target_input_pixels = MAX_PIXEL_BUDGET / (upscale_factor ** 2)
139
- scale = (target_input_pixels / (w * h)) ** 0.5
140
- new_w = int(w * scale) - int(w * scale) % 8
141
- new_h = int(h * scale) - int(h * scale) % 8
142
- input_image = input_image.resize((new_w, new_h), resample=Image.LANCZOS)
143
- was_resized = True
144
 
145
- return input_image, w_original, h_original, was_resized
146
-
147
-
148
- def load_image_from_url(url):
 
 
 
 
 
 
 
 
 
 
 
149
  """Load image from URL"""
150
  try:
151
  response = requests.get(url, stream=True)
152
  response.raise_for_status()
153
  return Image.open(response.raw)
154
  except Exception as e:
155
- raise gr.Error(f"Failed to load image from URL: {e}")
156
-
157
-
158
- def esrgan_upscale(image, scale=4):
159
  if not USE_ESRGAN:
160
  return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
161
  img = img2tensor(np.array(image) / 255., bgr2rgb=False, float32=True)
162
  with torch.no_grad():
163
  output = esrgan_model(img.unsqueeze(0)).squeeze()
164
  output_img = tensor2img(output, rgb2bgr=False, min_max=(0, 1))
165
- return Image.fromarray(output_img)
166
-
167
-
168
- def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator, tile_size=1024, overlap=32):
169
  """Tiled Img2Img to mimic Ultimate SD Upscaler tiling"""
170
  w, h = image.size
171
- output = image.copy() # Start with the control image
172
-
173
- # For handling long prompts: truncate for CLIP, full for T5
174
- max_clip_tokens = pipe.tokenizer.model_max_length # Typically 77
175
- input_ids = pipe.tokenizer.encode(prompt, return_tensors="pt")
176
- if input_ids.shape[1] > max_clip_tokens:
177
- input_ids = input_ids[:, :max_clip_tokens]
178
- prompt_clip = pipe.tokenizer.decode(input_ids[0], skip_special_tokens=True)
179
- else:
180
- prompt_clip = prompt
181
-
182
- for x in range(0, w, tile_size - overlap):
183
- for y in range(0, h, tile_size - overlap):
184
- tile_w = min(tile_size, w - x)
185
- tile_h = min(tile_size, h - y)
186
- tile = image.crop((x, y, x + tile_w, y + tile_h))
187
-
188
- # Run Flux on tile
189
- gen_tile = pipe(
190
- prompt=prompt_clip,
191
- prompt_2=prompt,
192
- image=tile,
193
- strength=strength,
194
- num_inference_steps=steps,
195
- guidance_scale=guidance,
196
- height=tile_h,
197
- width=tile_w,
198
- generator=generator,
199
- ).images[0]
200
-
201
- # Resize back to exact tile size if pipeline adjusted it
202
- gen_tile = gen_tile.resize((tile_w, tile_h), resample=Image.LANCZOS)
203
-
204
- # Paste with blending if overlap
205
- if overlap > 0:
206
- paste_box = (x, y, x + tile_w, y + tile_h)
207
- if x > 0 or y > 0:
208
- # Simple linear blend on overlaps
209
- mask = Image.new('L', (tile_w, tile_h), 255)
210
- if x > 0:
211
- blend_width = min(overlap, tile_w)
212
- for i in range(blend_width):
213
- for j in range(tile_h):
214
- mask.putpixel((i, j), int(255 * (i / overlap)))
215
- if y > 0:
216
- blend_height = min(overlap, tile_h)
217
- for i in range(tile_w):
218
- for j in range(blend_height):
219
- mask.putpixel((i, j), int(255 * (j / overlap)))
220
- output.paste(gen_tile, paste_box, mask)
221
- else:
222
- output.paste(gen_tile, paste_box)
223
  else:
224
- output.paste(gen_tile, (x, y))
225
-
226
- return output
227
 
228
-
229
- @spaces.GPU(duration=120)
230
  def enhance_image(
231
  image_input,
232
  image_url,
 
233
  randomize_seed,
234
  num_inference_steps,
235
  upscale_factor,
@@ -245,259 +205,244 @@ def enhance_image(
245
  elif image_url:
246
  input_image = load_image_from_url(image_url)
247
  else:
248
- raise gr.Error("Please provide an image (upload or URL)")
249
-
250
- # Convert input image to PNG in backend
251
- buffer = io.BytesIO()
252
- input_image.save(buffer, format="PNG")
253
- buffer.seek(0)
254
- input_image = Image.open(buffer)
255
 
256
- if randomize_seed:
257
- seed = random.randint(0, MAX_SEED)
258
- else:
259
- seed = 42
260
 
261
- true_input_image = input_image
262
-
263
- # Process input image
264
- input_image, w_original, h_original, was_resized = process_input(
265
- input_image, upscale_factor
266
- )
267
 
268
- # Generate caption if requested
269
- if use_generated_caption:
270
- gr.Info("🔍 Generating image caption...")
271
- generated_caption = generate_caption(input_image)
272
- prompt = generated_caption
273
- else:
274
- prompt = custom_prompt if custom_prompt.strip() else ""
275
 
276
- generator = torch.Generator().manual_seed(seed)
277
 
278
- gr.Info("🚀 Upscaling image...")
279
 
280
- # Initial upscale
281
- if USE_ESRGAN and upscale_factor == 4:
282
- control_image = esrgan_upscale(input_image, upscale_factor)
283
- else:
284
- w, h = input_image.size
285
- control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
286
 
287
- # Tiled Flux Img2Img for refinement
288
- image = tiled_flux_img2img(
289
- pipe,
290
- prompt,
291
- control_image,
292
- denoising_strength,
293
- num_inference_steps,
294
- 1.0, # Hardcoded guidance_scale to 1
295
- generator,
296
- tile_size=1024,
297
- overlap=32
298
- )
299
 
300
- if was_resized:
301
- gr.Info(f"📏 Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
302
- image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
303
-
304
- # Resize input image to match output size for slider alignment
305
- resized_input = true_input_image.resize(image.size, resample=Image.LANCZOS)
306
-
307
- return [resized_input, image], image
308
 
 
 
309
 
310
- # Create Gradio interface
311
- with gr.Blocks(css=css, title="🎨 Flux dev Creative Upscaler - Florence-2 + FLUX") as demo:
312
  gr.HTML("""
313
  <div class="main-header">
314
- <h1>🎨 Flux dev Creative Upscaler</h1>
315
- <p>Upload an image or provide a URL to upscale it using Florence-2 captioning and FLUX dev with Ultimate SD Upscaler</p>
316
  <p>Currently running on <strong>{}</strong></p>
317
  </div>
318
- """.format(power_device))
319
-
320
- with gr.Row():
321
- with gr.Column(scale=1):
322
- gr.HTML("<h3>📤 Input</h3>")
323
-
324
- with gr.Tabs():
325
- with gr.TabItem("📁 Upload Image"):
326
- input_image = gr.Image(
327
- label="Upload Image",
328
- type="pil",
329
- height=200 # Made smaller
330
- )
331
-
332
- with gr.TabItem("🔗 Image URL"):
333
- image_url = gr.Textbox(
334
- label="Image URL",
335
- placeholder="https://example.com/image.jpg",
336
- value="https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg"
337
- )
338
-
339
- gr.HTML("<h3>🎛️ Caption Settings</h3>")
340
-
341
- use_generated_caption = gr.Checkbox(
342
- label="Use AI-generated caption (Florence-2)",
343
- value=True,
344
- info="Generate detailed caption automatically"
345
- )
346
-
347
- custom_prompt = gr.Textbox(
348
- label="Custom Prompt (optional)",
349
- placeholder="Enter custom prompt or leave empty for generated caption",
350
- lines=2
351
- )
352
-
353
- gr.HTML("<h3>⚙️ Upscaling Settings</h3>")
354
 
355
- upscale_factor = gr.Slider(
356
- label="Upscale Factor",
357
- minimum=1,
358
- maximum=4,
359
- step=1,
360
- value=2,
361
- info="How much to upscale the image"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  )
363
-
364
- num_inference_steps = gr.Slider(
365
- label="Steps (25 Recommended)",
366
- minimum=8,
367
- maximum=50,
368
  step=1,
369
- value=25,
370
- info="More steps = better quality but slower"
371
- )
372
-
373
- denoising_strength = gr.Slider(
374
- label="Creativity (Denoising)",
375
- minimum=0.0,
376
- maximum=1.0,
377
- step=0.05,
378
- value=0.3,
379
- info="Controls how much the image is transformed"
380
- )
381
-
382
- with gr.Row():
383
- randomize_seed = gr.Checkbox(
384
- label="Randomize seed",
385
- value=True
386
- )
387
-
388
- enhance_btn = gr.Button(
389
- "🚀 Upscale Image",
390
- variant="primary",
391
- size="lg"
392
  )
 
 
 
 
 
 
393
 
394
- with gr.Column(scale=2): # Larger scale for results
395
- gr.HTML("<h3>📊 Results</h3>")
396
-
397
- result_slider = ImageSlider(
398
- type="pil",
399
- interactive=False, # Disable interactivity to prevent uploads
400
- height=600, # Made larger
401
- elem_id="result_slider",
402
- label=None # Remove default label
403
- )
404
 
405
- upscaled_output = gr.Image(
406
- label="Upscaled Image (Download as PNG)",
407
- type="pil",
408
- interactive=False,
409
- show_download_button=True,
410
- height=600,
411
- )
 
 
 
 
 
 
 
 
 
412
 
413
- # Event handler
414
- enhance_btn.click(
415
- fn=enhance_image,
416
- inputs=[
417
- input_image,
418
- image_url,
419
- randomize_seed,
420
- num_inference_steps,
421
- upscale_factor,
422
- denoising_strength,
423
- use_generated_caption,
424
- custom_prompt,
425
- ],
426
- outputs=[result_slider, upscaled_output]
427
- )
428
-
429
- gr.HTML("""
430
- <div style="margin-top: 2rem; padding: 1rem; background: #f0f0f0; border-radius: 8px;">
431
- <p><strong>Note:</strong> This upscaler uses the Flux dev model. Users are responsible for obtaining commercial rights if used commercially under their license.</p>
432
- </div>
433
- """)
434
-
435
- # Custom CSS for slider
436
- gr.HTML("""
437
- <style>
438
- #result_slider .slider {
439
- width: 100% !important;
440
- max-width: inherit !important;
441
- }
442
- #result_slider img {
443
- object-fit: contain !important;
444
- width: 100% !important;
445
- height: auto !important;
446
- }
447
- #result_slider .gr-button-tool {
448
- display: none !important;
449
- }
450
- #result_slider .gr-button-undo {
451
- display: none !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  }
453
- #result_slider .gr-button-clear {
454
- display: none !important;
455
- }
456
- #result_slider .badge-container .badge {
457
- display: none !important;
458
- }
459
- #result_slider .badge-container::before {
460
- content: "Before";
461
- position: absolute;
462
- top: 10px;
463
- left: 10px;
464
- background: rgba(0,0,0,0.5);
465
- color: white;
466
- padding: 5px;
467
- border-radius: 5px;
468
- z-index: 10;
469
- }
470
- #result_slider .badge-container::after {
471
- content: "After";
472
- position: absolute;
473
- top: 10px;
474
- right: 10px;
475
- background: rgba(0,0,0,0.5);
476
- color: white;
477
- padding: 5px;
478
- border-radius: 5px;
479
- z-index: 10;
480
- }
481
- #result_slider .fullscreen img {
482
- object-fit: contain !important;
483
- width: 100vw !important;
484
- height: 100vh !important;
485
- }
486
- </style>
487
- """)
488
-
489
- # JS to set slider default position to middle
490
- gr.HTML("""
491
- <script>
492
- document.addEventListener('DOMContentLoaded', function() {
493
- const sliderInput = document.querySelector('#result_slider input[type="range"]');
494
- if (sliderInput) {
495
- sliderInput.value = 50;
496
- sliderInput.dispatchEvent(new Event('input'));
497
- }
498
- });
499
- </script>
500
- """)
501
 
502
- if __name__ == "__main__":
503
- demo.queue().launch(share=True, server_name="0.0.0.0", server_port=7860)
 
11
  from gradio_imageslider import ImageSlider
12
  from PIL import Image
13
  from huggingface_hub import snapshot_download
14
+ import requests# For ESRGAN (requires pip install basicsr gfpgan)
 
 
 
15
  try:
16
  from basicsr.archs.rrdbnet_arch import RRDBNet
17
  from basicsr.utils import img2tensor, tensor2img
18
  USE_ESRGAN = True
19
  except ImportError:
20
  USE_ESRGAN = False
21
+ warnings.warn("basicsr not installed; falling back to LANCZOS interpolation.")css = """
 
 
22
  #col-container {
23
  margin: 0 auto;
24
  max-width: 800px;
 
27
  text-align: center;
28
  margin-bottom: 2rem;
29
  }
30
+ """# Device setup
31
+ if torch.cuda.is_available():
32
+ power_device = "GPU"
33
+ device = "cuda"
34
+ else:
35
+ power_device = "CPU"
36
+ device = "cpu"# Get HuggingFace token
37
+ huggingface_token = os.getenv("HF_TOKEN")# Download FLUX model
38
+ print(" Downloading FLUX model...")
 
 
39
  model_path = snapshot_download(
40
  repo_id="black-forest-labs/FLUX.1-dev",
41
  repo_type="model",
42
  ignore_patterns=["*.md", "*.gitattributes"],
43
  local_dir="FLUX.1-dev",
44
  token=huggingface_token,
45
+ )# Load Florence-2 model for image captioning
46
+ print(" Loading Florence-2 model...")
 
 
47
  florence_model = AutoModelForCausalLM.from_pretrained(
48
  "microsoft/Florence-2-large",
49
  torch_dtype=torch.float16,
 
53
  florence_processor = AutoProcessor.from_pretrained(
54
  "microsoft/Florence-2-large",
55
  trust_remote_code=True
56
+ )# Load FLUX Img2Img pipeline
57
+ print(" Loading FLUX Img2Img...")
 
 
58
  pipe = FluxImg2ImgPipeline.from_pretrained(
59
  model_path,
60
  torch_dtype=torch.bfloat16
61
  )
62
  pipe.to(device)
63
  pipe.enable_vae_tiling()
64
+ pipe.enable_vae_slicing()print(" All models loaded successfully!")# Download ESRGAN model if using
 
 
 
 
65
  if USE_ESRGAN:
66
  esrgan_path = "4x-UltraSharp.pth"
67
  if not os.path.exists(esrgan_path):
 
72
  state_dict = torch.load(esrgan_path)['params_ema']
73
  esrgan_model.load_state_dict(state_dict)
74
  esrgan_model.eval()
75
+ esrgan_model.to(device)MAX_SEED = 1000000
76
+ MAX_PIXEL_BUDGET = 8192 * 8192 # Increased for tiling supportdef generate_caption(image):
 
 
 
 
 
77
  """Generate detailed caption using Florence-2"""
78
  try:
79
  task_prompt = "<MORE_DETAILED_CAPTION>"
80
+ prompt = task_prompt inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(device)
81
+ inputs["pixel_values"] = inputs["pixel_values"].to(torch.float16) # Match model dtype
82
+
83
+ generated_ids = florence_model.generate(
84
+ input_ids=inputs["input_ids"],
85
+ pixel_values=inputs["pixel_values"],
86
+ max_new_tokens=1024,
87
+ num_beams=3,
88
+ do_sample=True,
89
+ )
90
+
91
+ generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
92
+ parsed_answer = florence_processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
93
+
94
+ caption = parsed_answer[task_prompt]
95
+ return caption
96
+ except Exception as e:
97
+ print(f"Caption generation failed: {e}")
98
+ return "a high quality detailed image"def process_input(input_image, upscale_factor):
 
 
 
 
 
99
  """Process input image and handle size constraints"""
100
  w, h = input_image.size
101
  w_original, h_original = w, h
102
+ aspect_ratio = w / hwas_resized = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ if w * h * upscale_factor**2 > MAX_PIXEL_BUDGET:
105
+ warnings.warn(
106
+ f"Requested output image is too large ({w * upscale_factor}x{h * upscale_factor}). Resizing to fit budget."
107
+ )
108
+ gr.Info(
109
+ f"Requested output image is too large. Resizing input to fit within pixel budget."
110
+ )
111
+ target_input_pixels = MAX_PIXEL_BUDGET / (upscale_factor ** 2)
112
+ scale = (target_input_pixels / (w * h)) ** 0.5
113
+ new_w = int(w * scale) - int(w * scale) % 8
114
+ new_h = int(h * scale) - int(h * scale) % 8
115
+ input_image = input_image.resize((new_w, new_h), resample=Image.LANCZOS)
116
+ was_resized = True
117
+
118
+ return input_image, w_original, h_original, was_resizeddef load_image_from_url(url):
119
  """Load image from URL"""
120
  try:
121
  response = requests.get(url, stream=True)
122
  response.raise_for_status()
123
  return Image.open(response.raw)
124
  except Exception as e:
125
+ raise gr.Error(f"Failed to load image from URL: {e}")def esrgan_upscale(image, scale=4):
 
 
 
126
  if not USE_ESRGAN:
127
  return image.resize((image.width * scale, image.height * scale), resample=Image.LANCZOS)
128
  img = img2tensor(np.array(image) / 255., bgr2rgb=False, float32=True)
129
  with torch.no_grad():
130
  output = esrgan_model(img.unsqueeze(0)).squeeze()
131
  output_img = tensor2img(output, rgb2bgr=False, min_max=(0, 1))
132
+ return Image.fromarray(output_img)def tiled_flux_img2img(pipe, prompt, image, strength, steps, guidance, generator, tile_size=1024, overlap=32):
 
 
 
133
  """Tiled Img2Img to mimic Ultimate SD Upscaler tiling"""
134
  w, h = image.size
135
+ output = image.copy() # Start with the control image# For handling long prompts: truncate for CLIP, full for T5
136
+ max_clip_tokens = pipe.tokenizer.model_max_length # Typically 77
137
+ input_ids = pipe.tokenizer.encode(prompt, return_tensors="pt")
138
+ if input_ids.shape[1] > max_clip_tokens:
139
+ input_ids = input_ids[:, :max_clip_tokens]
140
+ prompt_clip = pipe.tokenizer.decode(input_ids[0], skip_special_tokens=True)
141
+ else:
142
+ prompt_clip = prompt
143
+
144
+ for x in range(0, w, tile_size - overlap):
145
+ for y in range(0, h, tile_size - overlap):
146
+ tile_w = min(tile_size, w - x)
147
+ tile_h = min(tile_size, h - y)
148
+ tile = image.crop((x, y, x + tile_w, y + tile_h))
149
+
150
+ # Run Flux on tile
151
+ gen_tile = pipe(
152
+ prompt=prompt_clip,
153
+ prompt_2=prompt,
154
+ image=tile,
155
+ strength=strength,
156
+ num_inference_steps=steps,
157
+ guidance_scale=guidance,
158
+ height=tile_h,
159
+ width=tile_w,
160
+ generator=generator,
161
+ ).images[0]
162
+
163
+ # Resize back to exact tile size if pipeline adjusted it
164
+ gen_tile = gen_tile.resize((tile_w, tile_h), resample=Image.LANCZOS)
165
+
166
+ # Paste with blending if overlap
167
+ if overlap > 0:
168
+ paste_box = (x, y, x + tile_w, y + tile_h)
169
+ if x > 0 or y > 0:
170
+ # Simple linear blend on overlaps
171
+ mask = Image.new('L', (tile_w, tile_h), 255)
172
+ if x > 0:
173
+ blend_width = min(overlap, tile_w)
174
+ for i in range(blend_width):
175
+ for j in range(tile_h):
176
+ mask.putpixel((i, j), int(255 * (i / overlap)))
177
+ if y > 0:
178
+ blend_height = min(overlap, tile_h)
179
+ for i in range(tile_w):
180
+ for j in range(blend_height):
181
+ mask.putpixel((i, j), int(255 * (j / overlap)))
182
+ output.paste(gen_tile, paste_box, mask)
 
 
 
 
183
  else:
184
+ output.paste(gen_tile, paste_box)
185
+ else:
186
+ output.paste(gen_tile, (x, y))
187
 
188
+ return [email protected](duration=120)
 
189
  def enhance_image(
190
  image_input,
191
  image_url,
192
+ seed,
193
  randomize_seed,
194
  num_inference_steps,
195
  upscale_factor,
 
205
  elif image_url:
206
  input_image = load_image_from_url(image_url)
207
  else:
208
+ raise gr.Error("Please provide an image (upload or URL)")if randomize_seed:
209
+ seed = random.randint(0, MAX_SEED)
 
 
 
 
 
210
 
211
+ true_input_image = input_image
 
 
 
212
 
213
+ # Process input image
214
+ input_image, w_original, h_original, was_resized = process_input(
215
+ input_image, upscale_factor
216
+ )
 
 
217
 
218
+ # Generate caption if requested
219
+ if use_generated_caption:
220
+ gr.Info(" Generating image caption...")
221
+ generated_caption = generate_caption(input_image)
222
+ prompt = generated_caption
223
+ else:
224
+ prompt = custom_prompt if custom_prompt.strip() else ""
225
 
226
+ generator = torch.Generator().manual_seed(seed)
227
 
228
+ gr.Info(" Upscaling image...")
229
 
230
+ # Initial upscale
231
+ if USE_ESRGAN and upscale_factor == 4:
232
+ control_image = esrgan_upscale(input_image, upscale_factor)
233
+ else:
234
+ w, h = input_image.size
235
+ control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
236
 
237
+ # Tiled Flux Img2Img for refinement
238
+ image = tiled_flux_img2img(
239
+ pipe,
240
+ prompt,
241
+ control_image,
242
+ denoising_strength,
243
+ num_inference_steps,
244
+ 1.0, # Hardcoded guidance_scale to 1
245
+ generator,
246
+ tile_size=1024,
247
+ overlap=32
248
+ )
249
 
250
+ if was_resized:
251
+ gr.Info(f" Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
252
+ image = image.resize((w_original * upscale_factor, h_original * upscale_factor), resample=Image.LANCZOS)
 
 
 
 
 
253
 
254
+ # Resize input image to match output size for slider alignment
255
+ resized_input = true_input_image.resize(image.size, resample=Image.LANCZOS)
256
 
257
+ return [resized_input, image]# Create Gradio interface
258
+ with gr.Blocks(css=css, title=" AI Image Upscaler - Florence-2 + FLUX") as demo:
259
  gr.HTML("""
260
  <div class="main-header">
261
+ <h1>Flux Dev Ultimate HD Upscaler</h1>
262
+ <p>Upload an image or provide a URL to upscale it using Florence-2 captioning and FLUX upscaling</p>
263
  <p>Currently running on <strong>{}</strong></p>
264
  </div>
265
+ """.format(power_device))with gr.Row():
266
+ with gr.Column(scale=1):
267
+ gr.HTML("<h3> Input</h3>")
268
+
269
+ with gr.Tabs():
270
+ with gr.TabItem(" Upload Image"):
271
+ input_image = gr.Image(
272
+ label="Upload Image",
273
+ type="pil",
274
+ height=200 # Made smaller
275
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
+ with gr.TabItem(" Image URL"):
278
+ image_url = gr.Textbox(
279
+ label="Image URL",
280
+ placeholder="https://example.com/image.jpg",
281
+ value="https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg"
282
+ )
283
+
284
+ gr.HTML("<h3> Caption Settings</h3>")
285
+
286
+ use_generated_caption = gr.Checkbox(
287
+ label="Use AI-generated caption (Florence-2)",
288
+ value=True,
289
+ info="Generate detailed caption automatically"
290
+ )
291
+
292
+ custom_prompt = gr.Textbox(
293
+ label="Custom Prompt (optional)",
294
+ placeholder="Enter custom prompt or leave empty for generated caption",
295
+ lines=2
296
+ )
297
+
298
+ gr.HTML("<h3> Upscaling Settings</h3>")
299
+
300
+ upscale_factor = gr.Slider(
301
+ label="Upscale Factor",
302
+ minimum=1,
303
+ maximum=4,
304
+ step=1,
305
+ value=2,
306
+ info="How much to upscale the image"
307
+ )
308
+
309
+ num_inference_steps = gr.Slider(
310
+ label="Steps (25 Recommended)",
311
+ minimum=8,
312
+ maximum=50,
313
+ step=1,
314
+ value=25,
315
+ info="More steps = better quality but slower"
316
+ )
317
+
318
+ denoising_strength = gr.Slider(
319
+ label="Creativity (Denoising value)",
320
+ minimum=0.0,
321
+ maximum=1.0,
322
+ step=0.05,
323
+ value=0.3,
324
+ info="More>0.3 = Very Creative, Less<0.1 = More consistent, 0.15-0.3 recommended"
325
+ )
326
+
327
+ with gr.Row():
328
+ randomize_seed = gr.Checkbox(
329
+ label="Randomize seed",
330
+ value=True
331
  )
332
+ seed = gr.Slider(
333
+ label="Seed",
334
+ minimum=0,
335
+ maximum=MAX_SEED,
 
336
  step=1,
337
+ value=42,
338
+ interactive=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  )
340
+
341
+ enhance_btn = gr.Button(
342
+ " Upscale Image",
343
+ variant="primary",
344
+ size="lg"
345
+ )
346
 
347
+ with gr.Column(scale=2): # Larger scale for results
348
+ gr.HTML("<h3> Results</h3>")
349
+
350
+ result_slider = ImageSlider(
351
+ type="pil",
352
+ interactive=False, # Disable interactivity to prevent uploads
353
+ height=600, # Made larger
354
+ elem_id="result_slider",
355
+ label=None # Remove default label
356
+ )
357
 
358
+ # Event handler
359
+ enhance_btn.click(
360
+ fn=enhance_image,
361
+ inputs=[
362
+ input_image,
363
+ image_url,
364
+ seed,
365
+ randomize_seed,
366
+ num_inference_steps,
367
+ upscale_factor,
368
+ denoising_strength,
369
+ use_generated_caption,
370
+ custom_prompt,
371
+ ],
372
+ outputs=[result_slider]
373
+ )
374
 
375
+ gr.HTML("""
376
+ <div style="margin-top: 2rem; padding: 1rem; background: #f0f0f0; border-radius: 8px;">
377
+ <p><strong>Note:</strong> This upscaler uses the Flux dev model. Users are responsible for obtaining commercial rights if used commercially under their license.</p>
378
+ </div>
379
+ """)
380
+
381
+ # Custom CSS for slider
382
+ gr.HTML("""
383
+ <style>
384
+ #result_slider .slider {
385
+ width: 100% !important;
386
+ max-width: inherit !important;
387
+ }
388
+ #result_slider img {
389
+ object-fit: contain !important;
390
+ width: 100% !important;
391
+ height: auto !important;
392
+ }
393
+ #result_slider .gr-button-tool {
394
+ display: none !important;
395
+ }
396
+ #result_slider .gr-button-undo {
397
+ display: none !important;
398
+ }
399
+ #result_slider .gr-button-clear {
400
+ display: none !important;
401
+ }
402
+ #result_slider .badge-container .badge {
403
+ display: none !important;
404
+ }
405
+ #result_slider .badge-container::before {
406
+ content: "Before";
407
+ position: absolute;
408
+ top: 10px;
409
+ left: 10px;
410
+ background: rgba(0,0,0,0.5);
411
+ color: white;
412
+ padding: 5px;
413
+ border-radius: 5px;
414
+ z-index: 10;
415
+ }
416
+ #result_slider .badge-container::after {
417
+ content: "After";
418
+ position: absolute;
419
+ top: 10px;
420
+ right: 10px;
421
+ background: rgba(0,0,0,0.5);
422
+ color: white;
423
+ padding: 5px;
424
+ border-radius: 5px;
425
+ z-index: 10;
426
+ }
427
+ #result_slider .fullscreen img {
428
+ object-fit: contain !important;
429
+ width: 100vw !important;
430
+ height: 100vh !important;
431
+ }
432
+ </style>
433
+ """)
434
+
435
+ # JS to set slider default position to middle
436
+ gr.HTML("""
437
+ <script>
438
+ document.addEventListener('DOMContentLoaded', function() {
439
+ const sliderInput = document.querySelector('#result_slider input[type="range"]');
440
+ if (sliderInput) {
441
+ sliderInput.value = 50;
442
+ sliderInput.dispatchEvent(new Event('input'));
443
  }
444
+ });
445
+ </script>
446
+ """)if __name__ == "__main__":
447
+ demo.queue().launch(share=True, server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448