comrender commited on
Commit
b7cfbcf
Β·
verified Β·
1 Parent(s): c46d203

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +362 -400
app.py CHANGED
@@ -1,431 +1,393 @@
1
- import os
2
  import random
3
- import sys
4
- from typing import Sequence, Mapping, Any, Union
5
- import torch
6
  import gradio as gr
7
- from huggingface_hub import hf_hub_download
8
  import spaces
9
-
10
- # Download required models from Hugging Face
11
- hf_hub_download(repo_id="black-forest-labs/FLUX.1-dev", filename="ae.safetensors", local_dir="models/vae")
12
- hf_hub_download(repo_id="black-forest-labs/FLUX.1-dev", filename="flux1-dev.safetensors", local_dir="models/diffusion_models")
13
- hf_hub_download(repo_id="comfyanonymous/flux_text_encoders", filename="clip_l.safetensors", local_dir="models/text_encoders")
14
- hf_hub_download(repo_id="comfyanonymous/flux_text_encoders", filename="t5xxl_fp16.safetensors", local_dir="models/text_encoders")
15
- hf_hub_download(repo_id="kim2091/UltraSharp", filename="4x-UltraSharp.pth", local_dir="models/upscale_models")
16
-
17
- def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
18
- """Returns the value at the given index of a sequence or mapping."""
19
- try:
20
- return obj[index]
21
- except KeyError:
22
- return obj["result"][index]
23
-
24
- def find_path(name: str, path: str = None) -> str:
25
- """Recursively looks at parent folders starting from the given path until it finds the given name."""
26
- if path is None:
27
- path = os.getcwd()
28
-
29
- if name in os.listdir(path):
30
- path_name = os.path.join(path, name)
31
- print(f"{name} found: {path_name}")
32
- return path_name
33
-
34
- parent_directory = os.path.dirname(path)
35
- if parent_directory == path:
36
- return None
37
-
38
- return find_path(name, parent_directory)
39
-
40
- def add_comfyui_directory_to_sys_path() -> None:
41
- """Add 'ComfyUI' to the sys.path"""
42
- comfyui_path = find_path("ComfyUI")
43
- if comfyui_path is not None and os.path.isdir(comfyui_path):
44
- sys.path.append(comfyui_path)
45
- print(f"'{comfyui_path}' added to sys.path")
46
-
47
- def add_extra_model_paths() -> None:
48
- """Parse the optional extra_model_paths.yaml file and add the parsed paths to the sys.path."""
49
- try:
50
- from main import load_extra_path_config
51
- extra_model_paths = find_path("extra_model_paths.yaml")
52
- if extra_model_paths is not None:
53
- load_extra_path_config(extra_model_paths)
54
- else:
55
- print("Could not find the extra_model_paths config file.")
56
- except ImportError:
57
- try:
58
- from utils.extra_config import load_extra_path_config
59
- extra_model_paths = find_path("extra_model_paths.yaml")
60
- if extra_model_paths is not None:
61
- load_extra_path_config(extra_model_paths)
62
- else:
63
- print("Could not find the extra_model_paths config file.")
64
- except ImportError:
65
- print("Could not import extra config. Continuing without extra model paths.")
66
-
67
- add_comfyui_directory_to_sys_path()
68
- try:
69
- add_extra_model_paths()
70
- except Exception as e:
71
- print(f"Warning: Could not load extra model paths: {e}")
72
-
73
- def import_custom_nodes() -> None:
74
- """Find all custom nodes in the custom_nodes folder and add those node objects to NODE_CLASS_MAPPINGS"""
 
 
 
 
 
 
 
 
75
  try:
76
- import asyncio
77
- import execution
78
- from nodes import init_extra_nodes
79
- import server
80
-
81
- # Check if we're already in an event loop
82
- try:
83
- loop = asyncio.get_event_loop()
84
- if loop.is_running():
85
- # We're in an existing loop, use it
86
- pass
87
- else:
88
- # Loop exists but not running, set a new one
89
- loop = asyncio.new_event_loop()
90
- asyncio.set_event_loop(loop)
91
- except RuntimeError:
92
- # No loop exists, create one
93
- loop = asyncio.new_event_loop()
94
- asyncio.set_event_loop(loop)
95
-
96
- server_instance = server.PromptServer(loop)
97
- execution.PromptQueue(server_instance)
98
- init_extra_nodes()
99
  except Exception as e:
100
- print(f"Warning: Could not initialize custom nodes: {e}")
101
- print("Continuing with basic ComfyUI nodes only...")
102
-
103
- from nodes import NODE_CLASS_MAPPINGS
104
-
105
- # Pre-load models outside the decorated function for ZeroGPU efficiency
106
- try:
107
- import_custom_nodes()
108
-
109
- # Initialize model loaders
110
- dualcliploader = NODE_CLASS_MAPPINGS["DualCLIPLoader"]()
111
- dualcliploader_54 = dualcliploader.load_clip(
112
- clip_name1="clip_l.safetensors",
113
- clip_name2="t5xxl_fp16.safetensors",
114
- type="flux",
115
- device="default",
116
- )
117
 
118
- upscalemodelloader = NODE_CLASS_MAPPINGS["UpscaleModelLoader"]()
119
- upscalemodelloader_44 = upscalemodelloader.load_model(model_name="4x-UltraSharp.pth")
120
 
121
- vaeloader = NODE_CLASS_MAPPINGS["VAELoader"]()
122
- vaeloader_55 = vaeloader.load_vae(vae_name="ae.safetensors")
 
 
 
123
 
124
- unetloader = NODE_CLASS_MAPPINGS["UNETLoader"]()
125
- unetloader_58 = unetloader.load_unet(
126
- unet_name="flux1-dev.safetensors", weight_dtype="default"
127
- )
128
 
129
- downloadandloadflorence2model = NODE_CLASS_MAPPINGS["DownloadAndLoadFlorence2Model"]()
130
- downloadandloadflorence2model_52 = downloadandloadflorence2model.loadmodel(
131
- model="microsoft/Florence-2-large", precision="fp16", attention="sdpa"
132
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- # Pre-load models to GPU for efficiency
 
 
 
 
135
  try:
136
- from comfy import model_management
137
- model_loaders = [dualcliploader_54, vaeloader_55, unetloader_58, downloadandloadflorence2model_52]
138
- valid_models = [
139
- getattr(loader[0], 'patcher', loader[0])
140
- for loader in model_loaders
141
- if not isinstance(loader[0], dict) and not isinstance(getattr(loader[0], 'patcher', None), dict)
142
- ]
143
- model_management.load_models_gpu(valid_models)
144
- print("Models successfully pre-loaded to GPU")
145
  except Exception as e:
146
- print(f"Warning: Could not pre-load models to GPU: {e}")
147
-
148
- print("ComfyUI setup completed successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
- except Exception as e:
151
- print(f"Error during ComfyUI setup: {e}")
152
- print("Please check that all required custom nodes are installed.")
153
- raise
154
-
155
- @spaces.GPU(duration=120) # Adjust duration based on your workflow speed
156
- def enhance_image(image_input, upscale_factor, steps, cfg_scale, denoise_strength, guidance_scale):
157
- """
158
- Main function to enhance and upscale images using Florence-2 captioning and FLUX upscaling
159
- """
160
- try:
161
- with torch.inference_mode():
162
- # Handle different input types (file upload vs URL)
163
- if isinstance(image_input, str) and image_input.startswith(('http://', 'https://')):
164
- # Load from URL
165
- load_image_from_url_mtb = NODE_CLASS_MAPPINGS["Load Image From Url (mtb)"]()
166
- load_image_result = load_image_from_url_mtb.load(url=image_input)
167
- else:
168
- # Load from uploaded file
169
- loadimage = NODE_CLASS_MAPPINGS["LoadImage"]()
170
- load_image_result = loadimage.load_image(image=image_input)
171
-
172
- # Generate detailed caption using Florence-2
173
- florence2run = NODE_CLASS_MAPPINGS["Florence2Run"]()
174
- florence2run_51 = florence2run.encode(
175
- text_input="",
176
- task="more_detailed_caption",
177
- fill_mask=True,
178
- keep_model_loaded=False,
179
- max_new_tokens=1024,
180
- num_beams=3,
181
- do_sample=True,
182
- output_mask_select="",
183
- seed=random.randint(1, 2**64),
184
- image=get_value_at_index(load_image_result, 0),
185
- florence2_model=get_value_at_index(downloadandloadflorence2model_52, 0),
186
- )
187
 
188
- # Encode the generated caption
189
- cliptextencode = NODE_CLASS_MAPPINGS["CLIPTextEncode"]()
190
- cliptextencode_6 = cliptextencode.encode(
191
- text=get_value_at_index(florence2run_51, 2),
192
- clip=get_value_at_index(dualcliploader_54, 0),
193
- )
 
194
 
195
- # Encode empty negative prompt
196
- cliptextencode_42 = cliptextencode.encode(
197
- text="", clip=get_value_at_index(dualcliploader_54, 0)
198
- )
199
 
200
- # Set up upscale factor
201
- primitivefloat = NODE_CLASS_MAPPINGS["PrimitiveFloat"]()
202
- primitivefloat_60 = primitivefloat.execute(value=upscale_factor)
203
 
204
- # Apply FLUX guidance
205
- fluxguidance = NODE_CLASS_MAPPINGS["FluxGuidance"]()
206
- fluxguidance_26 = fluxguidance.append(
207
- guidance=guidance_scale,
208
- conditioning=get_value_at_index(cliptextencode_6, 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  )
210
-
211
- # Perform ultimate upscaling
212
- ultimatesdupscale = NODE_CLASS_MAPPINGS["UltimateSDUpscale"]()
213
- ultimatesdupscale_50 = ultimatesdupscale.upscale(
214
- upscale_by=get_value_at_index(primitivefloat_60, 0),
215
- seed=random.randint(1, 2**64),
216
- steps=steps,
217
- cfg=cfg_scale,
218
- sampler_name="euler",
219
- scheduler="normal",
220
- denoise=denoise_strength,
221
- mode_type="Linear",
222
- tile_width=1024,
223
- tile_height=1024,
224
- mask_blur=8,
225
- tile_padding=32,
226
- seam_fix_mode="None",
227
- seam_fix_denoise=1,
228
- seam_fix_width=64,
229
- seam_fix_mask_blur=8,
230
- seam_fix_padding=16,
231
- force_uniform_tiles=True,
232
- tiled_decode=False,
233
- image=get_value_at_index(load_image_result, 0),
234
- model=get_value_at_index(unetloader_58, 0),
235
- positive=get_value_at_index(fluxguidance_26, 0),
236
- negative=get_value_at_index(cliptextencode_42, 0),
237
- vae=get_value_at_index(vaeloader_55, 0),
238
- upscale_model=get_value_at_index(upscalemodelloader_44, 0),
239
  )
240
-
241
- # Save the result
242
- saveimage = NODE_CLASS_MAPPINGS["SaveImage"]()
243
- saveimage_43 = saveimage.save_images(
244
- filename_prefix="enhanced_image",
245
- images=get_value_at_index(ultimatesdupscale_50, 0),
 
 
 
 
246
  )
247
-
248
- # Return the path to the saved image
249
- saved_path = f"output/{saveimage_43['ui']['images'][0]['filename']}"
250
 
251
- # Also return the generated caption for user feedback
252
- generated_caption = get_value_at_index(florence2run_51, 2)
 
 
 
 
 
 
253
 
254
- return saved_path, generated_caption
255
-
256
- except Exception as e:
257
- print(f"Error in enhance_image: {str(e)}")
258
- raise gr.Error(f"Enhancement failed: {str(e)}")
259
-
260
- # Create the Gradio interface
261
- def create_interface():
262
- with gr.Blocks(
263
- title="πŸš€ AI Image Enhancer - Florence-2 + FLUX",
264
- theme=gr.themes.Soft(),
265
- css="""
266
- .gradio-container {
267
- max-width: 1200px !important;
268
- }
269
- .main-header {
270
- text-align: center;
271
- margin-bottom: 2rem;
272
- }
273
- .result-gallery {
274
- min-height: 400px;
275
- }
276
- """
277
- ) as app:
278
-
279
- gr.HTML("""
280
- <div class="main-header">
281
- <h1>🎨 AI Image Enhancer</h1>
282
- <p>Upload an image or provide a URL to enhance it using Florence-2 captioning and FLUX upscaling</p>
283
- </div>
284
- """)
285
-
286
- with gr.Row():
287
- with gr.Column(scale=1):
288
- gr.HTML("<h3>πŸ“€ Input Settings</h3>")
289
-
290
- with gr.Tabs():
291
- with gr.TabItem("πŸ“ Upload Image"):
292
- image_upload = gr.Image(
293
- label="Upload Image",
294
- type="filepath",
295
- height=300
296
- )
297
-
298
- with gr.TabItem("πŸ”— Image URL"):
299
- image_url = gr.Textbox(
300
- label="Image URL",
301
- placeholder="https://example.com/image.jpg",
302
- value="https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg"
303
- )
304
-
305
- gr.HTML("<h3>βš™οΈ Enhancement Settings</h3>")
306
-
307
- upscale_factor = gr.Slider(
308
- minimum=1.0,
309
- maximum=4.0,
310
- value=2.0,
311
- step=0.5,
312
- label="Upscale Factor",
313
- info="How much to upscale the image"
314
- )
315
-
316
- steps = gr.Slider(
317
- minimum=10,
318
- maximum=50,
319
- value=25,
320
- step=5,
321
- label="Steps",
322
- info="Number of denoising steps"
323
- )
324
-
325
- cfg_scale = gr.Slider(
326
- minimum=0.5,
327
- maximum=10.0,
328
- value=1.0,
329
- step=0.5,
330
- label="CFG Scale",
331
- info="Classifier-free guidance scale"
332
- )
333
-
334
- denoise_strength = gr.Slider(
335
- minimum=0.1,
336
- maximum=1.0,
337
- value=0.3,
338
- step=0.1,
339
- label="Denoise Strength",
340
- info="How much to denoise the image"
341
- )
342
-
343
- guidance_scale = gr.Slider(
344
- minimum=1.0,
345
- maximum=10.0,
346
- value=3.5,
347
- step=0.5,
348
- label="Guidance Scale",
349
- info="FLUX guidance strength"
350
  )
351
-
352
- enhance_btn = gr.Button(
353
- "πŸš€ Enhance Image",
354
- variant="primary",
355
- size="lg"
 
 
356
  )
357
 
358
- with gr.Column(scale=1):
359
- gr.HTML("<h3>πŸ“Š Results</h3>")
360
-
361
- output_image = gr.Image(
362
- label="Enhanced Image",
363
- type="filepath",
364
- height=400,
365
- interactive=False
366
- )
367
-
368
- generated_caption = gr.Textbox(
369
- label="Generated Caption",
370
- placeholder="The AI-generated caption will appear here...",
371
- lines=3,
 
 
 
 
 
 
372
  interactive=False
373
  )
374
-
375
- gr.HTML("""
376
- <div style="margin-top: 1rem; padding: 1rem; background: #f0f0f0; border-radius: 8px;">
377
- <h4>πŸ’‘ How it works:</h4>
378
- <ol>
379
- <li>Florence-2 analyzes your image and generates a detailed caption</li>
380
- <li>FLUX uses this caption to guide the upscaling process</li>
381
- <li>The result is an enhanced, higher-resolution image</li>
382
- </ol>
383
- </div>
384
- """)
385
-
386
- # Event handlers
387
- def process_image(img_upload, img_url, upscale_f, steps_val, cfg_val, denoise_val, guidance_val):
388
- # Determine input source
389
- image_input = img_upload if img_upload is not None else img_url
390
 
391
- if not image_input:
392
- raise gr.Error("Please provide an image (upload or URL)")
393
-
394
- return enhance_image(image_input, upscale_f, steps_val, cfg_val, denoise_val, guidance_val)
395
-
396
- enhance_btn.click(
397
- fn=process_image,
398
- inputs=[
399
- image_upload,
400
- image_url,
401
- upscale_factor,
402
- steps,
403
- cfg_scale,
404
- denoise_strength,
405
- guidance_scale
406
- ],
407
- outputs=[output_image, generated_caption]
408
- )
409
-
410
- # Example inputs
411
- gr.Examples(
412
- examples=[
413
- [None, "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg", 2.0, 25, 1.0, 0.3, 3.5],
414
- [None, "https://picsum.photos/512/512", 2.0, 20, 1.5, 0.4, 4.0],
415
- ],
416
- inputs=[
417
- image_upload,
418
- image_url,
419
- upscale_factor,
420
- steps,
421
- cfg_scale,
422
- denoise_strength,
423
- guidance_scale
424
- ]
425
- )
 
 
 
 
 
 
 
 
 
426
 
427
- return app
 
 
 
 
 
 
 
 
 
 
428
 
429
  if __name__ == "__main__":
430
- app = create_interface()
431
- app.launch(share=True, server_name="0.0.0.0", server_port=7860)
 
1
+ import logging
2
  import random
3
+ import warnings
4
+ import os
 
5
  import gradio as gr
6
+ import numpy as np
7
  import spaces
8
+ import torch
9
+ from diffusers import FluxControlNetModel, FluxControlNetPipeline
10
+ from transformers import AutoProcessor, AutoModelForCausalLM
11
+ from gradio_imageslider import ImageSlider
12
+ from PIL import Image
13
+ from huggingface_hub import snapshot_download
14
+ import requests
15
+
16
+ css = """
17
+ #col-container {
18
+ margin: 0 auto;
19
+ max-width: 800px;
20
+ }
21
+ .main-header {
22
+ text-align: center;
23
+ margin-bottom: 2rem;
24
+ }
25
+ """
26
+
27
+ # Device setup
28
+ if torch.cuda.is_available():
29
+ power_device = "GPU"
30
+ device = "cuda"
31
+ else:
32
+ power_device = "CPU"
33
+ device = "cpu"
34
+
35
+ # Get HuggingFace token
36
+ huggingface_token = os.getenv("HF_TOKEN")
37
+
38
+ # Download FLUX model
39
+ print("πŸ“₯ Downloading FLUX model...")
40
+ model_path = snapshot_download(
41
+ repo_id="black-forest-labs/FLUX.1-dev",
42
+ repo_type="model",
43
+ ignore_patterns=["*.md", "*..gitattributes"],
44
+ local_dir="FLUX.1-dev",
45
+ token=huggingface_token,
46
+ )
47
+
48
+ # Load Florence-2 model for image captioning
49
+ print("πŸ“₯ Loading Florence-2 model...")
50
+ florence_model = AutoModelForCausalLM.from_pretrained(
51
+ "microsoft/Florence-2-large",
52
+ torch_dtype=torch.float16,
53
+ trust_remote_code=True
54
+ ).to(device)
55
+ florence_processor = AutoProcessor.from_pretrained(
56
+ "microsoft/Florence-2-large",
57
+ trust_remote_code=True
58
+ )
59
+
60
+ # Load FLUX ControlNet pipeline
61
+ print("πŸ“₯ Loading FLUX ControlNet...")
62
+ controlnet = FluxControlNetModel.from_pretrained(
63
+ "jasperai/Flux.1-dev-Controlnet-Upscaler",
64
+ torch_dtype=torch.bfloat16
65
+ ).to(device)
66
+
67
+ pipe = FluxControlNetPipeline.from_pretrained(
68
+ model_path,
69
+ controlnet=controlnet,
70
+ torch_dtype=torch.bfloat16
71
+ )
72
+ pipe.to(device)
73
+
74
+ print("βœ… All models loaded successfully!")
75
+
76
+ MAX_SEED = 1000000
77
+ MAX_PIXEL_BUDGET = 1024 * 1024
78
+
79
+
80
+ def generate_caption(image):
81
+ """Generate detailed caption using Florence-2"""
82
  try:
83
+ task_prompt = "<MORE_DETAILED_CAPTION>"
84
+ prompt = task_prompt
85
+
86
+ inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(device)
87
+
88
+ generated_ids = florence_model.generate(
89
+ input_ids=inputs["input_ids"],
90
+ pixel_values=inputs["pixel_values"],
91
+ max_new_tokens=1024,
92
+ num_beams=3,
93
+ do_sample=True,
94
+ )
95
+
96
+ generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
97
+ parsed_answer = florence_processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
98
+
99
+ caption = parsed_answer[task_prompt]
100
+ return caption
 
 
 
 
 
101
  except Exception as e:
102
+ print(f"Caption generation failed: {e}")
103
+ return "a high quality detailed image"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
 
 
105
 
106
+ def process_input(input_image, upscale_factor):
107
+ """Process input image and handle size constraints"""
108
+ w, h = input_image.size
109
+ w_original, h_original = w, h
110
+ aspect_ratio = w / h
111
 
112
+ was_resized = False
 
 
 
113
 
114
+ if w * h * upscale_factor**2 > MAX_PIXEL_BUDGET:
115
+ warnings.warn(
116
+ f"Requested output image is too large ({w * upscale_factor}x{h * upscale_factor}). Resizing to fit budget."
117
+ )
118
+ gr.Info(
119
+ f"Requested output image is too large. Resizing input to fit within pixel budget."
120
+ )
121
+ input_image = input_image.resize(
122
+ (
123
+ int(aspect_ratio * MAX_PIXEL_BUDGET**0.5 // upscale_factor),
124
+ int(MAX_PIXEL_BUDGET**0.5 // aspect_ratio // upscale_factor),
125
+ )
126
+ )
127
+ was_resized = True
128
+
129
+ # Resize to multiple of 8
130
+ w, h = input_image.size
131
+ w = w - w % 8
132
+ h = h - h % 8
133
 
134
+ return input_image.resize((w, h)), w_original, h_original, was_resized
135
+
136
+
137
+ def load_image_from_url(url):
138
+ """Load image from URL"""
139
  try:
140
+ response = requests.get(url)
141
+ response.raise_for_status()
142
+ return Image.open(requests.get(url, stream=True).raw)
 
 
 
 
 
 
143
  except Exception as e:
144
+ raise gr.Error(f"Failed to load image from URL: {e}")
145
+
146
+
147
+ @spaces.GPU(duration=120)
148
+ def enhance_image(
149
+ image_input,
150
+ image_url,
151
+ seed,
152
+ randomize_seed,
153
+ num_inference_steps,
154
+ upscale_factor,
155
+ controlnet_conditioning_scale,
156
+ guidance_scale,
157
+ use_generated_caption,
158
+ custom_prompt,
159
+ progress=gr.Progress(track_tqdm=True),
160
+ ):
161
+ """Main enhancement function"""
162
+ # Handle image input
163
+ if image_input is not None:
164
+ input_image = image_input
165
+ elif image_url:
166
+ input_image = load_image_from_url(image_url)
167
+ else:
168
+ raise gr.Error("Please provide an image (upload or URL)")
169
+
170
+ if randomize_seed:
171
+ seed = random.randint(0, MAX_SEED)
172
+
173
+ true_input_image = input_image
174
 
175
+ # Process input image
176
+ input_image, w_original, h_original, was_resized = process_input(
177
+ input_image, upscale_factor
178
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
+ # Generate caption if requested
181
+ if use_generated_caption:
182
+ gr.Info("πŸ” Generating image caption...")
183
+ generated_caption = generate_caption(input_image)
184
+ prompt = generated_caption
185
+ else:
186
+ prompt = custom_prompt if custom_prompt.strip() else ""
187
 
188
+ # Rescale with upscale factor
189
+ w, h = input_image.size
190
+ control_image = input_image.resize((w * upscale_factor, h * upscale_factor))
 
191
 
192
+ generator = torch.Generator().manual_seed(seed)
 
 
193
 
194
+ gr.Info("πŸš€ Upscaling image...")
195
+
196
+ # Generate upscaled image
197
+ image = pipe(
198
+ prompt=prompt,
199
+ control_image=control_image,
200
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
201
+ num_inference_steps=num_inference_steps,
202
+ guidance_scale=guidance_scale,
203
+ height=control_image.size[1],
204
+ width=control_image.size[0],
205
+ generator=generator,
206
+ ).images[0]
207
+
208
+ if was_resized:
209
+ gr.Info(f"πŸ“ Resizing output to target size: {w_original * upscale_factor}x{h_original * upscale_factor}")
210
+
211
+ # Resize to target desired size
212
+ final_image = image.resize((w_original * upscale_factor, h_original * upscale_factor))
213
+
214
+ return [true_input_image, final_image, seed, generated_caption if use_generated_caption else ""]
215
+
216
+
217
+ # Create Gradio interface
218
+ with gr.Blocks(css=css, title="🎨 AI Image Enhancer - Florence-2 + FLUX") as demo:
219
+ gr.HTML("""
220
+ <div class="main-header">
221
+ <h1>🎨 AI Image Enhancer</h1>
222
+ <p>Upload an image or provide a URL to enhance it using Florence-2 captioning and FLUX upscaling</p>
223
+ <p>Currently running on <strong>{}</strong></p>
224
+ </div>
225
+ """.format(power_device))
226
+
227
+ with gr.Row():
228
+ with gr.Column(scale=1):
229
+ gr.HTML("<h3>πŸ“€ Input</h3>")
230
+
231
+ with gr.Tabs():
232
+ with gr.TabItem("πŸ“ Upload Image"):
233
+ input_image = gr.Image(
234
+ label="Upload Image",
235
+ type="pil",
236
+ height=300
237
+ )
238
+
239
+ with gr.TabItem("πŸ”— Image URL"):
240
+ image_url = gr.Textbox(
241
+ label="Image URL",
242
+ placeholder="https://example.com/image.jpg",
243
+ value="https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg"
244
+ )
245
+
246
+ gr.HTML("<h3>πŸŽ›οΈ Caption Settings</h3>")
247
+
248
+ use_generated_caption = gr.Checkbox(
249
+ label="Use AI-generated caption (Florence-2)",
250
+ value=True,
251
+ info="Generate detailed caption automatically"
252
  )
253
+
254
+ custom_prompt = gr.Textbox(
255
+ label="Custom Prompt (optional)",
256
+ placeholder="Enter custom prompt or leave empty for generated caption",
257
+ lines=2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  )
259
+
260
+ gr.HTML("<h3>βš™οΈ Enhancement Settings</h3>")
261
+
262
+ upscale_factor = gr.Slider(
263
+ label="Upscale Factor",
264
+ minimum=1,
265
+ maximum=4,
266
+ step=1,
267
+ value=2,
268
+ info="How much to upscale the image"
269
  )
 
 
 
270
 
271
+ num_inference_steps = gr.Slider(
272
+ label="Number of Inference Steps",
273
+ minimum=8,
274
+ maximum=50,
275
+ step=1,
276
+ value=28,
277
+ info="More steps = better quality but slower"
278
+ )
279
 
280
+ controlnet_conditioning_scale = gr.Slider(
281
+ label="ControlNet Conditioning Scale",
282
+ minimum=0.1,
283
+ maximum=1.5,
284
+ step=0.1,
285
+ value=0.6,
286
+ info="How much to preserve original structure"
287
+ )
288
+
289
+ guidance_scale = gr.Slider(
290
+ label="Guidance Scale",
291
+ minimum=1.0,
292
+ maximum=10.0,
293
+ step=0.5,
294
+ value=3.5,
295
+ info="How closely to follow the prompt"
296
+ )
297
+
298
+ with gr.Row():
299
+ randomize_seed = gr.Checkbox(
300
+ label="Randomize seed",
301
+ value=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  )
303
+ seed = gr.Slider(
304
+ label="Seed",
305
+ minimum=0,
306
+ maximum=MAX_SEED,
307
+ step=1,
308
+ value=42,
309
+ interactive=True
310
  )
311
 
312
+ enhance_btn = gr.Button(
313
+ "πŸš€ Enhance Image",
314
+ variant="primary",
315
+ size="lg"
316
+ )
317
+
318
+ with gr.Column(scale=1):
319
+ gr.HTML("<h3>πŸ“Š Results</h3>")
320
+
321
+ result_slider = ImageSlider(
322
+ label="Input / Enhanced",
323
+ type="pil",
324
+ interactive=True,
325
+ height=400
326
+ )
327
+
328
+ with gr.Row():
329
+ output_seed = gr.Number(
330
+ label="Used Seed",
331
+ precision=0,
332
  interactive=False
333
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
+ generated_caption_output = gr.Textbox(
336
+ label="Generated Caption",
337
+ placeholder="AI-generated caption will appear here...",
338
+ lines=3,
339
+ interactive=False
340
+ )
341
+
342
+ # Examples
343
+ gr.Examples(
344
+ examples=[
345
+ [None, "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg", 42, False, 28, 2, 0.6, 3.5, True, ""],
346
+ [None, "https://picsum.photos/512/512", 123, False, 25, 3, 0.8, 4.0, True, ""],
347
+ ],
348
+ inputs=[
349
+ input_image,
350
+ image_url,
351
+ seed,
352
+ randomize_seed,
353
+ num_inference_steps,
354
+ upscale_factor,
355
+ controlnet_conditioning_scale,
356
+ guidance_scale,
357
+ use_generated_caption,
358
+ custom_prompt,
359
+ ]
360
+ )
361
+
362
+ # Event handler
363
+ enhance_btn.click(
364
+ fn=enhance_image,
365
+ inputs=[
366
+ input_image,
367
+ image_url,
368
+ seed,
369
+ randomize_seed,
370
+ num_inference_steps,
371
+ upscale_factor,
372
+ controlnet_conditioning_scale,
373
+ guidance_scale,
374
+ use_generated_caption,
375
+ custom_prompt,
376
+ ],
377
+ outputs=[result_slider, output_seed, generated_caption_output]
378
+ )
379
 
380
+ gr.HTML("""
381
+ <div style="margin-top: 2rem; padding: 1rem; background: #f0f0f0; border-radius: 8px;">
382
+ <h4>πŸ’‘ How it works:</h4>
383
+ <ol>
384
+ <li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
385
+ <li><strong>FLUX ControlNet</strong> uses this caption to guide the upscaling process</li>
386
+ <li>The result is an enhanced, higher-resolution image with improved details</li>
387
+ </ol>
388
+ <p><strong>Note:</strong> Due to memory constraints, output is limited to 1024x1024 pixels total budget.</p>
389
+ </div>
390
+ """)
391
 
392
  if __name__ == "__main__":
393
+ demo.queue().launch(share=True, server_name="0.0.0.0", server_port=7860)