Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import gradio as gr
|
|
6 |
import numpy as np
|
7 |
import spaces
|
8 |
import torch
|
9 |
-
from diffusers import
|
10 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
11 |
from gradio_imageslider import ImageSlider
|
12 |
from PIL import Image
|
@@ -40,7 +40,7 @@ print("π₯ Downloading FLUX model...")
|
|
40 |
model_path = snapshot_download(
|
41 |
repo_id="black-forest-labs/FLUX.1-dev",
|
42 |
repo_type="model",
|
43 |
-
ignore_patterns=["*.md", "
|
44 |
local_dir="FLUX.1-dev",
|
45 |
token=huggingface_token,
|
46 |
)
|
@@ -58,16 +58,10 @@ florence_processor = AutoProcessor.from_pretrained(
|
|
58 |
trust_remote_code=True
|
59 |
)
|
60 |
|
61 |
-
# Load FLUX
|
62 |
-
print("π₯ Loading FLUX
|
63 |
-
|
64 |
-
"jasperai/Flux.1-dev-Controlnet-Upscaler",
|
65 |
-
torch_dtype=torch.bfloat16
|
66 |
-
).to(device)
|
67 |
-
|
68 |
-
pipe = FluxControlNetPipeline.from_pretrained(
|
69 |
model_path,
|
70 |
-
controlnet=controlnet,
|
71 |
torch_dtype=torch.bfloat16
|
72 |
)
|
73 |
pipe.to(device)
|
@@ -75,7 +69,7 @@ pipe.to(device)
|
|
75 |
print("β
All models loaded successfully!")
|
76 |
|
77 |
MAX_SEED = 1000000
|
78 |
-
MAX_PIXEL_BUDGET =
|
79 |
|
80 |
|
81 |
def generate_caption(image):
|
@@ -86,9 +80,6 @@ def generate_caption(image):
|
|
86 |
|
87 |
inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(device)
|
88 |
|
89 |
-
# Cast floating-point inputs to match model's dtype (float16)
|
90 |
-
inputs["pixel_values"] = inputs["pixel_values"].to(torch.float16)
|
91 |
-
|
92 |
generated_ids = florence_model.generate(
|
93 |
input_ids=inputs["input_ids"],
|
94 |
pixel_values=inputs["pixel_values"],
|
@@ -156,8 +147,8 @@ def enhance_image(
|
|
156 |
randomize_seed,
|
157 |
num_inference_steps,
|
158 |
upscale_factor,
|
159 |
-
controlnet_conditioning_scale,
|
160 |
guidance_scale,
|
|
|
161 |
use_generated_caption,
|
162 |
custom_prompt,
|
163 |
progress=gr.Progress(track_tqdm=True),
|
@@ -200,8 +191,8 @@ def enhance_image(
|
|
200 |
# Generate upscaled image
|
201 |
image = pipe(
|
202 |
prompt=prompt,
|
203 |
-
|
204 |
-
|
205 |
num_inference_steps=num_inference_steps,
|
206 |
guidance_scale=guidance_scale,
|
207 |
height=control_image.size[1],
|
@@ -281,15 +272,6 @@ with gr.Blocks(css=css, title="π¨ AI Image Enhancer - Florence-2 + FLUX") as d
|
|
281 |
info="More steps = better quality but slower"
|
282 |
)
|
283 |
|
284 |
-
controlnet_conditioning_scale = gr.Slider(
|
285 |
-
label="ControlNet Conditioning Scale",
|
286 |
-
minimum=0.1,
|
287 |
-
maximum=1.5,
|
288 |
-
step=0.1,
|
289 |
-
value=0.6,
|
290 |
-
info="How much to preserve original structure"
|
291 |
-
)
|
292 |
-
|
293 |
guidance_scale = gr.Slider(
|
294 |
label="Guidance Scale",
|
295 |
minimum=1.0,
|
@@ -299,6 +281,15 @@ with gr.Blocks(css=css, title="π¨ AI Image Enhancer - Florence-2 + FLUX") as d
|
|
299 |
info="How closely to follow the prompt"
|
300 |
)
|
301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
302 |
with gr.Row():
|
303 |
randomize_seed = gr.Checkbox(
|
304 |
label="Randomize seed",
|
@@ -346,8 +337,8 @@ with gr.Blocks(css=css, title="π¨ AI Image Enhancer - Florence-2 + FLUX") as d
|
|
346 |
# Examples
|
347 |
gr.Examples(
|
348 |
examples=[
|
349 |
-
[None, "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg", 42, False, 28, 2,
|
350 |
-
[None, "https://picsum.photos/512/512", 123, False, 25, 3, 0
|
351 |
],
|
352 |
inputs=[
|
353 |
input_image,
|
@@ -356,8 +347,8 @@ with gr.Blocks(css=css, title="π¨ AI Image Enhancer - Florence-2 + FLUX") as d
|
|
356 |
randomize_seed,
|
357 |
num_inference_steps,
|
358 |
upscale_factor,
|
359 |
-
controlnet_conditioning_scale,
|
360 |
guidance_scale,
|
|
|
361 |
use_generated_caption,
|
362 |
custom_prompt,
|
363 |
]
|
@@ -373,8 +364,8 @@ with gr.Blocks(css=css, title="π¨ AI Image Enhancer - Florence-2 + FLUX") as d
|
|
373 |
randomize_seed,
|
374 |
num_inference_steps,
|
375 |
upscale_factor,
|
376 |
-
controlnet_conditioning_scale,
|
377 |
guidance_scale,
|
|
|
378 |
use_generated_caption,
|
379 |
custom_prompt,
|
380 |
],
|
@@ -386,7 +377,7 @@ with gr.Blocks(css=css, title="π¨ AI Image Enhancer - Florence-2 + FLUX") as d
|
|
386 |
<h4>π‘ How it works:</h4>
|
387 |
<ol>
|
388 |
<li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
|
389 |
-
<li><strong>FLUX
|
390 |
<li>The result is an enhanced, higher-resolution image with improved details</li>
|
391 |
</ol>
|
392 |
<p><strong>Note:</strong> Due to memory constraints, output is limited to 1024x1024 pixels total budget.</p>
|
|
|
6 |
import numpy as np
|
7 |
import spaces
|
8 |
import torch
|
9 |
+
from diffusers import FluxImg2ImgPipeline
|
10 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
11 |
from gradio_imageslider import ImageSlider
|
12 |
from PIL import Image
|
|
|
40 |
model_path = snapshot_download(
|
41 |
repo_id="black-forest-labs/FLUX.1-dev",
|
42 |
repo_type="model",
|
43 |
+
ignore_patterns=["*.md", "*.gitattributes"],
|
44 |
local_dir="FLUX.1-dev",
|
45 |
token=huggingface_token,
|
46 |
)
|
|
|
58 |
trust_remote_code=True
|
59 |
)
|
60 |
|
61 |
+
# Load FLUX Img2Img pipeline
|
62 |
+
print("π₯ Loading FLUX Img2Img...")
|
63 |
+
pipe = FluxImg2ImgPipeline.from_pretrained(
|
|
|
|
|
|
|
|
|
|
|
64 |
model_path,
|
|
|
65 |
torch_dtype=torch.bfloat16
|
66 |
)
|
67 |
pipe.to(device)
|
|
|
69 |
print("β
All models loaded successfully!")
|
70 |
|
71 |
MAX_SEED = 1000000
|
72 |
+
MAX_PIXEL_BUDGET = 4096 * 4096
|
73 |
|
74 |
|
75 |
def generate_caption(image):
|
|
|
80 |
|
81 |
inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(device)
|
82 |
|
|
|
|
|
|
|
83 |
generated_ids = florence_model.generate(
|
84 |
input_ids=inputs["input_ids"],
|
85 |
pixel_values=inputs["pixel_values"],
|
|
|
147 |
randomize_seed,
|
148 |
num_inference_steps,
|
149 |
upscale_factor,
|
|
|
150 |
guidance_scale,
|
151 |
+
denoising_strength,
|
152 |
use_generated_caption,
|
153 |
custom_prompt,
|
154 |
progress=gr.Progress(track_tqdm=True),
|
|
|
191 |
# Generate upscaled image
|
192 |
image = pipe(
|
193 |
prompt=prompt,
|
194 |
+
image=control_image,
|
195 |
+
strength=denoising_strength,
|
196 |
num_inference_steps=num_inference_steps,
|
197 |
guidance_scale=guidance_scale,
|
198 |
height=control_image.size[1],
|
|
|
272 |
info="More steps = better quality but slower"
|
273 |
)
|
274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
guidance_scale = gr.Slider(
|
276 |
label="Guidance Scale",
|
277 |
minimum=1.0,
|
|
|
281 |
info="How closely to follow the prompt"
|
282 |
)
|
283 |
|
284 |
+
denoising_strength = gr.Slider(
|
285 |
+
label="Denoising Strength",
|
286 |
+
minimum=0.0,
|
287 |
+
maximum=1.0,
|
288 |
+
step=0.05,
|
289 |
+
value=0.3,
|
290 |
+
info="Controls how much the image is transformed (from Ultimate SD Upscaler concept)"
|
291 |
+
)
|
292 |
+
|
293 |
with gr.Row():
|
294 |
randomize_seed = gr.Checkbox(
|
295 |
label="Randomize seed",
|
|
|
337 |
# Examples
|
338 |
gr.Examples(
|
339 |
examples=[
|
340 |
+
[None, "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg", 42, False, 28, 2, 3.5, 0.3, True, ""],
|
341 |
+
[None, "https://picsum.photos/512/512", 123, False, 25, 3, 4.0, 0.4, True, ""],
|
342 |
],
|
343 |
inputs=[
|
344 |
input_image,
|
|
|
347 |
randomize_seed,
|
348 |
num_inference_steps,
|
349 |
upscale_factor,
|
|
|
350 |
guidance_scale,
|
351 |
+
denoising_strength,
|
352 |
use_generated_caption,
|
353 |
custom_prompt,
|
354 |
]
|
|
|
364 |
randomize_seed,
|
365 |
num_inference_steps,
|
366 |
upscale_factor,
|
|
|
367 |
guidance_scale,
|
368 |
+
denoising_strength,
|
369 |
use_generated_caption,
|
370 |
custom_prompt,
|
371 |
],
|
|
|
377 |
<h4>π‘ How it works:</h4>
|
378 |
<ol>
|
379 |
<li><strong>Florence-2</strong> analyzes your image and generates a detailed caption</li>
|
380 |
+
<li><strong>FLUX Img2Img</strong> uses this caption to guide the upscaling process with denoising</li>
|
381 |
<li>The result is an enhanced, higher-resolution image with improved details</li>
|
382 |
</ol>
|
383 |
<p><strong>Note:</strong> Due to memory constraints, output is limited to 1024x1024 pixels total budget.</p>
|