Spaces:
Running
on
Zero
Running
on
Zero
test
Browse files- __pycache__/dreamfuse_inference.cpython-310.pyc +0 -0
- app.py +20 -16
- dreamfuse/trains/utils/inference_utils.py +4 -32
- dreamfuse_inference.py +1 -38
__pycache__/dreamfuse_inference.cpython-310.pyc
CHANGED
Binary files a/__pycache__/dreamfuse_inference.cpython-310.pyc and b/__pycache__/dreamfuse_inference.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -75,7 +75,7 @@ class DreamFuseGUI:
|
|
75 |
color: #222;
|
76 |
}
|
77 |
#canvas_preview {
|
78 |
-
min-height: 420px;
|
79 |
border: 2px dashed #ccc;
|
80 |
background-color: #fafafa;
|
81 |
border-radius: 8px;
|
@@ -130,7 +130,7 @@ class DreamFuseGUI:
|
|
130 |
}
|
131 |
.svelte-1ipelgc {
|
132 |
flex-wrap: nowrap !important;
|
133 |
-
gap: 24px !important;
|
134 |
}
|
135 |
"""
|
136 |
|
@@ -174,18 +174,15 @@ class DreamFuseGUI:
|
|
174 |
};
|
175 |
|
176 |
globalThis.initializeDrag = () => {
|
177 |
-
console.log("✅ 初始化拖拽与缩放功能...");
|
178 |
|
179 |
const oldImg = document.getElementById('draggable-img');
|
180 |
const container = document.getElementById('canvas-container');
|
181 |
const slider = document.getElementById('scale-slider');
|
182 |
|
183 |
if (!oldImg || !container || !slider) {
|
184 |
-
console.warn("❌ 缺少必要的元素 (#draggable-img, #canvas-container, #scale-slider)");
|
185 |
return;
|
186 |
}
|
187 |
|
188 |
-
// 用 clone 替换旧 img,清除之前的监听器
|
189 |
const img = oldImg.cloneNode(true);
|
190 |
oldImg.replaceWith(img);
|
191 |
|
@@ -242,7 +239,7 @@ class DreamFuseGUI:
|
|
242 |
const relativeX = absoluteLeft - offsetLeft;
|
243 |
const relativeY = absoluteTop - offsetTop;
|
244 |
document.getElementById("coordinate").textContent =
|
245 |
-
|
246 |
updateTransformation();
|
247 |
}
|
248 |
scaleAnchor = null;
|
@@ -306,7 +303,6 @@ class DreamFuseGUI:
|
|
306 |
|
307 |
|
308 |
def pil_to_base64(self, img):
|
309 |
-
"""将 PIL Image 转为 base64 字符串,PNG 格式下保留透明通道"""
|
310 |
if img is None:
|
311 |
return ""
|
312 |
if img.mode != "RGBA":
|
@@ -318,7 +314,6 @@ class DreamFuseGUI:
|
|
318 |
return f"data:image/png;base64,{base64_str}"
|
319 |
|
320 |
def resize_background_image(self, img, max_size=400):
|
321 |
-
"""将背景图等比例缩放到最长边为 max_size(400)"""
|
322 |
if img is None:
|
323 |
return None
|
324 |
w, h = img.size
|
@@ -329,7 +324,6 @@ class DreamFuseGUI:
|
|
329 |
return img
|
330 |
|
331 |
def resize_draggable_image(self, img, max_size=400):
|
332 |
-
"""将前景图等比例缩放到最长边不超过 max_size(400)"""
|
333 |
if img is None:
|
334 |
return None
|
335 |
w, h = img.size
|
@@ -340,7 +334,6 @@ class DreamFuseGUI:
|
|
340 |
return img
|
341 |
|
342 |
def generate_html(self, background_img_b64, bg_width, bg_height, draggable_img_b64, draggable_width, draggable_height, canvas_size=400):
|
343 |
-
"""生成预览 HTML 页面"""
|
344 |
html_code = f"""
|
345 |
<html>
|
346 |
<head>
|
@@ -415,9 +408,8 @@ class DreamFuseGUI:
|
|
415 |
return html_code
|
416 |
|
417 |
def on_upload(self, background_img, draggable_img):
|
418 |
-
"""上传图片后的处理"""
|
419 |
if background_img is None or draggable_img is None:
|
420 |
-
return "<p style='color:red;'
|
421 |
|
422 |
if draggable_img.mode != "RGB":
|
423 |
draggable_img = draggable_img.convert("RGB")
|
@@ -449,7 +441,7 @@ class DreamFuseGUI:
|
|
449 |
# pipeline = None
|
450 |
pipeline = DreamFuseInference(config)
|
451 |
pipeline.gradio_generate = spaces.GPU(duratioin=120)(pipeline.gradio_generate)
|
452 |
-
|
453 |
with gr.Blocks(css=self.css_style) as demo:
|
454 |
modified_fg_state = gr.State()
|
455 |
gr.Markdown("# DreamFuse")
|
@@ -476,7 +468,20 @@ class DreamFuseGUI:
|
|
476 |
with gr.Row():
|
477 |
with gr.Column(scale=1):
|
478 |
gr.Examples(
|
479 |
-
examples=[self.examples[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
480 |
inputs=[background_img_in, draggable_img_in],
|
481 |
# elem_id="small-examples"
|
482 |
)
|
@@ -506,7 +511,7 @@ class DreamFuseGUI:
|
|
506 |
)
|
507 |
prompt_text = gr.Textbox(label="Prompt", placeholder="text prompt", value="")
|
508 |
text_strength = gr.Slider(minimum=1, maximum=10, step=1, label="Text Strength", value=1, visible=False)
|
509 |
-
enable_gui = gr.Checkbox(label="
|
510 |
enable_truecfg = gr.Checkbox(label="TrueCFG", value=False, visible=False)
|
511 |
with gr.Column(scale=1, elem_id="section-results"):
|
512 |
gr.Markdown("### Model Result")
|
@@ -533,7 +538,6 @@ class DreamFuseGUI:
|
|
533 |
prompt_text, enable_gui, cfg_slider, size_select, text_strength, enable_truecfg],
|
534 |
outputs=model_output
|
535 |
)
|
536 |
-
# 页面加载后初始化拖拽/缩放事件
|
537 |
demo.load(None, None, None, js=self.js_script)
|
538 |
generate_btn.click(fn=None, inputs=None, outputs=None, js="initializeDrag")
|
539 |
|
|
|
75 |
color: #222;
|
76 |
}
|
77 |
#canvas_preview {
|
78 |
+
min-height: 420px;
|
79 |
border: 2px dashed #ccc;
|
80 |
background-color: #fafafa;
|
81 |
border-radius: 8px;
|
|
|
130 |
}
|
131 |
.svelte-1ipelgc {
|
132 |
flex-wrap: nowrap !important;
|
133 |
+
gap: 24px !important;
|
134 |
}
|
135 |
"""
|
136 |
|
|
|
174 |
};
|
175 |
|
176 |
globalThis.initializeDrag = () => {
|
|
|
177 |
|
178 |
const oldImg = document.getElementById('draggable-img');
|
179 |
const container = document.getElementById('canvas-container');
|
180 |
const slider = document.getElementById('scale-slider');
|
181 |
|
182 |
if (!oldImg || !container || !slider) {
|
|
|
183 |
return;
|
184 |
}
|
185 |
|
|
|
186 |
const img = oldImg.cloneNode(true);
|
187 |
oldImg.replaceWith(img);
|
188 |
|
|
|
239 |
const relativeX = absoluteLeft - offsetLeft;
|
240 |
const relativeY = absoluteTop - offsetTop;
|
241 |
document.getElementById("coordinate").textContent =
|
242 |
+
`Location: (x=${relativeX.toFixed(2)}, y=${relativeY.toFixed(2)})`;
|
243 |
updateTransformation();
|
244 |
}
|
245 |
scaleAnchor = null;
|
|
|
303 |
|
304 |
|
305 |
def pil_to_base64(self, img):
|
|
|
306 |
if img is None:
|
307 |
return ""
|
308 |
if img.mode != "RGBA":
|
|
|
314 |
return f"data:image/png;base64,{base64_str}"
|
315 |
|
316 |
def resize_background_image(self, img, max_size=400):
|
|
|
317 |
if img is None:
|
318 |
return None
|
319 |
w, h = img.size
|
|
|
324 |
return img
|
325 |
|
326 |
def resize_draggable_image(self, img, max_size=400):
|
|
|
327 |
if img is None:
|
328 |
return None
|
329 |
w, h = img.size
|
|
|
334 |
return img
|
335 |
|
336 |
def generate_html(self, background_img_b64, bg_width, bg_height, draggable_img_b64, draggable_width, draggable_height, canvas_size=400):
|
|
|
337 |
html_code = f"""
|
338 |
<html>
|
339 |
<head>
|
|
|
408 |
return html_code
|
409 |
|
410 |
def on_upload(self, background_img, draggable_img):
|
|
|
411 |
if background_img is None or draggable_img is None:
|
412 |
+
return "<p style='color:red;'>Please upload the background and foreground images。</p>"
|
413 |
|
414 |
if draggable_img.mode != "RGB":
|
415 |
draggable_img = draggable_img.convert("RGB")
|
|
|
441 |
# pipeline = None
|
442 |
pipeline = DreamFuseInference(config)
|
443 |
pipeline.gradio_generate = spaces.GPU(duratioin=120)(pipeline.gradio_generate)
|
444 |
+
|
445 |
with gr.Blocks(css=self.css_style) as demo:
|
446 |
modified_fg_state = gr.State()
|
447 |
gr.Markdown("# DreamFuse")
|
|
|
468 |
with gr.Row():
|
469 |
with gr.Column(scale=1):
|
470 |
gr.Examples(
|
471 |
+
examples=[self.examples[0]],
|
472 |
+
inputs=[background_img_in, draggable_img_in],
|
473 |
+
# elem_id="small-examples"
|
474 |
+
)
|
475 |
+
with gr.Column(scale=1):
|
476 |
+
gr.Examples(
|
477 |
+
examples=[self.examples[2]],
|
478 |
+
inputs=[background_img_in, draggable_img_in],
|
479 |
+
# elem_id="small-examples"
|
480 |
+
)
|
481 |
+
with gr.Row():
|
482 |
+
with gr.Column(scale=1):
|
483 |
+
gr.Examples(
|
484 |
+
examples=[self.examples[0]],
|
485 |
inputs=[background_img_in, draggable_img_in],
|
486 |
# elem_id="small-examples"
|
487 |
)
|
|
|
511 |
)
|
512 |
prompt_text = gr.Textbox(label="Prompt", placeholder="text prompt", value="")
|
513 |
text_strength = gr.Slider(minimum=1, maximum=10, step=1, label="Text Strength", value=1, visible=False)
|
514 |
+
enable_gui = gr.Checkbox(label="GUI", value=True, visible=False)
|
515 |
enable_truecfg = gr.Checkbox(label="TrueCFG", value=False, visible=False)
|
516 |
with gr.Column(scale=1, elem_id="section-results"):
|
517 |
gr.Markdown("### Model Result")
|
|
|
538 |
prompt_text, enable_gui, cfg_slider, size_select, text_strength, enable_truecfg],
|
539 |
outputs=model_output
|
540 |
)
|
|
|
541 |
demo.load(None, None, None, js=self.js_script)
|
542 |
generate_btn.click(fn=None, inputs=None, outputs=None, js="initializeDrag")
|
543 |
|
dreamfuse/trains/utils/inference_utils.py
CHANGED
@@ -313,37 +313,17 @@ def encode_prompt(
|
|
313 |
def warp_affine_tensor(input_tensor, mask_affines, output_size, scale_factor=1/16,
|
314 |
align_corners_grid=False, align_corners_sample=True,
|
315 |
flatten_output=True, device=None):
|
316 |
-
|
317 |
-
对输入的 tensor 应用 affine 仿射变换,并返回 warp 后的结果。
|
318 |
-
|
319 |
-
参数:
|
320 |
-
input_tensor: 待变换的图像 tensor,支持的形状包括 (H, W, C)、(C, H, W) 或 (1, C, H, W)。
|
321 |
-
mask_affines: 仿射参数(例如 [a, 0, tₓ, 0, e, t_y]),这些参数单位基于 512×512 图像。
|
322 |
-
output_size: 目标输出的空间尺寸,格式为 (H_out, W_out)。
|
323 |
-
scale_factor: 平移参数的缩放因子;例如若 512→32,则 factor = 32/512 = 1/16。
|
324 |
-
align_corners_grid: 传递给 F.affine_grid 的 align_corners 参数。
|
325 |
-
align_corners_sample: 传递给 F.grid_sample 的 align_corners 参数。
|
326 |
-
flatten_output: 若为 True,则将输出 warp 后的 tensor 从 (1, C, H_out, W_out) 转换为 (-1, C)。
|
327 |
-
device: 如果设置,将将相关 tensor 移动到指定的设备上。
|
328 |
-
|
329 |
-
返回:
|
330 |
-
warped_output: 经过 affine warp 处理后的 tensor,
|
331 |
-
若 flatten_output 为 True,则形状为 (H_out*W_out, C),否则为 (1, C, H_out, W_out)。
|
332 |
-
"""
|
333 |
-
# 如果输入 tensor 不是 batch(4D)的,则调整为 (1, C, H, W)
|
334 |
if input_tensor.dim() == 3:
|
335 |
-
# 判断是否为 (H, W, C),如果最后一维为 3,则认为是 RGB
|
336 |
if input_tensor.shape[-1] == 3:
|
337 |
input_tensor = input_tensor.permute(2, 0, 1)
|
338 |
input_tensor = input_tensor.unsqueeze(0)
|
339 |
elif input_tensor.dim() != 4:
|
340 |
-
raise ValueError("input_tensor
|
341 |
|
342 |
-
# 输出尺寸
|
343 |
H_out, W_out = output_size
|
344 |
B, C, H_in, W_in = input_tensor.shape
|
345 |
|
346 |
-
# 将 mask_affines 转换为 tensor,确保形状为 (1, 6)
|
347 |
if not torch.is_tensor(mask_affines):
|
348 |
theta = torch.tensor(mask_affines, dtype=torch.float32).unsqueeze(0)
|
349 |
else:
|
@@ -351,35 +331,27 @@ def warp_affine_tensor(input_tensor, mask_affines, output_size, scale_factor=1/1
|
|
351 |
if theta.dim() == 1:
|
352 |
theta = theta.unsqueeze(0)
|
353 |
|
354 |
-
|
355 |
-
theta[0,
|
356 |
-
theta[0, 5] *= scale_factor # y 方向平移
|
357 |
|
358 |
a = theta[0, 0]
|
359 |
t_x = theta[0, 2]
|
360 |
e = theta[0, 4]
|
361 |
t_y = theta[0, 5]
|
362 |
|
363 |
-
# 根据归一化转换(范围 [-1, 1])
|
364 |
-
# 对 x 方向:归一化公式为 x_norm = 2*x/(W_out-1) - 1
|
365 |
-
# 转换后 affine 的常数项即为:a + 2*t_x/(W_out-1) - 1
|
366 |
theta_norm = torch.tensor([
|
367 |
[a, 0.0, a + 2*t_x/(W_out - 1) - 1],
|
368 |
[0.0, e, e + 2*t_y/(H_out - 1) - 1]
|
369 |
], dtype=torch.float32).unsqueeze(0)
|
370 |
|
371 |
-
# 根据目标输出大小创建 affine_grid,grid 的 size 为 (B, C, H_out, W_out)
|
372 |
grid = F.affine_grid(theta_norm, size=(B, C, H_out, W_out), align_corners=align_corners_grid)
|
373 |
if device is not None:
|
374 |
grid = grid.to(device)
|
375 |
input_tensor = input_tensor.to(device)
|
376 |
|
377 |
-
# 对输入 tensor 进行采样
|
378 |
warped = F.grid_sample(input_tensor, grid, align_corners=align_corners_sample)
|
379 |
|
380 |
-
# 若需要将输出展平为 (-1, C)
|
381 |
if flatten_output:
|
382 |
-
# 将 (1, C, H_out, W_out) → 转为 (H_out, W_out, C) → reshape(-1, C)
|
383 |
warped = warped.squeeze(0).permute(1, 2, 0).reshape(-1, C)
|
384 |
return warped
|
385 |
|
|
|
313 |
def warp_affine_tensor(input_tensor, mask_affines, output_size, scale_factor=1/16,
|
314 |
align_corners_grid=False, align_corners_sample=True,
|
315 |
flatten_output=True, device=None):
|
316 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
if input_tensor.dim() == 3:
|
|
|
318 |
if input_tensor.shape[-1] == 3:
|
319 |
input_tensor = input_tensor.permute(2, 0, 1)
|
320 |
input_tensor = input_tensor.unsqueeze(0)
|
321 |
elif input_tensor.dim() != 4:
|
322 |
+
raise ValueError("input_tensor must be 3D or 4D Tensor!")
|
323 |
|
|
|
324 |
H_out, W_out = output_size
|
325 |
B, C, H_in, W_in = input_tensor.shape
|
326 |
|
|
|
327 |
if not torch.is_tensor(mask_affines):
|
328 |
theta = torch.tensor(mask_affines, dtype=torch.float32).unsqueeze(0)
|
329 |
else:
|
|
|
331 |
if theta.dim() == 1:
|
332 |
theta = theta.unsqueeze(0)
|
333 |
|
334 |
+
theta[0, 2] *= scale_factor
|
335 |
+
theta[0, 5] *= scale_factor
|
|
|
336 |
|
337 |
a = theta[0, 0]
|
338 |
t_x = theta[0, 2]
|
339 |
e = theta[0, 4]
|
340 |
t_y = theta[0, 5]
|
341 |
|
|
|
|
|
|
|
342 |
theta_norm = torch.tensor([
|
343 |
[a, 0.0, a + 2*t_x/(W_out - 1) - 1],
|
344 |
[0.0, e, e + 2*t_y/(H_out - 1) - 1]
|
345 |
], dtype=torch.float32).unsqueeze(0)
|
346 |
|
|
|
347 |
grid = F.affine_grid(theta_norm, size=(B, C, H_out, W_out), align_corners=align_corners_grid)
|
348 |
if device is not None:
|
349 |
grid = grid.to(device)
|
350 |
input_tensor = input_tensor.to(device)
|
351 |
|
|
|
352 |
warped = F.grid_sample(input_tensor, grid, align_corners=align_corners_sample)
|
353 |
|
|
|
354 |
if flatten_output:
|
|
|
355 |
warped = warped.squeeze(0).permute(1, 2, 0).reshape(-1, C)
|
356 |
return warped
|
357 |
|
dreamfuse_inference.py
CHANGED
@@ -241,7 +241,7 @@ class DreamFuseInference:
|
|
241 |
|
242 |
scale_width = ref_width / target_width
|
243 |
scale_height = ref_height / target_height
|
244 |
-
scale = min(scale_width, scale_height)
|
245 |
|
246 |
new_width = int(target_width * scale)
|
247 |
new_height = int(target_height * scale)
|
@@ -250,17 +250,6 @@ class DreamFuseInference:
|
|
250 |
return resized_image
|
251 |
|
252 |
def pad_or_crop(self, img, target_size, fill_color=(255, 255, 255)):
|
253 |
-
"""
|
254 |
-
将输入图像按中心对齐,裁剪或填充到 target_size 大小。
|
255 |
-
|
256 |
-
参数:
|
257 |
-
img - PIL.Image 对象
|
258 |
-
target_size - 目标尺寸 (width, height)
|
259 |
-
fill_color - 填充颜色,默认为白色
|
260 |
-
|
261 |
-
返回:
|
262 |
-
调整后的 PIL.Image 对象,尺寸为 target_size
|
263 |
-
"""
|
264 |
iw, ih = img.size
|
265 |
tw, th = target_size
|
266 |
|
@@ -277,30 +266,6 @@ class DreamFuseInference:
|
|
277 |
return new_img
|
278 |
|
279 |
def transform_foreground_original(self, original_fg, original_bg, transformation_info, canvas_size=400):
|
280 |
-
"""
|
281 |
-
根据 transformation_info 中的信息对原始前景图(original_fg)进行平移处理,
|
282 |
-
要求:
|
283 |
-
1. 输出图像大小与 original_fg 相同(保持原始前景图大小);
|
284 |
-
2. 位移计算时,还原为未缩放的拖拽坐标,即用 drag_left/drag_top 除以 scale_ratio;
|
285 |
-
3. 拖拽产生的相对位移比例在 400x400 预览画布下相对于未缩放时默认(居中)位置计算,
|
286 |
-
然后按此比例推算到原始前景图尺寸下的实际位移(像素数)。
|
287 |
-
4. 结果在原始前景图大小的白底(未覆盖区域填充白色)中粘贴前景图。
|
288 |
-
|
289 |
-
参数:
|
290 |
-
original_fg: 原始上传的前景图(PIL Image 对象)
|
291 |
-
transformation_info: 字典,必须包含以下字段:
|
292 |
-
- "drag_left": 拖拽后当前显示的前景图左上角横坐标(受缩放影响,单位像素)
|
293 |
-
- "drag_top": 拖拽后当前显示的前景图左上角纵坐标(受缩放影响,单位像素)
|
294 |
-
- "scale_ratio": 预览时前景图缩放比例
|
295 |
-
- "data_original_width": 前景图在预览中未缩放时的宽度
|
296 |
-
- "data_original_height": 前景图在预览中未缩放时的高度
|
297 |
-
canvas_size: 预览画布尺寸(默认400,与前端保持一致)
|
298 |
-
|
299 |
-
返回:
|
300 |
-
处理后的图像(PIL Image 对象),大小与 original_fg 相同,
|
301 |
-
并根据未缩放时拖拽的相对位移结果进行了平移。
|
302 |
-
"""
|
303 |
-
# 读取 transformation_info 中的参数
|
304 |
drag_left = float(transformation_info.get("drag_left", 0))
|
305 |
drag_top = float(transformation_info.get("drag_top", 0))
|
306 |
scale_ratio = float(transformation_info.get("scale_ratio", 1))
|
@@ -382,8 +347,6 @@ class DreamFuseInference:
|
|
382 |
images = Image.fromarray(images[0], "RGB")
|
383 |
|
384 |
images = images.resize(background_img.size)
|
385 |
-
# images_save = images.copy()
|
386 |
-
|
387 |
# images.thumbnail((640, 640), Image.LANCZOS)
|
388 |
return images
|
389 |
|
|
|
241 |
|
242 |
scale_width = ref_width / target_width
|
243 |
scale_height = ref_height / target_height
|
244 |
+
scale = min(scale_width, scale_height)
|
245 |
|
246 |
new_width = int(target_width * scale)
|
247 |
new_height = int(target_height * scale)
|
|
|
250 |
return resized_image
|
251 |
|
252 |
def pad_or_crop(self, img, target_size, fill_color=(255, 255, 255)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
iw, ih = img.size
|
254 |
tw, th = target_size
|
255 |
|
|
|
266 |
return new_img
|
267 |
|
268 |
def transform_foreground_original(self, original_fg, original_bg, transformation_info, canvas_size=400):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
drag_left = float(transformation_info.get("drag_left", 0))
|
270 |
drag_top = float(transformation_info.get("drag_top", 0))
|
271 |
scale_ratio = float(transformation_info.get("scale_ratio", 1))
|
|
|
347 |
images = Image.fromarray(images[0], "RGB")
|
348 |
|
349 |
images = images.resize(background_img.size)
|
|
|
|
|
350 |
# images.thumbnail((640, 640), Image.LANCZOS)
|
351 |
return images
|
352 |
|