LL3RD commited on
Commit
6eae0d8
·
1 Parent(s): a1b34c3
__pycache__/dreamfuse_inference.cpython-310.pyc CHANGED
Binary files a/__pycache__/dreamfuse_inference.cpython-310.pyc and b/__pycache__/dreamfuse_inference.cpython-310.pyc differ
 
app.py CHANGED
@@ -75,7 +75,7 @@ class DreamFuseGUI:
75
  color: #222;
76
  }
77
  #canvas_preview {
78
- min-height: 420px; /* 或你需要的高度 */
79
  border: 2px dashed #ccc;
80
  background-color: #fafafa;
81
  border-radius: 8px;
@@ -130,7 +130,7 @@ class DreamFuseGUI:
130
  }
131
  .svelte-1ipelgc {
132
  flex-wrap: nowrap !important;
133
- gap: 24px !important; /* 每块之间增加空隙 */
134
  }
135
  """
136
 
@@ -174,18 +174,15 @@ class DreamFuseGUI:
174
  };
175
 
176
  globalThis.initializeDrag = () => {
177
- console.log("✅ 初始化拖拽与缩放功能...");
178
 
179
  const oldImg = document.getElementById('draggable-img');
180
  const container = document.getElementById('canvas-container');
181
  const slider = document.getElementById('scale-slider');
182
 
183
  if (!oldImg || !container || !slider) {
184
- console.warn("❌ 缺少必要的元素 (#draggable-img, #canvas-container, #scale-slider)");
185
  return;
186
  }
187
 
188
- // 用 clone 替换旧 img,清除之前的监听器
189
  const img = oldImg.cloneNode(true);
190
  oldImg.replaceWith(img);
191
 
@@ -242,7 +239,7 @@ class DreamFuseGUI:
242
  const relativeX = absoluteLeft - offsetLeft;
243
  const relativeY = absoluteTop - offsetTop;
244
  document.getElementById("coordinate").textContent =
245
- `前景图坐标: (x=${relativeX.toFixed(2)}, y=${relativeY.toFixed(2)})`;
246
  updateTransformation();
247
  }
248
  scaleAnchor = null;
@@ -306,7 +303,6 @@ class DreamFuseGUI:
306
 
307
 
308
  def pil_to_base64(self, img):
309
- """将 PIL Image 转为 base64 字符串,PNG 格式下保留透明通道"""
310
  if img is None:
311
  return ""
312
  if img.mode != "RGBA":
@@ -318,7 +314,6 @@ class DreamFuseGUI:
318
  return f"data:image/png;base64,{base64_str}"
319
 
320
  def resize_background_image(self, img, max_size=400):
321
- """将背景图等比例缩放到最长边为 max_size(400)"""
322
  if img is None:
323
  return None
324
  w, h = img.size
@@ -329,7 +324,6 @@ class DreamFuseGUI:
329
  return img
330
 
331
  def resize_draggable_image(self, img, max_size=400):
332
- """将前景图等比例缩放到最长边不超过 max_size(400)"""
333
  if img is None:
334
  return None
335
  w, h = img.size
@@ -340,7 +334,6 @@ class DreamFuseGUI:
340
  return img
341
 
342
  def generate_html(self, background_img_b64, bg_width, bg_height, draggable_img_b64, draggable_width, draggable_height, canvas_size=400):
343
- """生成预览 HTML 页面"""
344
  html_code = f"""
345
  <html>
346
  <head>
@@ -415,9 +408,8 @@ class DreamFuseGUI:
415
  return html_code
416
 
417
  def on_upload(self, background_img, draggable_img):
418
- """上传图片后的处理"""
419
  if background_img is None or draggable_img is None:
420
- return "<p style='color:red;'>请先上传背景图片和可拖拽图片。</p>"
421
 
422
  if draggable_img.mode != "RGB":
423
  draggable_img = draggable_img.convert("RGB")
@@ -449,7 +441,7 @@ class DreamFuseGUI:
449
  # pipeline = None
450
  pipeline = DreamFuseInference(config)
451
  pipeline.gradio_generate = spaces.GPU(duratioin=120)(pipeline.gradio_generate)
452
- """创建 Gradio 界面"""
453
  with gr.Blocks(css=self.css_style) as demo:
454
  modified_fg_state = gr.State()
455
  gr.Markdown("# DreamFuse")
@@ -476,7 +468,20 @@ class DreamFuseGUI:
476
  with gr.Row():
477
  with gr.Column(scale=1):
478
  gr.Examples(
479
- examples=[self.examples[1]],
 
 
 
 
 
 
 
 
 
 
 
 
 
480
  inputs=[background_img_in, draggable_img_in],
481
  # elem_id="small-examples"
482
  )
@@ -506,7 +511,7 @@ class DreamFuseGUI:
506
  )
507
  prompt_text = gr.Textbox(label="Prompt", placeholder="text prompt", value="")
508
  text_strength = gr.Slider(minimum=1, maximum=10, step=1, label="Text Strength", value=1, visible=False)
509
- enable_gui = gr.Checkbox(label="启用GUI", value=True, visible=False)
510
  enable_truecfg = gr.Checkbox(label="TrueCFG", value=False, visible=False)
511
  with gr.Column(scale=1, elem_id="section-results"):
512
  gr.Markdown("### Model Result")
@@ -533,7 +538,6 @@ class DreamFuseGUI:
533
  prompt_text, enable_gui, cfg_slider, size_select, text_strength, enable_truecfg],
534
  outputs=model_output
535
  )
536
- # 页面加载后初始化拖拽/缩放事件
537
  demo.load(None, None, None, js=self.js_script)
538
  generate_btn.click(fn=None, inputs=None, outputs=None, js="initializeDrag")
539
 
 
75
  color: #222;
76
  }
77
  #canvas_preview {
78
+ min-height: 420px;
79
  border: 2px dashed #ccc;
80
  background-color: #fafafa;
81
  border-radius: 8px;
 
130
  }
131
  .svelte-1ipelgc {
132
  flex-wrap: nowrap !important;
133
+ gap: 24px !important;
134
  }
135
  """
136
 
 
174
  };
175
 
176
  globalThis.initializeDrag = () => {
 
177
 
178
  const oldImg = document.getElementById('draggable-img');
179
  const container = document.getElementById('canvas-container');
180
  const slider = document.getElementById('scale-slider');
181
 
182
  if (!oldImg || !container || !slider) {
 
183
  return;
184
  }
185
 
 
186
  const img = oldImg.cloneNode(true);
187
  oldImg.replaceWith(img);
188
 
 
239
  const relativeX = absoluteLeft - offsetLeft;
240
  const relativeY = absoluteTop - offsetTop;
241
  document.getElementById("coordinate").textContent =
242
+ `Location: (x=${relativeX.toFixed(2)}, y=${relativeY.toFixed(2)})`;
243
  updateTransformation();
244
  }
245
  scaleAnchor = null;
 
303
 
304
 
305
  def pil_to_base64(self, img):
 
306
  if img is None:
307
  return ""
308
  if img.mode != "RGBA":
 
314
  return f"data:image/png;base64,{base64_str}"
315
 
316
  def resize_background_image(self, img, max_size=400):
 
317
  if img is None:
318
  return None
319
  w, h = img.size
 
324
  return img
325
 
326
  def resize_draggable_image(self, img, max_size=400):
 
327
  if img is None:
328
  return None
329
  w, h = img.size
 
334
  return img
335
 
336
  def generate_html(self, background_img_b64, bg_width, bg_height, draggable_img_b64, draggable_width, draggable_height, canvas_size=400):
 
337
  html_code = f"""
338
  <html>
339
  <head>
 
408
  return html_code
409
 
410
  def on_upload(self, background_img, draggable_img):
 
411
  if background_img is None or draggable_img is None:
412
+ return "<p style='color:red;'>Please upload the background and foreground images。</p>"
413
 
414
  if draggable_img.mode != "RGB":
415
  draggable_img = draggable_img.convert("RGB")
 
441
  # pipeline = None
442
  pipeline = DreamFuseInference(config)
443
  pipeline.gradio_generate = spaces.GPU(duratioin=120)(pipeline.gradio_generate)
444
+
445
  with gr.Blocks(css=self.css_style) as demo:
446
  modified_fg_state = gr.State()
447
  gr.Markdown("# DreamFuse")
 
468
  with gr.Row():
469
  with gr.Column(scale=1):
470
  gr.Examples(
471
+ examples=[self.examples[0]],
472
+ inputs=[background_img_in, draggable_img_in],
473
+ # elem_id="small-examples"
474
+ )
475
+ with gr.Column(scale=1):
476
+ gr.Examples(
477
+ examples=[self.examples[2]],
478
+ inputs=[background_img_in, draggable_img_in],
479
+ # elem_id="small-examples"
480
+ )
481
+ with gr.Row():
482
+ with gr.Column(scale=1):
483
+ gr.Examples(
484
+ examples=[self.examples[0]],
485
  inputs=[background_img_in, draggable_img_in],
486
  # elem_id="small-examples"
487
  )
 
511
  )
512
  prompt_text = gr.Textbox(label="Prompt", placeholder="text prompt", value="")
513
  text_strength = gr.Slider(minimum=1, maximum=10, step=1, label="Text Strength", value=1, visible=False)
514
+ enable_gui = gr.Checkbox(label="GUI", value=True, visible=False)
515
  enable_truecfg = gr.Checkbox(label="TrueCFG", value=False, visible=False)
516
  with gr.Column(scale=1, elem_id="section-results"):
517
  gr.Markdown("### Model Result")
 
538
  prompt_text, enable_gui, cfg_slider, size_select, text_strength, enable_truecfg],
539
  outputs=model_output
540
  )
 
541
  demo.load(None, None, None, js=self.js_script)
542
  generate_btn.click(fn=None, inputs=None, outputs=None, js="initializeDrag")
543
 
dreamfuse/trains/utils/inference_utils.py CHANGED
@@ -313,37 +313,17 @@ def encode_prompt(
313
  def warp_affine_tensor(input_tensor, mask_affines, output_size, scale_factor=1/16,
314
  align_corners_grid=False, align_corners_sample=True,
315
  flatten_output=True, device=None):
316
- """
317
- 对输入的 tensor 应用 affine 仿射变换,并返回 warp 后的结果。
318
-
319
- 参数:
320
- input_tensor: 待变换的图像 tensor,支持的形状包括 (H, W, C)、(C, H, W) 或 (1, C, H, W)。
321
- mask_affines: 仿射参数(例如 [a, 0, tₓ, 0, e, t_y]),这些参数单位基于 512×512 图像。
322
- output_size: 目标输出的空间尺寸,格式为 (H_out, W_out)。
323
- scale_factor: 平移参数的缩放因子;例如若 512→32,则 factor = 32/512 = 1/16。
324
- align_corners_grid: 传递给 F.affine_grid 的 align_corners 参数。
325
- align_corners_sample: 传递给 F.grid_sample 的 align_corners 参数。
326
- flatten_output: 若为 True,则将输出 warp 后的 tensor 从 (1, C, H_out, W_out) 转换为 (-1, C)。
327
- device: 如果设置,将将相关 tensor 移动到指定的设备上。
328
-
329
- 返回:
330
- warped_output: 经过 affine warp 处理后的 tensor,
331
- 若 flatten_output 为 True,则形状为 (H_out*W_out, C),否则为 (1, C, H_out, W_out)。
332
- """
333
- # 如果输入 tensor 不是 batch(4D)的,则调整为 (1, C, H, W)
334
  if input_tensor.dim() == 3:
335
- # 判断是否为 (H, W, C),如果最后一维为 3,则认为是 RGB
336
  if input_tensor.shape[-1] == 3:
337
  input_tensor = input_tensor.permute(2, 0, 1)
338
  input_tensor = input_tensor.unsqueeze(0)
339
  elif input_tensor.dim() != 4:
340
- raise ValueError("input_tensor 必须是 3D 4D Tensor!")
341
 
342
- # 输出尺寸
343
  H_out, W_out = output_size
344
  B, C, H_in, W_in = input_tensor.shape
345
 
346
- # 将 mask_affines 转换为 tensor,确保形状为 (1, 6)
347
  if not torch.is_tensor(mask_affines):
348
  theta = torch.tensor(mask_affines, dtype=torch.float32).unsqueeze(0)
349
  else:
@@ -351,35 +331,27 @@ def warp_affine_tensor(input_tensor, mask_affines, output_size, scale_factor=1/1
351
  if theta.dim() == 1:
352
  theta = theta.unsqueeze(0)
353
 
354
- # 调整平移部分(第三和第六个元素),使其适应当前目标分辨率
355
- theta[0, 2] *= scale_factor # x 方向平移
356
- theta[0, 5] *= scale_factor # y 方向平移
357
 
358
  a = theta[0, 0]
359
  t_x = theta[0, 2]
360
  e = theta[0, 4]
361
  t_y = theta[0, 5]
362
 
363
- # 根据归一化转换(范围 [-1, 1])
364
- # 对 x 方向:归一化公式为 x_norm = 2*x/(W_out-1) - 1
365
- # 转换后 affine 的常数项即为:a + 2*t_x/(W_out-1) - 1
366
  theta_norm = torch.tensor([
367
  [a, 0.0, a + 2*t_x/(W_out - 1) - 1],
368
  [0.0, e, e + 2*t_y/(H_out - 1) - 1]
369
  ], dtype=torch.float32).unsqueeze(0)
370
 
371
- # 根据目标输出大小创建 affine_grid,grid 的 size 为 (B, C, H_out, W_out)
372
  grid = F.affine_grid(theta_norm, size=(B, C, H_out, W_out), align_corners=align_corners_grid)
373
  if device is not None:
374
  grid = grid.to(device)
375
  input_tensor = input_tensor.to(device)
376
 
377
- # 对输入 tensor 进行采样
378
  warped = F.grid_sample(input_tensor, grid, align_corners=align_corners_sample)
379
 
380
- # 若需要将输出展平为 (-1, C)
381
  if flatten_output:
382
- # 将 (1, C, H_out, W_out) → 转为 (H_out, W_out, C) → reshape(-1, C)
383
  warped = warped.squeeze(0).permute(1, 2, 0).reshape(-1, C)
384
  return warped
385
 
 
313
  def warp_affine_tensor(input_tensor, mask_affines, output_size, scale_factor=1/16,
314
  align_corners_grid=False, align_corners_sample=True,
315
  flatten_output=True, device=None):
316
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  if input_tensor.dim() == 3:
 
318
  if input_tensor.shape[-1] == 3:
319
  input_tensor = input_tensor.permute(2, 0, 1)
320
  input_tensor = input_tensor.unsqueeze(0)
321
  elif input_tensor.dim() != 4:
322
+ raise ValueError("input_tensor must be 3D or 4D Tensor!")
323
 
 
324
  H_out, W_out = output_size
325
  B, C, H_in, W_in = input_tensor.shape
326
 
 
327
  if not torch.is_tensor(mask_affines):
328
  theta = torch.tensor(mask_affines, dtype=torch.float32).unsqueeze(0)
329
  else:
 
331
  if theta.dim() == 1:
332
  theta = theta.unsqueeze(0)
333
 
334
+ theta[0, 2] *= scale_factor
335
+ theta[0, 5] *= scale_factor
 
336
 
337
  a = theta[0, 0]
338
  t_x = theta[0, 2]
339
  e = theta[0, 4]
340
  t_y = theta[0, 5]
341
 
 
 
 
342
  theta_norm = torch.tensor([
343
  [a, 0.0, a + 2*t_x/(W_out - 1) - 1],
344
  [0.0, e, e + 2*t_y/(H_out - 1) - 1]
345
  ], dtype=torch.float32).unsqueeze(0)
346
 
 
347
  grid = F.affine_grid(theta_norm, size=(B, C, H_out, W_out), align_corners=align_corners_grid)
348
  if device is not None:
349
  grid = grid.to(device)
350
  input_tensor = input_tensor.to(device)
351
 
 
352
  warped = F.grid_sample(input_tensor, grid, align_corners=align_corners_sample)
353
 
 
354
  if flatten_output:
 
355
  warped = warped.squeeze(0).permute(1, 2, 0).reshape(-1, C)
356
  return warped
357
 
dreamfuse_inference.py CHANGED
@@ -241,7 +241,7 @@ class DreamFuseInference:
241
 
242
  scale_width = ref_width / target_width
243
  scale_height = ref_height / target_height
244
- scale = min(scale_width, scale_height) # 选择最小的缩放比例,确保不超出参考图片的宽高
245
 
246
  new_width = int(target_width * scale)
247
  new_height = int(target_height * scale)
@@ -250,17 +250,6 @@ class DreamFuseInference:
250
  return resized_image
251
 
252
  def pad_or_crop(self, img, target_size, fill_color=(255, 255, 255)):
253
- """
254
- 将输入图像按中心对齐,裁剪或填充到 target_size 大小。
255
-
256
- 参数:
257
- img - PIL.Image 对象
258
- target_size - 目标尺寸 (width, height)
259
- fill_color - 填充颜色,默认为白色
260
-
261
- 返回:
262
- 调整后的 PIL.Image 对象,尺寸为 target_size
263
- """
264
  iw, ih = img.size
265
  tw, th = target_size
266
 
@@ -277,30 +266,6 @@ class DreamFuseInference:
277
  return new_img
278
 
279
  def transform_foreground_original(self, original_fg, original_bg, transformation_info, canvas_size=400):
280
- """
281
- 根据 transformation_info 中的信息对原始前景图(original_fg)进行平移处理,
282
- 要求:
283
- 1. 输出图像大小与 original_fg 相同(保持原始前景图大小);
284
- 2. 位移计算时,还原为未缩放的拖拽坐标,即用 drag_left/drag_top 除以 scale_ratio;
285
- 3. 拖拽产生的相对位移比例在 400x400 预览画布下相对于未缩放时默认(居中)位置计算,
286
- 然后按此比例推算到原始前景图尺寸下的实际位移(像素数)。
287
- 4. 结果在原始前景图大小的白底(未覆盖区域填充白色)中粘贴前景图。
288
-
289
- 参数:
290
- original_fg: 原始上传的前景图(PIL Image 对象)
291
- transformation_info: 字典,必须包含以下字段:
292
- - "drag_left": 拖拽后当前显示的前景图左上角横坐标(受缩放影响,单位像素)
293
- - "drag_top": 拖拽后当前显示的前景图左上角纵坐标(受缩放影响,单位像素)
294
- - "scale_ratio": 预览时前景图缩放比例
295
- - "data_original_width": 前景图在预览中未缩放时的宽度
296
- - "data_original_height": 前景图在预览中未缩放时的高度
297
- canvas_size: 预览画布尺寸(默认400,与前端保持一致)
298
-
299
- 返回:
300
- 处理后的图像(PIL Image 对象),大小与 original_fg 相同,
301
- 并根据未缩放时拖拽的相对位移结果进行了平移。
302
- """
303
- # 读取 transformation_info 中的参数
304
  drag_left = float(transformation_info.get("drag_left", 0))
305
  drag_top = float(transformation_info.get("drag_top", 0))
306
  scale_ratio = float(transformation_info.get("scale_ratio", 1))
@@ -382,8 +347,6 @@ class DreamFuseInference:
382
  images = Image.fromarray(images[0], "RGB")
383
 
384
  images = images.resize(background_img.size)
385
- # images_save = images.copy()
386
-
387
  # images.thumbnail((640, 640), Image.LANCZOS)
388
  return images
389
 
 
241
 
242
  scale_width = ref_width / target_width
243
  scale_height = ref_height / target_height
244
+ scale = min(scale_width, scale_height)
245
 
246
  new_width = int(target_width * scale)
247
  new_height = int(target_height * scale)
 
250
  return resized_image
251
 
252
  def pad_or_crop(self, img, target_size, fill_color=(255, 255, 255)):
 
 
 
 
 
 
 
 
 
 
 
253
  iw, ih = img.size
254
  tw, th = target_size
255
 
 
266
  return new_img
267
 
268
  def transform_foreground_original(self, original_fg, original_bg, transformation_info, canvas_size=400):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  drag_left = float(transformation_info.get("drag_left", 0))
270
  drag_top = float(transformation_info.get("drag_top", 0))
271
  scale_ratio = float(transformation_info.get("scale_ratio", 1))
 
347
  images = Image.fromarray(images[0], "RGB")
348
 
349
  images = images.resize(background_img.size)
 
 
350
  # images.thumbnail((640, 640), Image.LANCZOS)
351
  return images
352