Spaces:

Sm0kyWu
/

Amodal3R

Runtime error

App Files Files Community

Sm0kyWu commited on Mar 12

Commit

05802f8

verified ·

1 Parent(s): 49bda9b

Upload app.py

Browse files

Files changed (1) hide show

app.py +180 -101

app.py CHANGED Viewed

@@ -34,62 +34,110 @@ def end_session(req: gr.Request):
     shutil.rmtree(user_dir)
-def select_point_callback(image, points, evt):
     """
-    当用户点击图像时，记录点击点并在图像上绘制标记（十字）。
-    输入参数：
-      - image：当前图像（numpy 数组）。
-      - points：已记录的点列表。
-      - evt：Gradio 的点击事件数据（包含 .index，即点击坐标）。
-    返回：
-      - 更新后的图像（带标记）。
-      - 更新后的点列表。
-      - 以字符串形式展示的点列表（用于显示在文本框中）。
     """
-    if points is None:
-        points = []
-    annotated_img = image.copy()
-    # 如果没有点击事件，则直接返回原图和当前点列表
-    if evt is None or evt.index is None:
-        return image, points, str(points)
-    coord = evt.index  # 期望返回 (x, y)
-    points.append((tuple(coord), 1))  # 记录为正样本 prompt
-    # 绘制十字标记，颜色为红色
-    cv2.drawMarker(annotated_img, tuple(coord), (255, 0, 0),
-                   markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
-    return annotated_img, points
-def mark_point_on_image(image, points, evt):
     """
-    当用户点击 image_prompt 时，在图像上直接绘制标记，并更新点击点状态。
-    :param image: 当前图像（numpy 数组，RGB格式）。
-    :param points: 已记录的点列表。
-    :param evt: Gradio 的点击事件数据，包含 .index 属性（点击坐标）。
-    :return: 更新后的图像、点列表以及显示的文本信息。
     """
-    if image is None:
-        return None, points, str(points)
-    # 如果没有已有的点，则复制一份原图，保存原始版本（可以存到其他 State 供后续处理）
-    annotated_image = image.copy()
-    if points is None:
-        points = []
-    # 检查事件数据中是否有点击坐标
-    if evt is None or evt.index is None:
-        return annotated_image, points, str(points)
-    # 获取点击坐标（格式：列表或元组）
-    pt = tuple(evt.index)
-    points.append((pt, 1))  # 1 表示正样本标记（可以根据需要调整）
-    # 在图像上绘制所有点的标记
-    for p, _ in points:
-        cv2.drawMarker(annotated_image, p, (255, 0, 0),
-                       markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
-    return annotated_image, points, str(points)
 @spaces.GPU
@@ -131,10 +179,12 @@ def apply_mask_overlay(image: Image.Image, mask: np.ndarray) -> Image.Image:
     return Image.fromarray(overlay)
-def segment_and_overlay(image: Image.Image, points):
     """
     调用 run_sam 获得 mask，然后叠加显示分割结果。
     """
     if image.mode != "RGB":
         image = image.convert("RGB")
     mask, _ = run_sam(sam_predictor, image, points)
@@ -310,6 +360,15 @@ def split_image(image: Image.Image) -> list:
     return [image for image in images]
 with gr.Blocks(delete_cache=(600, 600)) as demo:
     gr.Markdown("""
     ## 3D Amodal Reconstruction with [Amodal3R](https://sm0kywu.github.io/Amodal3R/)
@@ -319,72 +378,92 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
     * Different random seeds can be tried in "Generation Settings", if you think the results are not ideal.
     * If the reconstruction 3D asset is satisfactory, you can extract the GLB file and download it.
     """)
     with gr.Row():
         with gr.Column():
-            # 上传的图像不经过预处理，直接展示原始图像
-            image_prompt = gr.Image(type="numpy", label="Input Occlusion Image", interactive=True, height=512)
-            # 用于交互标注的图像，点击时更新显示标记
-            # image_annotation = gr.Image(type="numpy", label="Select Point Prompts for Target Object", interactive=True, height=512)
-            # 存储点击点状态以及显示点击点坐标
-            points_state = gr.State([])
-            segment_button = gr.Button("Run Segmentation")
-            # points_output = gr.Textbox(label="Target Object Prompts", interactive=False)
-            # 展示 SAM 分割结果（只用于显示，不允许上传）
-            segmented_output = gr.Image(label="Segmented Result", height=512, interactive=False)
-            with gr.Accordion(label="Generation Settings", open=False):
-                seed = gr.Slider(0, MAX_SEED, label="Seed", value=1, step=1)
-                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
-                gr.Markdown("Stage 1: Sparse Structure Generation")
-                with gr.Row():
-                    ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
-                    ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-                gr.Markdown("Stage 2: Structured Latent Generation")
-                with gr.Row():
-                    slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
-                    slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-        # 其他组件（如生成按钮、视频展示、GLB 提取等）可根据需要添加
     # 会话启动与结束
     demo.load(start_session)
     demo.unload(end_session)
-    # 上传图像后直接显示，不做预处理
-    image_prompt.upload(
-        lambda x: x,
-        inputs=[image_prompt],
-        outputs=[image_prompt]
     )
-    # 点击 image_annotation 时调用 select_point_callback，
-    # 更新图像显示、点状态以及文本显示点击点信息
-    image_prompt.select(
-        select_point_callback,
-        inputs=[image_prompt, points_state],
-        outputs=[image_prompt, points_state]
     )
-    # 添加一个按钮，用于运行 SAM 分割并展示叠加结果
-    segment_button.click(
-        segment_and_overlay,
-        inputs=[image_prompt, points_state],
-        outputs=[segmented_output]
     )
-    # 后续可添加生成 3D 模型等其他流程...
 # 启动 Gradio App
 if __name__ == "__main__":
-    sam_checkpoint = hf_hub_download("ybelkada/segment-anything", "checkpoints/sam_vit_h_4b8939.pth")
-    model_type = "vit_h"
-    sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
-    sam.cuda()
-    sam_predictor = SamPredictor(sam)
     pipeline = Amodal3RImageTo3DPipeline.from_pretrained("Sm0kyWu/Amodal3R")
     pipeline.cuda()
     try:
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))
     except:
         pass
-    demo.launch()

     shutil.rmtree(user_dir)
+def select_point(predictor: SamPredictor,
+                 annotated_img: np.ndarray,
+                 orig_img: np.ndarray,
+                 sel_pix: list,
+                 point_type: str,
+                 evt: gr.SelectData):
     """
+    当用户在标注图像上点击时：
+      - 将点击坐标添加到 sel_pix（正/负 prompt 根据 point_type），
+      - 根据 sel_pix 调用 SAM 得到 mask，
+      - 在 annotated_img 上绘制所有已选点的标记，
+      - 返回更新后的标注图像、SAM 输出（用于显示）及生成的 visible_mask（用于后续 pix2gestalt）。
     """
+    # 拷贝原图（用于标注）
+    img = annotated_img.copy()
+    h_original, w_original, _ = orig_img.shape
+    h_new, w_new = 256, 256
+    scale_x = w_new / w_original
+    scale_y = h_new / h_original
+    # 根据 prompt 类型添加点击点（evt.index 格式为 (x, y)）
+    if point_type == 'positive_prompt':
+        sel_pix.append((evt.index, 1))
+    elif point_type == 'negative_prompt':
+        sel_pix.append((evt.index, 0))
+    else:
+        sel_pix.append((evt.index, 1))
+    # 将原始尺寸的点转换到 256x256 尺寸（SAM 输入要求）
+    processed_sel_pix = []
+    for point, label in sel_pix:
+        x, y = point
+        new_x = int(x * scale_x)
+        new_y = int(y * scale_y)
+        processed_sel_pix.append(([new_x, new_y], label))
+    visible_mask, overlay_mask = run_sam(predictor, processed_sel_pix)
+    # overlay_mask 是 SAM 输出的 mask（256x256），调整尺寸到原图尺寸以便显示
+    mask = np.squeeze(overlay_mask[0][0])  # (256, 256)
+    resized_mask = cv2.resize(mask.astype(np.uint8) * 255, (w_original, h_original), interpolation=cv2.INTER_AREA)
+    resized_mask = resized_mask > 127
+    # 制作 overlay 信息（供 output_mask 使用）
+    resized_overlay_mask = [(resized_mask, 'visible_mask')]
+    # 绘制所有点的标记
+    COLORS = [(255, 0, 0), (0, 255, 0)]
+    MARKERS = [1, 4]
+    scaling_factor = min(h_original / 256, w_original / 256)
+    marker_size = int(6 * scaling_factor)
+    marker_thickness = int(2 * scaling_factor)
+    for point, label in sel_pix:
+        cv2.drawMarker(img, tuple(point), COLORS[label], markerType=MARKERS[label],
+                       markerSize=marker_size, thickness=marker_thickness)
+    return img, (orig_img, resized_overlay_mask), visible_mask
+def undo_points(predictor, orig_img, sel_pix):
     """
+    撤销最后一次点击：
+      - 从 sel_pix 中 pop 出最后一个点，
+      - 根据剩余点重新调用 SAM 得到 mask，
+      - 返回更新后的图像和 mask。
     """
+    temp = orig_img.copy()
+    h_original, w_original, _ = orig_img.shape
+    COLORS = [(255, 0, 0), (0, 255, 0)]
+    MARKERS = [0, 5]
+    scaling_factor = min(h_original / 256, w_original / 256)
+    marker_size = int(6 * scaling_factor)
+    marker_thickness = int(2 * scaling_factor)
+    if len(sel_pix) > 0:
+        sel_pix.pop()
+        # 重新绘制剩余点
+        for point, label in sel_pix:
+            cv2.drawMarker(temp, tuple(point), COLORS[label],
+                           markerType=MARKERS[label], markerSize=marker_size, thickness=marker_thickness)
+    else:
+        dummy_overlay_mask = [(np.zeros((h_original, w_original), dtype=np.uint8), 'visible_mask')]
+        return orig_img, (orig_img, dummy_overlay_mask), []
+    visible_mask, overlay_mask = run_sam(predictor, sel_pix)
+    mask = np.squeeze(overlay_mask[0][0])
+    resized_mask = cv2.resize(mask.astype(np.uint8) * 255, (w_original, h_original), interpolation=cv2.INTER_AREA)
+    resized_mask = resized_mask > 127
+    resized_overlay_mask = [(resized_mask, 'visible_mask')]
+    return temp, (orig_img, resized_overlay_mask), visible_mask
+def reset_image(predictor, img):
+    """
+    上传图像后调用：
+      - 重置 predictor，
+      - 设置 predictor 的输入图像，
+      - 返回原图、预处理图像、清空 sel_pix、以及初始输出（无 mask）。
+    """
+    preprocessed_image = img
+    predictor.set_image(preprocessed_image)
+    # 返回原始图像、预处理图像、清空点列表、初始输出（作为 SAM mask 显示，初始为原图复制）
+    return img, preprocessed_image, [], (img.copy(), [(np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8), 'visible_mask')])
+def button_clickable(selected_points):
+    if len(selected_points) > 0:
+        return gr.Button.update(interactive=True)
+    else:
+        return gr.Button.update(interactive=False)
 @spaces.GPU
     return Image.fromarray(overlay)
+def segment_and_overlay(image: np.ndarray, points):
     """
     调用 run_sam 获得 mask，然后叠加显示分割结果。
     """
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
     if image.mode != "RGB":
         image = image.convert("RGB")
     mask, _ = run_sam(sam_predictor, image, points)
     return [image for image in images]
+def get_sam_predictor():
+    sam_checkpoint = hf_hub_download("ybelkada/segment-anything", "checkpoints/sam_vit_h_4b8939.pth")
+    model_type = "vit_h"
+    sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
+    sam.cuda()
+    sam_predictor = SamPredictor(sam)
+    return sam_predictor
 with gr.Blocks(delete_cache=(600, 600)) as demo:
     gr.Markdown("""
     ## 3D Amodal Reconstruction with [Amodal3R](https://sm0kywu.github.io/Amodal3R/)
     * Different random seeds can be tried in "Generation Settings", if you think the results are not ideal.
     * If the reconstruction 3D asset is satisfactory, you can extract the GLB file and download it.
     """)
+     # 定义各状态变量
+    predictor = gr.State(value=get_sam_predictor())
+    selected_points = gr.State(value=[])
+    original_image = gr.State(value=None)
+    preprocessed_image = gr.State(value=None)
+    visible_mask = gr.State(value=None)
     with gr.Row():
         with gr.Column():
+        #     # 上传的图像不经过预处理，直接展示原始图像
+        #     image_prompt = gr.Image(type="numpy", label="Input Occlusion Image", interactive=True, height=512)
+        #     # 用于交互标注的图像，点击时更新显示标记
+        #     # image_annotation = gr.Image(type="numpy", label="Select Point Prompts for Target Object", interactive=True, height=512)
+        #     # 存储点击点状态以及显示点击点坐标
+        #     points_state = gr.State([])
+        #     segment_button = gr.Button("Run Segmentation")
+        #     # points_output = gr.Textbox(label="Target Object Prompts", interactive=False)
+        #     # 展示 SAM 分割结果（只用于显示，不允许上传）
+        #     segmented_output = gr.Image(label="Segmented Result", height=512, interactive=False)
+        #     with gr.Accordion(label="Generation Settings", open=False):
+        #         seed = gr.Slider(0, MAX_SEED, label="Seed", value=1, step=1)
+        #         randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+        #         gr.Markdown("Stage 1: Sparse Structure Generation")
+        #         with gr.Row():
+        #             ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
+        #             ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
+        #         gr.Markdown("Stage 2: Structured Latent Generation")
+        #         with gr.Row():
+        #             slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
+        #             slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
+        # # 其他组件（如生成按钮、视频展示、GLB 提取等）可根据需要添加\
+            input_image = gr.Image(type="numpy", label='Input Occlusion Image', height=500)
+            annotation_image = gr.Image(type="numpy", label='Annotate Image', interactive=True, height=500)
+            undo_button = gr.Button('Undo Prompt')
+            fg_bg_radio = gr.Radio(['positive_prompt', 'negative_prompt'], label='Point Prompt Type')
+            gr.Markdown('''
+            ### Instructions:
+            - First, upload an image.
+            - Then, click on the "Annotate Image" to select visible regions.
+            - Use "Undo Prompt" to remove the last point.
+            - Once the SAM mask is satisfactory, click "Run pix2gestalt" to perform amodal completion.
+            ''')
+        with gr.Column():
+            # 显示 SAM 分割结果（带 overlay）—— 使用 AnnotatedImage 显示更直观
+            output_mask = gr.AnnotatedImage(label='SAM Generated Visible (Modal) Mask', height=500)
     # 会话启动与结束
     demo.load(start_session)
     demo.unload(end_session)
+    # 上传图像时：重置 predictor 并将原图赋值给 original_image、preprocessed_image、selected_points 以及 output_mask
+    input_image.upload(
+        reset_image,
+        [predictor, input_image],
+        [original_image, preprocessed_image, selected_points, output_mask]
     )
+    # 同时更新 annotation_image（使其与上传图像保持一致）
+    input_image.upload(
+        lambda x: x,
+        inputs=[input_image],
+        outputs=[annotation_image]
     )
+    # 撤销按钮：撤销最近一次点击
+    undo_button.click(
+        undo_points,
+        [predictor, original_image, selected_points],
+        [annotation_image, output_mask, visible_mask]
+    )
+    # 在 annotation_image 上点击：调用 select_point 更新标注图像和 SAM 分割结果
+    annotation_image.select(
+        select_point,
+        [predictor, annotation_image, original_image, selected_points, fg_bg_radio],
+        [annotation_image, output_mask, visible_mask]
     )
 # 启动 Gradio App
 if __name__ == "__main__":
     pipeline = Amodal3RImageTo3DPipeline.from_pretrained("Sm0kyWu/Amodal3R")
     pipeline.cuda()
     try:
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))
     except:
         pass
+    demo.launch()