Spaces:

Sm0kyWu
/

Amodal3R

Runtime error

App Files Files Community

Sm0kyWu commited on Mar 12

Commit

b7fa320

verified ·

1 Parent(s): 0cc751c

Upload app.py

Browse files

Files changed (1) hide show

app.py +115 -265

app.py CHANGED Viewed

@@ -33,102 +33,42 @@ def end_session(req: gr.Request):
     shutil.rmtree(user_dir)
-def preprocess_image(image: Image.Image) -> Image.Image:
     """
-    Preprocess the input image.
-    Args:
-        image (Image.Image): The input image.
-    Returns:
-        Image.Image: The preprocessed image.
-    """
-    processed_image = pipeline.preprocess_image(image)
-    return processed_image
-def preprocess_images(images: List[Tuple[Image.Image, str]]) -> List[Image.Image]:
-    """
-    Preprocess a list of input images.
-    Args:
-        images (List[Tuple[Image.Image, str]]): The input images.
-    Returns:
-        List[Image.Image]: The preprocessed images.
-    """
-    images = [image[0] for image in images]
-    processed_images = [pipeline.preprocess_image(image) for image in images]
-    return processed_images
-def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
-    return {
-        'gaussian': {
-            **gs.init_params,
-            '_xyz': gs._xyz.cpu().numpy(),
-            '_features_dc': gs._features_dc.cpu().numpy(),
-            '_scaling': gs._scaling.cpu().numpy(),
-            '_rotation': gs._rotation.cpu().numpy(),
-            '_opacity': gs._opacity.cpu().numpy(),
-        },
-        'mesh': {
-            'vertices': mesh.vertices.cpu().numpy(),
-            'faces': mesh.faces.cpu().numpy(),
-        },
-    }
-def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
-    gs = Gaussian(
-        aabb=state['gaussian']['aabb'],
-        sh_degree=state['gaussian']['sh_degree'],
-        mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
-        scaling_bias=state['gaussian']['scaling_bias'],
-        opacity_bias=state['gaussian']['opacity_bias'],
-        scaling_activation=state['gaussian']['scaling_activation'],
-    )
-    gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
-    gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
-    gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
-    gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
-    gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
-    mesh = edict(
-        vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
-        faces=torch.tensor(state['mesh']['faces'], device='cuda'),
-    )
-    return gs, mesh
-def get_seed(randomize_seed: bool, seed: int) -> int:
-    """
-    Get the random seed.
-    """
-    return np.random.randint(0, MAX_SEED) if randomize_seed else seed
-def record_click(evt, points):
-    """
-    记录在图像上点击的位置，默认所有点击均为目标对象的 prompt，标签设为 1
     """
     if points is None:
         points = []
-    if evt is None:
-        return points, str(points)
-    # 假设 evt 中包含 "index" 键，其值为 (x, y)
-    coord = evt.get("index", None)
-    if coord is not None:
-        points.append((coord, 1))
-    return points, str(points)
 @spaces.GPU
 def run_sam(predictor: SamPredictor, image, selected_points):
     """
-    调用 Segment Anything 模型进行分割，返回 mask 及其他信息
     """
-    assert image.mode == 'RGB', "Image should be RGB"
     if len(selected_points) == 0:
         return [], None
     input_points = [p for p, _ in selected_points]
@@ -144,33 +84,24 @@ def run_sam(predictor: SamPredictor, image, selected_points):
 def apply_mask_overlay(image: Image.Image, mask: np.ndarray) -> Image.Image:
     """
-    在原图上叠加 mask：使用红色绘制 mask 的轮廓，
-    非 mask 区域叠加浅灰色半透明遮罩
     """
-    # 转换图像为 numpy 数组
     img_arr = np.array(image)
-    # 如果 mask 为三维，则取第一个通道
     if mask.ndim == 3:
         mask = mask[:, :, 0]
-    # 创建副本用于叠加
     overlay = img_arr.copy()
-    # 定义浅灰色（例如 RGB=(200,200,200)）
     gray_color = np.array([200, 200, 200], dtype=np.uint8)
-    # 对于非 mask 区域（mask == 0），进行半透明混合
     non_mask = mask == 0
     overlay[non_mask] = (0.5 * overlay[non_mask] + 0.5 * gray_color).astype(np.uint8)
-    # 使用 OpenCV 找到 mask 的轮廓
     contours, _ = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    # 在 overlay 上绘制红色轮廓，粗细为2个像素
     cv2.drawContours(overlay, contours, -1, (255, 0, 0), 2)
     return Image.fromarray(overlay)
 def segment_and_overlay(image: Image.Image, points):
     """
-    调用 run_sam 获得 mask，然后调用 apply_mask_overlay 生成叠加图像
     """
-    # 确保输入图像为 RGB
     if image.mode != "RGB":
         image = image.convert("RGB")
     mask, _ = run_sam(sam_predictor, image, points)
@@ -179,9 +110,10 @@ def segment_and_overlay(image: Image.Image, points):
     overlaid = apply_mask_overlay(image, mask)
     return overlaid
 def reset_points():
     """
-    清空 prompt 点
     """
     return [], ""
@@ -189,33 +121,18 @@ def reset_points():
 @spaces.GPU
 def image_to_3d(
     image: Image.Image,
-    multiimages: List[Tuple[Image.Image, str]],
     is_multiimage: bool,
     seed: int,
     ss_guidance_strength: float,
     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
-    multiimage_algo: Literal["multidiffusion", "stochastic"],
     req: gr.Request,
-) -> Tuple[dict, str]:
     """
-    Convert an image to a 3D model.
-    Args:
-        image (Image.Image): The input image.
-        multiimages (List[Tuple[Image.Image, str]]): The input images in multi-image mode.
-        is_multiimage (bool): Whether is in multi-image mode.
-        seed (int): The random seed.
-        ss_guidance_strength (float): The guidance strength for sparse structure generation.
-        ss_sampling_steps (int): The number of sampling steps for sparse structure generation.
-        slat_guidance_strength (float): The guidance strength for structured latent generation.
-        slat_sampling_steps (int): The number of sampling steps for structured latent generation.
-        multiimage_algo (Literal["multidiffusion", "stochastic"]): The algorithm for multi-image generation.
-    Returns:
-        dict: The information of the generated 3D model.
-        str: The path to the video of the 3D model.
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     if not is_multiimage:
@@ -235,7 +152,7 @@ def image_to_3d(
         )
     else:
         outputs = pipeline.run_multi_image(
-            [image[0] for image in multiimages],
             seed=seed,
             formats=["gaussian", "mesh"],
             preprocess_image=False,
@@ -265,17 +182,9 @@ def extract_glb(
     mesh_simplify: float,
     texture_size: int,
     req: gr.Request,
-) -> Tuple[str, str]:
     """
-    Extract a GLB file from the 3D model.
-    Args:
-        state (dict): The state of the generated 3D model.
-        mesh_simplify (float): The mesh simplification factor.
-        texture_size (int): The texture resolution.
-    Returns:
-        str: The path to the extracted GLB file.
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     gs, mesh = unpack_state(state)
@@ -287,15 +196,9 @@ def extract_glb(
 @spaces.GPU
-def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
     """
-    Extract a Gaussian file from the 3D model.
-    Args:
-        state (dict): The state of the generated 3D model.
-    Returns:
-        str: The path to the extracted Gaussian file.
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     gs, _ = unpack_state(state)
@@ -305,7 +208,47 @@ def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
     return gaussian_path, gaussian_path
-def prepare_multi_example() -> List[Image.Image]:
     multi_case = list(set([i.split('_')[0] for i in os.listdir("assets/example_multi_image")]))
     images = []
     for case in multi_case:
@@ -319,49 +262,37 @@ def prepare_multi_example() -> List[Image.Image]:
     return images
-def split_image(image: Image.Image) -> List[Image.Image]:
     """
-    Split an image into multiple views.
     """
     image = np.array(image)
     alpha = image[..., 3]
-    alpha = np.any(alpha>0, axis=0)
     start_pos = np.where(~alpha[:-1] & alpha[1:])[0].tolist()
     end_pos = np.where(alpha[:-1] & ~alpha[1:])[0].tolist()
     images = []
     for s, e in zip(start_pos, end_pos):
         images.append(Image.fromarray(image[:, s:e+1]))
-    return [preprocess_image(image) for image in images]
 with gr.Blocks(delete_cache=(600, 600)) as demo:
     gr.Markdown("""
     ## 3D Amodal Reconstruction with [Amodal3R](https://sm0kywu.github.io/Amodal3R/)
-    * Upload an image and click "Generate" to create a 3D asset.
-    * Target object selection. Multiple point prompts are supported until you get the ideal visible area.
-    * Occluders selection, this can be done by squential point prompts. You can choose "all occ", then all the other areas except the target object will be treated as occluders.
-    * Different random seeds can be tried in "Generation Settings", if you think the results are not ideal.
-    * If the reconstruction 3D asset is satisfactory, you can extract the GLB file and download it.
     """)
     with gr.Row():
         with gr.Column():
-            with gr.Tabs() as input_tabs:
-                image_prompt = gr.Image(type="numpy", label="Input Occlusion Image", height=512)
-            # 用于交互标注的图像
             image_annotation = gr.Image(type="numpy", label="Select Point Prompts for Target Object", interactive=True, height=512)
-            # 记录用户点击的点及显示当前 prompt 列表
             points_state = gr.State([])
             points_output = gr.Textbox(label="Target Object Prompts", interactive=False)
-            # 为 image_annotation 添加点击事件记录 prompt 点
-            image_annotation.select(
-                record_click,
-                inputs=[points_state],
-                outputs=[points_state, points_output]
-            )
-            # 新增：分割后展示结果的组件
-            segmented_output = gr.Image(label="Segmented Result", height=512)
             with gr.Accordion(label="Generation Settings", open=False):
                 seed = gr.Slider(0, MAX_SEED, label="Seed", value=1, step=1)
@@ -374,119 +305,38 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
                 with gr.Row():
                     slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                     slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-        # with gr.Column():
-    #         generate_btn = gr.Button("Generate")
-    #         with gr.Accordion(label="GLB Extraction Settings", open=False):
-    #             mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
-    #             texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
-    #         with gr.Row():
-    #             extract_glb_btn = gr.Button("Extract GLB", interactive=False)
-    #             extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
-    #         gr.Markdown("""
-    #                     *NOTE: Gaussian file can be very large (~50MB), it will take a while to display and download.*
-    #                     """)
-    #     with gr.Column():
-    #         video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
-    #         model_output = LitModel3D(label="Extracted GLB/Gaussian", exposure=10.0, height=300)
-    #         with gr.Row():
-    #             download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
-    #             download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
-    # is_multiimage = gr.State(False)
-    # output_buf = gr.State()
-    # # Example images at the bottom of the page
-    # with gr.Row() as single_image_example:
-    #     examples = gr.Examples(
-    #         examples=[
-    #             f'assets/example_image/{image}'
-    #             for image in os.listdir("assets/example_image")
-    #         ],
-    #         inputs=[image_prompt],
-    #         fn=preprocess_image,
-    #         outputs=[image_prompt],
-    #         run_on_click=True,
-    #         examples_per_page=64,
-    #     )
-    # with gr.Row(visible=False) as multiimage_example:
-    #     examples_multi = gr.Examples(
-    #         examples=prepare_multi_example(),
-    #         inputs=[image_prompt],
-    #         fn=split_image,
-    #         outputs=[multiimage_prompt],
-    #         run_on_click=True,
-    #         examples_per_page=8,
-    #     )
-    # Handlers
     demo.load(start_session)
     demo.unload(end_session)
-    # single_image_input_tab.select(
-    #     lambda: tuple([False, gr.Row.update(visible=True), gr.Row.update(visible=False)]),
-    #     outputs=[single_image_example]
-    # )
-    # multiimage_input_tab.select(
-    #     lambda: tuple([True, gr.Row.update(visible=False), gr.Row.update(visible=True)]),
-    #     outputs=[is_multiimage, single_image_example, multiimage_example]
-    # )
     image_prompt.upload(
-        preprocess_image,
         inputs=[image_prompt],
-        outputs=[image_prompt],
     )
-    # generate_btn.click(
-    #     get_seed,
-    #     inputs=[randomize_seed, seed],
-    #     outputs=[seed],
-    # ).then(
-    #     image_to_3d,
-    #     inputs=[image_prompt, multiimage_prompt, is_multiimage, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps, multiimage_algo],
-    #     outputs=[output_buf, video_output],
-    # ).then(
-    #     lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
-    #     outputs=[extract_glb_btn, extract_gs_btn],
-    # )
-    # video_output.clear(
-    #     lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False)]),
-    #     outputs=[extract_glb_btn, extract_gs_btn],
-    # )
-    # extract_glb_btn.click(
-    #     extract_glb,
-    #     inputs=[output_buf, mesh_simplify, texture_size],
-    #     outputs=[model_output, download_glb],
-    # ).then(
-    #     lambda: gr.Button(interactive=True),
-    #     outputs=[download_glb],
-    # )
-    # extract_gs_btn.click(
-    #     extract_gaussian,
-    #     inputs=[output_buf],
-    #     outputs=[model_output, download_gs],
-    # ).then(
-    #     lambda: gr.Button(interactive=True),
-    #     outputs=[download_gs],
-    # )
-    # model_output.clear(
-    #     lambda: gr.Button(interactive=False),
-    #     outputs=[download_glb],
-    # )
-# Launch the Gradio app
 if __name__ == "__main__":
     sam_checkpoint = hf_hub_download("ybelkada/segment-anything", "checkpoints/sam_vit_h_4b8939.pth")
     model_type = "vit_h"
@@ -497,7 +347,7 @@ if __name__ == "__main__":
     pipeline = Amodal3RImageTo3DPipeline.from_pretrained("Sm0kyWu/Amodal3R")
     pipeline.cuda()
     try:
-        pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))    # Preload rembg
     except:
         pass
     demo.launch()

     shutil.rmtree(user_dir)
+def select_point_callback(image, points, evt):
     """
+    当用户点击图像时，记录点击点并在图像上绘制标记（十字）。
+    输入参数：
+      - image：当前图像（numpy 数组）。
+      - points：已记录的点列表。
+      - evt：Gradio 的点击事件数据（包含 .index，即点击坐标）。
+    返回：
+      - 更新后的图像（带标记）。
+      - 更新后的点列表。
+      - 以字符串形式展示的点列表（用于显示在文本框中）。
     """
     if points is None:
         points = []
+    annotated_img = image.copy()
+    # 如果没有点击事件，则直接返回原图和当前点列表
+    if evt is None or evt.index is None:
+        return image, points, str(points)
+    coord = evt.index  # 期望返回 (x, y)
+    points.append((tuple(coord), 1))  # 记录为正样本 prompt
+    # 绘制十字标记，颜色为红色
+    cv2.drawMarker(annotated_img, tuple(coord), (255, 0, 0),
+                   markerType=cv2.MARKER_CROSS, markerSize=10, thickness=2)
+    return annotated_img, points, str(points)
 @spaces.GPU
 def run_sam(predictor: SamPredictor, image, selected_points):
     """
+    调用 SAM 模型进行分割。
     """
+    # 确保图像为 RGB 模式
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    if image.mode != 'RGB':
+        image = image.convert("RGB")
     if len(selected_points) == 0:
         return [], None
     input_points = [p for p, _ in selected_points]
 def apply_mask_overlay(image: Image.Image, mask: np.ndarray) -> Image.Image:
     """
+    ���原图上叠加 mask：使用红色绘制 mask 的轮廓，非 mask 区域叠加浅灰色半透明遮罩。
     """
     img_arr = np.array(image)
     if mask.ndim == 3:
         mask = mask[:, :, 0]
     overlay = img_arr.copy()
     gray_color = np.array([200, 200, 200], dtype=np.uint8)
     non_mask = mask == 0
     overlay[non_mask] = (0.5 * overlay[non_mask] + 0.5 * gray_color).astype(np.uint8)
     contours, _ = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     cv2.drawContours(overlay, contours, -1, (255, 0, 0), 2)
     return Image.fromarray(overlay)
 def segment_and_overlay(image: Image.Image, points):
     """
+    调用 run_sam 获得 mask，然后叠加显示分割结果。
     """
     if image.mode != "RGB":
         image = image.convert("RGB")
     mask, _ = run_sam(sam_predictor, image, points)
     overlaid = apply_mask_overlay(image, mask)
     return overlaid
 def reset_points():
     """
+    清空点击点提示。
     """
     return [], ""
 @spaces.GPU
 def image_to_3d(
     image: Image.Image,
+    multiimages: List[tuple],
     is_multiimage: bool,
     seed: int,
     ss_guidance_strength: float,
     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
+    multiimage_algo: str,
     req: gr.Request,
+) -> tuple:
     """
+    将图像转换为 3D 模型。
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     if not is_multiimage:
         )
     else:
         outputs = pipeline.run_multi_image(
+            [img[0] for img in multiimages],
             seed=seed,
             formats=["gaussian", "mesh"],
             preprocess_image=False,
     mesh_simplify: float,
     texture_size: int,
     req: gr.Request,
+) -> tuple:
     """
+    从生成的 3D 模型中提取 GLB 文件。
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     gs, mesh = unpack_state(state)
 @spaces.GPU
+def extract_gaussian(state: dict, req: gr.Request) -> tuple:
     """
+    从生成的 3D 模型中提取 Gaussian 文件。
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     gs, _ = unpack_state(state)
     return gaussian_path, gaussian_path
+def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
+    return {
+        'gaussian': {
+            **gs.init_params,
+            '_xyz': gs._xyz.cpu().numpy(),
+            '_features_dc': gs._features_dc.cpu().numpy(),
+            '_scaling': gs._scaling.cpu().numpy(),
+            '_rotation': gs._rotation.cpu().numpy(),
+            '_opacity': gs._opacity.cpu().numpy(),
+        },
+        'mesh': {
+            'vertices': mesh.vertices.cpu().numpy(),
+            'faces': mesh.faces.cpu().numpy(),
+        },
+    }
+def unpack_state(state: dict) -> tuple:
+    gs = Gaussian(
+        aabb=state['gaussian']['aabb'],
+        sh_degree=state['gaussian']['sh_degree'],
+        mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
+        scaling_bias=state['gaussian']['scaling_bias'],
+        opacity_bias=state['gaussian']['opacity_bias'],
+        scaling_activation=state['gaussian']['scaling_activation'],
+    )
+    gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
+    gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
+    gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
+    gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
+    gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
+    mesh = edict(
+        vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
+        faces=torch.tensor(state['mesh']['faces'], device='cuda'),
+    )
+    return gs, mesh
+def prepare_multi_example() -> list:
     multi_case = list(set([i.split('_')[0] for i in os.listdir("assets/example_multi_image")]))
     images = []
     for case in multi_case:
     return images
+def split_image(image: Image.Image) -> list:
     """
+    将图像拆分为多个视图（不进行预处理）。
     """
     image = np.array(image)
     alpha = image[..., 3]
+    alpha = np.any(alpha > 0, axis=0)
     start_pos = np.where(~alpha[:-1] & alpha[1:])[0].tolist()
     end_pos = np.where(alpha[:-1] & ~alpha[1:])[0].tolist()
     images = []
     for s, e in zip(start_pos, end_pos):
         images.append(Image.fromarray(image[:, s:e+1]))
+    return [image for image in images]
 with gr.Blocks(delete_cache=(600, 600)) as demo:
     gr.Markdown("""
     ## 3D Amodal Reconstruction with [Amodal3R](https://sm0kywu.github.io/Amodal3R/)
+    * 上传图像后，点击图像选择目标区域，点击的点会在图像上显示。
     """)
     with gr.Row():
         with gr.Column():
+            # 上传的图像不经过预处理，直接展示原始图像
+            image_prompt = gr.Image(type="numpy", label="Input Occlusion Image", height=512)
+            # 用于交互标注的图像，点击时更新显示标记
             image_annotation = gr.Image(type="numpy", label="Select Point Prompts for Target Object", interactive=True, height=512)
+            # 存储点击点状态以及显示点击点坐标
             points_state = gr.State([])
             points_output = gr.Textbox(label="Target Object Prompts", interactive=False)
+            # 展示 SAM 分割结果（只用于显示，不允许上传）
+            segmented_output = gr.Image(label="Segmented Result", height=512, interactive=False)
             with gr.Accordion(label="Generation Settings", open=False):
                 seed = gr.Slider(0, MAX_SEED, label="Seed", value=1, step=1)
                 with gr.Row():
                     slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                     slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
+        # 其他组件（如生成按钮、视频展示、GLB 提取等）可根据需要添加
+    # 会话启动与结束
     demo.load(start_session)
     demo.unload(end_session)
+    # 上传图像后直接显示，不做预处理
     image_prompt.upload(
+        lambda x: x,
         inputs=[image_prompt],
+        outputs=[image_prompt]
     )
+    # 点击 image_annotation 时调用 select_point_callback，
+    # 更新图像显示、点状态以及文本显示点击点信息
+    image_annotation.select(
+        select_point_callback,
+        inputs=[image_annotation, points_state],
+        outputs=[image_annotation, points_state, points_output]
+    )
+    # 添加一个按钮，用于运行 SAM 分割并展示叠加结果
+    segment_button = gr.Button("Run Segmentation")
+    segment_button.click(
+        segment_and_overlay,
+        inputs=[image_prompt, points_state],
+        outputs=[segmented_output]
+    )
+    # 后续可添加生成 3D 模型等其他流程...
+# 启动 Gradio App
 if __name__ == "__main__":
     sam_checkpoint = hf_hub_download("ybelkada/segment-anything", "checkpoints/sam_vit_h_4b8939.pth")
     model_type = "vit_h"
     pipeline = Amodal3RImageTo3DPipeline.from_pretrained("Sm0kyWu/Amodal3R")
     pipeline.cuda()
     try:
+        pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))
     except:
         pass
     demo.launch()