Spaces:

Sm0kyWu
/

Amodal3R

Runtime error

App Files Files Community

Sm0kyWu commited on Mar 13

Commit

ec4c704

verified ·

1 Parent(s): cbc2ce6

Upload app.py

Browse files

Files changed (1) hide show

app.py +44 -32

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ import imageio
 from easydict import EasyDict as edict
 from PIL import Image
 from Amodal3R.pipelines import Amodal3RImageTo3DPipeline
 from Amodal3R.representations import Gaussian, MeshExtractResult
 from Amodal3R.utils import render_utils, postprocessing_utils
 from segment_anything import sam_model_registry, SamPredictor
@@ -96,23 +97,33 @@ def segment_and_overlay(image, points, sam_predictor):
 @spaces.GPU
 def image_to_3d(
-    image: List[tuple],
-    masks: List[np.ndarray],
     seed: int,
     ss_guidance_strength: float,
     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
-    multiimage_algo: str,
     req: gr.Request,
-) -> tuple:
     """
-    将图像转换为 3D 模型。
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     outputs = pipeline.run_multi_image(
-        [img[0] for img in image],
-        [mask[0] for mask in masks],
         seed=seed,
         formats=["gaussian", "mesh"],
         preprocess_image=False,
@@ -124,7 +135,7 @@ def image_to_3d(
             "steps": slat_sampling_steps,
             "cfg_strength": slat_guidance_strength,
         },
-        mode=multiimage_algo,
     )
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
@@ -430,26 +441,26 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
         * Different random seeds can be tried in "Generation Settings", if you think the results are not ideal.
         * If the reconstruction 3D asset is satisfactory, you can extract the GLB file and download it.
         """)
-    # with gr.Row():
-    #     with gr.Column():
-    #         with gr.Accordion(label="Generation Settings", open=True):
-    #             seed = gr.Slider(0, MAX_SEED, label="Seed", value=1, step=1)
-    #             randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
-    #             gr.Markdown("Stage 1: Sparse Structure Generation")
-    #             with gr.Row():
-    #                 ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
-    #                 ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-    #             gr.Markdown("Stage 2: Structured Latent Generation")
-    #             with gr.Row():
-    #                 slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
-    #                 slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-    #         generate_btn = gr.Button("Generate")
-    #     with gr.Column():
-    #         video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
     # # Handlers
-    # demo.load(start_session)
-    # demo.unload(end_session)
     # ---------------------------
     # 原有交互逻辑（略）
@@ -535,16 +546,17 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
     #     outputs=[output_buf, video_output],
     # )
-    # generate_btn.click(
-    #     image_to_3d,
-    #     inputs=[vis_input, occluded_mask, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
-    #     outputs=[output_buf, video_output],
-    # )
 # 启动 Gradio App
 if __name__ == "__main__":
-    pipeline = Amodal3RImageTo3DPipeline.from_pretrained("Sm0kyWu/Amodal3R")
     pipeline.cuda()
     try:
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))

 from easydict import EasyDict as edict
 from PIL import Image
 from Amodal3R.pipelines import Amodal3RImageTo3DPipeline
+from trellis.pipelines import TrellisImageTo3DPipeline
 from Amodal3R.representations import Gaussian, MeshExtractResult
 from Amodal3R.utils import render_utils, postprocessing_utils
 from segment_anything import sam_model_registry, SamPredictor
 @spaces.GPU
 def image_to_3d(
+    images: List[Image.Image],
     seed: int,
     ss_guidance_strength: float,
     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
     req: gr.Request,
+) -> Tuple[dict, str]:
     """
+    Convert an image to a 3D model.
+    Args:
+        image (Image.Image): The input image.
+        multiimages (List[Tuple[Image.Image, str]]): The input images in multi-image mode.
+        is_multiimage (bool): Whether is in multi-image mode.
+        seed (int): The random seed.
+        ss_guidance_strength (float): The guidance strength for sparse structure generation.
+        ss_sampling_steps (int): The number of sampling steps for sparse structure generation.
+        slat_guidance_strength (float): The guidance strength for structured latent generation.
+        slat_sampling_steps (int): The number of sampling steps for structured latent generation.
+        multiimage_algo (Literal["multidiffusion", "stochastic"]): The algorithm for multi-image generation.
+    Returns:
+        dict: The information of the generated 3D model.
+        str: The path to the video of the 3D model.
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     outputs = pipeline.run_multi_image(
+        [images],
         seed=seed,
         formats=["gaussian", "mesh"],
         preprocess_image=False,
             "steps": slat_sampling_steps,
             "cfg_strength": slat_guidance_strength,
         },
+        mode="stochastic",
     )
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
         * Different random seeds can be tried in "Generation Settings", if you think the results are not ideal.
         * If the reconstruction 3D asset is satisfactory, you can extract the GLB file and download it.
         """)
+    with gr.Row():
+        with gr.Column():
+            with gr.Accordion(label="Generation Settings", open=True):
+                seed = gr.Slider(0, MAX_SEED, label="Seed", value=1, step=1)
+                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+                gr.Markdown("Stage 1: Sparse Structure Generation")
+                with gr.Row():
+                    ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
+                    ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
+                gr.Markdown("Stage 2: Structured Latent Generation")
+                with gr.Row():
+                    slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
+                    slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
+            generate_btn = gr.Button("Generate")
+        with gr.Column():
+            video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
     # # Handlers
+    demo.load(start_session)
+    demo.unload(end_session)
     # ---------------------------
     # 原有交互逻辑（略）
     #     outputs=[output_buf, video_output],
     # )
+    generate_btn.click(
+        image_to_3d,
+        inputs=[vis_input, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
+        outputs=[output_buf, video_output],
+    )
 # 启动 Gradio App
 if __name__ == "__main__":
+    # pipeline = Amodal3RImageTo3DPipeline.from_pretrained("Sm0kyWu/Amodal3R")
+    pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
     pipeline.cuda()
     try:
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))