Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,7 @@ import imageio
|
|
13 |
from easydict import EasyDict as edict
|
14 |
from PIL import Image
|
15 |
from Amodal3R.pipelines import Amodal3RImageTo3DPipeline
|
|
|
16 |
from Amodal3R.representations import Gaussian, MeshExtractResult
|
17 |
from Amodal3R.utils import render_utils, postprocessing_utils
|
18 |
from segment_anything import sam_model_registry, SamPredictor
|
@@ -96,23 +97,33 @@ def segment_and_overlay(image, points, sam_predictor):
|
|
96 |
|
97 |
@spaces.GPU
|
98 |
def image_to_3d(
|
99 |
-
|
100 |
-
masks: List[np.ndarray],
|
101 |
seed: int,
|
102 |
ss_guidance_strength: float,
|
103 |
ss_sampling_steps: int,
|
104 |
slat_guidance_strength: float,
|
105 |
slat_sampling_steps: int,
|
106 |
-
multiimage_algo: str,
|
107 |
req: gr.Request,
|
108 |
-
) ->
|
109 |
"""
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
"""
|
112 |
user_dir = os.path.join(TMP_DIR, str(req.session_hash))
|
113 |
outputs = pipeline.run_multi_image(
|
114 |
-
[
|
115 |
-
[mask[0] for mask in masks],
|
116 |
seed=seed,
|
117 |
formats=["gaussian", "mesh"],
|
118 |
preprocess_image=False,
|
@@ -124,7 +135,7 @@ def image_to_3d(
|
|
124 |
"steps": slat_sampling_steps,
|
125 |
"cfg_strength": slat_guidance_strength,
|
126 |
},
|
127 |
-
mode=
|
128 |
)
|
129 |
video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
|
130 |
video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
|
@@ -430,26 +441,26 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
|
|
430 |
* Different random seeds can be tried in "Generation Settings", if you think the results are not ideal.
|
431 |
* If the reconstruction 3D asset is satisfactory, you can extract the GLB file and download it.
|
432 |
""")
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
|
450 |
# # Handlers
|
451 |
-
|
452 |
-
|
453 |
|
454 |
# ---------------------------
|
455 |
# εζδΊ€δΊι»θΎοΌη₯οΌ
|
@@ -535,16 +546,17 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
|
|
535 |
# outputs=[output_buf, video_output],
|
536 |
# )
|
537 |
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
|
544 |
|
545 |
# ε―ε¨ Gradio App
|
546 |
if __name__ == "__main__":
|
547 |
-
pipeline = Amodal3RImageTo3DPipeline.from_pretrained("Sm0kyWu/Amodal3R")
|
|
|
548 |
pipeline.cuda()
|
549 |
try:
|
550 |
pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))
|
|
|
13 |
from easydict import EasyDict as edict
|
14 |
from PIL import Image
|
15 |
from Amodal3R.pipelines import Amodal3RImageTo3DPipeline
|
16 |
+
from trellis.pipelines import TrellisImageTo3DPipeline
|
17 |
from Amodal3R.representations import Gaussian, MeshExtractResult
|
18 |
from Amodal3R.utils import render_utils, postprocessing_utils
|
19 |
from segment_anything import sam_model_registry, SamPredictor
|
|
|
97 |
|
98 |
@spaces.GPU
|
99 |
def image_to_3d(
|
100 |
+
images: List[Image.Image],
|
|
|
101 |
seed: int,
|
102 |
ss_guidance_strength: float,
|
103 |
ss_sampling_steps: int,
|
104 |
slat_guidance_strength: float,
|
105 |
slat_sampling_steps: int,
|
|
|
106 |
req: gr.Request,
|
107 |
+
) -> Tuple[dict, str]:
|
108 |
"""
|
109 |
+
Convert an image to a 3D model.
|
110 |
+
Args:
|
111 |
+
image (Image.Image): The input image.
|
112 |
+
multiimages (List[Tuple[Image.Image, str]]): The input images in multi-image mode.
|
113 |
+
is_multiimage (bool): Whether is in multi-image mode.
|
114 |
+
seed (int): The random seed.
|
115 |
+
ss_guidance_strength (float): The guidance strength for sparse structure generation.
|
116 |
+
ss_sampling_steps (int): The number of sampling steps for sparse structure generation.
|
117 |
+
slat_guidance_strength (float): The guidance strength for structured latent generation.
|
118 |
+
slat_sampling_steps (int): The number of sampling steps for structured latent generation.
|
119 |
+
multiimage_algo (Literal["multidiffusion", "stochastic"]): The algorithm for multi-image generation.
|
120 |
+
Returns:
|
121 |
+
dict: The information of the generated 3D model.
|
122 |
+
str: The path to the video of the 3D model.
|
123 |
"""
|
124 |
user_dir = os.path.join(TMP_DIR, str(req.session_hash))
|
125 |
outputs = pipeline.run_multi_image(
|
126 |
+
[images],
|
|
|
127 |
seed=seed,
|
128 |
formats=["gaussian", "mesh"],
|
129 |
preprocess_image=False,
|
|
|
135 |
"steps": slat_sampling_steps,
|
136 |
"cfg_strength": slat_guidance_strength,
|
137 |
},
|
138 |
+
mode="stochastic",
|
139 |
)
|
140 |
video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
|
141 |
video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
|
|
|
441 |
* Different random seeds can be tried in "Generation Settings", if you think the results are not ideal.
|
442 |
* If the reconstruction 3D asset is satisfactory, you can extract the GLB file and download it.
|
443 |
""")
|
444 |
+
with gr.Row():
|
445 |
+
with gr.Column():
|
446 |
+
with gr.Accordion(label="Generation Settings", open=True):
|
447 |
+
seed = gr.Slider(0, MAX_SEED, label="Seed", value=1, step=1)
|
448 |
+
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
449 |
+
gr.Markdown("Stage 1: Sparse Structure Generation")
|
450 |
+
with gr.Row():
|
451 |
+
ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
|
452 |
+
ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
|
453 |
+
gr.Markdown("Stage 2: Structured Latent Generation")
|
454 |
+
with gr.Row():
|
455 |
+
slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
|
456 |
+
slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
|
457 |
+
generate_btn = gr.Button("Generate")
|
458 |
+
with gr.Column():
|
459 |
+
video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
|
460 |
|
461 |
# # Handlers
|
462 |
+
demo.load(start_session)
|
463 |
+
demo.unload(end_session)
|
464 |
|
465 |
# ---------------------------
|
466 |
# εζδΊ€δΊι»θΎοΌη₯οΌ
|
|
|
546 |
# outputs=[output_buf, video_output],
|
547 |
# )
|
548 |
|
549 |
+
generate_btn.click(
|
550 |
+
image_to_3d,
|
551 |
+
inputs=[vis_input, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
|
552 |
+
outputs=[output_buf, video_output],
|
553 |
+
)
|
554 |
|
555 |
|
556 |
# ε―ε¨ Gradio App
|
557 |
if __name__ == "__main__":
|
558 |
+
# pipeline = Amodal3RImageTo3DPipeline.from_pretrained("Sm0kyWu/Amodal3R")
|
559 |
+
pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
|
560 |
pipeline.cuda()
|
561 |
try:
|
562 |
pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))
|