Shap-E

Paused

App Files Files Community

hysts HF Staff commited on May 6, 2023

Commit

6a6f2a6

1 Parent(s): 117b175

Update

Browse files

Files changed (5) hide show

app_image_to_3d.py +8 -17
app_text_to_3d.py +2 -14
model.py +29 -43
requirements.txt +1 -1
style.css +0 -8

app_image_to_3d.py CHANGED Viewed

@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 import shlex
 import subprocess
@@ -11,14 +12,15 @@ from utils import randomize_seed_fn
 def create_demo(model: Model) -> gr.Blocks:
-    subprocess.run(
-        shlex.split(
-            'wget https://raw.githubusercontent.com/openai/shap-e/d99cedaea18e0989e340163dbaeb4b109fa9e8ec/shap_e/examples/example_data/corgi.png -O corgi.png'
-        ))
     examples = ['corgi.png']
     def process_example_fn(image_path: str) -> str:
-        return model.run_image(image_path, output_image_size=128)
     with gr.Blocks() as demo:
         with gr.Box():
@@ -26,7 +28,7 @@ def create_demo(model: Model) -> gr.Blocks:
                              show_label=False,
                              type='filepath')
             run_button = gr.Button('Run')
-            result = gr.Video(label='Result', elem_id='result-2')
             with gr.Accordion('Advanced options', open=False):
                 seed = gr.Slider(label='Seed',
                                  minimum=0,
@@ -46,15 +48,6 @@ def create_demo(model: Model) -> gr.Blocks:
                     maximum=100,
                     step=1,
                     value=64)
-                image_size = gr.Slider(label='Image size',
-                                       minimum=64,
-                                       maximum=256,
-                                       step=64,
-                                       value=128)
-                render_mode = gr.Dropdown(label='Render mode',
-                                          choices=['nerf', 'stf'],
-                                          value='nerf',
-                                          visible=False)
         gr.Examples(examples=examples,
                     inputs=image,
@@ -67,8 +60,6 @@ def create_demo(model: Model) -> gr.Blocks:
             seed,
             guidance_scale,
             num_inference_steps,
-            image_size,
-            render_mode,
         ]
         run_button.click(

 #!/usr/bin/env python
+import pathlib
 import shlex
 import subprocess
 def create_demo(model: Model) -> gr.Blocks:
+    if not pathlib.Path('corgi.png').exists():
+        subprocess.run(
+            shlex.split(
+                'wget https://raw.githubusercontent.com/openai/shap-e/d99cedaea18e0989e340163dbaeb4b109fa9e8ec/shap_e/examples/example_data/corgi.png -O corgi.png'
+            ))
     examples = ['corgi.png']
     def process_example_fn(image_path: str) -> str:
+        return model.run_image(image_path)
     with gr.Blocks() as demo:
         with gr.Box():
                              show_label=False,
                              type='filepath')
             run_button = gr.Button('Run')
+            result = gr.Model3D(label='Result', show_label=False)
             with gr.Accordion('Advanced options', open=False):
                 seed = gr.Slider(label='Seed',
                                  minimum=0,
                     maximum=100,
                     step=1,
                     value=64)
         gr.Examples(examples=examples,
                     inputs=image,
             seed,
             guidance_scale,
             num_inference_steps,
         ]
         run_button.click(

app_text_to_3d.py CHANGED Viewed

@@ -21,7 +21,7 @@ def create_demo(model: Model) -> gr.Blocks:
     ]
     def process_example_fn(prompt: str) -> str:
-        return model.run_text(prompt, output_image_size=128)
     with gr.Blocks() as demo:
         with gr.Box():
@@ -32,7 +32,7 @@ def create_demo(model: Model) -> gr.Blocks:
                     max_lines=1,
                     placeholder='Enter your prompt').style(container=False)
                 run_button = gr.Button('Run').style(full_width=False)
-            result = gr.Video(label='Result', elem_id='result-1')
             with gr.Accordion('Advanced options', open=False):
                 seed = gr.Slider(label='Seed',
                                  minimum=0,
@@ -52,15 +52,6 @@ def create_demo(model: Model) -> gr.Blocks:
                     maximum=100,
                     step=1,
                     value=64)
-                image_size = gr.Slider(label='Image size',
-                                       minimum=64,
-                                       maximum=256,
-                                       step=64,
-                                       value=128)
-                render_mode = gr.Dropdown(label='Render mode',
-                                          choices=['nerf', 'stf'],
-                                          value='nerf',
-                                          visible=False)
         gr.Examples(examples=examples,
                     inputs=prompt,
@@ -73,8 +64,6 @@ def create_demo(model: Model) -> gr.Blocks:
             seed,
             guidance_scale,
             num_inference_steps,
-            image_size,
-            render_mode,
         ]
         prompt.submit(
             fn=randomize_seed_fn,
@@ -86,7 +75,6 @@ def create_demo(model: Model) -> gr.Blocks:
             inputs=inputs,
             outputs=result,
         )
         run_button.click(
             fn=randomize_seed_fn,
             inputs=[seed, randomize_seed],

     ]
     def process_example_fn(prompt: str) -> str:
+        return model.run_text(prompt)
     with gr.Blocks() as demo:
         with gr.Box():
                     max_lines=1,
                     placeholder='Enter your prompt').style(container=False)
                 run_button = gr.Button('Run').style(full_width=False)
+            result = gr.Model3D(label='Result', show_label=False)
             with gr.Accordion('Advanced options', open=False):
                 seed = gr.Slider(label='Seed',
                                  minimum=0,
                     maximum=100,
                     step=1,
                     value=64)
         gr.Examples(examples=examples,
                     inputs=prompt,
             seed,
             guidance_scale,
             num_inference_steps,
         ]
         prompt.submit(
             fn=randomize_seed_fn,
             inputs=inputs,
             outputs=result,
         )
         run_button.click(
             fn=randomize_seed_fn,
             inputs=[seed, randomize_seed],

model.py CHANGED Viewed

@@ -1,15 +1,15 @@
 import tempfile
-import imageio
 import numpy as np
-import PIL.Image
 import torch
 from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
 from shap_e.diffusion.sample import sample_latents
 from shap_e.models.download import load_config, load_model
 from shap_e.models.nn.camera import (DifferentiableCameraBatch,
                                      DifferentiableProjectiveCamera)
 from shap_e.models.transmitter.base import Transmitter, VectorDecoder
 from shap_e.util.collections import AttrDict
 from shap_e.util.image_util import load_image
@@ -47,23 +47,20 @@ def create_pan_cameras(size: int,
     )
-# Copied from https://github.com/openai/shap-e/blob/d99cedaea18e0989e340163dbaeb4b109fa9e8ec/shap_e/util/notebooks.py#L45-L60
 @torch.no_grad()
-def decode_latent_images(
     xm: Transmitter | VectorDecoder,
     latent: torch.Tensor,
-    cameras: DifferentiableCameraBatch,
-    rendering_mode: str = 'stf',
-):
     decoded = xm.renderer.render_views(
-        AttrDict(cameras=cameras),
         params=(xm.encoder if isinstance(xm, Transmitter) else
                 xm).bottleneck_to_params(latent[None]),
-        options=AttrDict(rendering_mode=rendering_mode,
-                         render_with_direction=False),
     )
-    arr = decoded.channels.clamp(0, 255).to(torch.uint8)[0].cpu().numpy()
-    return [PIL.Image.fromarray(x) for x in arr]
 class Model:
@@ -82,24 +79,29 @@ class Model:
         self.model = load_model(model_name, device=self.device)
         self.model_name = model_name
-    @staticmethod
-    def to_video(frames: list[PIL.Image.Image], fps: int = 5) -> str:
-        out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
-        writer = imageio.get_writer(out_file.name, format='FFMPEG', fps=fps)
-        for frame in frames:
-            writer.append_data(np.asarray(frame))
-        writer.close()
-        return out_file.name
     def run_text(self,
                  prompt: str,
                  seed: int = 0,
                  guidance_scale: float = 15.0,
-                 num_steps: int = 64,
-                 output_image_size: int = 64,
-                 render_mode: str = 'nerf') -> str:
         self.load_model('text300M')
         torch.manual_seed(seed)
         latents = sample_latents(
@@ -117,27 +119,17 @@ class Model:
             sigma_max=160,
             s_churn=0,
         )
-        cameras = create_pan_cameras(output_image_size, self.device)
-        frames = decode_latent_images(self.xm,
-                                      latents[0],
-                                      cameras,
-                                      rendering_mode=render_mode)
-        return self.to_video(frames)
     def run_image(self,
                   image_path: str,
                   seed: int = 0,
                   guidance_scale: float = 3.0,
-                  num_steps: int = 64,
-                  output_image_size: int = 64,
-                  render_mode: str = 'nerf') -> str:
         self.load_model('image300M')
         torch.manual_seed(seed)
         image = load_image(image_path)
         latents = sample_latents(
             batch_size=1,
             model=self.model,
@@ -153,10 +145,4 @@ class Model:
             sigma_max=160,
             s_churn=0,
         )
-        cameras = create_pan_cameras(output_image_size, self.device)
-        frames = decode_latent_images(self.xm,
-                                      latents[0],
-                                      cameras,
-                                      rendering_mode=render_mode)
-        return self.to_video(frames)

 import tempfile
 import numpy as np
 import torch
+import trimesh
 from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
 from shap_e.diffusion.sample import sample_latents
 from shap_e.models.download import load_config, load_model
 from shap_e.models.nn.camera import (DifferentiableCameraBatch,
                                      DifferentiableProjectiveCamera)
 from shap_e.models.transmitter.base import Transmitter, VectorDecoder
+from shap_e.rendering.torch_mesh import TorchMesh
 from shap_e.util.collections import AttrDict
 from shap_e.util.image_util import load_image
     )
+# Copied from https://github.com/openai/shap-e/blob/8625e7c15526d8510a2292f92165979268d0e945/shap_e/util/notebooks.py#LL64C1-L76C33
 @torch.no_grad()
+def decode_latent_mesh(
     xm: Transmitter | VectorDecoder,
     latent: torch.Tensor,
+) -> TorchMesh:
     decoded = xm.renderer.render_views(
+        AttrDict(cameras=create_pan_cameras(
+            2, latent.device)),  # lowest resolution possible
         params=(xm.encoder if isinstance(xm, Transmitter) else
                 xm).bottleneck_to_params(latent[None]),
+        options=AttrDict(rendering_mode='stf', render_with_direction=False),
     )
+    return decoded.raw_meshes[0]
 class Model:
         self.model = load_model(model_name, device=self.device)
         self.model_name = model_name
+    def to_glb(self, latent: torch.Tensor) -> str:
+        ply_path = tempfile.NamedTemporaryFile(suffix='.ply',
+                                               delete=False,
+                                               mode='w+b')
+        decode_latent_mesh(self.xm, latent).tri_mesh().write_ply(ply_path)
+        mesh = trimesh.load(ply_path.name)
+        rot = trimesh.transformations.rotation_matrix(-np.pi / 2, [1, 0, 0])
+        mesh = mesh.apply_transform(rot)
+        rot = trimesh.transformations.rotation_matrix(np.pi, [0, 1, 0])
+        mesh = mesh.apply_transform(rot)
+        mesh_path = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
+        mesh.export(mesh_path.name, file_type='glb')
+        return mesh_path.name
     def run_text(self,
                  prompt: str,
                  seed: int = 0,
                  guidance_scale: float = 15.0,
+                 num_steps: int = 64) -> str:
         self.load_model('text300M')
         torch.manual_seed(seed)
         latents = sample_latents(
             sigma_max=160,
             s_churn=0,
         )
+        return self.to_glb(latents[0])
     def run_image(self,
                   image_path: str,
                   seed: int = 0,
                   guidance_scale: float = 3.0,
+                  num_steps: int = 64) -> str:
         self.load_model('image300M')
         torch.manual_seed(seed)
         image = load_image(image_path)
         latents = sample_latents(
             batch_size=1,
             model=self.model,
             sigma_max=160,
             s_churn=0,
         )
+        return self.to_glb(latents[0])

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 git+https://github.com/openai/shap-e@8625e7c
 gradio==3.28.3
-imageio[ffmpeg]==2.28.1
 torch==2.0.0
 torchvision==0.15.1

 git+https://github.com/openai/shap-e@8625e7c
 gradio==3.28.3
 torch==2.0.0
 torchvision==0.15.1
+trimesh==3.21.5

style.css CHANGED Viewed

@@ -8,14 +8,6 @@ h1 {
   padding-top: 1.5rem;
 }
-#result-1 video {
-  object-fit: scale-down;
-}
-#result-2 video {
-  object-fit: scale-down;
-}
 #prompt-container {
   gap: 0;
 }

   padding-top: 1.5rem;
 }
 #prompt-container {
   gap: 0;
 }