Update
Browse files- app_image_to_3d.py +8 -17
- app_text_to_3d.py +2 -14
- model.py +29 -43
- requirements.txt +1 -1
- style.css +0 -8
app_image_to_3d.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
#!/usr/bin/env python
|
| 2 |
|
|
|
|
| 3 |
import shlex
|
| 4 |
import subprocess
|
| 5 |
|
|
@@ -11,14 +12,15 @@ from utils import randomize_seed_fn
|
|
| 11 |
|
| 12 |
|
| 13 |
def create_demo(model: Model) -> gr.Blocks:
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
| 18 |
examples = ['corgi.png']
|
| 19 |
|
| 20 |
def process_example_fn(image_path: str) -> str:
|
| 21 |
-
return model.run_image(image_path
|
| 22 |
|
| 23 |
with gr.Blocks() as demo:
|
| 24 |
with gr.Box():
|
|
@@ -26,7 +28,7 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
| 26 |
show_label=False,
|
| 27 |
type='filepath')
|
| 28 |
run_button = gr.Button('Run')
|
| 29 |
-
result = gr.
|
| 30 |
with gr.Accordion('Advanced options', open=False):
|
| 31 |
seed = gr.Slider(label='Seed',
|
| 32 |
minimum=0,
|
|
@@ -46,15 +48,6 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
| 46 |
maximum=100,
|
| 47 |
step=1,
|
| 48 |
value=64)
|
| 49 |
-
image_size = gr.Slider(label='Image size',
|
| 50 |
-
minimum=64,
|
| 51 |
-
maximum=256,
|
| 52 |
-
step=64,
|
| 53 |
-
value=128)
|
| 54 |
-
render_mode = gr.Dropdown(label='Render mode',
|
| 55 |
-
choices=['nerf', 'stf'],
|
| 56 |
-
value='nerf',
|
| 57 |
-
visible=False)
|
| 58 |
|
| 59 |
gr.Examples(examples=examples,
|
| 60 |
inputs=image,
|
|
@@ -67,8 +60,6 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
| 67 |
seed,
|
| 68 |
guidance_scale,
|
| 69 |
num_inference_steps,
|
| 70 |
-
image_size,
|
| 71 |
-
render_mode,
|
| 72 |
]
|
| 73 |
|
| 74 |
run_button.click(
|
|
|
|
| 1 |
#!/usr/bin/env python
|
| 2 |
|
| 3 |
+
import pathlib
|
| 4 |
import shlex
|
| 5 |
import subprocess
|
| 6 |
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
def create_demo(model: Model) -> gr.Blocks:
|
| 15 |
+
if not pathlib.Path('corgi.png').exists():
|
| 16 |
+
subprocess.run(
|
| 17 |
+
shlex.split(
|
| 18 |
+
'wget https://raw.githubusercontent.com/openai/shap-e/d99cedaea18e0989e340163dbaeb4b109fa9e8ec/shap_e/examples/example_data/corgi.png -O corgi.png'
|
| 19 |
+
))
|
| 20 |
examples = ['corgi.png']
|
| 21 |
|
| 22 |
def process_example_fn(image_path: str) -> str:
|
| 23 |
+
return model.run_image(image_path)
|
| 24 |
|
| 25 |
with gr.Blocks() as demo:
|
| 26 |
with gr.Box():
|
|
|
|
| 28 |
show_label=False,
|
| 29 |
type='filepath')
|
| 30 |
run_button = gr.Button('Run')
|
| 31 |
+
result = gr.Model3D(label='Result', show_label=False)
|
| 32 |
with gr.Accordion('Advanced options', open=False):
|
| 33 |
seed = gr.Slider(label='Seed',
|
| 34 |
minimum=0,
|
|
|
|
| 48 |
maximum=100,
|
| 49 |
step=1,
|
| 50 |
value=64)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
gr.Examples(examples=examples,
|
| 53 |
inputs=image,
|
|
|
|
| 60 |
seed,
|
| 61 |
guidance_scale,
|
| 62 |
num_inference_steps,
|
|
|
|
|
|
|
| 63 |
]
|
| 64 |
|
| 65 |
run_button.click(
|
app_text_to_3d.py
CHANGED
|
@@ -21,7 +21,7 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
| 21 |
]
|
| 22 |
|
| 23 |
def process_example_fn(prompt: str) -> str:
|
| 24 |
-
return model.run_text(prompt
|
| 25 |
|
| 26 |
with gr.Blocks() as demo:
|
| 27 |
with gr.Box():
|
|
@@ -32,7 +32,7 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
| 32 |
max_lines=1,
|
| 33 |
placeholder='Enter your prompt').style(container=False)
|
| 34 |
run_button = gr.Button('Run').style(full_width=False)
|
| 35 |
-
result = gr.
|
| 36 |
with gr.Accordion('Advanced options', open=False):
|
| 37 |
seed = gr.Slider(label='Seed',
|
| 38 |
minimum=0,
|
|
@@ -52,15 +52,6 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
| 52 |
maximum=100,
|
| 53 |
step=1,
|
| 54 |
value=64)
|
| 55 |
-
image_size = gr.Slider(label='Image size',
|
| 56 |
-
minimum=64,
|
| 57 |
-
maximum=256,
|
| 58 |
-
step=64,
|
| 59 |
-
value=128)
|
| 60 |
-
render_mode = gr.Dropdown(label='Render mode',
|
| 61 |
-
choices=['nerf', 'stf'],
|
| 62 |
-
value='nerf',
|
| 63 |
-
visible=False)
|
| 64 |
|
| 65 |
gr.Examples(examples=examples,
|
| 66 |
inputs=prompt,
|
|
@@ -73,8 +64,6 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
| 73 |
seed,
|
| 74 |
guidance_scale,
|
| 75 |
num_inference_steps,
|
| 76 |
-
image_size,
|
| 77 |
-
render_mode,
|
| 78 |
]
|
| 79 |
prompt.submit(
|
| 80 |
fn=randomize_seed_fn,
|
|
@@ -86,7 +75,6 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
| 86 |
inputs=inputs,
|
| 87 |
outputs=result,
|
| 88 |
)
|
| 89 |
-
|
| 90 |
run_button.click(
|
| 91 |
fn=randomize_seed_fn,
|
| 92 |
inputs=[seed, randomize_seed],
|
|
|
|
| 21 |
]
|
| 22 |
|
| 23 |
def process_example_fn(prompt: str) -> str:
|
| 24 |
+
return model.run_text(prompt)
|
| 25 |
|
| 26 |
with gr.Blocks() as demo:
|
| 27 |
with gr.Box():
|
|
|
|
| 32 |
max_lines=1,
|
| 33 |
placeholder='Enter your prompt').style(container=False)
|
| 34 |
run_button = gr.Button('Run').style(full_width=False)
|
| 35 |
+
result = gr.Model3D(label='Result', show_label=False)
|
| 36 |
with gr.Accordion('Advanced options', open=False):
|
| 37 |
seed = gr.Slider(label='Seed',
|
| 38 |
minimum=0,
|
|
|
|
| 52 |
maximum=100,
|
| 53 |
step=1,
|
| 54 |
value=64)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
gr.Examples(examples=examples,
|
| 57 |
inputs=prompt,
|
|
|
|
| 64 |
seed,
|
| 65 |
guidance_scale,
|
| 66 |
num_inference_steps,
|
|
|
|
|
|
|
| 67 |
]
|
| 68 |
prompt.submit(
|
| 69 |
fn=randomize_seed_fn,
|
|
|
|
| 75 |
inputs=inputs,
|
| 76 |
outputs=result,
|
| 77 |
)
|
|
|
|
| 78 |
run_button.click(
|
| 79 |
fn=randomize_seed_fn,
|
| 80 |
inputs=[seed, randomize_seed],
|
model.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
import tempfile
|
| 2 |
|
| 3 |
-
import imageio
|
| 4 |
import numpy as np
|
| 5 |
-
import PIL.Image
|
| 6 |
import torch
|
|
|
|
| 7 |
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
|
| 8 |
from shap_e.diffusion.sample import sample_latents
|
| 9 |
from shap_e.models.download import load_config, load_model
|
| 10 |
from shap_e.models.nn.camera import (DifferentiableCameraBatch,
|
| 11 |
DifferentiableProjectiveCamera)
|
| 12 |
from shap_e.models.transmitter.base import Transmitter, VectorDecoder
|
|
|
|
| 13 |
from shap_e.util.collections import AttrDict
|
| 14 |
from shap_e.util.image_util import load_image
|
| 15 |
|
|
@@ -47,23 +47,20 @@ def create_pan_cameras(size: int,
|
|
| 47 |
)
|
| 48 |
|
| 49 |
|
| 50 |
-
# Copied from https://github.com/openai/shap-e/blob/
|
| 51 |
@torch.no_grad()
|
| 52 |
-
def
|
| 53 |
xm: Transmitter | VectorDecoder,
|
| 54 |
latent: torch.Tensor,
|
| 55 |
-
|
| 56 |
-
rendering_mode: str = 'stf',
|
| 57 |
-
):
|
| 58 |
decoded = xm.renderer.render_views(
|
| 59 |
-
AttrDict(cameras=
|
|
|
|
| 60 |
params=(xm.encoder if isinstance(xm, Transmitter) else
|
| 61 |
xm).bottleneck_to_params(latent[None]),
|
| 62 |
-
options=AttrDict(rendering_mode=
|
| 63 |
-
render_with_direction=False),
|
| 64 |
)
|
| 65 |
-
|
| 66 |
-
return [PIL.Image.fromarray(x) for x in arr]
|
| 67 |
|
| 68 |
|
| 69 |
class Model:
|
|
@@ -82,24 +79,29 @@ class Model:
|
|
| 82 |
self.model = load_model(model_name, device=self.device)
|
| 83 |
self.model_name = model_name
|
| 84 |
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
def run_text(self,
|
| 95 |
prompt: str,
|
| 96 |
seed: int = 0,
|
| 97 |
guidance_scale: float = 15.0,
|
| 98 |
-
num_steps: int = 64
|
| 99 |
-
output_image_size: int = 64,
|
| 100 |
-
render_mode: str = 'nerf') -> str:
|
| 101 |
self.load_model('text300M')
|
| 102 |
-
|
| 103 |
torch.manual_seed(seed)
|
| 104 |
|
| 105 |
latents = sample_latents(
|
|
@@ -117,27 +119,17 @@ class Model:
|
|
| 117 |
sigma_max=160,
|
| 118 |
s_churn=0,
|
| 119 |
)
|
| 120 |
-
|
| 121 |
-
cameras = create_pan_cameras(output_image_size, self.device)
|
| 122 |
-
frames = decode_latent_images(self.xm,
|
| 123 |
-
latents[0],
|
| 124 |
-
cameras,
|
| 125 |
-
rendering_mode=render_mode)
|
| 126 |
-
return self.to_video(frames)
|
| 127 |
|
| 128 |
def run_image(self,
|
| 129 |
image_path: str,
|
| 130 |
seed: int = 0,
|
| 131 |
guidance_scale: float = 3.0,
|
| 132 |
-
num_steps: int = 64
|
| 133 |
-
output_image_size: int = 64,
|
| 134 |
-
render_mode: str = 'nerf') -> str:
|
| 135 |
self.load_model('image300M')
|
| 136 |
-
|
| 137 |
torch.manual_seed(seed)
|
| 138 |
|
| 139 |
image = load_image(image_path)
|
| 140 |
-
|
| 141 |
latents = sample_latents(
|
| 142 |
batch_size=1,
|
| 143 |
model=self.model,
|
|
@@ -153,10 +145,4 @@ class Model:
|
|
| 153 |
sigma_max=160,
|
| 154 |
s_churn=0,
|
| 155 |
)
|
| 156 |
-
|
| 157 |
-
cameras = create_pan_cameras(output_image_size, self.device)
|
| 158 |
-
frames = decode_latent_images(self.xm,
|
| 159 |
-
latents[0],
|
| 160 |
-
cameras,
|
| 161 |
-
rendering_mode=render_mode)
|
| 162 |
-
return self.to_video(frames)
|
|
|
|
| 1 |
import tempfile
|
| 2 |
|
|
|
|
| 3 |
import numpy as np
|
|
|
|
| 4 |
import torch
|
| 5 |
+
import trimesh
|
| 6 |
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
|
| 7 |
from shap_e.diffusion.sample import sample_latents
|
| 8 |
from shap_e.models.download import load_config, load_model
|
| 9 |
from shap_e.models.nn.camera import (DifferentiableCameraBatch,
|
| 10 |
DifferentiableProjectiveCamera)
|
| 11 |
from shap_e.models.transmitter.base import Transmitter, VectorDecoder
|
| 12 |
+
from shap_e.rendering.torch_mesh import TorchMesh
|
| 13 |
from shap_e.util.collections import AttrDict
|
| 14 |
from shap_e.util.image_util import load_image
|
| 15 |
|
|
|
|
| 47 |
)
|
| 48 |
|
| 49 |
|
| 50 |
+
# Copied from https://github.com/openai/shap-e/blob/8625e7c15526d8510a2292f92165979268d0e945/shap_e/util/notebooks.py#LL64C1-L76C33
|
| 51 |
@torch.no_grad()
|
| 52 |
+
def decode_latent_mesh(
|
| 53 |
xm: Transmitter | VectorDecoder,
|
| 54 |
latent: torch.Tensor,
|
| 55 |
+
) -> TorchMesh:
|
|
|
|
|
|
|
| 56 |
decoded = xm.renderer.render_views(
|
| 57 |
+
AttrDict(cameras=create_pan_cameras(
|
| 58 |
+
2, latent.device)), # lowest resolution possible
|
| 59 |
params=(xm.encoder if isinstance(xm, Transmitter) else
|
| 60 |
xm).bottleneck_to_params(latent[None]),
|
| 61 |
+
options=AttrDict(rendering_mode='stf', render_with_direction=False),
|
|
|
|
| 62 |
)
|
| 63 |
+
return decoded.raw_meshes[0]
|
|
|
|
| 64 |
|
| 65 |
|
| 66 |
class Model:
|
|
|
|
| 79 |
self.model = load_model(model_name, device=self.device)
|
| 80 |
self.model_name = model_name
|
| 81 |
|
| 82 |
+
def to_glb(self, latent: torch.Tensor) -> str:
|
| 83 |
+
ply_path = tempfile.NamedTemporaryFile(suffix='.ply',
|
| 84 |
+
delete=False,
|
| 85 |
+
mode='w+b')
|
| 86 |
+
decode_latent_mesh(self.xm, latent).tri_mesh().write_ply(ply_path)
|
| 87 |
+
|
| 88 |
+
mesh = trimesh.load(ply_path.name)
|
| 89 |
+
rot = trimesh.transformations.rotation_matrix(-np.pi / 2, [1, 0, 0])
|
| 90 |
+
mesh = mesh.apply_transform(rot)
|
| 91 |
+
rot = trimesh.transformations.rotation_matrix(np.pi, [0, 1, 0])
|
| 92 |
+
mesh = mesh.apply_transform(rot)
|
| 93 |
+
|
| 94 |
+
mesh_path = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
|
| 95 |
+
mesh.export(mesh_path.name, file_type='glb')
|
| 96 |
+
|
| 97 |
+
return mesh_path.name
|
| 98 |
|
| 99 |
def run_text(self,
|
| 100 |
prompt: str,
|
| 101 |
seed: int = 0,
|
| 102 |
guidance_scale: float = 15.0,
|
| 103 |
+
num_steps: int = 64) -> str:
|
|
|
|
|
|
|
| 104 |
self.load_model('text300M')
|
|
|
|
| 105 |
torch.manual_seed(seed)
|
| 106 |
|
| 107 |
latents = sample_latents(
|
|
|
|
| 119 |
sigma_max=160,
|
| 120 |
s_churn=0,
|
| 121 |
)
|
| 122 |
+
return self.to_glb(latents[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
def run_image(self,
|
| 125 |
image_path: str,
|
| 126 |
seed: int = 0,
|
| 127 |
guidance_scale: float = 3.0,
|
| 128 |
+
num_steps: int = 64) -> str:
|
|
|
|
|
|
|
| 129 |
self.load_model('image300M')
|
|
|
|
| 130 |
torch.manual_seed(seed)
|
| 131 |
|
| 132 |
image = load_image(image_path)
|
|
|
|
| 133 |
latents = sample_latents(
|
| 134 |
batch_size=1,
|
| 135 |
model=self.model,
|
|
|
|
| 145 |
sigma_max=160,
|
| 146 |
s_churn=0,
|
| 147 |
)
|
| 148 |
+
return self.to_glb(latents[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
git+https://github.com/openai/shap-e@8625e7c
|
| 2 |
gradio==3.28.3
|
| 3 |
-
imageio[ffmpeg]==2.28.1
|
| 4 |
torch==2.0.0
|
| 5 |
torchvision==0.15.1
|
|
|
|
|
|
| 1 |
git+https://github.com/openai/shap-e@8625e7c
|
| 2 |
gradio==3.28.3
|
|
|
|
| 3 |
torch==2.0.0
|
| 4 |
torchvision==0.15.1
|
| 5 |
+
trimesh==3.21.5
|
style.css
CHANGED
|
@@ -8,14 +8,6 @@ h1 {
|
|
| 8 |
padding-top: 1.5rem;
|
| 9 |
}
|
| 10 |
|
| 11 |
-
#result-1 video {
|
| 12 |
-
object-fit: scale-down;
|
| 13 |
-
}
|
| 14 |
-
|
| 15 |
-
#result-2 video {
|
| 16 |
-
object-fit: scale-down;
|
| 17 |
-
}
|
| 18 |
-
|
| 19 |
#prompt-container {
|
| 20 |
gap: 0;
|
| 21 |
}
|
|
|
|
| 8 |
padding-top: 1.5rem;
|
| 9 |
}
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
#prompt-container {
|
| 12 |
gap: 0;
|
| 13 |
}
|