Commit
·
fdeb0ad
1
Parent(s):
597a667
Refactor device handling across multiple modules to enforce CUDA usage explicitly. This change ensures consistent device allocation for model operations, enhancing performance on systems with GPU support while maintaining compatibility.
Browse files- app.py +8 -4
- imagedream/ldm/modules/encoders/modules.py +1 -1
- inference.py +8 -29
- libs/sample.py +10 -17
- pipelines.py +2 -2
- util/flexicubes.py +1 -1
- util/flexicubes_geometry.py +1 -1
app.py
CHANGED
@@ -158,11 +158,16 @@ parser.add_argument(
|
|
158 |
parser.add_argument("--device", type=str, default="cpu")
|
159 |
args = parser.parse_args()
|
160 |
|
|
|
|
|
|
|
|
|
|
|
161 |
crm_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="CRM.pth")
|
162 |
specs = json.load(open("configs/specs_objaverse_total.json"))
|
163 |
model = CRM(specs)
|
164 |
-
model.load_state_dict(torch.load(crm_path, map_location="cuda"
|
165 |
-
model = model.to("cuda"
|
166 |
|
167 |
stage1_config = OmegaConf.load(args.stage1_config).config
|
168 |
stage2_config = OmegaConf.load(args.stage2_config).config
|
@@ -177,13 +182,12 @@ pixel_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="pixel-diffusion.pt
|
|
177 |
stage1_model_config.resume = pixel_path
|
178 |
stage2_model_config.resume = xyz_path
|
179 |
|
180 |
-
device = args.device if hasattr(args, 'device') else ("cuda" if torch.cuda.is_available() else "cpu")
|
181 |
pipeline = TwoStagePipeline(
|
182 |
stage1_model_config,
|
183 |
stage2_model_config,
|
184 |
stage1_sampler_config,
|
185 |
stage2_sampler_config,
|
186 |
-
device=
|
187 |
dtype=torch.float32
|
188 |
)
|
189 |
|
|
|
158 |
parser.add_argument("--device", type=str, default="cpu")
|
159 |
args = parser.parse_args()
|
160 |
|
161 |
+
if not torch.cuda.is_available():
|
162 |
+
raise RuntimeError("CUDA is not available! Please check your GPU and CUDA installation.")
|
163 |
+
|
164 |
+
device = torch.device("cuda")
|
165 |
+
|
166 |
crm_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="CRM.pth")
|
167 |
specs = json.load(open("configs/specs_objaverse_total.json"))
|
168 |
model = CRM(specs)
|
169 |
+
model.load_state_dict(torch.load(crm_path, map_location="cuda"), strict=False)
|
170 |
+
model = model.to("cuda")
|
171 |
|
172 |
stage1_config = OmegaConf.load(args.stage1_config).config
|
173 |
stage2_config = OmegaConf.load(args.stage2_config).config
|
|
|
182 |
stage1_model_config.resume = pixel_path
|
183 |
stage2_model_config.resume = xyz_path
|
184 |
|
|
|
185 |
pipeline = TwoStagePipeline(
|
186 |
stage1_model_config,
|
187 |
stage2_model_config,
|
188 |
stage1_sampler_config,
|
189 |
stage2_sampler_config,
|
190 |
+
device="cuda",
|
191 |
dtype=torch.float32
|
192 |
)
|
193 |
|
imagedream/ldm/modules/encoders/modules.py
CHANGED
@@ -310,7 +310,7 @@ class FrozenCLIPT5Encoder(AbstractEncoder):
|
|
310 |
clip_max_length=77,
|
311 |
t5_max_length=77,
|
312 |
):
|
313 |
-
device = device
|
314 |
super().__init__()
|
315 |
self.clip_encoder = FrozenCLIPEmbedder(
|
316 |
clip_version, device, max_length=clip_max_length
|
|
|
310 |
clip_max_length=77,
|
311 |
t5_max_length=77,
|
312 |
):
|
313 |
+
device = torch.device("cuda")
|
314 |
super().__init__()
|
315 |
self.clip_encoder = FrozenCLIPEmbedder(
|
316 |
clip_version, device, max_length=clip_max_length
|
inference.py
CHANGED
@@ -92,38 +92,27 @@ def vertex_color_to_uv_textured_glb(obj_path, glb_path, texture_size=640):
|
|
92 |
mesh.export(glb_path)
|
93 |
image_texture.save("debug_texture.png")
|
94 |
|
95 |
-
def generate3d(model, rgb, ccm, device):
|
96 |
-
|
97 |
model.renderer = Renderer(tet_grid_size=model.tet_grid_size, camera_angle_num=model.camera_angle_num,
|
98 |
scale=model.input.scale, geo_type = model.geo_type)
|
99 |
-
|
100 |
-
|
101 |
-
xyz_tri = torch.from_numpy(ccm[:,:,(2,1,0)])/255
|
102 |
color = color_tri.permute(2,0,1)
|
103 |
xyz = xyz_tri.permute(2,0,1)
|
104 |
-
|
105 |
-
|
106 |
def get_imgs(color):
|
107 |
-
# color : [C, H, W*6]
|
108 |
color_list = []
|
109 |
color_list.append(color[:,:,256*5:256*(1+5)])
|
110 |
for i in range(0,5):
|
111 |
color_list.append(color[:,:,256*i:256*(1+i)])
|
112 |
-
return torch.stack(color_list, dim=0)
|
113 |
-
|
114 |
-
triplane_color = get_imgs(color).permute(0,2,3,1).unsqueeze(0).to(device)# [1, 6, H, W, C]
|
115 |
-
|
116 |
color = get_imgs(color)
|
117 |
xyz = get_imgs(xyz)
|
118 |
-
|
119 |
-
|
120 |
-
xyz = get_tri(xyz, dim=0, blender= True, scale = 1, fix= True).unsqueeze(0)
|
121 |
-
|
122 |
triplane = torch.cat([color,xyz],dim=1).to(device)
|
123 |
-
# 3D visualize
|
124 |
model.eval()
|
125 |
-
|
126 |
-
|
127 |
if model.denoising == True:
|
128 |
tnew = 20
|
129 |
tnew = torch.randint(tnew, tnew+1, [triplane.shape[0]], dtype=torch.long, device=triplane.device)
|
@@ -137,35 +126,25 @@ def generate3d(model, rgb, ccm, device):
|
|
137 |
print(f"unet takes {elapsed_time}s")
|
138 |
else:
|
139 |
triplane_feature2 = model.unet2(triplane)
|
140 |
-
|
141 |
-
|
142 |
with torch.no_grad():
|
143 |
data_config = {
|
144 |
'resolution': [1024, 1024],
|
145 |
"triview_color": triplane_color.to(device),
|
146 |
}
|
147 |
-
|
148 |
verts, faces = model.decode(data_config, triplane_feature2)
|
149 |
-
|
150 |
data_config['verts'] = verts[0]
|
151 |
data_config['faces'] = faces
|
152 |
-
|
153 |
-
|
154 |
from kiui.mesh_utils import clean_mesh
|
155 |
verts, faces = clean_mesh(data_config['verts'].squeeze().cpu().numpy().astype(np.float32), data_config['faces'].squeeze().cpu().numpy().astype(np.int32), repair = False, remesh=True, remesh_size=0.005, remesh_iters=1)
|
156 |
data_config['verts'] = torch.from_numpy(verts).to(device).contiguous()
|
157 |
data_config['faces'] = torch.from_numpy(faces).to(device).contiguous()
|
158 |
-
|
159 |
start_time = time.time()
|
160 |
with torch.no_grad():
|
161 |
mesh_path_glb = tempfile.NamedTemporaryFile(suffix=f"", delete=False).name
|
162 |
model.export_mesh(data_config, mesh_path_glb, tri_fea_2 = triplane_feature2)
|
163 |
-
|
164 |
end_time = time.time()
|
165 |
elapsed_time = end_time - start_time
|
166 |
print(f"uv takes {elapsed_time}s")
|
167 |
-
|
168 |
-
# Convert .obj (with vertex colors) to UV-mapped textured .glb
|
169 |
obj_path = mesh_path_glb + ".obj"
|
170 |
glb_path = mesh_path_glb + ".glb"
|
171 |
vertex_color_to_uv_textured_glb(obj_path, glb_path)
|
|
|
92 |
mesh.export(glb_path)
|
93 |
image_texture.save("debug_texture.png")
|
94 |
|
95 |
+
def generate3d(model, rgb, ccm, device=None):
|
96 |
+
device = torch.device("cuda")
|
97 |
model.renderer = Renderer(tet_grid_size=model.tet_grid_size, camera_angle_num=model.camera_angle_num,
|
98 |
scale=model.input.scale, geo_type = model.geo_type)
|
99 |
+
color_tri = torch.from_numpy(rgb).to(device)/255
|
100 |
+
xyz_tri = torch.from_numpy(ccm[:,:,(2,1,0)]).to(device)/255
|
|
|
101 |
color = color_tri.permute(2,0,1)
|
102 |
xyz = xyz_tri.permute(2,0,1)
|
|
|
|
|
103 |
def get_imgs(color):
|
|
|
104 |
color_list = []
|
105 |
color_list.append(color[:,:,256*5:256*(1+5)])
|
106 |
for i in range(0,5):
|
107 |
color_list.append(color[:,:,256*i:256*(1+i)])
|
108 |
+
return torch.stack(color_list, dim=0)
|
109 |
+
triplane_color = get_imgs(color).permute(0,2,3,1).unsqueeze(0).to(device)
|
|
|
|
|
110 |
color = get_imgs(color)
|
111 |
xyz = get_imgs(xyz)
|
112 |
+
color = get_tri(color, dim=0, blender= True, scale = 1).unsqueeze(0).to(device)
|
113 |
+
xyz = get_tri(xyz, dim=0, blender= True, scale = 1, fix= True).unsqueeze(0).to(device)
|
|
|
|
|
114 |
triplane = torch.cat([color,xyz],dim=1).to(device)
|
|
|
115 |
model.eval()
|
|
|
|
|
116 |
if model.denoising == True:
|
117 |
tnew = 20
|
118 |
tnew = torch.randint(tnew, tnew+1, [triplane.shape[0]], dtype=torch.long, device=triplane.device)
|
|
|
126 |
print(f"unet takes {elapsed_time}s")
|
127 |
else:
|
128 |
triplane_feature2 = model.unet2(triplane)
|
|
|
|
|
129 |
with torch.no_grad():
|
130 |
data_config = {
|
131 |
'resolution': [1024, 1024],
|
132 |
"triview_color": triplane_color.to(device),
|
133 |
}
|
|
|
134 |
verts, faces = model.decode(data_config, triplane_feature2)
|
|
|
135 |
data_config['verts'] = verts[0]
|
136 |
data_config['faces'] = faces
|
|
|
|
|
137 |
from kiui.mesh_utils import clean_mesh
|
138 |
verts, faces = clean_mesh(data_config['verts'].squeeze().cpu().numpy().astype(np.float32), data_config['faces'].squeeze().cpu().numpy().astype(np.int32), repair = False, remesh=True, remesh_size=0.005, remesh_iters=1)
|
139 |
data_config['verts'] = torch.from_numpy(verts).to(device).contiguous()
|
140 |
data_config['faces'] = torch.from_numpy(faces).to(device).contiguous()
|
|
|
141 |
start_time = time.time()
|
142 |
with torch.no_grad():
|
143 |
mesh_path_glb = tempfile.NamedTemporaryFile(suffix=f"", delete=False).name
|
144 |
model.export_mesh(data_config, mesh_path_glb, tri_fea_2 = triplane_feature2)
|
|
|
145 |
end_time = time.time()
|
146 |
elapsed_time = end_time - start_time
|
147 |
print(f"uv takes {elapsed_time}s")
|
|
|
|
|
148 |
obj_path = mesh_path_glb + ".obj"
|
149 |
glb_path = mesh_path_glb + ".glb"
|
150 |
vertex_color_to_uv_textured_glb(obj_path, glb_path)
|
libs/sample.py
CHANGED
@@ -23,25 +23,20 @@ class ImageDreamDiffusion:
|
|
23 |
image_size=256,
|
24 |
seed=1234,
|
25 |
) -> None:
|
26 |
-
device = device
|
27 |
-
|
28 |
-
size = image_size
|
29 |
self.seed = seed
|
30 |
batch_size = max(4, num_frames)
|
31 |
-
|
32 |
neg_texts = "uniform low no texture ugly, boring, bad anatomy, blurry, pixelated, obscure, unnatural colors, poor lighting, dull, and unclear."
|
33 |
uc = model.get_learned_conditioning([neg_texts]).to(device)
|
34 |
sampler = DDIMSampler(model)
|
35 |
-
|
36 |
-
# pre-compute camera matrices
|
37 |
camera = [get_camera_for_index(i).squeeze() for i in camera_views]
|
38 |
-
camera[ref_position] = torch.zeros_like(camera[ref_position])
|
39 |
camera = torch.stack(camera)
|
40 |
camera = camera.repeat(batch_size // num_frames, 1).to(device)
|
41 |
-
|
42 |
self.image_transform = T.Compose(
|
43 |
[
|
44 |
-
T.Resize((
|
45 |
T.ToTensor(),
|
46 |
T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
|
47 |
]
|
@@ -52,8 +47,7 @@ class ImageDreamDiffusion:
|
|
52 |
self.random_background = random_background
|
53 |
self.resize_rate = resize_rate
|
54 |
self.num_frames = num_frames
|
55 |
-
self.size =
|
56 |
-
self.device = device
|
57 |
self.batch_size = batch_size
|
58 |
self.model = model
|
59 |
self.sampler = sampler
|
@@ -205,11 +199,11 @@ class ImageDreamDiffusionStage2:
|
|
205 |
def __init__(
|
206 |
self,
|
207 |
model,
|
208 |
-
device
|
209 |
-
dtype
|
210 |
-
num_frames
|
211 |
-
camera_views
|
212 |
-
ref_position
|
213 |
random_background=False,
|
214 |
offset_noise=False,
|
215 |
resize_rate=1,
|
@@ -217,7 +211,6 @@ class ImageDreamDiffusionStage2:
|
|
217 |
image_size=256,
|
218 |
seed=1234,
|
219 |
) -> None:
|
220 |
-
device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
221 |
assert mode in ["pixel", "local"]
|
222 |
|
223 |
size = image_size
|
|
|
23 |
image_size=256,
|
24 |
seed=1234,
|
25 |
) -> None:
|
26 |
+
device = torch.device("cuda")
|
27 |
+
self.device = device
|
|
|
28 |
self.seed = seed
|
29 |
batch_size = max(4, num_frames)
|
|
|
30 |
neg_texts = "uniform low no texture ugly, boring, bad anatomy, blurry, pixelated, obscure, unnatural colors, poor lighting, dull, and unclear."
|
31 |
uc = model.get_learned_conditioning([neg_texts]).to(device)
|
32 |
sampler = DDIMSampler(model)
|
|
|
|
|
33 |
camera = [get_camera_for_index(i).squeeze() for i in camera_views]
|
34 |
+
camera[ref_position] = torch.zeros_like(camera[ref_position])
|
35 |
camera = torch.stack(camera)
|
36 |
camera = camera.repeat(batch_size // num_frames, 1).to(device)
|
|
|
37 |
self.image_transform = T.Compose(
|
38 |
[
|
39 |
+
T.Resize((image_size, image_size)),
|
40 |
T.ToTensor(),
|
41 |
T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
|
42 |
]
|
|
|
47 |
self.random_background = random_background
|
48 |
self.resize_rate = resize_rate
|
49 |
self.num_frames = num_frames
|
50 |
+
self.size = image_size
|
|
|
51 |
self.batch_size = batch_size
|
52 |
self.model = model
|
53 |
self.sampler = sampler
|
|
|
199 |
def __init__(
|
200 |
self,
|
201 |
model,
|
202 |
+
device,
|
203 |
+
dtype,
|
204 |
+
num_frames,
|
205 |
+
camera_views,
|
206 |
+
ref_position,
|
207 |
random_background=False,
|
208 |
offset_noise=False,
|
209 |
resize_rate=1,
|
|
|
211 |
image_size=256,
|
212 |
seed=1234,
|
213 |
) -> None:
|
|
|
214 |
assert mode in ["pixel", "local"]
|
215 |
|
216 |
size = image_size
|
pipelines.py
CHANGED
@@ -16,7 +16,7 @@ class TwoStagePipeline(object):
|
|
16 |
stage2_model_config,
|
17 |
stage1_sampler_config,
|
18 |
stage2_sampler_config,
|
19 |
-
device=
|
20 |
dtype=torch.float16,
|
21 |
resize_rate=1,
|
22 |
) -> None:
|
@@ -25,7 +25,7 @@ class TwoStagePipeline(object):
|
|
25 |
- the first stage was condition on single pixel image, gererate multi-view pixel image, based on the v2pp config
|
26 |
- the second stage was condition on multiview pixel image generated by the first stage, generate the final image, based on the stage2-test config
|
27 |
"""
|
28 |
-
device = device
|
29 |
self.resize_rate = resize_rate
|
30 |
|
31 |
self.stage1_model = instantiate_from_config(OmegaConf.load(stage1_model_config.config).model)
|
|
|
16 |
stage2_model_config,
|
17 |
stage1_sampler_config,
|
18 |
stage2_sampler_config,
|
19 |
+
device=None,
|
20 |
dtype=torch.float16,
|
21 |
resize_rate=1,
|
22 |
) -> None:
|
|
|
25 |
- the first stage was condition on single pixel image, gererate multi-view pixel image, based on the v2pp config
|
26 |
- the second stage was condition on multiview pixel image generated by the first stage, generate the final image, based on the stage2-test config
|
27 |
"""
|
28 |
+
device = torch.device("cuda")
|
29 |
self.resize_rate = resize_rate
|
30 |
|
31 |
self.stage1_model = instantiate_from_config(OmegaConf.load(stage1_model_config.config).model)
|
util/flexicubes.py
CHANGED
@@ -65,7 +65,7 @@ class FlexiCubes:
|
|
65 |
"""
|
66 |
|
67 |
def __init__(self, device=None, qef_reg_scale=1e-3, weight_scale=0.99):
|
68 |
-
device = device
|
69 |
self.device = device
|
70 |
self.dmc_table = torch.tensor(dmc_table, dtype=torch.long, device=device, requires_grad=False)
|
71 |
self.num_vd_table = torch.tensor(num_vd_table,
|
|
|
65 |
"""
|
66 |
|
67 |
def __init__(self, device=None, qef_reg_scale=1e-3, weight_scale=0.99):
|
68 |
+
device = torch.device("cuda")
|
69 |
self.device = device
|
70 |
self.dmc_table = torch.tensor(dmc_table, dtype=torch.long, device=device, requires_grad=False)
|
71 |
self.num_vd_table = torch.tensor(num_vd_table,
|
util/flexicubes_geometry.py
CHANGED
@@ -33,7 +33,7 @@ class FlexiCubesGeometry(object):
|
|
33 |
def __init__(
|
34 |
self, grid_res=64, scale=2.0, device=None, renderer=None,
|
35 |
render_type='neural_render', args=None):
|
36 |
-
device = device
|
37 |
super(FlexiCubesGeometry, self).__init__()
|
38 |
self.grid_res = grid_res
|
39 |
self.device = device
|
|
|
33 |
def __init__(
|
34 |
self, grid_res=64, scale=2.0, device=None, renderer=None,
|
35 |
render_type='neural_render', args=None):
|
36 |
+
device = torch.device("cuda")
|
37 |
super(FlexiCubesGeometry, self).__init__()
|
38 |
self.grid_res = grid_res
|
39 |
self.device = device
|