CRM

Runtime error

App Files Files Community

YoussefAnso commited on Jun 19

Commit

fdeb0ad

1 Parent(s): 597a667

Refactor device handling across multiple modules to enforce CUDA usage explicitly. This change ensures consistent device allocation for model operations, enhancing performance on systems with GPU support while maintaining compatibility.

Browse files

Files changed (7) hide show

app.py +8 -4
imagedream/ldm/modules/encoders/modules.py +1 -1
inference.py +8 -29
libs/sample.py +10 -17
pipelines.py +2 -2
util/flexicubes.py +1 -1
util/flexicubes_geometry.py +1 -1

app.py CHANGED Viewed

@@ -158,11 +158,16 @@ parser.add_argument(
 parser.add_argument("--device", type=str, default="cpu")
 args = parser.parse_args()
 crm_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="CRM.pth")
 specs = json.load(open("configs/specs_objaverse_total.json"))
 model = CRM(specs)
-model.load_state_dict(torch.load(crm_path, map_location="cuda" if torch.cuda.is_available() else "cpu"), strict=False)
-model = model.to("cuda" if torch.cuda.is_available() else "cpu")
 stage1_config = OmegaConf.load(args.stage1_config).config
 stage2_config = OmegaConf.load(args.stage2_config).config
@@ -177,13 +182,12 @@ pixel_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="pixel-diffusion.pt
 stage1_model_config.resume = pixel_path
 stage2_model_config.resume = xyz_path
-device = args.device if hasattr(args, 'device') else ("cuda" if torch.cuda.is_available() else "cpu")
 pipeline = TwoStagePipeline(
     stage1_model_config,
     stage2_model_config,
     stage1_sampler_config,
     stage2_sampler_config,
-    device=device,
     dtype=torch.float32
 )

 parser.add_argument("--device", type=str, default="cpu")
 args = parser.parse_args()
+if not torch.cuda.is_available():
+    raise RuntimeError("CUDA is not available! Please check your GPU and CUDA installation.")
+device = torch.device("cuda")
 crm_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="CRM.pth")
 specs = json.load(open("configs/specs_objaverse_total.json"))
 model = CRM(specs)
+model.load_state_dict(torch.load(crm_path, map_location="cuda"), strict=False)
+model = model.to("cuda")
 stage1_config = OmegaConf.load(args.stage1_config).config
 stage2_config = OmegaConf.load(args.stage2_config).config
 stage1_model_config.resume = pixel_path
 stage2_model_config.resume = xyz_path
 pipeline = TwoStagePipeline(
     stage1_model_config,
     stage2_model_config,
     stage1_sampler_config,
     stage2_sampler_config,
+    device="cuda",
     dtype=torch.float32
 )

imagedream/ldm/modules/encoders/modules.py CHANGED Viewed

@@ -310,7 +310,7 @@ class FrozenCLIPT5Encoder(AbstractEncoder):
         clip_max_length=77,
         t5_max_length=77,
     ):
-        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         super().__init__()
         self.clip_encoder = FrozenCLIPEmbedder(
             clip_version, device, max_length=clip_max_length

         clip_max_length=77,
         t5_max_length=77,
     ):
+        device = torch.device("cuda")
         super().__init__()
         self.clip_encoder = FrozenCLIPEmbedder(
             clip_version, device, max_length=clip_max_length

inference.py CHANGED Viewed

@@ -92,38 +92,27 @@ def vertex_color_to_uv_textured_glb(obj_path, glb_path, texture_size=640):
     mesh.export(glb_path)
     image_texture.save("debug_texture.png")
-def generate3d(model, rgb, ccm, device):
     model.renderer = Renderer(tet_grid_size=model.tet_grid_size, camera_angle_num=model.camera_angle_num,
                                 scale=model.input.scale, geo_type = model.geo_type)
-    color_tri = torch.from_numpy(rgb)/255
-    xyz_tri = torch.from_numpy(ccm[:,:,(2,1,0)])/255
     color = color_tri.permute(2,0,1)
     xyz = xyz_tri.permute(2,0,1)
     def get_imgs(color):
-        # color : [C, H, W*6]
         color_list = []
         color_list.append(color[:,:,256*5:256*(1+5)])
         for i in range(0,5):
             color_list.append(color[:,:,256*i:256*(1+i)])
-        return torch.stack(color_list, dim=0)# [6, C, H, W]
-    triplane_color = get_imgs(color).permute(0,2,3,1).unsqueeze(0).to(device)# [1, 6, H, W, C]
     color = get_imgs(color)
     xyz = get_imgs(xyz)
-    color = get_tri(color, dim=0, blender= True, scale = 1).unsqueeze(0)
-    xyz = get_tri(xyz, dim=0, blender= True, scale = 1, fix= True).unsqueeze(0)
     triplane = torch.cat([color,xyz],dim=1).to(device)
-    # 3D visualize
     model.eval()
     if model.denoising == True:
         tnew = 20
         tnew = torch.randint(tnew, tnew+1, [triplane.shape[0]], dtype=torch.long, device=triplane.device)
@@ -137,35 +126,25 @@ def generate3d(model, rgb, ccm, device):
         print(f"unet takes {elapsed_time}s")
     else:
         triplane_feature2 = model.unet2(triplane)
     with torch.no_grad():
         data_config = {
             'resolution': [1024, 1024],
             "triview_color": triplane_color.to(device),
         }
         verts, faces = model.decode(data_config, triplane_feature2)
         data_config['verts'] = verts[0]
         data_config['faces'] = faces
     from kiui.mesh_utils import clean_mesh
     verts, faces = clean_mesh(data_config['verts'].squeeze().cpu().numpy().astype(np.float32), data_config['faces'].squeeze().cpu().numpy().astype(np.int32), repair = False, remesh=True, remesh_size=0.005, remesh_iters=1)
     data_config['verts'] = torch.from_numpy(verts).to(device).contiguous()
     data_config['faces'] = torch.from_numpy(faces).to(device).contiguous()
     start_time = time.time()
     with torch.no_grad():
         mesh_path_glb = tempfile.NamedTemporaryFile(suffix=f"", delete=False).name
         model.export_mesh(data_config, mesh_path_glb, tri_fea_2 = triplane_feature2)
     end_time = time.time()
     elapsed_time = end_time - start_time
     print(f"uv takes {elapsed_time}s")
-    # Convert .obj (with vertex colors) to UV-mapped textured .glb
     obj_path = mesh_path_glb + ".obj"
     glb_path = mesh_path_glb + ".glb"
     vertex_color_to_uv_textured_glb(obj_path, glb_path)

     mesh.export(glb_path)
     image_texture.save("debug_texture.png")
+def generate3d(model, rgb, ccm, device=None):
+    device = torch.device("cuda")
     model.renderer = Renderer(tet_grid_size=model.tet_grid_size, camera_angle_num=model.camera_angle_num,
                                 scale=model.input.scale, geo_type = model.geo_type)
+    color_tri = torch.from_numpy(rgb).to(device)/255
+    xyz_tri = torch.from_numpy(ccm[:,:,(2,1,0)]).to(device)/255
     color = color_tri.permute(2,0,1)
     xyz = xyz_tri.permute(2,0,1)
     def get_imgs(color):
         color_list = []
         color_list.append(color[:,:,256*5:256*(1+5)])
         for i in range(0,5):
             color_list.append(color[:,:,256*i:256*(1+i)])
+        return torch.stack(color_list, dim=0)
+    triplane_color = get_imgs(color).permute(0,2,3,1).unsqueeze(0).to(device)
     color = get_imgs(color)
     xyz = get_imgs(xyz)
+    color = get_tri(color, dim=0, blender= True, scale = 1).unsqueeze(0).to(device)
+    xyz = get_tri(xyz, dim=0, blender= True, scale = 1, fix= True).unsqueeze(0).to(device)
     triplane = torch.cat([color,xyz],dim=1).to(device)
     model.eval()
     if model.denoising == True:
         tnew = 20
         tnew = torch.randint(tnew, tnew+1, [triplane.shape[0]], dtype=torch.long, device=triplane.device)
         print(f"unet takes {elapsed_time}s")
     else:
         triplane_feature2 = model.unet2(triplane)
     with torch.no_grad():
         data_config = {
             'resolution': [1024, 1024],
             "triview_color": triplane_color.to(device),
         }
         verts, faces = model.decode(data_config, triplane_feature2)
         data_config['verts'] = verts[0]
         data_config['faces'] = faces
     from kiui.mesh_utils import clean_mesh
     verts, faces = clean_mesh(data_config['verts'].squeeze().cpu().numpy().astype(np.float32), data_config['faces'].squeeze().cpu().numpy().astype(np.int32), repair = False, remesh=True, remesh_size=0.005, remesh_iters=1)
     data_config['verts'] = torch.from_numpy(verts).to(device).contiguous()
     data_config['faces'] = torch.from_numpy(faces).to(device).contiguous()
     start_time = time.time()
     with torch.no_grad():
         mesh_path_glb = tempfile.NamedTemporaryFile(suffix=f"", delete=False).name
         model.export_mesh(data_config, mesh_path_glb, tri_fea_2 = triplane_feature2)
     end_time = time.time()
     elapsed_time = end_time - start_time
     print(f"uv takes {elapsed_time}s")
     obj_path = mesh_path_glb + ".obj"
     glb_path = mesh_path_glb + ".glb"
     vertex_color_to_uv_textured_glb(obj_path, glb_path)

libs/sample.py CHANGED Viewed

@@ -23,25 +23,20 @@ class ImageDreamDiffusion:
         image_size=256,
         seed=1234,
     ) -> None:
-        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
-        assert mode in ["pixel", "local"]
-        size = image_size
         self.seed = seed
         batch_size = max(4, num_frames)
         neg_texts = "uniform low no texture ugly, boring, bad anatomy, blurry, pixelated,  obscure, unnatural colors, poor lighting, dull, and unclear."
         uc = model.get_learned_conditioning([neg_texts]).to(device)
         sampler = DDIMSampler(model)
-        # pre-compute camera matrices
         camera = [get_camera_for_index(i).squeeze() for i in camera_views]
-        camera[ref_position] = torch.zeros_like(camera[ref_position])  # set ref camera to zero
         camera = torch.stack(camera)
         camera = camera.repeat(batch_size // num_frames, 1).to(device)
         self.image_transform = T.Compose(
             [
-                T.Resize((size, size)),
                 T.ToTensor(),
                 T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
             ]
@@ -52,8 +47,7 @@ class ImageDreamDiffusion:
         self.random_background = random_background
         self.resize_rate = resize_rate
         self.num_frames = num_frames
-        self.size = size
-        self.device = device
         self.batch_size = batch_size
         self.model = model
         self.sampler = sampler
@@ -205,11 +199,11 @@ class ImageDreamDiffusionStage2:
     def __init__(
         self,
         model,
-        device=None,
-        dtype=None,
-        num_frames=None,
-        camera_views=None,
-        ref_position=None,
         random_background=False,
         offset_noise=False,
         resize_rate=1,
@@ -217,7 +211,6 @@ class ImageDreamDiffusionStage2:
         image_size=256,
         seed=1234,
     ) -> None:
-        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         assert mode in ["pixel", "local"]
         size = image_size

         image_size=256,
         seed=1234,
     ) -> None:
+        device = torch.device("cuda")
+        self.device = device
         self.seed = seed
         batch_size = max(4, num_frames)
         neg_texts = "uniform low no texture ugly, boring, bad anatomy, blurry, pixelated,  obscure, unnatural colors, poor lighting, dull, and unclear."
         uc = model.get_learned_conditioning([neg_texts]).to(device)
         sampler = DDIMSampler(model)
         camera = [get_camera_for_index(i).squeeze() for i in camera_views]
+        camera[ref_position] = torch.zeros_like(camera[ref_position])
         camera = torch.stack(camera)
         camera = camera.repeat(batch_size // num_frames, 1).to(device)
         self.image_transform = T.Compose(
             [
+                T.Resize((image_size, image_size)),
                 T.ToTensor(),
                 T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
             ]
         self.random_background = random_background
         self.resize_rate = resize_rate
         self.num_frames = num_frames
+        self.size = image_size
         self.batch_size = batch_size
         self.model = model
         self.sampler = sampler
     def __init__(
         self,
         model,
+        device,
+        dtype,
+        num_frames,
+        camera_views,
+        ref_position,
         random_background=False,
         offset_noise=False,
         resize_rate=1,
         image_size=256,
         seed=1234,
     ) -> None:
         assert mode in ["pixel", "local"]
         size = image_size

pipelines.py CHANGED Viewed

@@ -16,7 +16,7 @@ class TwoStagePipeline(object):
         stage2_model_config,
         stage1_sampler_config,
         stage2_sampler_config,
-        device="cuda",
         dtype=torch.float16,
         resize_rate=1,
     ) -> None:
@@ -25,7 +25,7 @@ class TwoStagePipeline(object):
         - the first stage was condition on single pixel image, gererate multi-view pixel image, based on the v2pp config
         - the second stage was condition on multiview pixel image generated by the first stage, generate the final image, based on the stage2-test config
         """
-        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.resize_rate = resize_rate
         self.stage1_model = instantiate_from_config(OmegaConf.load(stage1_model_config.config).model)

         stage2_model_config,
         stage1_sampler_config,
         stage2_sampler_config,
+        device=None,
         dtype=torch.float16,
         resize_rate=1,
     ) -> None:
         - the first stage was condition on single pixel image, gererate multi-view pixel image, based on the v2pp config
         - the second stage was condition on multiview pixel image generated by the first stage, generate the final image, based on the stage2-test config
         """
+        device = torch.device("cuda")
         self.resize_rate = resize_rate
         self.stage1_model = instantiate_from_config(OmegaConf.load(stage1_model_config.config).model)

util/flexicubes.py CHANGED Viewed

@@ -65,7 +65,7 @@ class FlexiCubes:
     """
     def __init__(self, device=None, qef_reg_scale=1e-3, weight_scale=0.99):
-        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.device = device
         self.dmc_table = torch.tensor(dmc_table, dtype=torch.long, device=device, requires_grad=False)
         self.num_vd_table = torch.tensor(num_vd_table,

     """
     def __init__(self, device=None, qef_reg_scale=1e-3, weight_scale=0.99):
+        device = torch.device("cuda")
         self.device = device
         self.dmc_table = torch.tensor(dmc_table, dtype=torch.long, device=device, requires_grad=False)
         self.num_vd_table = torch.tensor(num_vd_table,

util/flexicubes_geometry.py CHANGED Viewed

@@ -33,7 +33,7 @@ class FlexiCubesGeometry(object):
     def __init__(
             self, grid_res=64, scale=2.0, device=None, renderer=None,
             render_type='neural_render', args=None):
-        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         super(FlexiCubesGeometry, self).__init__()
         self.grid_res = grid_res
         self.device = device

     def __init__(
             self, grid_res=64, scale=2.0, device=None, renderer=None,
             render_type='neural_render', args=None):
+        device = torch.device("cuda")
         super(FlexiCubesGeometry, self).__init__()
         self.grid_res = grid_res
         self.device = device