Spaces:

LTT
/

Kiss3DGen

Runtime error

App Files Files Community

JiantaoLin commited on Dec 24, 2024

Commit

c5daa2d

1 Parent(s): ebe241c

new

Browse files

Files changed (1) hide show

pipeline/kiss3d_wrapper.py +24 -28

pipeline/kiss3d_wrapper.py CHANGED Viewed

@@ -78,7 +78,7 @@ def init_wrapper_from_config(config_path):
     flux_pipe.vae.enable_tiling()
     # load flux model and controlnet
-    if flux_controlnet_pth is not None:
         flux_controlnet = FluxControlNetModel.from_pretrained(flux_controlnet_pth, torch_dtype=torch.bfloat16)
         flux_pipe = convert_flux_pipeline(flux_pipe, FluxControlNetImg2ImgPipeline, controlnet=[flux_controlnet])
@@ -90,7 +90,7 @@ def init_wrapper_from_config(config_path):
     # load redux model
     flux_redux_pipe = None
-    if flux_redux_pth is not None:
         flux_redux_pipe = FluxPriorReduxPipeline.from_pretrained(flux_redux_pth, torch_dtype=torch.bfloat16, token=access_token)
         flux_redux_pipe.text_encoder = flux_pipe.text_encoder
         flux_redux_pipe.text_encoder_2 = flux_pipe.text_encoder_2
@@ -101,41 +101,37 @@ def init_wrapper_from_config(config_path):
     # logger.warning(f"GPU memory allocated after load flux model on {flux_device}: {torch.cuda.memory_allocated(device=flux_device) / 1024**3} GB")
-    # TODO: load pulid model
     # init multiview model
-    logger.info('==> Loading multiview diffusion model ...')
-    multiview_device = config_['multiview'].get('device', 'cpu')
-    multiview_pipeline = DiffusionPipeline.from_pretrained(
-        config_['multiview']['base_model'],
-        custom_pipeline=config_['multiview']['custom_pipeline'],
-        torch_dtype=torch.float16,
-    )
-    multiview_pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
-        multiview_pipeline.scheduler.config, timestep_spacing='trailing'
-    )
-    # unet_ckpt_path = hf_hub_download(repo_id="LTT/Kiss3DGen", filename="flexgen_19w.ckpt", repo_type="model", token=access_token)
-    unet_ckpt_path = hf_hub_download(repo_id="LTT/Kiss3DGen", filename="flexgen.ckpt", repo_type="model", token=access_token)
-    if unet_ckpt_path is not None:
-        state_dict = torch.load(unet_ckpt_path, map_location='cpu')
-        # state_dict = {k[10:]: v for k, v in state_dict.items() if k.startswith('unet.unet.')}
-        multiview_pipeline.unet.load_state_dict(state_dict, strict=True)
     # multiview_pipeline.to(multiview_device)
     # logger.warning(f"GPU memory allocated after load multiview model on {multiview_device}: {torch.cuda.memory_allocated(device=multiview_device) / 1024**3} GB")
-    # multiview_pipeline = None
     # load caption model
-    logger.info('==> Loading caption model ...')
-    caption_device = config_['caption'].get('device', 'cpu')
-    caption_model = AutoModelForCausalLM.from_pretrained(config_['caption']['base_model'], \
-                    torch_dtype=torch.bfloat16, trust_remote_code=True)
-    caption_processor = AutoProcessor.from_pretrained(config_['caption']['base_model'], trust_remote_code=True)
     # logger.warning(f"GPU memory allocated after load caption model on {caption_device}: {torch.cuda.memory_allocated(device=caption_device) / 1024**3} GB")
-    # caption_processor = None
-    # caption_model = None
     # load reconstruction model
     logger.info('==> Loading reconstruction model ...')

     flux_pipe.vae.enable_tiling()
     # load flux model and controlnet
+    if flux_controlnet_pth is not None and False:
         flux_controlnet = FluxControlNetModel.from_pretrained(flux_controlnet_pth, torch_dtype=torch.bfloat16)
         flux_pipe = convert_flux_pipeline(flux_pipe, FluxControlNetImg2ImgPipeline, controlnet=[flux_controlnet])
     # load redux model
     flux_redux_pipe = None
+    if flux_redux_pth is not None and False:
         flux_redux_pipe = FluxPriorReduxPipeline.from_pretrained(flux_redux_pth, torch_dtype=torch.bfloat16, token=access_token)
         flux_redux_pipe.text_encoder = flux_pipe.text_encoder
         flux_redux_pipe.text_encoder_2 = flux_pipe.text_encoder_2
     # logger.warning(f"GPU memory allocated after load flux model on {flux_device}: {torch.cuda.memory_allocated(device=flux_device) / 1024**3} GB")
     # init multiview model
+    # logger.info('==> Loading multiview diffusion model ...')
+    # multiview_device = config_['multiview'].get('device', 'cpu')
+    # multiview_pipeline = DiffusionPipeline.from_pretrained(
+    #     config_['multiview']['base_model'],
+    #     custom_pipeline=config_['multiview']['custom_pipeline'],
+    #     torch_dtype=torch.float16,
+    # )
+    # multiview_pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
+    #     multiview_pipeline.scheduler.config, timestep_spacing='trailing'
+    # )
+    # unet_ckpt_path = hf_hub_download(repo_id="LTT/Kiss3DGen", filename="flexgen.ckpt", repo_type="model", token=access_token)
+    # if unet_ckpt_path is not None:
+    #     state_dict = torch.load(unet_ckpt_path, map_location='cpu')
+    #     multiview_pipeline.unet.load_state_dict(state_dict, strict=True)
     # multiview_pipeline.to(multiview_device)
     # logger.warning(f"GPU memory allocated after load multiview model on {multiview_device}: {torch.cuda.memory_allocated(device=multiview_device) / 1024**3} GB")
+    multiview_pipeline = None
     # load caption model
+    # logger.info('==> Loading caption model ...')
+    # caption_device = config_['caption'].get('device', 'cpu')
+    # caption_model = AutoModelForCausalLM.from_pretrained(config_['caption']['base_model'], \
+    #                 torch_dtype=torch.bfloat16, trust_remote_code=True)
+    # caption_processor = AutoProcessor.from_pretrained(config_['caption']['base_model'], trust_remote_code=True)
     # logger.warning(f"GPU memory allocated after load caption model on {caption_device}: {torch.cuda.memory_allocated(device=caption_device) / 1024**3} GB")
+    caption_processor = None
+    caption_model = None
     # load reconstruction model
     logger.info('==> Loading reconstruction model ...')