flux-style-shaping

Runtime error

App Files Files Community

jallenjia commited on Jul 15

Commit

8ced43a

1 Parent(s): c422602

manual load weight

Browse files

Files changed (2) hide show

.gitignore +1 -0
custom_nodes/comfyui-florence2/nodes.py +39 -35

.gitignore CHANGED Viewed

@@ -21,3 +21,4 @@ venv/
 *.log
 web_custom_versions/
 .DS_Store

 *.log
 web_custom_versions/
 .DS_Store
+python_lib/

custom_nodes/comfyui-florence2/nodes.py CHANGED Viewed

@@ -128,29 +128,29 @@ class DownloadAndLoadFlorence2Model:
         print(f"Florence2 using {attention} for attention")
-        if convert_to_safetensors:
-            model_weight_path = os.path.join(model_path, 'pytorch_model.bin')
-            if os.path.exists(model_weight_path):
-                safetensors_weight_path = os.path.join(model_path, 'model.safetensors')
-                print(f"Converting {model_weight_path} to {safetensors_weight_path}")
-                if not os.path.exists(safetensors_weight_path):
-                    sd = torch.load(model_weight_path, map_location=offload_device)
-                    sd_new = {}
-                    for k, v in sd.items():
-                        sd_new[k] = v.clone()
-                    save_file(sd_new, safetensors_weight_path)
-                    if os.path.exists(safetensors_weight_path):
-                        print(f"Conversion successful. Deleting original file: {model_weight_path}")
-                        os.remove(model_weight_path)
-                        print(f"Original {model_weight_path} file deleted.")
         if transformers.__version__ < '4.51.0':
             with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports): #workaround for unnecessary flash_attn requirement
-                 model = AutoModelForCausalLM.from_pretrained(model_path, attn_implementation=attention, torch_dtype=dtype,trust_remote_code=True, device_map="cpu", low_cpu_mem_usage=False)
         else:
             from .modeling_florence2 import Florence2ForConditionalGeneration
-            model = Florence2ForConditionalGeneration.from_pretrained(model_path, attn_implementation=attention, torch_dtype=dtype, device_map="cpu", low_cpu_mem_usage=False)
         processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
         if lora is not None:
@@ -231,28 +231,32 @@ class Florence2ModelLoader:
         model_path = Florence2ModelLoader.model_paths.get(model)
         print(f"Loading model from {model_path}")
         print(f"Florence2 using {attention} for attention")
-        if convert_to_safetensors:
-            model_weight_path = os.path.join(model_path, 'pytorch_model.bin')
-            if os.path.exists(model_weight_path):
-                safetensors_weight_path = os.path.join(model_path, 'model.safetensors')
-                print(f"Converting {model_weight_path} to {safetensors_weight_path}")
-                if not os.path.exists(safetensors_weight_path):
-                    sd = torch.load(model_weight_path, map_location=offload_device)
-                    sd_new = {}
-                    for k, v in sd.items():
-                        sd_new[k] = v.clone()
-                    save_file(sd_new, safetensors_weight_path)
-                    if os.path.exists(safetensors_weight_path):
-                        print(f"Conversion successful. Deleting original file: {model_weight_path}")
-                        os.remove(model_weight_path)
-                        print(f"Original {model_weight_path} file deleted.")
         if transformers.__version__ < '4.51.0':
             with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports): #workaround for unnecessary flash_attn requirement
-                 model = AutoModelForCausalLM.from_pretrained(model_path, attn_implementation=attention, torch_dtype=dtype,trust_remote_code=True, device_map="cpu", low_cpu_mem_usage=False)
         else:
             from .modeling_florence2 import Florence2ForConditionalGeneration
-            model = Florence2ForConditionalGeneration.from_pretrained(model_path, attn_implementation=attention, torch_dtype=dtype, device_map="cpu", low_cpu_mem_usage=False)
         processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
         if lora is not None:

         print(f"Florence2 using {attention} for attention")
+        from transformers import AutoConfig
+        # Manually load the state dict to CPU to avoid issues with ZeroGPU patching
+        print("Manually loading weights to CPU...")
+        weights_path = os.path.join(model_path, "pytorch_model.bin")
+        state_dict = torch.load(weights_path, map_location="cpu")
+        config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
         if transformers.__version__ < '4.51.0':
             with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports): #workaround for unnecessary flash_attn requirement
+                 model = AutoModelForCausalLM.from_pretrained(
+                    None, config=config, state_dict=state_dict, attn_implementation=attention,
+                    torch_dtype=dtype, trust_remote_code=True
+                 )
         else:
             from .modeling_florence2 import Florence2ForConditionalGeneration
+            model = Florence2ForConditionalGeneration.from_pretrained(
+                None, config=config, state_dict=state_dict, attn_implementation=attention, torch_dtype=dtype
+            )
+        # We don't need to call .to(offload_device) here as it's already on CPU
+        # and the run node will handle moving it to the GPU.
         processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
         if lora is not None:
         model_path = Florence2ModelLoader.model_paths.get(model)
         print(f"Loading model from {model_path}")
         print(f"Florence2 using {attention} for attention")
+        from transformers import AutoConfig
+        # Manually load the state dict to CPU to avoid issues with ZeroGPU patching
+        print("Manually loading weights to CPU...")
+        # Prefer safetensors if they exist (potentially after conversion)
+        weights_path = os.path.join(model_path, "model.safetensors")
+        if not os.path.exists(weights_path):
+             weights_path = os.path.join(model_path, "pytorch_model.bin")
+        state_dict = torch.load(weights_path, map_location="cpu")
+        config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
         if transformers.__version__ < '4.51.0':
             with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports): #workaround for unnecessary flash_attn requirement
+                 model = AutoModelForCausalLM.from_pretrained(
+                    None, config=config, state_dict=state_dict, attn_implementation=attention,
+                    torch_dtype=dtype, trust_remote_code=True
+                 )
         else:
             from .modeling_florence2 import Florence2ForConditionalGeneration
+            model = Florence2ForConditionalGeneration.from_pretrained(
+                None, config=config, state_dict=state_dict, attn_implementation=attention, torch_dtype=dtype
+            )
         processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
         if lora is not None: