LlavaMistral1

Runtime error

lorocksUMD commited on Nov 27, 2024

Commit

a433de4

verified ·

1 Parent(s): 77ceba1

Update llava/model/builder.py

Files changed (1) hide show

llava/model/builder.py CHANGED Viewed

@@ -29,18 +29,19 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
     # if device != "cuda":
     #     kwargs['device_map'] = {"": device}
-    # if load_8bit:
-    #     kwargs['load_in_8bit'] = True
-    # elif load_4bit:
-    #     kwargs['load_in_4bit'] = True
-    #     kwargs['quantization_config'] = BitsAndBytesConfig(
-    #         load_in_4bit=True,
-    #         bnb_4bit_compute_dtype=torch.float16,
-    #         bnb_4bit_use_double_quant=True,
-    #         bnb_4bit_quant_type='nf4'
-    #     )
-    # else:
-    #     kwargs['torch_dtype'] = torch.float16
     if use_flash_attn:
         kwargs['attn_implementation'] = 'flash_attention_2'

     # if device != "cuda":
     #     kwargs['device_map'] = {"": device}
+    load_8bit = True
+    if load_8bit:
+        kwargs['load_in_8bit'] = True
+    elif load_4bit:
+        kwargs['load_in_4bit'] = True
+        kwargs['quantization_config'] = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type='nf4'
+        )
+    else:
+        kwargs['torch_dtype'] = torch.float16
     if use_flash_attn:
         kwargs['attn_implementation'] = 'flash_attention_2'