Spaces:

Vision-CAIR
/

minigpt4

Runtime error

Vision-CAIR commited on Apr 17, 2023

Commit

6feb8b1

1 Parent(s): bc58293

Update minigpt4/models/mini_gpt4.py

Files changed (1) hide show

minigpt4/models/mini_gpt4.py CHANGED Viewed

@@ -92,11 +92,11 @@ class MiniGPT4(Blip2Base):
         if llama_cache_dir:
             self.llama_model = LlamaForCausalLM.from_pretrained(
-                llama_model, load_in_8bit=True, torch_dtype=torch.float16, device_map={'': 0}, cache_dir=llama_cache_dir
             )
         else:
             self.llama_model = LlamaForCausalLM.from_pretrained(
-                llama_model, load_in_8bit=True, torch_dtype=torch.float16, device_map={'': 0}
             )
         for name, param in self.llama_model.named_parameters():
             param.requires_grad = False
@@ -118,7 +118,16 @@ class MiniGPT4(Blip2Base):
         else:
             self.prompt_list = []
     def encode_img(self, image):
         with self.maybe_autocast():
             image_embeds = self.ln_vision(self.visual_encoder(image))
             image_atts = torch.ones(image_embeds.size()[:-1], dtype=torch.long).to(

         if llama_cache_dir:
             self.llama_model = LlamaForCausalLM.from_pretrained(
+                llama_model, load_in_8bit=True, torch_dtype=torch.float16, device_map="auto", cache_dir=llama_cache_dir
             )
         else:
             self.llama_model = LlamaForCausalLM.from_pretrained(
+                llama_model, load_in_8bit=True, torch_dtype=torch.float16, device_map="auto"
             )
         for name, param in self.llama_model.named_parameters():
             param.requires_grad = False
         else:
             self.prompt_list = []
+    def vit_to_cpu(self):
+        self.ln_vision.to("cpu")
+        self.ln_vision.float()
+        self.visual_encoder.to("cpu")
+        self.visual_encoder.float()
     def encode_img(self, image):
+        device = image.device
+        self.vit_to_cpu()
+        image = image.to("cpu")
         with self.maybe_autocast():
             image_embeds = self.ln_vision(self.visual_encoder(image))
             image_atts = torch.ones(image_embeds.size()[:-1], dtype=torch.long).to(