weiyi01191 commited on
Commit
92180ac
·
verified ·
1 Parent(s): ff51588

Update minigpt4/models/mini_gpt4_llama_v2.py

Browse files
Files changed (1) hide show
  1. minigpt4/models/mini_gpt4_llama_v2.py +18 -84
minigpt4/models/mini_gpt4_llama_v2.py CHANGED
@@ -111,94 +111,28 @@ class MiniGPT4_Video(Blip2Base, PreTrainedModel):
111
 
112
  print('Loading LLAMA')
113
 
114
- # 🔧 在加载Llama前强制清理GPU缓存
115
- import torch
116
- import gc
117
- if torch.cuda.is_available():
118
- print("🔧 正在清理GPU缓存...")
119
- torch.cuda.empty_cache()
120
- torch.cuda.ipc_collect()
121
- gc.collect()
122
- available_mem = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)
123
- print(f"🔧 清理后可用显存: {available_mem / 1024**3:.1f} GB")
124
-
125
  self.B_SYS, self.E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
126
  token=os.environ.get("HF_TKN")
127
-
128
- # 🔧 根据模型类型选择合适的tokenizer
129
- print(f"🔧 正在为模型 {self.llama_model} 加载tokenizer...")
130
-
131
- # 检查是否是Qwen模型
132
- if "qwen" in self.llama_model.lower() or "Qwen" in self.llama_model:
133
- print("🔧 检测到Qwen模型,使用AutoTokenizer")
134
- from transformers import AutoTokenizer
135
- self.llama_tokenizer = AutoTokenizer.from_pretrained(
136
- self.llama_model,
137
- use_fast=False,
138
- token=token,
139
- trust_remote_code=True
140
- )
141
- # Qwen模型的特殊token设置
142
- if self.llama_tokenizer.pad_token is None:
143
- self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
144
- else:
145
- print("🔧 使用LlamaTokenizer")
146
- self.llama_tokenizer = LlamaTokenizer.from_pretrained(
147
- self.llama_model,
148
- use_fast=False,
149
- token=token
150
- )
151
- self.llama_tokenizer.pad_token = "$$"
152
-
153
- print(f"✅ Tokenizer加载成功: {type(self.llama_tokenizer)}")
154
  print("self.low_resource",self.low_resource)
155
-
156
- # 🔧 再次清理内存,为模型加载腾出空间
157
- if torch.cuda.is_available():
158
- torch.cuda.empty_cache()
159
- gc.collect()
160
-
161
  if self.low_resource:
162
- # 🔧 根据模型类型使用不同的加载策略
163
- if "qwen" in self.llama_model.lower() or "Qwen" in self.llama_model:
164
- print("🔧 使用Qwen专用加载策略")
165
- # Qwen模型使用AutoModelForCausalLM
166
- from transformers import AutoModelForCausalLM
167
- self.llama_model = AutoModelForCausalLM.from_pretrained(
168
- self.llama_model,
169
- torch_dtype=torch.float16,
170
- load_in_8bit=True,
171
- device_map={'':f"cuda:{self.minigpt4_gpu_id}"},
172
- token=token,
173
- trust_remote_code=True
174
- )
175
- else:
176
- print("🔧 使用Llama专用加载策略")
177
- self.llama_model = llm_model.from_pretrained(
178
- self.llama_model,
179
- torch_dtype=torch.float16,
180
- load_in_8bit=True,
181
- device_map={'':f"cuda:{self.minigpt4_gpu_id}"},
182
- token=token
183
- )
184
  else:
185
- # 🔧 高资源模式下的加载策略
186
- if "qwen" in self.llama_model.lower() or "Qwen" in self.llama_model:
187
- print("🔧 使用Qwen高资源加载策略")
188
- from transformers import AutoModelForCausalLM
189
- self.llama_model = AutoModelForCausalLM.from_pretrained(
190
- self.llama_model,
191
- torch_dtype=torch.float16,
192
- token=token,
193
- trust_remote_code=True
194
- )
195
- else:
196
- print("🔧 使用Llama高资源加载策略")
197
- self.llama_model = llm_model.from_pretrained(
198
- self.llama_model,
199
- torch_dtype=torch.float16,
200
- token=token
201
- )
202
 
203
  # self.llama_model.resize_token_embeddings(len(self.llama_tokenizer))
204
  self.llama_model = prepare_model_for_int8_training(self.llama_model)
@@ -874,4 +808,4 @@ def assign_imgs(batched_instruct_list, batched_img_embeds):
874
  n_assigned.append(None)
875
  batched_assigned.append(assigned_img)
876
 
877
- return batched_assigned
 
111
 
112
  print('Loading LLAMA')
113
 
 
 
 
 
 
 
 
 
 
 
 
114
  self.B_SYS, self.E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
115
  token=os.environ.get("HF_TKN")
116
+ self.llama_tokenizer = LlamaTokenizer.from_pretrained(self.llama_model,use_fast=False,token=token) #
117
+ self.llama_tokenizer.pad_token = "$$"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  print("self.low_resource",self.low_resource)
 
 
 
 
 
 
119
  if self.low_resource:
120
+ self.llama_model = llm_model.from_pretrained(
121
+ self.llama_model,
122
+ torch_dtype=torch.float16,
123
+ # torch_dtype = torch.bfloat16,
124
+ load_in_8bit=True,
125
+ # device_map = "balanced"
126
+ # device_map="auto",
127
+ # device_map={'':torch.cuda.current_device()},token=token
128
+ device_map={'':f"cuda:{self.minigpt4_gpu_id}"},token=token
129
+
130
+ )
 
 
 
 
 
 
 
 
 
 
 
131
  else:
132
+ self.llama_model = llm_model.from_pretrained(
133
+ self.llama_model,
134
+ torch_dtype=torch.float16,token=token
135
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  # self.llama_model.resize_token_embeddings(len(self.llama_tokenizer))
138
  self.llama_model = prepare_model_for_int8_training(self.llama_model)
 
808
  n_assigned.append(None)
809
  batched_assigned.append(assigned_img)
810
 
811
+ return batched_assigned