Spaces:
Running
Running
Update minigpt4/models/mini_gpt4_llama_v2.py
Browse files
minigpt4/models/mini_gpt4_llama_v2.py
CHANGED
@@ -111,94 +111,28 @@ class MiniGPT4_Video(Blip2Base, PreTrainedModel):
|
|
111 |
|
112 |
print('Loading LLAMA')
|
113 |
|
114 |
-
# 🔧 在加载Llama前强制清理GPU缓存
|
115 |
-
import torch
|
116 |
-
import gc
|
117 |
-
if torch.cuda.is_available():
|
118 |
-
print("🔧 正在清理GPU缓存...")
|
119 |
-
torch.cuda.empty_cache()
|
120 |
-
torch.cuda.ipc_collect()
|
121 |
-
gc.collect()
|
122 |
-
available_mem = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)
|
123 |
-
print(f"🔧 清理后可用显存: {available_mem / 1024**3:.1f} GB")
|
124 |
-
|
125 |
self.B_SYS, self.E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
126 |
token=os.environ.get("HF_TKN")
|
127 |
-
|
128 |
-
|
129 |
-
print(f"🔧 正在为模型 {self.llama_model} 加载tokenizer...")
|
130 |
-
|
131 |
-
# 检查是否是Qwen模型
|
132 |
-
if "qwen" in self.llama_model.lower() or "Qwen" in self.llama_model:
|
133 |
-
print("🔧 检测到Qwen模型,使用AutoTokenizer")
|
134 |
-
from transformers import AutoTokenizer
|
135 |
-
self.llama_tokenizer = AutoTokenizer.from_pretrained(
|
136 |
-
self.llama_model,
|
137 |
-
use_fast=False,
|
138 |
-
token=token,
|
139 |
-
trust_remote_code=True
|
140 |
-
)
|
141 |
-
# Qwen模型的特殊token设置
|
142 |
-
if self.llama_tokenizer.pad_token is None:
|
143 |
-
self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
|
144 |
-
else:
|
145 |
-
print("🔧 使用LlamaTokenizer")
|
146 |
-
self.llama_tokenizer = LlamaTokenizer.from_pretrained(
|
147 |
-
self.llama_model,
|
148 |
-
use_fast=False,
|
149 |
-
token=token
|
150 |
-
)
|
151 |
-
self.llama_tokenizer.pad_token = "$$"
|
152 |
-
|
153 |
-
print(f"✅ Tokenizer加载成功: {type(self.llama_tokenizer)}")
|
154 |
print("self.low_resource",self.low_resource)
|
155 |
-
|
156 |
-
# 🔧 再次清理内存,为模型加载腾出空间
|
157 |
-
if torch.cuda.is_available():
|
158 |
-
torch.cuda.empty_cache()
|
159 |
-
gc.collect()
|
160 |
-
|
161 |
if self.low_resource:
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
#
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
trust_remote_code=True
|
174 |
-
)
|
175 |
-
else:
|
176 |
-
print("🔧 使用Llama专用加载策略")
|
177 |
-
self.llama_model = llm_model.from_pretrained(
|
178 |
-
self.llama_model,
|
179 |
-
torch_dtype=torch.float16,
|
180 |
-
load_in_8bit=True,
|
181 |
-
device_map={'':f"cuda:{self.minigpt4_gpu_id}"},
|
182 |
-
token=token
|
183 |
-
)
|
184 |
else:
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
self.llama_model = AutoModelForCausalLM.from_pretrained(
|
190 |
-
self.llama_model,
|
191 |
-
torch_dtype=torch.float16,
|
192 |
-
token=token,
|
193 |
-
trust_remote_code=True
|
194 |
-
)
|
195 |
-
else:
|
196 |
-
print("🔧 使用Llama高资源加载策略")
|
197 |
-
self.llama_model = llm_model.from_pretrained(
|
198 |
-
self.llama_model,
|
199 |
-
torch_dtype=torch.float16,
|
200 |
-
token=token
|
201 |
-
)
|
202 |
|
203 |
# self.llama_model.resize_token_embeddings(len(self.llama_tokenizer))
|
204 |
self.llama_model = prepare_model_for_int8_training(self.llama_model)
|
@@ -874,4 +808,4 @@ def assign_imgs(batched_instruct_list, batched_img_embeds):
|
|
874 |
n_assigned.append(None)
|
875 |
batched_assigned.append(assigned_img)
|
876 |
|
877 |
-
return batched_assigned
|
|
|
111 |
|
112 |
print('Loading LLAMA')
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
self.B_SYS, self.E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
115 |
token=os.environ.get("HF_TKN")
|
116 |
+
self.llama_tokenizer = LlamaTokenizer.from_pretrained(self.llama_model,use_fast=False,token=token) #
|
117 |
+
self.llama_tokenizer.pad_token = "$$"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
print("self.low_resource",self.low_resource)
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
if self.low_resource:
|
120 |
+
self.llama_model = llm_model.from_pretrained(
|
121 |
+
self.llama_model,
|
122 |
+
torch_dtype=torch.float16,
|
123 |
+
# torch_dtype = torch.bfloat16,
|
124 |
+
load_in_8bit=True,
|
125 |
+
# device_map = "balanced"
|
126 |
+
# device_map="auto",
|
127 |
+
# device_map={'':torch.cuda.current_device()},token=token
|
128 |
+
device_map={'':f"cuda:{self.minigpt4_gpu_id}"},token=token
|
129 |
+
|
130 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
else:
|
132 |
+
self.llama_model = llm_model.from_pretrained(
|
133 |
+
self.llama_model,
|
134 |
+
torch_dtype=torch.float16,token=token
|
135 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
# self.llama_model.resize_token_embeddings(len(self.llama_tokenizer))
|
138 |
self.llama_model = prepare_model_for_int8_training(self.llama_model)
|
|
|
808 |
n_assigned.append(None)
|
809 |
batched_assigned.append(assigned_img)
|
810 |
|
811 |
+
return batched_assigned
|