Spaces:
Build error
Build error
Commit
·
d52f2be
1
Parent(s):
9f4c400
Update model/openllama.py
Browse files- model/openllama.py +15 -15
model/openllama.py
CHANGED
|
@@ -43,7 +43,7 @@ for obj in objs:
|
|
| 43 |
for s in prompted_state:
|
| 44 |
for template in prompt_templates:
|
| 45 |
prompted_sentence.append(template.format(s))
|
| 46 |
-
prompted_sentence = data.load_and_transform_text(prompted_sentence, torch.
|
| 47 |
prompt_sentence_obj.append(prompted_sentence)
|
| 48 |
prompt_sentences[obj] = prompt_sentence_obj
|
| 49 |
|
|
@@ -167,7 +167,7 @@ class OpenLLAMAPEFTModel(nn.Module):
|
|
| 167 |
max_tgt_len = args['max_tgt_len']
|
| 168 |
stage = args['stage']
|
| 169 |
|
| 170 |
-
self.device = torch.cuda.current_device()
|
| 171 |
|
| 172 |
print (f'Initializing visual encoder from {imagebind_ckpt_path} ...')
|
| 173 |
|
|
@@ -205,25 +205,25 @@ class OpenLLAMAPEFTModel(nn.Module):
|
|
| 205 |
target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj']
|
| 206 |
)
|
| 207 |
|
| 208 |
-
config = AutoConfig.from_pretrained(vicuna_ckpt_path)
|
| 209 |
-
with init_empty_weights():
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
# device_map = infer_auto_device_map(self.llama_model, no_split_module_classes=["OPTDecoderLayer"], dtype="float16")
|
| 213 |
-
# print(device_map)
|
| 214 |
-
device_map = {'model.embed_tokens': 0, 'model.layers.0': 0, 'model.layers.1': 0, 'model.layers.2': 0, 'model.layers.3': 0, 'model.layers.4': 0, 'model.layers.5': 0, 'model.layers.6': 0, 'model.layers.7': 0, 'model.layers.8': 0, 'model.layers.9': 0, 'model.layers.10.self_attn': 0, 'model.layers.10.mlp.gate_proj': 0, 'model.layers.10.mlp.down_proj': 'cpu', 'model.layers.10.mlp.up_proj': 'cpu', 'model.layers.10.mlp.act_fn': 'cpu', 'model.layers.10.input_layernorm': 'cpu', 'model.layers.10.post_attention_layernorm': 'cpu', 'model.layers.11': 'cpu', 'model.layers.12': 'cpu', 'model.layers.13': 'cpu', 'model.layers.14': 'cpu', 'model.layers.15': 'cpu', 'model.layers.16': 'cpu', 'model.layers.17': 'cpu', 'model.layers.18': 'cpu', 'model.layers.19': 'cpu', 'model.layers.20': 'cpu', 'model.layers.21': 'cpu', 'model.layers.22': 'cpu', 'model.layers.23': 'cpu', 'model.layers.24': 'disk', 'model.layers.25': 'disk', 'model.layers.26': 'disk', 'model.layers.27': 'disk', 'model.layers.28': 'disk', 'model.layers.29': 'disk', 'model.layers.30': 'disk', 'model.layers.31.self_attn': 'disk', 'model.layers.31.mlp.gate_proj': 'disk', 'model.layers.31.mlp.down_proj': 'disk', 'model.layers.31.mlp.up_proj': 'disk', 'model.layers.31.mlp.act_fn': 'disk', 'model.layers.31.input_layernorm': 'disk', 'model.layers.31.post_attention_layernorm': 'disk', 'model.norm': 'disk', 'lm_head': 'disk'}
|
| 215 |
-
# self.llama_model = load_checkpoint_and_dispatch(self.llama_model, vicuna_ckpt_path, device_map=device_map, offload_folder="offload", offload_state_dict = True)
|
| 216 |
-
# self.llama_model.to(torch.float16)
|
| 217 |
-
# try:
|
| 218 |
-
self.llama_model = AutoModelForCausalLM.from_pretrained(
|
| 219 |
-
# except:
|
| 220 |
# pass
|
| 221 |
# finally:
|
| 222 |
# print(self.llama_model.hf_device_map)
|
| 223 |
self.llama_model = get_peft_model(self.llama_model, peft_config)
|
| 224 |
self.llama_model.print_trainable_parameters()
|
| 225 |
|
| 226 |
-
self.llama_tokenizer = LlamaTokenizer.from_pretrained(vicuna_ckpt_path, use_fast=False, torch_dtype=torch.
|
| 227 |
self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
|
| 228 |
self.llama_tokenizer.padding_side = "right"
|
| 229 |
print ('Language decoder initialized.')
|
|
|
|
| 43 |
for s in prompted_state:
|
| 44 |
for template in prompt_templates:
|
| 45 |
prompted_sentence.append(template.format(s))
|
| 46 |
+
prompted_sentence = data.load_and_transform_text(prompted_sentence, torch.device('cpu'))#torch.cuda.current_device())
|
| 47 |
prompt_sentence_obj.append(prompted_sentence)
|
| 48 |
prompt_sentences[obj] = prompt_sentence_obj
|
| 49 |
|
|
|
|
| 167 |
max_tgt_len = args['max_tgt_len']
|
| 168 |
stage = args['stage']
|
| 169 |
|
| 170 |
+
self.device = torch.device('cpu') # torch.cuda.current_device()
|
| 171 |
|
| 172 |
print (f'Initializing visual encoder from {imagebind_ckpt_path} ...')
|
| 173 |
|
|
|
|
| 205 |
target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj']
|
| 206 |
)
|
| 207 |
|
| 208 |
+
# config = AutoConfig.from_pretrained(vicuna_ckpt_path)
|
| 209 |
+
# with init_empty_weights():
|
| 210 |
+
# self.llama_model = AutoModelForCausalLM.from_config(config)
|
| 211 |
+
|
| 212 |
+
# # device_map = infer_auto_device_map(self.llama_model, no_split_module_classes=["OPTDecoderLayer"], dtype="float16")
|
| 213 |
+
# # print(device_map)
|
| 214 |
+
# device_map = {'model.embed_tokens': 0, 'model.layers.0': 0, 'model.layers.1': 0, 'model.layers.2': 0, 'model.layers.3': 0, 'model.layers.4': 0, 'model.layers.5': 0, 'model.layers.6': 0, 'model.layers.7': 0, 'model.layers.8': 0, 'model.layers.9': 0, 'model.layers.10.self_attn': 0, 'model.layers.10.mlp.gate_proj': 0, 'model.layers.10.mlp.down_proj': 'cpu', 'model.layers.10.mlp.up_proj': 'cpu', 'model.layers.10.mlp.act_fn': 'cpu', 'model.layers.10.input_layernorm': 'cpu', 'model.layers.10.post_attention_layernorm': 'cpu', 'model.layers.11': 'cpu', 'model.layers.12': 'cpu', 'model.layers.13': 'cpu', 'model.layers.14': 'cpu', 'model.layers.15': 'cpu', 'model.layers.16': 'cpu', 'model.layers.17': 'cpu', 'model.layers.18': 'cpu', 'model.layers.19': 'cpu', 'model.layers.20': 'cpu', 'model.layers.21': 'cpu', 'model.layers.22': 'cpu', 'model.layers.23': 'cpu', 'model.layers.24': 'disk', 'model.layers.25': 'disk', 'model.layers.26': 'disk', 'model.layers.27': 'disk', 'model.layers.28': 'disk', 'model.layers.29': 'disk', 'model.layers.30': 'disk', 'model.layers.31.self_attn': 'disk', 'model.layers.31.mlp.gate_proj': 'disk', 'model.layers.31.mlp.down_proj': 'disk', 'model.layers.31.mlp.up_proj': 'disk', 'model.layers.31.mlp.act_fn': 'disk', 'model.layers.31.input_layernorm': 'disk', 'model.layers.31.post_attention_layernorm': 'disk', 'model.norm': 'disk', 'lm_head': 'disk'}
|
| 215 |
+
# # self.llama_model = load_checkpoint_and_dispatch(self.llama_model, vicuna_ckpt_path, device_map=device_map, offload_folder="offload", offload_state_dict = True)
|
| 216 |
+
# # self.llama_model.to(torch.float16)
|
| 217 |
+
# # try:
|
| 218 |
+
self.llama_model = AutoModelForCausalLM.from_pretrained(vicuna_ckpt_path, torch_dtype=torch.bfloat16, device_map='auto', offload_folder="offload", offload_state_dict = True)
|
| 219 |
+
# # except:
|
| 220 |
# pass
|
| 221 |
# finally:
|
| 222 |
# print(self.llama_model.hf_device_map)
|
| 223 |
self.llama_model = get_peft_model(self.llama_model, peft_config)
|
| 224 |
self.llama_model.print_trainable_parameters()
|
| 225 |
|
| 226 |
+
self.llama_tokenizer = LlamaTokenizer.from_pretrained(vicuna_ckpt_path, use_fast=False, torch_dtype=torch.bfloat16, device_map='auto', offload_folder="offload", offload_state_dict = True)
|
| 227 |
self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
|
| 228 |
self.llama_tokenizer.padding_side = "right"
|
| 229 |
print ('Language decoder initialized.')
|