fix
Browse files- train_llama.py +3 -1
train_llama.py
CHANGED
@@ -16,8 +16,10 @@ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
|
16 |
MODEL_ID = "meta-llama/Llama-2-7b-hf"
|
17 |
tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID)
|
18 |
|
|
|
19 |
if tokenizer.pad_token is None:
|
20 |
-
tokenizer.
|
|
|
21 |
|
22 |
# Quantization config
|
23 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
|
16 |
MODEL_ID = "meta-llama/Llama-2-7b-hf"
|
17 |
tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID)
|
18 |
|
19 |
+
# Set pad token to existing <|endoftext|> (ID 2) instead of adding a new one
|
20 |
if tokenizer.pad_token is None:
|
21 |
+
tokenizer.pad_token = tokenizer.eos_token # Use <|endoftext|> as pad token
|
22 |
+
tokenizer.pad_token_id = tokenizer.eos_token_id # Should be 2
|
23 |
|
24 |
# Quantization config
|
25 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|