fix(tokenizer): update log order after update (#806)
Browse files
src/axolotl/utils/models.py
CHANGED
|
@@ -72,11 +72,6 @@ def load_tokenizer(cfg):
|
|
| 72 |
# set a pad_token, but use eos_token so we don't add a new token
|
| 73 |
tokenizer.pad_token = LLAMA_DEFAULT_EOS_TOKEN
|
| 74 |
|
| 75 |
-
LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}")
|
| 76 |
-
LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}")
|
| 77 |
-
LOG.debug(f"PAD: {tokenizer.pad_token_id} / {tokenizer.pad_token}")
|
| 78 |
-
LOG.debug(f"UNK: {tokenizer.unk_token_id} / {tokenizer.unk_token}")
|
| 79 |
-
|
| 80 |
if tokenizer.__class__.__name__ == "GPTNeoXTokenizerFast":
|
| 81 |
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
|
| 82 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
@@ -98,6 +93,11 @@ def load_tokenizer(cfg):
|
|
| 98 |
]
|
| 99 |
)
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
return tokenizer
|
| 102 |
|
| 103 |
|
|
|
|
| 72 |
# set a pad_token, but use eos_token so we don't add a new token
|
| 73 |
tokenizer.pad_token = LLAMA_DEFAULT_EOS_TOKEN
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
if tokenizer.__class__.__name__ == "GPTNeoXTokenizerFast":
|
| 76 |
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
|
| 77 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
|
|
| 93 |
]
|
| 94 |
)
|
| 95 |
|
| 96 |
+
LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}")
|
| 97 |
+
LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}")
|
| 98 |
+
LOG.debug(f"PAD: {tokenizer.pad_token_id} / {tokenizer.pad_token}")
|
| 99 |
+
LOG.debug(f"UNK: {tokenizer.unk_token_id} / {tokenizer.unk_token}")
|
| 100 |
+
|
| 101 |
return tokenizer
|
| 102 |
|
| 103 |
|