gracefully handle empty input (#442)
Browse files
src/axolotl/prompt_tokenizers.py
CHANGED
|
@@ -85,7 +85,11 @@ class PromptTokenizingStrategy(abc.ABC):
|
|
| 85 |
result["input_ids"].append(self.tokenizer.eos_token_id)
|
| 86 |
result["attention_mask"].append(1)
|
| 87 |
|
| 88 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
result["input_ids"] = result["input_ids"][1:]
|
| 90 |
result["attention_mask"] = result["attention_mask"][1:]
|
| 91 |
|
|
|
|
| 85 |
result["input_ids"].append(self.tokenizer.eos_token_id)
|
| 86 |
result["attention_mask"].append(1)
|
| 87 |
|
| 88 |
+
if (
|
| 89 |
+
len(result["input_ids"]) > 0
|
| 90 |
+
and result["input_ids"][0] == self.tokenizer.bos_token_id
|
| 91 |
+
and strip_bos_token
|
| 92 |
+
):
|
| 93 |
result["input_ids"] = result["input_ids"][1:]
|
| 94 |
result["attention_mask"] = result["attention_mask"][1:]
|
| 95 |
|