drop empty tokenized rows too (#509)
Browse files
src/axolotl/utils/trainer.py
CHANGED
|
@@ -361,7 +361,7 @@ def add_position_ids(sample):
|
|
| 361 |
|
| 362 |
|
| 363 |
def drop_long_seq(sample, sequence_len=2048):
|
| 364 |
-
return len(sample["input_ids"]) <= sequence_len
|
| 365 |
|
| 366 |
|
| 367 |
@contextmanager
|
|
|
|
| 361 |
|
| 362 |
|
| 363 |
def drop_long_seq(sample, sequence_len=2048):
|
| 364 |
+
return len(sample["input_ids"]) <= sequence_len and len(sample["input_ids"]) > 0
|
| 365 |
|
| 366 |
|
| 367 |
@contextmanager
|