Spaces:
Sleeping
Sleeping
Refactor imports in train.py to improve organization and clarity, adding DataCollatorForLanguageModeling for enhanced data handling during training.
Browse files
train.py
CHANGED
|
@@ -32,9 +32,13 @@ from datasets import (
|
|
| 32 |
IterableDatasetDict,
|
| 33 |
load_dataset,
|
| 34 |
)
|
| 35 |
-
from transformers import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
from trl import SFTTrainer
|
| 37 |
-
from trl.data.data_collator import DataCollatorForLanguageModeling
|
| 38 |
|
| 39 |
# Configuration
|
| 40 |
max_seq_length = 2048 # Auto supports RoPE Scaling internally
|
|
|
|
| 32 |
IterableDatasetDict,
|
| 33 |
load_dataset,
|
| 34 |
)
|
| 35 |
+
from transformers import (
|
| 36 |
+
AutoTokenizer,
|
| 37 |
+
DataCollatorForLanguageModeling,
|
| 38 |
+
Trainer,
|
| 39 |
+
TrainingArguments,
|
| 40 |
+
)
|
| 41 |
from trl import SFTTrainer
|
|
|
|
| 42 |
|
| 43 |
# Configuration
|
| 44 |
max_seq_length = 2048 # Auto supports RoPE Scaling internally
|