Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -198,7 +198,7 @@ lm_datasets = tokenized_datasets.map(
|
|
| 198 |
|
| 199 |
# Batches von Daten zusammenfassen
|
| 200 |
tokenizer.pad_token = tokenizer.eos_token
|
| 201 |
-
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
|
| 202 |
|
| 203 |
|
| 204 |
print ("###############lm datasets####################")
|
|
@@ -264,8 +264,8 @@ trainer = Trainer(
|
|
| 264 |
args=training_args,
|
| 265 |
train_dataset=lm_datasets["train"],
|
| 266 |
eval_dataset=lm_datasets["test"],
|
| 267 |
-
data_collator=data_collator,
|
| 268 |
-
|
| 269 |
compute_metrics=compute_metrics,
|
| 270 |
)
|
| 271 |
|
|
|
|
| 198 |
|
| 199 |
# Batches von Daten zusammenfassen
|
| 200 |
tokenizer.pad_token = tokenizer.eos_token
|
| 201 |
+
#data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
|
| 202 |
|
| 203 |
|
| 204 |
print ("###############lm datasets####################")
|
|
|
|
| 264 |
args=training_args,
|
| 265 |
train_dataset=lm_datasets["train"],
|
| 266 |
eval_dataset=lm_datasets["test"],
|
| 267 |
+
#data_collator=data_collator,
|
| 268 |
+
tokenizer=tokenizer,
|
| 269 |
compute_metrics=compute_metrics,
|
| 270 |
)
|
| 271 |
|