Merge pull request #85 from NanoCode012/fix/add-dataset-shard-readme
Browse files
README.md
CHANGED
|
@@ -209,6 +209,10 @@ dataset_prepared_path: data/last_run_prepared
|
|
| 209 |
push_dataset_to_hub: # repo path
|
| 210 |
# How much of the dataset to set aside as evaluation. 1 = 100%, 0.50 = 50%, etc
|
| 211 |
val_set_size: 0.04
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
# the maximum length of an input to train with, this should typically be less than 2048
|
| 214 |
# as most models have a token/context limit of 2048
|
|
|
|
| 209 |
push_dataset_to_hub: # repo path
|
| 210 |
# How much of the dataset to set aside as evaluation. 1 = 100%, 0.50 = 50%, etc
|
| 211 |
val_set_size: 0.04
|
| 212 |
+
# Num shards for whole dataset
|
| 213 |
+
dataset_shard_num:
|
| 214 |
+
# Index of shard to use for whole dataset
|
| 215 |
+
dataset_shard_idx:
|
| 216 |
|
| 217 |
# the maximum length of an input to train with, this should typically be less than 2048
|
| 218 |
# as most models have a token/context limit of 2048
|