Update tokenize_and_upload.py
Browse files- tokenize_and_upload.py +1 -1
tokenize_and_upload.py
CHANGED
|
@@ -18,7 +18,7 @@ SOURCE_DATASET_ID = "UcsTurkey/turkish-general-culture-chunks"
|
|
| 18 |
TRAIN_TARGET_DATASET_ID = "UcsTurkey/turkish-general-culture-tokenized"
|
| 19 |
BUFFER_SIZE = 5
|
| 20 |
START_CHUNK_NUMBER = 0
|
| 21 |
-
PROCESS_CHUNK_COUNT =
|
| 22 |
|
| 23 |
CHUNK_FOLDER = "/data/chunks"
|
| 24 |
PARQUET_FOLDER = "/data/tokenized_chunks"
|
|
|
|
| 18 |
TRAIN_TARGET_DATASET_ID = "UcsTurkey/turkish-general-culture-tokenized"
|
| 19 |
BUFFER_SIZE = 5
|
| 20 |
START_CHUNK_NUMBER = 0
|
| 21 |
+
PROCESS_CHUNK_COUNT = 776
|
| 22 |
|
| 23 |
CHUNK_FOLDER = "/data/chunks"
|
| 24 |
PARQUET_FOLDER = "/data/tokenized_chunks"
|