ciyidogan commited on
Commit
c00e5c0
·
verified ·
1 Parent(s): d3bc855

Update tokenize_and_upload_mistral.py

Browse files
Files changed (1) hide show
  1. tokenize_and_upload_mistral.py +2 -2
tokenize_and_upload_mistral.py CHANGED
@@ -17,8 +17,8 @@ HF_TOKEN = os.getenv("HF_TOKEN")
17
  SOURCE_DATASET_ID = "UcsTurkey/turkish-general-culture-chunks"
18
  TRAIN_TARGET_DATASET_ID = "UcsTurkey/turkish-general-culture-tokenized"
19
  BUFFER_SIZE = 5
20
- START_CHUNK_NUMBER = 0
21
- PROCESS_CHUNK_COUNT = 776
22
 
23
  CHUNK_FOLDER = "/data/chunks"
24
  PARQUET_FOLDER = "/data/tokenized_chunks"
 
17
  SOURCE_DATASET_ID = "UcsTurkey/turkish-general-culture-chunks"
18
  TRAIN_TARGET_DATASET_ID = "UcsTurkey/turkish-general-culture-tokenized"
19
  BUFFER_SIZE = 5
20
+ START_CHUNK_NUMBER = 776
21
+ PROCESS_CHUNK_COUNT = 2
22
 
23
  CHUNK_FOLDER = "/data/chunks"
24
  PARQUET_FOLDER = "/data/tokenized_chunks"