File size: 236 Bytes
611e31d
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
huggingface-cli download skymizer/fineweb-edu-dedup-45B --local-dir ./data_raw

python books.py \
    --data-path ./data_raw \
    --save-path ./data_proc \
    --content-key text \
    --processes-num 64 \
    --write-batch-size 100 \