huggingface-cli download skymizer/fineweb-edu-dedup-45B --local-dir ./data_raw | |
python books.py \ | |
--data-path ./data_raw \ | |
--save-path ./data_proc \ | |
--content-key text \ | |
--processes-num 64 \ | |
--write-batch-size 100 \ | |
huggingface-cli download skymizer/fineweb-edu-dedup-45B --local-dir ./data_raw | |
python books.py \ | |
--data-path ./data_raw \ | |
--save-path ./data_proc \ | |
--content-key text \ | |
--processes-num 64 \ | |
--write-batch-size 100 \ | |