Spaces:
Paused
Paused
.PHONY: test precommit common_tests slow_tests test_examples tests_gpu | |
check_dirs := examples tests trl | |
ACCELERATE_CONFIG_PATH = `pwd`/examples/accelerate_configs | |
COMMAND_FILES_PATH = `pwd`/commands | |
test: | |
pytest -n auto -m "not slow and not low-priority" -s -v --reruns 5 --reruns-delay 1 --only-rerun '(OSError|Timeout|HTTPError.*502|HTTPError.*504||not less than or equal to 0.01)' tests/ | |
precommit: | |
python scripts/add_copyrights.py | |
pre-commit run --all-files | |
slow_tests: | |
pytest -m "slow" tests/ $(if $(IS_GITHUB_CI),--report-log "slow_tests.log",) | |
test_examples: | |
touch temp_results_sft_tests.txt | |
for file in $(ACCELERATE_CONFIG_PATH)/*.yaml; do \ | |
TRL_ACCELERATE_CONFIG=$${file} bash $(COMMAND_FILES_PATH)/run_sft.sh; \ | |
echo $$?','$${file} >> temp_results_sft_tests.txt; \ | |
done | |
touch temp_results_dpo_tests.txt | |
for file in $(ACCELERATE_CONFIG_PATH)/*.yaml; do \ | |
TRL_ACCELERATE_CONFIG=$${file} bash $(COMMAND_FILES_PATH)/run_dpo.sh; \ | |
echo $$?','$${file} >> temp_results_dpo_tests.txt; \ | |
done | |
# ------------------------------------------------------------------------------ | |
run_rm_1: | |
python examples/scripts/reward_modeling.py \ | |
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \ | |
--dataset_name trl-lib/ultrafeedback_binarized \ | |
--output_dir Qwen2-0.5B-Reward \ | |
--per_device_train_batch_size 8 \ | |
--num_train_epochs 1 \ | |
--gradient_checkpointing True \ | |
--learning_rate 1.0e-5 \ | |
--logging_steps 25 \ | |
--eval_strategy steps \ | |
--eval_steps 50 \ | |
--max_length 2048 | |
run_rm_2: | |
python examples/scripts/reward_modeling.py \ | |
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \ | |
--dataset_name trl-lib/ultrafeedback_binarized \ | |
--output_dir Qwen2-0.5B-Reward-LoRA \ | |
--per_device_train_batch_size 8 \ | |
--num_train_epochs 1 \ | |
--gradient_checkpointing True \ | |
--learning_rate 1.0e-4 \ | |
--logging_steps 25 \ | |
--eval_strategy steps \ | |
--eval_steps 50 \ | |
--max_length 2048 \ | |
--use_peft \ | |
--lora_r 32 \ | |
--lora_alpha 16 | |
run_ppo_1: | |
python examples/scripts/ppo/ppo.py \ | |
--dataset_name trl-internal-testing/descriptiveness-sentiment-trl-style \ | |
--dataset_train_split descriptiveness \ | |
--learning_rate 3e-6 \ | |
--output_dir models/minimal/ppo \ | |
--per_device_train_batch_size 64 \ | |
--gradient_accumulation_steps 1 \ | |
--total_episodes 10000 \ | |
--model_name_or_path EleutherAI/pythia-1b-deduped \ | |
--missing_eos_penalty 1.0 | |
run_ppo_2: | |
accelerate launch --config_file examples/accelerate_configs/deepspeed_zero3.yaml \ | |
examples/scripts/ppo/ppo.py \ | |
--dataset_name trl-internal-testing/descriptiveness-sentiment-trl-style \ | |
--dataset_train_split descriptiveness \ | |
--output_dir models/minimal/ppo \ | |
--num_ppo_epochs 1 \ | |
--num_mini_batches 1 \ | |
--learning_rate 3e-6 \ | |
--per_device_train_batch_size 1 \ | |
--gradient_accumulation_steps 16 \ | |
--total_episodes 10000 \ | |
--model_name_or_path EleutherAI/pythia-1b-deduped \ | |
--sft_model_path EleutherAI/pythia-1b-deduped \ | |
--reward_model_path EleutherAI/pythia-1b-deduped \ | |
--local_rollout_forward_batch_size 1 \ | |
--missing_eos_penalty 1.0 | |