HarryJoshAI's picture
Upload folder using huggingface_hub
93f34b7 verified
adap_kl_ctrl: true
backward_batch_size: 1
batch_size: 32
cliprange: 0.2
cliprange_value: 0.2
compare_steps: 1
early_stopping: false
exp_name: train_rl
forward_batch_size: null
gamma: 1
global_backward_batch_size: 1
global_batch_size: 32
gradient_accumulation_steps: 1
gradient_checkpointing: false
horizon: 10000
init_kl_coef: 0.2
is_encoder_decoder: false
is_peft_model: false
kl_penalty: kl
lam: 0.95
learning_rate: 1.41e-05
log_with: tensorboard
max_grad_norm: null
mini_batch_size: 1
model_name: 01/medical_model/final
optimize_cuda_cache: null
optimize_device_cache: false
ppo_epochs: 4
project_kwargs/logging_dir: 01/medical_model_rl/final/logs
query_dataset: imdb
ratio_threshold: 10.0
remove_unused_columns: true
reward_model: sentiment-analysis:lvwerra/distilbert-imdb
score_clip: null
seed: 0
steps: 20000
target: 6
target_kl: 0.1
task_name: null
total_ppo_epochs: 625
tracker_project_name: trl
use_score_norm: false
use_score_scaling: false
vf_coef: 0.1
whiten_rewards: false
world_size: 1