Spaces:
Sleeping
Sleeping
adap_kl_ctrl: true | |
backward_batch_size: 1 | |
batch_size: 32 | |
cliprange: 0.2 | |
cliprange_value: 0.2 | |
compare_steps: 1 | |
early_stopping: false | |
exp_name: train_rl | |
forward_batch_size: null | |
gamma: 1 | |
global_backward_batch_size: 1 | |
global_batch_size: 32 | |
gradient_accumulation_steps: 1 | |
gradient_checkpointing: false | |
horizon: 10000 | |
init_kl_coef: 0.2 | |
is_encoder_decoder: false | |
is_peft_model: false | |
kl_penalty: kl | |
lam: 0.95 | |
learning_rate: 1.41e-05 | |
log_with: tensorboard | |
max_grad_norm: null | |
mini_batch_size: 1 | |
model_name: 01/medical_model/final | |
optimize_cuda_cache: null | |
optimize_device_cache: false | |
ppo_epochs: 4 | |
project_kwargs/logging_dir: 01/medical_model_rl/final/logs | |
query_dataset: imdb | |
ratio_threshold: 10.0 | |
remove_unused_columns: true | |
reward_model: sentiment-analysis:lvwerra/distilbert-imdb | |
score_clip: null | |
seed: 0 | |
steps: 20000 | |
target: 6 | |
target_kl: 0.1 | |
task_name: null | |
total_ppo_epochs: 625 | |
tracker_project_name: trl | |
use_score_norm: false | |
use_score_scaling: false | |
vf_coef: 0.1 | |
whiten_rewards: false | |
world_size: 1 | |