Upload configs.yaml with huggingface_hub
Browse files- configs.yaml +22 -24
configs.yaml
CHANGED
@@ -1,36 +1,34 @@
|
|
1 |
assistant_tag: gpt
|
2 |
-
bf16:
|
3 |
content_tag: value
|
4 |
-
cutoff_len: 16384
|
5 |
-
dataloader_num_workers: 4
|
6 |
-
dataloader_persistent_workers:
|
7 |
-
dataloader_pin_memory:
|
8 |
dataset: mlfoundations-dev/a1_math_open2math
|
9 |
dataset_dir: ONLINE
|
10 |
-
ddp_timeout: 180000000
|
11 |
-
deepspeed:
|
12 |
-
do_train:
|
13 |
-
enable_liger_kernel:
|
14 |
-
eval_strategy: 'no'
|
15 |
finetuning_type: full
|
16 |
formatting: sharegpt
|
17 |
-
global_batch_size: 128
|
18 |
-
gradient_accumulation_steps:
|
19 |
hub_model_id: mlfoundations-dev/a1_math_open2math
|
20 |
-
|
21 |
-
|
22 |
-
logging_steps: 1
|
23 |
lr_scheduler_type: cosine
|
24 |
messages: conversations
|
25 |
model_name_or_path: Qwen/Qwen2.5-7B-Instruct
|
26 |
-
num_train_epochs: 5.0
|
27 |
-
output_dir: /
|
28 |
-
overwrite_cache:
|
29 |
-
per_device_train_batch_size: 1
|
30 |
-
plot_loss:
|
31 |
-
preprocessing_num_workers:
|
32 |
-
push_to_db:
|
33 |
-
push_to_hub:
|
34 |
report_to: wandb
|
35 |
role_tag: from
|
36 |
run_name: a1_math_open2math
|
@@ -38,4 +36,4 @@ save_strategy: epoch
|
|
38 |
stage: sft
|
39 |
template: qwen25
|
40 |
user_tag: human
|
41 |
-
warmup_ratio: 0.1
|
|
|
1 |
assistant_tag: gpt
|
2 |
+
bf16: 'True'
|
3 |
content_tag: value
|
4 |
+
cutoff_len: '16384'
|
5 |
+
dataloader_num_workers: '4'
|
6 |
+
dataloader_persistent_workers: 'True'
|
7 |
+
dataloader_pin_memory: 'True'
|
8 |
dataset: mlfoundations-dev/a1_math_open2math
|
9 |
dataset_dir: ONLINE
|
10 |
+
ddp_timeout: '180000000'
|
11 |
+
deepspeed: /opt/ml/code/zero3.json
|
12 |
+
do_train: 'True'
|
13 |
+
enable_liger_kernel: 'True'
|
|
|
14 |
finetuning_type: full
|
15 |
formatting: sharegpt
|
16 |
+
global_batch_size: '128'
|
17 |
+
gradient_accumulation_steps: '16'
|
18 |
hub_model_id: mlfoundations-dev/a1_math_open2math
|
19 |
+
learning_rate: 4e-05
|
20 |
+
logging_steps: '1'
|
|
|
21 |
lr_scheduler_type: cosine
|
22 |
messages: conversations
|
23 |
model_name_or_path: Qwen/Qwen2.5-7B-Instruct
|
24 |
+
num_train_epochs: '5.0'
|
25 |
+
output_dir: /opt/ml/model
|
26 |
+
overwrite_cache: 'True'
|
27 |
+
per_device_train_batch_size: '1'
|
28 |
+
plot_loss: 'True'
|
29 |
+
preprocessing_num_workers: '16'
|
30 |
+
push_to_db: 'True'
|
31 |
+
push_to_hub: 'True'
|
32 |
report_to: wandb
|
33 |
role_tag: from
|
34 |
run_name: a1_math_open2math
|
|
|
36 |
stage: sft
|
37 |
template: qwen25
|
38 |
user_tag: human
|
39 |
+
warmup_ratio: '0.1'
|