rasdani commited on
Commit
d0524b1
·
verified ·
1 Parent(s): e69a51e

Training in progress, step 12, checkpoint

Browse files
checkpoint-12/adapter_config.json CHANGED
@@ -16,24 +16,24 @@
16
  "layers_pattern": null,
17
  "layers_to_transform": null,
18
  "loftq_config": {},
19
- "lora_alpha": 8,
20
  "lora_bias": false,
21
  "lora_dropout": 0.0,
22
  "megatron_config": null,
23
  "megatron_core": "megatron.core",
24
  "modules_to_save": null,
25
  "peft_type": "LORA",
26
- "r": 8,
27
  "rank_pattern": {},
28
  "revision": null,
29
  "target_modules": [
30
- "down_proj",
 
31
  "o_proj",
32
- "v_proj",
33
  "gate_proj",
34
- "q_proj",
35
- "up_proj",
36
- "k_proj"
37
  ],
38
  "task_type": null,
39
  "trainable_token_indices": null,
 
16
  "layers_pattern": null,
17
  "layers_to_transform": null,
18
  "loftq_config": {},
19
+ "lora_alpha": 16,
20
  "lora_bias": false,
21
  "lora_dropout": 0.0,
22
  "megatron_config": null,
23
  "megatron_core": "megatron.core",
24
  "modules_to_save": null,
25
  "peft_type": "LORA",
26
+ "r": 16,
27
  "rank_pattern": {},
28
  "revision": null,
29
  "target_modules": [
30
+ "up_proj",
31
+ "k_proj",
32
  "o_proj",
 
33
  "gate_proj",
34
+ "v_proj",
35
+ "down_proj",
36
+ "q_proj"
37
  ],
38
  "task_type": null,
39
  "trainable_token_indices": null,
checkpoint-12/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:851f4b0fe956eac509642f70fd14815493e76066844d87f487c1d0ec98575524
3
- size 8841928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b49989a6af1fef377ce007ebbb3081e857efdd9ac5ed32f0572b399f54638528
3
+ size 17640808
checkpoint-12/global_step12/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78a75e2ea5a78d36924f5dad07d8c356999b5449bd6dce79c735da1d4208e705
3
- size 52792816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deb87890f0d8546ed6dd84ca0c41ac09657443cf10153ed506ca3b47dd93d8f1
3
+ size 105582064
checkpoint-12/global_step12/zero_pp_rank_0_mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba10126db40fa1da3e0640e6c7a9549056758e44fb6c2aa6c123abc0b8ef91d7
3
  size 332955
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d588e76fb3c7b7eedb51521f9c49f616d9d8ac5c2c9cfc7d8e22297a7c040817
3
  size 332955
checkpoint-12/trainer_state.json CHANGED
@@ -10,13 +10,13 @@
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
13
- "completion_length": 598.6875,
14
  "epoch": 0.0002457304337142155,
15
  "grad_norm": 0.0,
16
  "kl": 0.0,
17
  "learning_rate": 5e-07,
18
  "loss": 0.0,
19
- "num_tokens": 12883.0,
20
  "reward": 0.0,
21
  "reward_std": 0.0,
22
  "rewards/reward_func": 0.0,
@@ -24,13 +24,13 @@
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
- "completion_length": 579.75,
28
  "epoch": 0.000491460867428431,
29
- "grad_norm": 0.057158572334860595,
30
  "kl": 0.0,
31
  "learning_rate": 5e-07,
32
- "loss": 0.0991,
33
- "num_tokens": 23183.0,
34
  "reward": 0.125,
35
  "reward_std": 0.2314550280570984,
36
  "rewards/reward_func": 0.125,
@@ -38,13 +38,13 @@
38
  },
39
  {
40
  "clip_ratio": 0.0,
41
- "completion_length": 712.5625,
42
  "epoch": 0.0007371913011426465,
43
- "grad_norm": 9.369531689325602e-07,
44
- "kl": 0.00038623809814453125,
45
  "learning_rate": 5e-07,
46
  "loss": 0.0,
47
- "num_tokens": 36072.0,
48
  "reward": 0.0,
49
  "reward_std": 0.0,
50
  "rewards/reward_func": 0.0,
@@ -52,69 +52,69 @@
52
  },
53
  {
54
  "clip_ratio": 0.0,
55
- "completion_length": 578.1875,
56
  "epoch": 0.000982921734856862,
57
- "grad_norm": 0.1134692235444953,
58
- "kl": 0.00043392181396484375,
59
  "learning_rate": 5e-07,
60
- "loss": -0.0204,
61
- "num_tokens": 46523.0,
62
- "reward": 0.0625,
63
- "reward_std": 0.1767766922712326,
64
- "rewards/reward_func": 0.0625,
65
  "step": 4
66
  },
67
  {
68
  "clip_ratio": 0.0,
69
- "completion_length": 708.0625,
70
  "epoch": 0.0012286521685710775,
71
- "grad_norm": 0.13726250703666534,
72
- "kl": 0.0003414154052734375,
73
  "learning_rate": 5e-07,
74
- "loss": -0.0446,
75
- "num_tokens": 59748.0,
76
- "reward": 0.125,
77
- "reward_std": 0.2314550280570984,
78
- "rewards/reward_func": 0.125,
79
  "step": 5
80
  },
81
  {
82
  "clip_ratio": 0.0,
83
- "completion_length": 351.9375,
84
  "epoch": 0.001474382602285293,
85
- "grad_norm": 0.13704685404012662,
86
- "kl": 0.00045680999755859375,
87
  "learning_rate": 5e-07,
88
- "loss": -0.0854,
89
- "num_tokens": 66971.0,
90
- "reward": 0.125,
91
- "reward_std": 0.2314550280570984,
92
- "rewards/reward_func": 0.125,
93
  "step": 6
94
  },
95
  {
96
  "clip_ratio": 0.0,
97
- "completion_length": 670.375,
98
  "epoch": 0.0017201130359995086,
99
- "grad_norm": 0.05810748919014082,
100
- "kl": 0.000301361083984375,
101
  "learning_rate": 5e-07,
102
- "loss": 0.0394,
103
- "num_tokens": 80449.0,
104
- "reward": 0.125,
105
- "reward_std": 0.2314550280570984,
106
- "rewards/reward_func": 0.125,
107
  "step": 7
108
  },
109
  {
110
  "clip_ratio": 0.0,
111
- "completion_length": 797.375,
112
  "epoch": 0.001965843469713724,
113
- "grad_norm": 0.09117945345657646,
114
- "kl": 0.0003681182861328125,
115
  "learning_rate": 5e-07,
116
- "loss": 0.0619,
117
- "num_tokens": 95399.0,
118
  "reward": 0.25,
119
  "reward_std": 0.4355512708425522,
120
  "rewards/reward_func": 0.25,
@@ -122,41 +122,41 @@
122
  },
123
  {
124
  "clip_ratio": 0.0,
125
- "completion_length": 455.1875,
126
  "epoch": 0.0022115739034279398,
127
- "grad_norm": 0.059279116418829485,
128
- "kl": 0.0003509521484375,
129
  "learning_rate": 5e-07,
130
- "loss": 0.0553,
131
- "num_tokens": 104498.0,
132
- "reward": 0.125,
133
- "reward_std": 0.2314550280570984,
134
- "rewards/reward_func": 0.125,
135
  "step": 9
136
  },
137
  {
138
  "clip_ratio": 0.0,
139
- "completion_length": 569.25,
140
  "epoch": 0.002457304337142155,
141
- "grad_norm": 0.11258647371214518,
142
- "kl": 0.0002951622009277344,
143
  "learning_rate": 5e-07,
144
- "loss": -0.118,
145
- "num_tokens": 114646.0,
146
- "reward": 0.1875,
147
- "reward_std": 0.2587745785713196,
148
- "rewards/reward_func": 0.1875,
149
  "step": 10
150
  },
151
  {
152
  "clip_ratio": 0.0,
153
- "completion_length": 461.0625,
154
  "epoch": 0.0027030347708563705,
155
- "grad_norm": 1.7798072854936817e-06,
156
- "kl": 0.0003662109375,
157
  "learning_rate": 5e-07,
158
  "loss": 0.0,
159
- "num_tokens": 128663.0,
160
  "reward": 0.0,
161
  "reward_std": 0.0,
162
  "rewards/reward_func": 0.0,
@@ -164,16 +164,16 @@
164
  },
165
  {
166
  "clip_ratio": 0.0,
167
- "completion_length": 378.375,
168
  "epoch": 0.002948765204570586,
169
- "grad_norm": 0.09280689232901017,
170
- "kl": 0.0002918243408203125,
171
  "learning_rate": 5e-07,
172
- "loss": -0.1028,
173
- "num_tokens": 135957.0,
174
- "reward": 0.1875,
175
- "reward_std": 0.2587745785713196,
176
- "rewards/reward_func": 0.1875,
177
  "step": 12
178
  }
179
  ],
 
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
13
+ "completion_length": 576.9375,
14
  "epoch": 0.0002457304337142155,
15
  "grad_norm": 0.0,
16
  "kl": 0.0,
17
  "learning_rate": 5e-07,
18
  "loss": 0.0,
19
+ "num_tokens": 12535.0,
20
  "reward": 0.0,
21
  "reward_std": 0.0,
22
  "rewards/reward_func": 0.0,
 
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
+ "completion_length": 879.0625,
28
  "epoch": 0.000491460867428431,
29
+ "grad_norm": 0.11151622953359844,
30
  "kl": 0.0,
31
  "learning_rate": 5e-07,
32
+ "loss": 0.0776,
33
+ "num_tokens": 27624.0,
34
  "reward": 0.125,
35
  "reward_std": 0.2314550280570984,
36
  "rewards/reward_func": 0.125,
 
38
  },
39
  {
40
  "clip_ratio": 0.0,
41
+ "completion_length": 923.75,
42
  "epoch": 0.0007371913011426465,
43
+ "grad_norm": 1.7342617962764825e-06,
44
+ "kl": 0.0003814697265625,
45
  "learning_rate": 5e-07,
46
  "loss": 0.0,
47
+ "num_tokens": 43892.0,
48
  "reward": 0.0,
49
  "reward_std": 0.0,
50
  "rewards/reward_func": 0.0,
 
52
  },
53
  {
54
  "clip_ratio": 0.0,
55
+ "completion_length": 682.9375,
56
  "epoch": 0.000982921734856862,
57
+ "grad_norm": 0.1605198642986145,
58
+ "kl": 0.00042629241943359375,
59
  "learning_rate": 5e-07,
60
+ "loss": -0.1736,
61
+ "num_tokens": 56019.0,
62
+ "reward": 0.1875,
63
+ "reward_std": 0.408231720328331,
64
+ "rewards/reward_func": 0.1875,
65
  "step": 4
66
  },
67
  {
68
  "clip_ratio": 0.0,
69
+ "completion_length": 729.5,
70
  "epoch": 0.0012286521685710775,
71
+ "grad_norm": 0.15191784640809727,
72
+ "kl": 0.00035190582275390625,
73
  "learning_rate": 5e-07,
74
+ "loss": 0.1212,
75
+ "num_tokens": 69587.0,
76
+ "reward": 0.0625,
77
+ "reward_std": 0.1767766922712326,
78
+ "rewards/reward_func": 0.0625,
79
  "step": 5
80
  },
81
  {
82
  "clip_ratio": 0.0,
83
+ "completion_length": 680.3125,
84
  "epoch": 0.001474382602285293,
85
+ "grad_norm": 0.00010216089929885942,
86
+ "kl": 0.0011739730834960938,
87
  "learning_rate": 5e-07,
88
+ "loss": 0.0,
89
+ "num_tokens": 82064.0,
90
+ "reward": 0.0,
91
+ "reward_std": 0.0,
92
+ "rewards/reward_func": 0.0,
93
  "step": 6
94
  },
95
  {
96
  "clip_ratio": 0.0,
97
+ "completion_length": 723.1875,
98
  "epoch": 0.0017201130359995086,
99
+ "grad_norm": 0.053916157377856225,
100
+ "kl": 0.00029850006103515625,
101
  "learning_rate": 5e-07,
102
+ "loss": 0.0843,
103
+ "num_tokens": 96387.0,
104
+ "reward": 0.0625,
105
+ "reward_std": 0.1767766922712326,
106
+ "rewards/reward_func": 0.0625,
107
  "step": 7
108
  },
109
  {
110
  "clip_ratio": 0.0,
111
+ "completion_length": 646.1875,
112
  "epoch": 0.001965843469713724,
113
+ "grad_norm": 0.16676206881832856,
114
+ "kl": 0.0003566741943359375,
115
  "learning_rate": 5e-07,
116
+ "loss": 0.0941,
117
+ "num_tokens": 108918.0,
118
  "reward": 0.25,
119
  "reward_std": 0.4355512708425522,
120
  "rewards/reward_func": 0.25,
 
122
  },
123
  {
124
  "clip_ratio": 0.0,
125
+ "completion_length": 605.875,
126
  "epoch": 0.0022115739034279398,
127
+ "grad_norm": 1.5988321121918276e-06,
128
+ "kl": 0.0003833770751953125,
129
  "learning_rate": 5e-07,
130
+ "loss": 0.0,
131
+ "num_tokens": 120428.0,
132
+ "reward": 0.0,
133
+ "reward_std": 0.0,
134
+ "rewards/reward_func": 0.0,
135
  "step": 9
136
  },
137
  {
138
  "clip_ratio": 0.0,
139
+ "completion_length": 551.4375,
140
  "epoch": 0.002457304337142155,
141
+ "grad_norm": 0.16315342243657902,
142
+ "kl": 0.00034236907958984375,
143
  "learning_rate": 5e-07,
144
+ "loss": -0.0335,
145
+ "num_tokens": 130291.0,
146
+ "reward": 0.0625,
147
+ "reward_std": 0.1767766922712326,
148
+ "rewards/reward_func": 0.0625,
149
  "step": 10
150
  },
151
  {
152
  "clip_ratio": 0.0,
153
+ "completion_length": 757.3125,
154
  "epoch": 0.0027030347708563705,
155
+ "grad_norm": 1.5633163558139792e-06,
156
+ "kl": 0.00029850006103515625,
157
  "learning_rate": 5e-07,
158
  "loss": 0.0,
159
+ "num_tokens": 149048.0,
160
  "reward": 0.0,
161
  "reward_std": 0.0,
162
  "rewards/reward_func": 0.0,
 
164
  },
165
  {
166
  "clip_ratio": 0.0,
167
+ "completion_length": 766.375,
168
  "epoch": 0.002948765204570586,
169
+ "grad_norm": 0.08173214068073498,
170
+ "kl": 0.00022268295288085938,
171
  "learning_rate": 5e-07,
172
+ "loss": 0.1401,
173
+ "num_tokens": 162550.0,
174
+ "reward": 0.125,
175
+ "reward_std": 0.2314550280570984,
176
+ "rewards/reward_func": 0.125,
177
  "step": 12
178
  }
179
  ],
checkpoint-12/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fff5d866f3f58eded13df4e5d0f17d86b3f86cb9253959e5d718e586d0617838
3
  size 7544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af3974b5d650bad10616ac85d5be43a43be3a632e9e6ca9838e55c33bd2fac4e
3
  size 7544