nRuaif commited on
Commit
614fdf5
·
1 Parent(s): 2ec64ca

Upload folder using huggingface_hub

Browse files
Untitled.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 4,
6
  "id": "127668ee-44f0-4438-9337-e7c4a486aea3",
7
  "metadata": {},
8
  "outputs": [
@@ -16,12 +16,12 @@
16
  {
17
  "data": {
18
  "application/vnd.jupyter.widget-view+json": {
19
- "model_id": "2529d1e8ef8c41e7acffc3b4b361e2d1",
20
  "version_major": 2,
21
  "version_minor": 0
22
  },
23
  "text/plain": [
24
- "Upload 5 LFS files: 0%| | 0/5 [00:00<?, ?it/s]"
25
  ]
26
  },
27
  "metadata": {},
@@ -30,7 +30,7 @@
30
  {
31
  "data": {
32
  "application/vnd.jupyter.widget-view+json": {
33
- "model_id": "829fa95283aa4168978a8e583d10f286",
34
  "version_major": 2,
35
  "version_minor": 0
36
  },
@@ -44,7 +44,7 @@
44
  {
45
  "data": {
46
  "application/vnd.jupyter.widget-view+json": {
47
- "model_id": "3c2f0437b42a4fa997c9427f891609a6",
48
  "version_major": 2,
49
  "version_minor": 0
50
  },
@@ -58,12 +58,12 @@
58
  {
59
  "data": {
60
  "application/vnd.jupyter.widget-view+json": {
61
- "model_id": "4ecfcef382ee44a1acc18a868e3dbef3",
62
  "version_major": 2,
63
  "version_minor": 0
64
  },
65
  "text/plain": [
66
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
67
  ]
68
  },
69
  "metadata": {},
@@ -72,12 +72,12 @@
72
  {
73
  "data": {
74
  "application/vnd.jupyter.widget-view+json": {
75
- "model_id": "818bf55fe9124089b9378164fcb70319",
76
  "version_major": 2,
77
  "version_minor": 0
78
  },
79
  "text/plain": [
80
- "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
81
  ]
82
  },
83
  "metadata": {},
@@ -86,7 +86,7 @@
86
  {
87
  "data": {
88
  "application/vnd.jupyter.widget-view+json": {
89
- "model_id": "a067333d049c449b9b1a31a7eba92b40",
90
  "version_major": 2,
91
  "version_minor": 0
92
  },
@@ -103,7 +103,7 @@
103
  "'https://huggingface.co/nRuaif/Blind-test01/tree/main/'"
104
  ]
105
  },
106
- "execution_count": 4,
107
  "metadata": {},
108
  "output_type": "execute_result"
109
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 5,
6
  "id": "127668ee-44f0-4438-9337-e7c4a486aea3",
7
  "metadata": {},
8
  "outputs": [
 
16
  {
17
  "data": {
18
  "application/vnd.jupyter.widget-view+json": {
19
+ "model_id": "a5456eb0827a47208f743d419ca6357e",
20
  "version_major": 2,
21
  "version_minor": 0
22
  },
23
  "text/plain": [
24
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
25
  ]
26
  },
27
  "metadata": {},
 
30
  {
31
  "data": {
32
  "application/vnd.jupyter.widget-view+json": {
33
+ "model_id": "0512e81c2a754626b56f6f22dbd177b1",
34
  "version_major": 2,
35
  "version_minor": 0
36
  },
 
44
  {
45
  "data": {
46
  "application/vnd.jupyter.widget-view+json": {
47
+ "model_id": "7a8e119cbaf6458e98e80c346c708f84",
48
  "version_major": 2,
49
  "version_minor": 0
50
  },
 
58
  {
59
  "data": {
60
  "application/vnd.jupyter.widget-view+json": {
61
+ "model_id": "3d700a1d569b449983ea1ddb3ae1cc04",
62
  "version_major": 2,
63
  "version_minor": 0
64
  },
65
  "text/plain": [
66
+ "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
67
  ]
68
  },
69
  "metadata": {},
 
72
  {
73
  "data": {
74
  "application/vnd.jupyter.widget-view+json": {
75
+ "model_id": "130538451397407c9cab9699b33c5a62",
76
  "version_major": 2,
77
  "version_minor": 0
78
  },
79
  "text/plain": [
80
+ "Upload 5 LFS files: 0%| | 0/5 [00:00<?, ?it/s]"
81
  ]
82
  },
83
  "metadata": {},
 
86
  {
87
  "data": {
88
  "application/vnd.jupyter.widget-view+json": {
89
+ "model_id": "732072b5d2c443a6a7d306ed25621586",
90
  "version_major": 2,
91
  "version_minor": 0
92
  },
 
103
  "'https://huggingface.co/nRuaif/Blind-test01/tree/main/'"
104
  ]
105
  },
106
+ "execution_count": 5,
107
  "metadata": {},
108
  "output_type": "execute_result"
109
  }
checkpoint-100/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.6.0.dev0
checkpoint-100/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": null,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj",
19
+ "up_proj",
20
+ "gate_proj",
21
+ "o_proj",
22
+ "k_proj",
23
+ "down_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
checkpoint-100/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ca1158380ea99796227216d7452a932238214cf50ef44cd31b5a292d9984063
3
+ size 62788109
checkpoint-100/adapter_model/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.6.0.dev0
checkpoint-100/adapter_model/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": null,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj",
19
+ "up_proj",
20
+ "gate_proj",
21
+ "o_proj",
22
+ "k_proj",
23
+ "down_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
checkpoint-100/adapter_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ca1158380ea99796227216d7452a932238214cf50ef44cd31b5a292d9984063
3
+ size 62788109
checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b4e74a79fd8d951361913a9772c17120c9a8293dee4b77a54fde377d3e5133c
3
+ size 250681597
checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d06187c9399b35d9c4214283dc0b9fc38078ee2466c57de37fea6b9192e43a
3
+ size 14575
checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d041a7e96c17819841a9a0c03258454e444965f215930b8240eeecb6ee97e88
3
+ size 627
checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,699 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.01399302482605,
3
+ "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-100",
4
+ "epoch": 2.5848142164781907,
5
+ "eval_steps": 10,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 1e-05,
14
+ "loss": 1.5707,
15
+ "step": 1
16
+ },
17
+ {
18
+ "epoch": 0.05,
19
+ "learning_rate": 2e-05,
20
+ "loss": 1.5621,
21
+ "step": 2
22
+ },
23
+ {
24
+ "epoch": 0.08,
25
+ "learning_rate": 3e-05,
26
+ "loss": 1.4812,
27
+ "step": 3
28
+ },
29
+ {
30
+ "epoch": 0.1,
31
+ "learning_rate": 4e-05,
32
+ "loss": 1.5197,
33
+ "step": 4
34
+ },
35
+ {
36
+ "epoch": 0.13,
37
+ "learning_rate": 5e-05,
38
+ "loss": 1.5567,
39
+ "step": 5
40
+ },
41
+ {
42
+ "epoch": 0.16,
43
+ "learning_rate": 5e-05,
44
+ "loss": 1.4645,
45
+ "step": 6
46
+ },
47
+ {
48
+ "epoch": 0.18,
49
+ "learning_rate": 5e-05,
50
+ "loss": 1.6122,
51
+ "step": 7
52
+ },
53
+ {
54
+ "epoch": 0.21,
55
+ "learning_rate": 5e-05,
56
+ "loss": 1.5596,
57
+ "step": 8
58
+ },
59
+ {
60
+ "epoch": 0.23,
61
+ "learning_rate": 5e-05,
62
+ "loss": 1.5608,
63
+ "step": 9
64
+ },
65
+ {
66
+ "epoch": 0.26,
67
+ "learning_rate": 5e-05,
68
+ "loss": 1.5456,
69
+ "step": 10
70
+ },
71
+ {
72
+ "epoch": 0.26,
73
+ "eval_loss": 2.098437547683716,
74
+ "eval_runtime": 119.6161,
75
+ "eval_samples_per_second": 1.555,
76
+ "eval_steps_per_second": 0.777,
77
+ "step": 10
78
+ },
79
+ {
80
+ "epoch": 0.28,
81
+ "learning_rate": 5e-05,
82
+ "loss": 1.5645,
83
+ "step": 11
84
+ },
85
+ {
86
+ "epoch": 0.31,
87
+ "learning_rate": 5e-05,
88
+ "loss": 1.538,
89
+ "step": 12
90
+ },
91
+ {
92
+ "epoch": 0.34,
93
+ "learning_rate": 5e-05,
94
+ "loss": 1.6388,
95
+ "step": 13
96
+ },
97
+ {
98
+ "epoch": 0.36,
99
+ "learning_rate": 5e-05,
100
+ "loss": 1.4943,
101
+ "step": 14
102
+ },
103
+ {
104
+ "epoch": 0.39,
105
+ "learning_rate": 5e-05,
106
+ "loss": 1.5469,
107
+ "step": 15
108
+ },
109
+ {
110
+ "epoch": 0.41,
111
+ "learning_rate": 5e-05,
112
+ "loss": 1.6149,
113
+ "step": 16
114
+ },
115
+ {
116
+ "epoch": 0.44,
117
+ "learning_rate": 5e-05,
118
+ "loss": 1.5345,
119
+ "step": 17
120
+ },
121
+ {
122
+ "epoch": 0.47,
123
+ "learning_rate": 5e-05,
124
+ "loss": 1.4903,
125
+ "step": 18
126
+ },
127
+ {
128
+ "epoch": 0.49,
129
+ "learning_rate": 5e-05,
130
+ "loss": 1.5499,
131
+ "step": 19
132
+ },
133
+ {
134
+ "epoch": 0.52,
135
+ "learning_rate": 5e-05,
136
+ "loss": 1.5934,
137
+ "step": 20
138
+ },
139
+ {
140
+ "epoch": 0.52,
141
+ "eval_loss": 2.066981077194214,
142
+ "eval_runtime": 119.5781,
143
+ "eval_samples_per_second": 1.555,
144
+ "eval_steps_per_second": 0.778,
145
+ "step": 20
146
+ },
147
+ {
148
+ "epoch": 0.54,
149
+ "learning_rate": 5e-05,
150
+ "loss": 1.4554,
151
+ "step": 21
152
+ },
153
+ {
154
+ "epoch": 0.57,
155
+ "learning_rate": 5e-05,
156
+ "loss": 1.5512,
157
+ "step": 22
158
+ },
159
+ {
160
+ "epoch": 0.59,
161
+ "learning_rate": 5e-05,
162
+ "loss": 1.4636,
163
+ "step": 23
164
+ },
165
+ {
166
+ "epoch": 0.62,
167
+ "learning_rate": 5e-05,
168
+ "loss": 1.5398,
169
+ "step": 24
170
+ },
171
+ {
172
+ "epoch": 0.65,
173
+ "learning_rate": 5e-05,
174
+ "loss": 1.5623,
175
+ "step": 25
176
+ },
177
+ {
178
+ "epoch": 0.67,
179
+ "learning_rate": 5e-05,
180
+ "loss": 1.4658,
181
+ "step": 26
182
+ },
183
+ {
184
+ "epoch": 0.7,
185
+ "learning_rate": 5e-05,
186
+ "loss": 1.4723,
187
+ "step": 27
188
+ },
189
+ {
190
+ "epoch": 0.72,
191
+ "learning_rate": 5e-05,
192
+ "loss": 1.432,
193
+ "step": 28
194
+ },
195
+ {
196
+ "epoch": 0.75,
197
+ "learning_rate": 5e-05,
198
+ "loss": 1.4814,
199
+ "step": 29
200
+ },
201
+ {
202
+ "epoch": 0.78,
203
+ "learning_rate": 5e-05,
204
+ "loss": 1.4924,
205
+ "step": 30
206
+ },
207
+ {
208
+ "epoch": 0.78,
209
+ "eval_loss": 2.046339750289917,
210
+ "eval_runtime": 119.5771,
211
+ "eval_samples_per_second": 1.555,
212
+ "eval_steps_per_second": 0.778,
213
+ "step": 30
214
+ },
215
+ {
216
+ "epoch": 0.8,
217
+ "learning_rate": 5e-05,
218
+ "loss": 1.5809,
219
+ "step": 31
220
+ },
221
+ {
222
+ "epoch": 0.83,
223
+ "learning_rate": 5e-05,
224
+ "loss": 1.4803,
225
+ "step": 32
226
+ },
227
+ {
228
+ "epoch": 0.85,
229
+ "learning_rate": 5e-05,
230
+ "loss": 1.4878,
231
+ "step": 33
232
+ },
233
+ {
234
+ "epoch": 0.88,
235
+ "learning_rate": 5e-05,
236
+ "loss": 1.3871,
237
+ "step": 34
238
+ },
239
+ {
240
+ "epoch": 0.9,
241
+ "learning_rate": 5e-05,
242
+ "loss": 1.5151,
243
+ "step": 35
244
+ },
245
+ {
246
+ "epoch": 0.93,
247
+ "learning_rate": 5e-05,
248
+ "loss": 1.4212,
249
+ "step": 36
250
+ },
251
+ {
252
+ "epoch": 0.96,
253
+ "learning_rate": 5e-05,
254
+ "loss": 1.6284,
255
+ "step": 37
256
+ },
257
+ {
258
+ "epoch": 0.98,
259
+ "learning_rate": 5e-05,
260
+ "loss": 1.5002,
261
+ "step": 38
262
+ },
263
+ {
264
+ "epoch": 1.01,
265
+ "learning_rate": 5e-05,
266
+ "loss": 1.4452,
267
+ "step": 39
268
+ },
269
+ {
270
+ "epoch": 1.03,
271
+ "learning_rate": 5e-05,
272
+ "loss": 1.4399,
273
+ "step": 40
274
+ },
275
+ {
276
+ "epoch": 1.03,
277
+ "eval_loss": 2.0354697704315186,
278
+ "eval_runtime": 119.5875,
279
+ "eval_samples_per_second": 1.555,
280
+ "eval_steps_per_second": 0.778,
281
+ "step": 40
282
+ },
283
+ {
284
+ "epoch": 1.06,
285
+ "learning_rate": 5e-05,
286
+ "loss": 1.465,
287
+ "step": 41
288
+ },
289
+ {
290
+ "epoch": 1.09,
291
+ "learning_rate": 5e-05,
292
+ "loss": 1.4199,
293
+ "step": 42
294
+ },
295
+ {
296
+ "epoch": 1.11,
297
+ "learning_rate": 5e-05,
298
+ "loss": 1.5403,
299
+ "step": 43
300
+ },
301
+ {
302
+ "epoch": 1.14,
303
+ "learning_rate": 5e-05,
304
+ "loss": 1.4499,
305
+ "step": 44
306
+ },
307
+ {
308
+ "epoch": 1.16,
309
+ "learning_rate": 5e-05,
310
+ "loss": 1.5751,
311
+ "step": 45
312
+ },
313
+ {
314
+ "epoch": 1.19,
315
+ "learning_rate": 5e-05,
316
+ "loss": 1.4809,
317
+ "step": 46
318
+ },
319
+ {
320
+ "epoch": 1.21,
321
+ "learning_rate": 5e-05,
322
+ "loss": 1.5022,
323
+ "step": 47
324
+ },
325
+ {
326
+ "epoch": 1.24,
327
+ "learning_rate": 5e-05,
328
+ "loss": 1.4663,
329
+ "step": 48
330
+ },
331
+ {
332
+ "epoch": 1.27,
333
+ "learning_rate": 5e-05,
334
+ "loss": 1.4435,
335
+ "step": 49
336
+ },
337
+ {
338
+ "epoch": 1.29,
339
+ "learning_rate": 5e-05,
340
+ "loss": 1.4246,
341
+ "step": 50
342
+ },
343
+ {
344
+ "epoch": 1.29,
345
+ "eval_loss": 2.0276732444763184,
346
+ "eval_runtime": 119.5811,
347
+ "eval_samples_per_second": 1.555,
348
+ "eval_steps_per_second": 0.778,
349
+ "step": 50
350
+ },
351
+ {
352
+ "epoch": 1.32,
353
+ "learning_rate": 5e-05,
354
+ "loss": 1.4877,
355
+ "step": 51
356
+ },
357
+ {
358
+ "epoch": 1.34,
359
+ "learning_rate": 5e-05,
360
+ "loss": 1.4066,
361
+ "step": 52
362
+ },
363
+ {
364
+ "epoch": 1.37,
365
+ "learning_rate": 5e-05,
366
+ "loss": 1.3559,
367
+ "step": 53
368
+ },
369
+ {
370
+ "epoch": 1.4,
371
+ "learning_rate": 5e-05,
372
+ "loss": 1.5591,
373
+ "step": 54
374
+ },
375
+ {
376
+ "epoch": 1.42,
377
+ "learning_rate": 5e-05,
378
+ "loss": 1.4942,
379
+ "step": 55
380
+ },
381
+ {
382
+ "epoch": 1.45,
383
+ "learning_rate": 5e-05,
384
+ "loss": 1.4685,
385
+ "step": 56
386
+ },
387
+ {
388
+ "epoch": 1.47,
389
+ "learning_rate": 5e-05,
390
+ "loss": 1.4165,
391
+ "step": 57
392
+ },
393
+ {
394
+ "epoch": 1.5,
395
+ "learning_rate": 5e-05,
396
+ "loss": 1.3995,
397
+ "step": 58
398
+ },
399
+ {
400
+ "epoch": 1.53,
401
+ "learning_rate": 5e-05,
402
+ "loss": 1.3931,
403
+ "step": 59
404
+ },
405
+ {
406
+ "epoch": 1.55,
407
+ "learning_rate": 5e-05,
408
+ "loss": 1.4234,
409
+ "step": 60
410
+ },
411
+ {
412
+ "epoch": 1.55,
413
+ "eval_loss": 2.0228564739227295,
414
+ "eval_runtime": 119.5715,
415
+ "eval_samples_per_second": 1.556,
416
+ "eval_steps_per_second": 0.778,
417
+ "step": 60
418
+ },
419
+ {
420
+ "epoch": 1.58,
421
+ "learning_rate": 5e-05,
422
+ "loss": 1.4732,
423
+ "step": 61
424
+ },
425
+ {
426
+ "epoch": 1.6,
427
+ "learning_rate": 5e-05,
428
+ "loss": 1.4349,
429
+ "step": 62
430
+ },
431
+ {
432
+ "epoch": 1.63,
433
+ "learning_rate": 5e-05,
434
+ "loss": 1.4548,
435
+ "step": 63
436
+ },
437
+ {
438
+ "epoch": 1.65,
439
+ "learning_rate": 5e-05,
440
+ "loss": 1.48,
441
+ "step": 64
442
+ },
443
+ {
444
+ "epoch": 1.68,
445
+ "learning_rate": 5e-05,
446
+ "loss": 1.3789,
447
+ "step": 65
448
+ },
449
+ {
450
+ "epoch": 1.71,
451
+ "learning_rate": 5e-05,
452
+ "loss": 1.3915,
453
+ "step": 66
454
+ },
455
+ {
456
+ "epoch": 1.73,
457
+ "learning_rate": 5e-05,
458
+ "loss": 1.3789,
459
+ "step": 67
460
+ },
461
+ {
462
+ "epoch": 1.76,
463
+ "learning_rate": 5e-05,
464
+ "loss": 1.5206,
465
+ "step": 68
466
+ },
467
+ {
468
+ "epoch": 1.78,
469
+ "learning_rate": 5e-05,
470
+ "loss": 1.4851,
471
+ "step": 69
472
+ },
473
+ {
474
+ "epoch": 1.81,
475
+ "learning_rate": 5e-05,
476
+ "loss": 1.5251,
477
+ "step": 70
478
+ },
479
+ {
480
+ "epoch": 1.81,
481
+ "eval_loss": 2.0199856758117676,
482
+ "eval_runtime": 119.5994,
483
+ "eval_samples_per_second": 1.555,
484
+ "eval_steps_per_second": 0.778,
485
+ "step": 70
486
+ },
487
+ {
488
+ "epoch": 1.84,
489
+ "learning_rate": 5e-05,
490
+ "loss": 1.4152,
491
+ "step": 71
492
+ },
493
+ {
494
+ "epoch": 1.86,
495
+ "learning_rate": 5e-05,
496
+ "loss": 1.4262,
497
+ "step": 72
498
+ },
499
+ {
500
+ "epoch": 1.89,
501
+ "learning_rate": 5e-05,
502
+ "loss": 1.5563,
503
+ "step": 73
504
+ },
505
+ {
506
+ "epoch": 1.91,
507
+ "learning_rate": 5e-05,
508
+ "loss": 1.42,
509
+ "step": 74
510
+ },
511
+ {
512
+ "epoch": 1.94,
513
+ "learning_rate": 5e-05,
514
+ "loss": 1.4407,
515
+ "step": 75
516
+ },
517
+ {
518
+ "epoch": 1.96,
519
+ "learning_rate": 5e-05,
520
+ "loss": 1.3953,
521
+ "step": 76
522
+ },
523
+ {
524
+ "epoch": 1.99,
525
+ "learning_rate": 5e-05,
526
+ "loss": 1.4186,
527
+ "step": 77
528
+ },
529
+ {
530
+ "epoch": 2.02,
531
+ "learning_rate": 5e-05,
532
+ "loss": 1.3863,
533
+ "step": 78
534
+ },
535
+ {
536
+ "epoch": 2.04,
537
+ "learning_rate": 5e-05,
538
+ "loss": 1.3907,
539
+ "step": 79
540
+ },
541
+ {
542
+ "epoch": 2.07,
543
+ "learning_rate": 5e-05,
544
+ "loss": 1.5147,
545
+ "step": 80
546
+ },
547
+ {
548
+ "epoch": 2.07,
549
+ "eval_loss": 2.0174262523651123,
550
+ "eval_runtime": 119.6245,
551
+ "eval_samples_per_second": 1.555,
552
+ "eval_steps_per_second": 0.777,
553
+ "step": 80
554
+ },
555
+ {
556
+ "epoch": 2.09,
557
+ "learning_rate": 5e-05,
558
+ "loss": 1.3683,
559
+ "step": 81
560
+ },
561
+ {
562
+ "epoch": 2.12,
563
+ "learning_rate": 5e-05,
564
+ "loss": 1.4289,
565
+ "step": 82
566
+ },
567
+ {
568
+ "epoch": 2.15,
569
+ "learning_rate": 5e-05,
570
+ "loss": 1.4033,
571
+ "step": 83
572
+ },
573
+ {
574
+ "epoch": 2.17,
575
+ "learning_rate": 5e-05,
576
+ "loss": 1.384,
577
+ "step": 84
578
+ },
579
+ {
580
+ "epoch": 2.2,
581
+ "learning_rate": 5e-05,
582
+ "loss": 1.3379,
583
+ "step": 85
584
+ },
585
+ {
586
+ "epoch": 2.22,
587
+ "learning_rate": 5e-05,
588
+ "loss": 1.3916,
589
+ "step": 86
590
+ },
591
+ {
592
+ "epoch": 2.25,
593
+ "learning_rate": 5e-05,
594
+ "loss": 1.5267,
595
+ "step": 87
596
+ },
597
+ {
598
+ "epoch": 2.27,
599
+ "learning_rate": 5e-05,
600
+ "loss": 1.4465,
601
+ "step": 88
602
+ },
603
+ {
604
+ "epoch": 2.3,
605
+ "learning_rate": 5e-05,
606
+ "loss": 1.4349,
607
+ "step": 89
608
+ },
609
+ {
610
+ "epoch": 2.33,
611
+ "learning_rate": 5e-05,
612
+ "loss": 1.4927,
613
+ "step": 90
614
+ },
615
+ {
616
+ "epoch": 2.33,
617
+ "eval_loss": 2.0155017375946045,
618
+ "eval_runtime": 119.6021,
619
+ "eval_samples_per_second": 1.555,
620
+ "eval_steps_per_second": 0.778,
621
+ "step": 90
622
+ },
623
+ {
624
+ "epoch": 2.35,
625
+ "learning_rate": 5e-05,
626
+ "loss": 1.3299,
627
+ "step": 91
628
+ },
629
+ {
630
+ "epoch": 2.38,
631
+ "learning_rate": 5e-05,
632
+ "loss": 1.5347,
633
+ "step": 92
634
+ },
635
+ {
636
+ "epoch": 2.4,
637
+ "learning_rate": 5e-05,
638
+ "loss": 1.4727,
639
+ "step": 93
640
+ },
641
+ {
642
+ "epoch": 2.43,
643
+ "learning_rate": 5e-05,
644
+ "loss": 1.4209,
645
+ "step": 94
646
+ },
647
+ {
648
+ "epoch": 2.46,
649
+ "learning_rate": 5e-05,
650
+ "loss": 1.42,
651
+ "step": 95
652
+ },
653
+ {
654
+ "epoch": 2.48,
655
+ "learning_rate": 5e-05,
656
+ "loss": 1.3787,
657
+ "step": 96
658
+ },
659
+ {
660
+ "epoch": 2.51,
661
+ "learning_rate": 5e-05,
662
+ "loss": 1.4292,
663
+ "step": 97
664
+ },
665
+ {
666
+ "epoch": 2.53,
667
+ "learning_rate": 5e-05,
668
+ "loss": 1.4144,
669
+ "step": 98
670
+ },
671
+ {
672
+ "epoch": 2.56,
673
+ "learning_rate": 5e-05,
674
+ "loss": 1.3595,
675
+ "step": 99
676
+ },
677
+ {
678
+ "epoch": 2.58,
679
+ "learning_rate": 5e-05,
680
+ "loss": 1.3445,
681
+ "step": 100
682
+ },
683
+ {
684
+ "epoch": 2.58,
685
+ "eval_loss": 2.01399302482605,
686
+ "eval_runtime": 119.5983,
687
+ "eval_samples_per_second": 1.555,
688
+ "eval_steps_per_second": 0.778,
689
+ "step": 100
690
+ }
691
+ ],
692
+ "logging_steps": 1,
693
+ "max_steps": 114,
694
+ "num_train_epochs": 3,
695
+ "save_steps": 10,
696
+ "total_flos": 4.5795234960900096e+17,
697
+ "trial_name": null,
698
+ "trial_params": null
699
+ }
checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
3
+ size 4219