Nadav commited on
Commit
df68b64
·
1 Parent(s): bfe5855

Training in progress, step 5000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07f4c191169bdefbf903ed26706aa4d7421568132260ed3c336a7bb1e9b346b7
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4d1ae83eac6be8f8c2b954c370c8aeadc7267bda680236a7a24a7649af5362c
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ec70d2781e97f4d329ba4c23b171ff2d9e095d57e1d6f397b53ab37773b559f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0baba8b4569c45a203ea379a2bd3967765a89b7b77984726e0ce3ce542327d3
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89bfe126fed4c4ad4dd5e48270f02e29322c35de34e86277f87fea6ea6285c49
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b67640377e5e9bee3b80f7da077f845affe0a102a4d4e09d30cfa4a924a22956
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dba64a6270ae61fb92642c233ac6acc0599e93af9e8dc843c1040d701d07852c
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a01f1f0436989f1373566c8148c19011dd0ac4c035b4f3ba4538506b2eac4732
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f84d441103b80fa19700bab6a191373aa187c4bff502da3579fb8fa36762b6ca
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,57 +1,116 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.00810142990237777,
5
- "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.0,
12
- "eval_runtime": 4.2859,
13
- "eval_samples_per_second": 11.666,
14
- "eval_steps_per_second": 0.467,
15
- "step": 100
16
  },
17
  {
18
- "epoch": 0.0,
19
- "eval_runtime": 6.7254,
20
- "eval_samples_per_second": 7.435,
21
- "eval_steps_per_second": 0.297,
22
- "step": 200
23
  },
24
  {
25
- "epoch": 0.0,
26
- "eval_runtime": 4.4562,
27
- "eval_samples_per_second": 11.22,
28
- "eval_steps_per_second": 0.449,
29
- "step": 300
 
30
  },
31
  {
32
- "epoch": 0.01,
33
- "eval_runtime": 4.3116,
34
- "eval_samples_per_second": 11.597,
35
- "eval_steps_per_second": 0.464,
36
- "step": 400
37
  },
38
  {
39
- "epoch": 0.01,
40
- "learning_rate": 1e-05,
41
- "loss": 0.535,
42
- "step": 500
43
  },
44
  {
45
- "epoch": 0.01,
46
- "eval_runtime": 4.3937,
47
- "eval_samples_per_second": 11.38,
48
- "eval_steps_per_second": 0.455,
49
- "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "max_steps": 1000000,
53
- "num_train_epochs": 17,
54
- "total_flos": 4.38126636957696e+18,
55
  "trial_name": null,
56
  "trial_params": null
57
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4253147329023477,
5
+ "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.04,
12
+ "learning_rate": 9.999999999999999e-06,
13
+ "loss": 0.5192,
14
+ "step": 500
 
15
  },
16
  {
17
+ "epoch": 0.09,
18
+ "learning_rate": 9.999999999999999e-06,
19
+ "loss": 0.4978,
20
+ "step": 1000
 
21
  },
22
  {
23
+ "epoch": 0.09,
24
+ "eval_loss": 0.4652232229709625,
25
+ "eval_runtime": 21.3928,
26
+ "eval_samples_per_second": 23.372,
27
+ "eval_steps_per_second": 0.748,
28
+ "step": 1000
29
  },
30
  {
31
+ "epoch": 0.13,
32
+ "learning_rate": 9.999999999999999e-06,
33
+ "loss": 0.4877,
34
+ "step": 1500
 
35
  },
36
  {
37
+ "epoch": 0.17,
38
+ "learning_rate": 9.999999999999999e-06,
39
+ "loss": 0.4811,
40
+ "step": 2000
41
  },
42
  {
43
+ "epoch": 0.17,
44
+ "eval_loss": 0.4523410201072693,
45
+ "eval_runtime": 15.3182,
46
+ "eval_samples_per_second": 32.641,
47
+ "eval_steps_per_second": 1.045,
48
+ "step": 2000
49
+ },
50
+ {
51
+ "epoch": 0.21,
52
+ "learning_rate": 9.999999999999999e-06,
53
+ "loss": 0.4747,
54
+ "step": 2500
55
+ },
56
+ {
57
+ "epoch": 0.26,
58
+ "learning_rate": 9.999999999999999e-06,
59
+ "loss": 0.4714,
60
+ "step": 3000
61
+ },
62
+ {
63
+ "epoch": 0.26,
64
+ "eval_loss": 0.44367074966430664,
65
+ "eval_runtime": 16.026,
66
+ "eval_samples_per_second": 31.199,
67
+ "eval_steps_per_second": 0.998,
68
+ "step": 3000
69
+ },
70
+ {
71
+ "epoch": 0.3,
72
+ "learning_rate": 9.999999999999999e-06,
73
+ "loss": 0.4671,
74
+ "step": 3500
75
+ },
76
+ {
77
+ "epoch": 0.34,
78
+ "learning_rate": 9.999999999999999e-06,
79
+ "loss": 0.4648,
80
+ "step": 4000
81
+ },
82
+ {
83
+ "epoch": 0.34,
84
+ "eval_loss": 0.4375583827495575,
85
+ "eval_runtime": 16.9713,
86
+ "eval_samples_per_second": 29.461,
87
+ "eval_steps_per_second": 0.943,
88
+ "step": 4000
89
+ },
90
+ {
91
+ "epoch": 0.38,
92
+ "learning_rate": 9.999999999999999e-06,
93
+ "loss": 0.4628,
94
+ "step": 4500
95
+ },
96
+ {
97
+ "epoch": 0.43,
98
+ "learning_rate": 9.999999999999999e-06,
99
+ "loss": 0.4611,
100
+ "step": 5000
101
+ },
102
+ {
103
+ "epoch": 0.43,
104
+ "eval_loss": 0.4329264163970947,
105
+ "eval_runtime": 20.3173,
106
+ "eval_samples_per_second": 24.61,
107
+ "eval_steps_per_second": 0.788,
108
+ "step": 5000
109
  }
110
  ],
111
  "max_steps": 1000000,
112
+ "num_train_epochs": 86,
113
+ "total_flos": 2.300164844027904e+20,
114
  "trial_name": null,
115
  "trial_params": null
116
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6270bc289a88f0f959604cd35e12f62592ad82a5a0194af541981581f153ae0
3
  size 5551
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06d7793bf7f922dc898c4ecfc820797566f809efb7c64280000f142e6ced9387
3
  size 5551
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ec70d2781e97f4d329ba4c23b171ff2d9e095d57e1d6f397b53ab37773b559f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0baba8b4569c45a203ea379a2bd3967765a89b7b77984726e0ce3ce542327d3
3
  size 449471589
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6270bc289a88f0f959604cd35e12f62592ad82a5a0194af541981581f153ae0
3
  size 5551
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06d7793bf7f922dc898c4ecfc820797566f809efb7c64280000f142e6ced9387
3
  size 5551