sedrickkeh commited on
Commit
a203155
·
verified ·
1 Parent(s): 5c5f62b

Training in progress, epoch 0

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6027e4c3ec9912f3d864b5465d03990d7d7a6988d567865bb1fa2f4b7ace0481
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44d21829e555f333697eda1de893ff826a6b716549b982f487052a9b17ab0e0d
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:241e7669b6a1f8d94ce77e5a1cadebb996fb761978f116a9ea67b7483d153a9d
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a94b64b6d06b5adf117c17fe6af07844006a6106db718b77eb6fd149e4e2716f
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a033e21446cde70d8fccd86b106953d787a459d2b8aed05da9f6bfceebbaddf
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4ff9c2db8c4f50f5a519be1f8512cb0f4781b72c048d9525a947d36a65c0002
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d66780024130591baf4b2232ec8bba90dc3b0eee35cb6fc7e85df8b17c0a45ce
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee150fa11b028ac46c6696fd113b6bed1b5961f5529916d102f7e4c56dd6e64a
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -1,93 +1,93 @@
1
- {"current_steps": 1, "total_steps": 276, "loss": 0.8567, "lr": 8e-05, "epoch": 0.010825439783491205, "percentage": 0.36, "elapsed_time": "0:02:31", "remaining_time": "11:33:00"}
2
- {"current_steps": 2, "total_steps": 276, "loss": 2.1698, "lr": 8e-05, "epoch": 0.02165087956698241, "percentage": 0.72, "elapsed_time": "0:04:08", "remaining_time": "9:27:18"}
3
- {"current_steps": 3, "total_steps": 276, "loss": 1.5305, "lr": 8e-05, "epoch": 0.03247631935047361, "percentage": 1.09, "elapsed_time": "0:05:45", "remaining_time": "8:44:26"}
4
- {"current_steps": 4, "total_steps": 276, "loss": 1.7845, "lr": 8e-05, "epoch": 0.04330175913396482, "percentage": 1.45, "elapsed_time": "0:07:22", "remaining_time": "8:21:55"}
5
- {"current_steps": 5, "total_steps": 276, "loss": 1.566, "lr": 8e-05, "epoch": 0.05412719891745602, "percentage": 1.81, "elapsed_time": "0:09:00", "remaining_time": "8:08:00"}
6
- {"current_steps": 6, "total_steps": 276, "loss": 1.246, "lr": 8e-05, "epoch": 0.06495263870094722, "percentage": 2.17, "elapsed_time": "0:10:37", "remaining_time": "7:58:24"}
7
- {"current_steps": 7, "total_steps": 276, "loss": 1.1493, "lr": 8e-05, "epoch": 0.07577807848443843, "percentage": 2.54, "elapsed_time": "0:12:15", "remaining_time": "7:51:14"}
8
- {"current_steps": 8, "total_steps": 276, "loss": 0.9818, "lr": 8e-05, "epoch": 0.08660351826792964, "percentage": 2.9, "elapsed_time": "0:13:52", "remaining_time": "7:45:02"}
9
- {"current_steps": 9, "total_steps": 276, "loss": 0.912, "lr": 8e-05, "epoch": 0.09742895805142084, "percentage": 3.26, "elapsed_time": "0:15:30", "remaining_time": "7:39:55"}
10
- {"current_steps": 10, "total_steps": 276, "loss": 0.8394, "lr": 8e-05, "epoch": 0.10825439783491204, "percentage": 3.62, "elapsed_time": "0:17:07", "remaining_time": "7:35:22"}
11
- {"current_steps": 11, "total_steps": 276, "loss": 0.8007, "lr": 8e-05, "epoch": 0.11907983761840325, "percentage": 3.99, "elapsed_time": "0:18:44", "remaining_time": "7:31:31"}
12
- {"current_steps": 12, "total_steps": 276, "loss": 0.7525, "lr": 8e-05, "epoch": 0.12990527740189445, "percentage": 4.35, "elapsed_time": "0:20:21", "remaining_time": "7:27:56"}
13
- {"current_steps": 13, "total_steps": 276, "loss": 0.7229, "lr": 8e-05, "epoch": 0.14073071718538566, "percentage": 4.71, "elapsed_time": "0:21:59", "remaining_time": "7:24:45"}
14
- {"current_steps": 14, "total_steps": 276, "loss": 0.734, "lr": 8e-05, "epoch": 0.15155615696887687, "percentage": 5.07, "elapsed_time": "0:23:36", "remaining_time": "7:21:46"}
15
- {"current_steps": 15, "total_steps": 276, "loss": 0.6876, "lr": 8e-05, "epoch": 0.16238159675236807, "percentage": 5.43, "elapsed_time": "0:25:13", "remaining_time": "7:18:57"}
16
- {"current_steps": 16, "total_steps": 276, "loss": 0.6733, "lr": 8e-05, "epoch": 0.17320703653585928, "percentage": 5.8, "elapsed_time": "0:26:50", "remaining_time": "7:16:18"}
17
- {"current_steps": 17, "total_steps": 276, "loss": 0.6572, "lr": 8e-05, "epoch": 0.18403247631935046, "percentage": 6.16, "elapsed_time": "0:28:28", "remaining_time": "7:13:46"}
18
- {"current_steps": 18, "total_steps": 276, "loss": 0.6307, "lr": 8e-05, "epoch": 0.19485791610284167, "percentage": 6.52, "elapsed_time": "0:30:06", "remaining_time": "7:11:26"}
19
- {"current_steps": 19, "total_steps": 276, "loss": 0.6293, "lr": 8e-05, "epoch": 0.20568335588633288, "percentage": 6.88, "elapsed_time": "0:31:43", "remaining_time": "7:09:03"}
20
- {"current_steps": 20, "total_steps": 276, "loss": 0.6136, "lr": 8e-05, "epoch": 0.2165087956698241, "percentage": 7.25, "elapsed_time": "0:33:20", "remaining_time": "7:06:42"}
21
- {"current_steps": 21, "total_steps": 276, "loss": 0.6195, "lr": 8e-05, "epoch": 0.2273342354533153, "percentage": 7.61, "elapsed_time": "0:34:57", "remaining_time": "7:04:26"}
22
- {"current_steps": 22, "total_steps": 276, "loss": 0.6091, "lr": 8e-05, "epoch": 0.2381596752368065, "percentage": 7.97, "elapsed_time": "0:36:34", "remaining_time": "7:02:14"}
23
- {"current_steps": 23, "total_steps": 276, "loss": 0.5969, "lr": 8e-05, "epoch": 0.2489851150202977, "percentage": 8.33, "elapsed_time": "0:38:11", "remaining_time": "7:00:09"}
24
- {"current_steps": 24, "total_steps": 276, "loss": 0.5993, "lr": 8e-05, "epoch": 0.2598105548037889, "percentage": 8.7, "elapsed_time": "0:39:49", "remaining_time": "6:58:05"}
25
- {"current_steps": 25, "total_steps": 276, "loss": 0.588, "lr": 8e-05, "epoch": 0.2706359945872801, "percentage": 9.06, "elapsed_time": "0:41:26", "remaining_time": "6:56:02"}
26
- {"current_steps": 26, "total_steps": 276, "loss": 0.5587, "lr": 8e-05, "epoch": 0.2814614343707713, "percentage": 9.42, "elapsed_time": "0:43:03", "remaining_time": "6:53:58"}
27
- {"current_steps": 27, "total_steps": 276, "loss": 0.5655, "lr": 8e-05, "epoch": 0.2922868741542625, "percentage": 9.78, "elapsed_time": "0:44:41", "remaining_time": "6:52:04"}
28
- {"current_steps": 28, "total_steps": 276, "loss": 0.5648, "lr": 8e-05, "epoch": 0.30311231393775373, "percentage": 10.14, "elapsed_time": "0:46:18", "remaining_time": "6:50:07"}
29
- {"current_steps": 29, "total_steps": 276, "loss": 0.5678, "lr": 8e-05, "epoch": 0.31393775372124494, "percentage": 10.51, "elapsed_time": "0:47:55", "remaining_time": "6:48:13"}
30
- {"current_steps": 30, "total_steps": 276, "loss": 0.5592, "lr": 8e-05, "epoch": 0.32476319350473615, "percentage": 10.87, "elapsed_time": "0:49:32", "remaining_time": "6:46:18"}
31
- {"current_steps": 31, "total_steps": 276, "loss": 0.5617, "lr": 8e-05, "epoch": 0.33558863328822736, "percentage": 11.23, "elapsed_time": "0:51:10", "remaining_time": "6:44:28"}
32
- {"current_steps": 32, "total_steps": 276, "loss": 0.5551, "lr": 8e-05, "epoch": 0.34641407307171856, "percentage": 11.59, "elapsed_time": "0:52:48", "remaining_time": "6:42:38"}
33
- {"current_steps": 33, "total_steps": 276, "loss": 0.5482, "lr": 8e-05, "epoch": 0.3572395128552097, "percentage": 11.96, "elapsed_time": "0:54:25", "remaining_time": "6:40:47"}
34
- {"current_steps": 34, "total_steps": 276, "loss": 0.553, "lr": 8e-05, "epoch": 0.3680649526387009, "percentage": 12.32, "elapsed_time": "0:56:02", "remaining_time": "6:38:55"}
35
- {"current_steps": 35, "total_steps": 276, "loss": 0.5331, "lr": 8e-05, "epoch": 0.37889039242219213, "percentage": 12.68, "elapsed_time": "0:57:39", "remaining_time": "6:37:04"}
36
- {"current_steps": 36, "total_steps": 276, "loss": 0.5214, "lr": 8e-05, "epoch": 0.38971583220568334, "percentage": 13.04, "elapsed_time": "0:59:17", "remaining_time": "6:35:13"}
37
- {"current_steps": 37, "total_steps": 276, "loss": 0.5325, "lr": 8e-05, "epoch": 0.40054127198917455, "percentage": 13.41, "elapsed_time": "1:00:54", "remaining_time": "6:33:24"}
38
- {"current_steps": 38, "total_steps": 276, "loss": 0.5209, "lr": 8e-05, "epoch": 0.41136671177266576, "percentage": 13.77, "elapsed_time": "1:02:31", "remaining_time": "6:31:38"}
39
- {"current_steps": 39, "total_steps": 276, "loss": 0.5307, "lr": 8e-05, "epoch": 0.42219215155615697, "percentage": 14.13, "elapsed_time": "1:04:09", "remaining_time": "6:29:51"}
40
- {"current_steps": 40, "total_steps": 276, "loss": 0.5354, "lr": 8e-05, "epoch": 0.4330175913396482, "percentage": 14.49, "elapsed_time": "1:05:46", "remaining_time": "6:28:04"}
41
- {"current_steps": 41, "total_steps": 276, "loss": 0.531, "lr": 8e-05, "epoch": 0.4438430311231394, "percentage": 14.86, "elapsed_time": "1:07:23", "remaining_time": "6:26:18"}
42
- {"current_steps": 42, "total_steps": 276, "loss": 0.5162, "lr": 8e-05, "epoch": 0.4546684709066306, "percentage": 15.22, "elapsed_time": "1:09:01", "remaining_time": "6:24:33"}
43
- {"current_steps": 43, "total_steps": 276, "loss": 0.5166, "lr": 8e-05, "epoch": 0.4654939106901218, "percentage": 15.58, "elapsed_time": "1:10:38", "remaining_time": "6:22:48"}
44
- {"current_steps": 44, "total_steps": 276, "loss": 0.5177, "lr": 8e-05, "epoch": 0.476319350473613, "percentage": 15.94, "elapsed_time": "1:12:15", "remaining_time": "6:21:02"}
45
- {"current_steps": 45, "total_steps": 276, "loss": 0.5167, "lr": 8e-05, "epoch": 0.4871447902571042, "percentage": 16.3, "elapsed_time": "1:13:53", "remaining_time": "6:19:18"}
46
- {"current_steps": 46, "total_steps": 276, "loss": 0.5125, "lr": 8e-05, "epoch": 0.4979702300405954, "percentage": 16.67, "elapsed_time": "1:15:30", "remaining_time": "6:17:33"}
47
- {"current_steps": 47, "total_steps": 276, "loss": 0.5095, "lr": 8e-05, "epoch": 0.5087956698240866, "percentage": 17.03, "elapsed_time": "1:17:07", "remaining_time": "6:15:48"}
48
- {"current_steps": 48, "total_steps": 276, "loss": 0.5086, "lr": 8e-05, "epoch": 0.5196211096075778, "percentage": 17.39, "elapsed_time": "1:18:44", "remaining_time": "6:14:02"}
49
- {"current_steps": 49, "total_steps": 276, "loss": 0.5041, "lr": 8e-05, "epoch": 0.530446549391069, "percentage": 17.75, "elapsed_time": "1:20:21", "remaining_time": "6:12:17"}
50
- {"current_steps": 50, "total_steps": 276, "loss": 0.5012, "lr": 8e-05, "epoch": 0.5412719891745602, "percentage": 18.12, "elapsed_time": "1:21:58", "remaining_time": "6:10:33"}
51
- {"current_steps": 51, "total_steps": 276, "loss": 0.4978, "lr": 8e-05, "epoch": 0.5520974289580515, "percentage": 18.48, "elapsed_time": "1:23:36", "remaining_time": "6:08:50"}
52
- {"current_steps": 52, "total_steps": 276, "loss": 0.4932, "lr": 8e-05, "epoch": 0.5629228687415426, "percentage": 18.84, "elapsed_time": "1:25:13", "remaining_time": "6:07:08"}
53
- {"current_steps": 53, "total_steps": 276, "loss": 0.4922, "lr": 8e-05, "epoch": 0.5737483085250338, "percentage": 19.2, "elapsed_time": "1:26:51", "remaining_time": "6:05:25"}
54
- {"current_steps": 54, "total_steps": 276, "loss": 0.5062, "lr": 8e-05, "epoch": 0.584573748308525, "percentage": 19.57, "elapsed_time": "1:28:28", "remaining_time": "6:03:43"}
55
- {"current_steps": 55, "total_steps": 276, "loss": 0.4914, "lr": 8e-05, "epoch": 0.5953991880920162, "percentage": 19.93, "elapsed_time": "1:30:05", "remaining_time": "6:02:01"}
56
- {"current_steps": 56, "total_steps": 276, "loss": 0.4946, "lr": 8e-05, "epoch": 0.6062246278755075, "percentage": 20.29, "elapsed_time": "1:31:43", "remaining_time": "6:00:19"}
57
- {"current_steps": 57, "total_steps": 276, "loss": 0.495, "lr": 8e-05, "epoch": 0.6170500676589986, "percentage": 20.65, "elapsed_time": "1:33:20", "remaining_time": "5:58:37"}
58
- {"current_steps": 58, "total_steps": 276, "loss": 0.4931, "lr": 8e-05, "epoch": 0.6278755074424899, "percentage": 21.01, "elapsed_time": "1:34:57", "remaining_time": "5:56:55"}
59
- {"current_steps": 59, "total_steps": 276, "loss": 0.4792, "lr": 8e-05, "epoch": 0.638700947225981, "percentage": 21.38, "elapsed_time": "1:36:35", "remaining_time": "5:55:14"}
60
- {"current_steps": 60, "total_steps": 276, "loss": 0.4856, "lr": 8e-05, "epoch": 0.6495263870094723, "percentage": 21.74, "elapsed_time": "1:38:12", "remaining_time": "5:53:32"}
61
- {"current_steps": 61, "total_steps": 276, "loss": 0.4989, "lr": 8e-05, "epoch": 0.6603518267929634, "percentage": 22.1, "elapsed_time": "1:39:49", "remaining_time": "5:51:51"}
62
- {"current_steps": 62, "total_steps": 276, "loss": 0.4907, "lr": 8e-05, "epoch": 0.6711772665764547, "percentage": 22.46, "elapsed_time": "1:41:27", "remaining_time": "5:50:10"}
63
- {"current_steps": 63, "total_steps": 276, "loss": 0.5004, "lr": 8e-05, "epoch": 0.6820027063599459, "percentage": 22.83, "elapsed_time": "1:43:04", "remaining_time": "5:48:29"}
64
- {"current_steps": 64, "total_steps": 276, "loss": 0.4912, "lr": 8e-05, "epoch": 0.6928281461434371, "percentage": 23.19, "elapsed_time": "1:44:41", "remaining_time": "5:46:48"}
65
- {"current_steps": 65, "total_steps": 276, "loss": 0.4874, "lr": 8e-05, "epoch": 0.7036535859269283, "percentage": 23.55, "elapsed_time": "1:46:19", "remaining_time": "5:45:07"}
66
- {"current_steps": 66, "total_steps": 276, "loss": 0.4949, "lr": 8e-05, "epoch": 0.7144790257104194, "percentage": 23.91, "elapsed_time": "1:47:56", "remaining_time": "5:43:26"}
67
- {"current_steps": 67, "total_steps": 276, "loss": 0.4942, "lr": 8e-05, "epoch": 0.7253044654939107, "percentage": 24.28, "elapsed_time": "1:49:33", "remaining_time": "5:41:46"}
68
- {"current_steps": 68, "total_steps": 276, "loss": 0.5007, "lr": 8e-05, "epoch": 0.7361299052774019, "percentage": 24.64, "elapsed_time": "1:51:11", "remaining_time": "5:40:06"}
69
- {"current_steps": 69, "total_steps": 276, "loss": 0.4893, "lr": 8e-05, "epoch": 0.7469553450608931, "percentage": 25.0, "elapsed_time": "1:52:48", "remaining_time": "5:38:26"}
70
- {"current_steps": 70, "total_steps": 276, "loss": 0.4744, "lr": 8e-05, "epoch": 0.7577807848443843, "percentage": 25.36, "elapsed_time": "1:54:26", "remaining_time": "5:36:45"}
71
- {"current_steps": 71, "total_steps": 276, "loss": 0.4763, "lr": 8e-05, "epoch": 0.7686062246278755, "percentage": 25.72, "elapsed_time": "1:56:03", "remaining_time": "5:35:05"}
72
- {"current_steps": 72, "total_steps": 276, "loss": 0.4856, "lr": 8e-05, "epoch": 0.7794316644113667, "percentage": 26.09, "elapsed_time": "1:57:40", "remaining_time": "5:33:25"}
73
- {"current_steps": 73, "total_steps": 276, "loss": 0.4931, "lr": 8e-05, "epoch": 0.790257104194858, "percentage": 26.45, "elapsed_time": "1:59:18", "remaining_time": "5:31:45"}
74
- {"current_steps": 74, "total_steps": 276, "loss": 0.4881, "lr": 8e-05, "epoch": 0.8010825439783491, "percentage": 26.81, "elapsed_time": "2:00:55", "remaining_time": "5:30:05"}
75
- {"current_steps": 75, "total_steps": 276, "loss": 0.4827, "lr": 8e-05, "epoch": 0.8119079837618404, "percentage": 27.17, "elapsed_time": "2:02:32", "remaining_time": "5:28:25"}
76
- {"current_steps": 76, "total_steps": 276, "loss": 0.4779, "lr": 8e-05, "epoch": 0.8227334235453315, "percentage": 27.54, "elapsed_time": "2:04:10", "remaining_time": "5:26:45"}
77
- {"current_steps": 77, "total_steps": 276, "loss": 0.4735, "lr": 8e-05, "epoch": 0.8335588633288228, "percentage": 27.9, "elapsed_time": "2:05:47", "remaining_time": "5:25:05"}
78
- {"current_steps": 78, "total_steps": 276, "loss": 0.4772, "lr": 8e-05, "epoch": 0.8443843031123139, "percentage": 28.26, "elapsed_time": "2:07:24", "remaining_time": "5:23:25"}
79
- {"current_steps": 79, "total_steps": 276, "loss": 0.4822, "lr": 8e-05, "epoch": 0.8552097428958051, "percentage": 28.62, "elapsed_time": "2:09:02", "remaining_time": "5:21:46"}
80
- {"current_steps": 80, "total_steps": 276, "loss": 0.4755, "lr": 8e-05, "epoch": 0.8660351826792964, "percentage": 28.99, "elapsed_time": "2:10:39", "remaining_time": "5:20:05"}
81
- {"current_steps": 81, "total_steps": 276, "loss": 0.4863, "lr": 8e-05, "epoch": 0.8768606224627875, "percentage": 29.35, "elapsed_time": "2:12:16", "remaining_time": "5:18:26"}
82
- {"current_steps": 82, "total_steps": 276, "loss": 0.4751, "lr": 8e-05, "epoch": 0.8876860622462788, "percentage": 29.71, "elapsed_time": "2:13:53", "remaining_time": "5:16:46"}
83
- {"current_steps": 83, "total_steps": 276, "loss": 0.484, "lr": 8e-05, "epoch": 0.8985115020297699, "percentage": 30.07, "elapsed_time": "2:15:31", "remaining_time": "5:15:07"}
84
- {"current_steps": 84, "total_steps": 276, "loss": 0.4859, "lr": 8e-05, "epoch": 0.9093369418132612, "percentage": 30.43, "elapsed_time": "2:17:08", "remaining_time": "5:13:27"}
85
- {"current_steps": 85, "total_steps": 276, "loss": 0.475, "lr": 8e-05, "epoch": 0.9201623815967523, "percentage": 30.8, "elapsed_time": "2:18:45", "remaining_time": "5:11:48"}
86
- {"current_steps": 86, "total_steps": 276, "loss": 0.4827, "lr": 8e-05, "epoch": 0.9309878213802436, "percentage": 31.16, "elapsed_time": "2:20:22", "remaining_time": "5:10:08"}
87
- {"current_steps": 87, "total_steps": 276, "loss": 0.4756, "lr": 8e-05, "epoch": 0.9418132611637348, "percentage": 31.52, "elapsed_time": "2:21:59", "remaining_time": "5:08:28"}
88
- {"current_steps": 88, "total_steps": 276, "loss": 0.465, "lr": 8e-05, "epoch": 0.952638700947226, "percentage": 31.88, "elapsed_time": "2:23:36", "remaining_time": "5:06:48"}
89
- {"current_steps": 89, "total_steps": 276, "loss": 0.4802, "lr": 8e-05, "epoch": 0.9634641407307172, "percentage": 32.25, "elapsed_time": "2:25:14", "remaining_time": "5:05:09"}
90
- {"current_steps": 90, "total_steps": 276, "loss": 0.4768, "lr": 8e-05, "epoch": 0.9742895805142084, "percentage": 32.61, "elapsed_time": "2:26:51", "remaining_time": "5:03:29"}
91
- {"current_steps": 91, "total_steps": 276, "loss": 0.4754, "lr": 8e-05, "epoch": 0.9851150202976996, "percentage": 32.97, "elapsed_time": "2:28:28", "remaining_time": "5:01:50"}
92
- {"current_steps": 92, "total_steps": 276, "loss": 0.4793, "lr": 8e-05, "epoch": 0.9959404600811907, "percentage": 33.33, "elapsed_time": "2:30:05", "remaining_time": "5:00:11"}
93
- {"current_steps": 93, "total_steps": 276, "loss": 0.7437, "lr": 8e-05, "epoch": 1.006765899864682, "percentage": 33.7, "elapsed_time": "2:33:13", "remaining_time": "5:01:29"}
 
1
+ {"current_steps": 1, "total_steps": 276, "loss": 0.8567, "lr": 8e-05, "epoch": 0.010825439783491205, "percentage": 0.36, "elapsed_time": "0:02:34", "remaining_time": "11:47:02"}
2
+ {"current_steps": 2, "total_steps": 276, "loss": 2.1738, "lr": 8e-05, "epoch": 0.02165087956698241, "percentage": 0.72, "elapsed_time": "0:04:10", "remaining_time": "9:32:54"}
3
+ {"current_steps": 3, "total_steps": 276, "loss": 1.5416, "lr": 8e-05, "epoch": 0.03247631935047361, "percentage": 1.09, "elapsed_time": "0:05:47", "remaining_time": "8:47:25"}
4
+ {"current_steps": 4, "total_steps": 276, "loss": 1.7788, "lr": 8e-05, "epoch": 0.04330175913396482, "percentage": 1.45, "elapsed_time": "0:07:24", "remaining_time": "8:23:32"}
5
+ {"current_steps": 5, "total_steps": 276, "loss": 1.4445, "lr": 8e-05, "epoch": 0.05412719891745602, "percentage": 1.81, "elapsed_time": "0:09:00", "remaining_time": "8:08:41"}
6
+ {"current_steps": 6, "total_steps": 276, "loss": 1.0873, "lr": 8e-05, "epoch": 0.06495263870094722, "percentage": 2.17, "elapsed_time": "0:10:37", "remaining_time": "7:58:29"}
7
+ {"current_steps": 7, "total_steps": 276, "loss": 1.1336, "lr": 8e-05, "epoch": 0.07577807848443843, "percentage": 2.54, "elapsed_time": "0:12:15", "remaining_time": "7:50:51"}
8
+ {"current_steps": 8, "total_steps": 276, "loss": 0.9486, "lr": 8e-05, "epoch": 0.08660351826792964, "percentage": 2.9, "elapsed_time": "0:13:51", "remaining_time": "7:44:28"}
9
+ {"current_steps": 9, "total_steps": 276, "loss": 0.8582, "lr": 8e-05, "epoch": 0.09742895805142084, "percentage": 3.26, "elapsed_time": "0:15:28", "remaining_time": "7:39:11"}
10
+ {"current_steps": 10, "total_steps": 276, "loss": 0.8317, "lr": 8e-05, "epoch": 0.10825439783491204, "percentage": 3.62, "elapsed_time": "0:17:05", "remaining_time": "7:34:29"}
11
+ {"current_steps": 11, "total_steps": 276, "loss": 1.8099, "lr": 8e-05, "epoch": 0.11907983761840325, "percentage": 3.99, "elapsed_time": "0:18:41", "remaining_time": "7:30:22"}
12
+ {"current_steps": 12, "total_steps": 276, "loss": 1.0518, "lr": 8e-05, "epoch": 0.12990527740189445, "percentage": 4.35, "elapsed_time": "0:20:18", "remaining_time": "7:26:42"}
13
+ {"current_steps": 13, "total_steps": 276, "loss": 0.9205, "lr": 8e-05, "epoch": 0.14073071718538566, "percentage": 4.71, "elapsed_time": "0:21:55", "remaining_time": "7:23:23"}
14
+ {"current_steps": 14, "total_steps": 276, "loss": 0.7999, "lr": 8e-05, "epoch": 0.15155615696887687, "percentage": 5.07, "elapsed_time": "0:23:31", "remaining_time": "7:20:15"}
15
+ {"current_steps": 15, "total_steps": 276, "loss": 0.7662, "lr": 8e-05, "epoch": 0.16238159675236807, "percentage": 5.43, "elapsed_time": "0:25:08", "remaining_time": "7:17:24"}
16
+ {"current_steps": 16, "total_steps": 276, "loss": 0.7253, "lr": 8e-05, "epoch": 0.17320703653585928, "percentage": 5.8, "elapsed_time": "0:26:44", "remaining_time": "7:14:40"}
17
+ {"current_steps": 17, "total_steps": 276, "loss": 0.7021, "lr": 8e-05, "epoch": 0.18403247631935046, "percentage": 6.16, "elapsed_time": "0:28:21", "remaining_time": "7:12:05"}
18
+ {"current_steps": 18, "total_steps": 276, "loss": 0.6775, "lr": 8e-05, "epoch": 0.19485791610284167, "percentage": 6.52, "elapsed_time": "0:29:58", "remaining_time": "7:09:43"}
19
+ {"current_steps": 19, "total_steps": 276, "loss": 0.6562, "lr": 8e-05, "epoch": 0.20568335588633288, "percentage": 6.88, "elapsed_time": "0:31:35", "remaining_time": "7:07:22"}
20
+ {"current_steps": 20, "total_steps": 276, "loss": 0.6493, "lr": 8e-05, "epoch": 0.2165087956698241, "percentage": 7.25, "elapsed_time": "0:33:12", "remaining_time": "7:05:03"}
21
+ {"current_steps": 21, "total_steps": 276, "loss": 0.6413, "lr": 8e-05, "epoch": 0.2273342354533153, "percentage": 7.61, "elapsed_time": "0:34:49", "remaining_time": "7:02:50"}
22
+ {"current_steps": 22, "total_steps": 276, "loss": 0.6327, "lr": 8e-05, "epoch": 0.2381596752368065, "percentage": 7.97, "elapsed_time": "0:36:26", "remaining_time": "7:00:39"}
23
+ {"current_steps": 23, "total_steps": 276, "loss": 0.6203, "lr": 8e-05, "epoch": 0.2489851150202977, "percentage": 8.33, "elapsed_time": "0:38:02", "remaining_time": "6:58:32"}
24
+ {"current_steps": 24, "total_steps": 276, "loss": 0.6164, "lr": 8e-05, "epoch": 0.2598105548037889, "percentage": 8.7, "elapsed_time": "0:39:39", "remaining_time": "6:56:24"}
25
+ {"current_steps": 25, "total_steps": 276, "loss": 0.6026, "lr": 8e-05, "epoch": 0.2706359945872801, "percentage": 9.06, "elapsed_time": "0:41:16", "remaining_time": "6:54:23"}
26
+ {"current_steps": 26, "total_steps": 276, "loss": 0.5784, "lr": 8e-05, "epoch": 0.2814614343707713, "percentage": 9.42, "elapsed_time": "0:42:52", "remaining_time": "6:52:20"}
27
+ {"current_steps": 27, "total_steps": 276, "loss": 0.5776, "lr": 8e-05, "epoch": 0.2922868741542625, "percentage": 9.78, "elapsed_time": "0:44:30", "remaining_time": "6:50:27"}
28
+ {"current_steps": 28, "total_steps": 276, "loss": 0.5768, "lr": 8e-05, "epoch": 0.30311231393775373, "percentage": 10.14, "elapsed_time": "0:46:07", "remaining_time": "6:48:29"}
29
+ {"current_steps": 29, "total_steps": 276, "loss": 0.5752, "lr": 8e-05, "epoch": 0.31393775372124494, "percentage": 10.51, "elapsed_time": "0:47:43", "remaining_time": "6:46:32"}
30
+ {"current_steps": 30, "total_steps": 276, "loss": 0.5631, "lr": 8e-05, "epoch": 0.32476319350473615, "percentage": 10.87, "elapsed_time": "0:49:20", "remaining_time": "6:44:34"}
31
+ {"current_steps": 31, "total_steps": 276, "loss": 0.5633, "lr": 8e-05, "epoch": 0.33558863328822736, "percentage": 11.23, "elapsed_time": "0:50:57", "remaining_time": "6:42:41"}
32
+ {"current_steps": 32, "total_steps": 276, "loss": 0.5564, "lr": 8e-05, "epoch": 0.34641407307171856, "percentage": 11.59, "elapsed_time": "0:52:34", "remaining_time": "6:40:50"}
33
+ {"current_steps": 33, "total_steps": 276, "loss": 0.5549, "lr": 8e-05, "epoch": 0.3572395128552097, "percentage": 11.96, "elapsed_time": "0:54:10", "remaining_time": "6:38:58"}
34
+ {"current_steps": 34, "total_steps": 276, "loss": 0.5596, "lr": 8e-05, "epoch": 0.3680649526387009, "percentage": 12.32, "elapsed_time": "0:55:47", "remaining_time": "6:37:06"}
35
+ {"current_steps": 35, "total_steps": 276, "loss": 0.5519, "lr": 8e-05, "epoch": 0.37889039242219213, "percentage": 12.68, "elapsed_time": "0:57:24", "remaining_time": "6:35:16"}
36
+ {"current_steps": 36, "total_steps": 276, "loss": 0.5339, "lr": 8e-05, "epoch": 0.38971583220568334, "percentage": 13.04, "elapsed_time": "0:59:00", "remaining_time": "6:33:25"}
37
+ {"current_steps": 37, "total_steps": 276, "loss": 0.539, "lr": 8e-05, "epoch": 0.40054127198917455, "percentage": 13.41, "elapsed_time": "1:00:37", "remaining_time": "6:31:36"}
38
+ {"current_steps": 38, "total_steps": 276, "loss": 0.5298, "lr": 8e-05, "epoch": 0.41136671177266576, "percentage": 13.77, "elapsed_time": "1:02:14", "remaining_time": "6:29:49"}
39
+ {"current_steps": 39, "total_steps": 276, "loss": 0.5365, "lr": 8e-05, "epoch": 0.42219215155615697, "percentage": 14.13, "elapsed_time": "1:03:51", "remaining_time": "6:28:01"}
40
+ {"current_steps": 40, "total_steps": 276, "loss": 0.5266, "lr": 8e-05, "epoch": 0.4330175913396482, "percentage": 14.49, "elapsed_time": "1:05:27", "remaining_time": "6:26:13"}
41
+ {"current_steps": 41, "total_steps": 276, "loss": 0.521, "lr": 8e-05, "epoch": 0.4438430311231394, "percentage": 14.86, "elapsed_time": "1:07:04", "remaining_time": "6:24:26"}
42
+ {"current_steps": 42, "total_steps": 276, "loss": 0.5164, "lr": 8e-05, "epoch": 0.4546684709066306, "percentage": 15.22, "elapsed_time": "1:08:40", "remaining_time": "6:22:39"}
43
+ {"current_steps": 43, "total_steps": 276, "loss": 0.5146, "lr": 8e-05, "epoch": 0.4654939106901218, "percentage": 15.58, "elapsed_time": "1:10:17", "remaining_time": "6:20:54"}
44
+ {"current_steps": 44, "total_steps": 276, "loss": 0.5125, "lr": 8e-05, "epoch": 0.476319350473613, "percentage": 15.94, "elapsed_time": "1:11:54", "remaining_time": "6:19:08"}
45
+ {"current_steps": 45, "total_steps": 276, "loss": 0.5159, "lr": 8e-05, "epoch": 0.4871447902571042, "percentage": 16.3, "elapsed_time": "1:13:30", "remaining_time": "6:17:22"}
46
+ {"current_steps": 46, "total_steps": 276, "loss": 0.5107, "lr": 8e-05, "epoch": 0.4979702300405954, "percentage": 16.67, "elapsed_time": "1:15:07", "remaining_time": "6:15:37"}
47
+ {"current_steps": 47, "total_steps": 276, "loss": 0.5116, "lr": 8e-05, "epoch": 0.5087956698240866, "percentage": 17.03, "elapsed_time": "1:16:44", "remaining_time": "6:13:53"}
48
+ {"current_steps": 48, "total_steps": 276, "loss": 0.5121, "lr": 8e-05, "epoch": 0.5196211096075778, "percentage": 17.39, "elapsed_time": "1:18:20", "remaining_time": "6:12:09"}
49
+ {"current_steps": 49, "total_steps": 276, "loss": 0.5056, "lr": 8e-05, "epoch": 0.530446549391069, "percentage": 17.75, "elapsed_time": "1:19:57", "remaining_time": "6:10:26"}
50
+ {"current_steps": 50, "total_steps": 276, "loss": 0.5035, "lr": 8e-05, "epoch": 0.5412719891745602, "percentage": 18.12, "elapsed_time": "1:21:34", "remaining_time": "6:08:42"}
51
+ {"current_steps": 51, "total_steps": 276, "loss": 0.5004, "lr": 8e-05, "epoch": 0.5520974289580515, "percentage": 18.48, "elapsed_time": "1:23:10", "remaining_time": "6:06:58"}
52
+ {"current_steps": 52, "total_steps": 276, "loss": 0.4954, "lr": 8e-05, "epoch": 0.5629228687415426, "percentage": 18.84, "elapsed_time": "1:24:47", "remaining_time": "6:05:15"}
53
+ {"current_steps": 53, "total_steps": 276, "loss": 0.4931, "lr": 8e-05, "epoch": 0.5737483085250338, "percentage": 19.2, "elapsed_time": "1:26:24", "remaining_time": "6:03:32"}
54
+ {"current_steps": 54, "total_steps": 276, "loss": 0.5058, "lr": 8e-05, "epoch": 0.584573748308525, "percentage": 19.57, "elapsed_time": "1:28:00", "remaining_time": "6:01:50"}
55
+ {"current_steps": 55, "total_steps": 276, "loss": 0.4945, "lr": 8e-05, "epoch": 0.5953991880920162, "percentage": 19.93, "elapsed_time": "1:29:37", "remaining_time": "6:00:08"}
56
+ {"current_steps": 56, "total_steps": 276, "loss": 0.4981, "lr": 8e-05, "epoch": 0.6062246278755075, "percentage": 20.29, "elapsed_time": "1:31:14", "remaining_time": "5:58:26"}
57
+ {"current_steps": 57, "total_steps": 276, "loss": 0.4985, "lr": 8e-05, "epoch": 0.6170500676589986, "percentage": 20.65, "elapsed_time": "1:32:51", "remaining_time": "5:56:44"}
58
+ {"current_steps": 58, "total_steps": 276, "loss": 0.4961, "lr": 8e-05, "epoch": 0.6278755074424899, "percentage": 21.01, "elapsed_time": "1:34:27", "remaining_time": "5:55:03"}
59
+ {"current_steps": 59, "total_steps": 276, "loss": 0.4846, "lr": 8e-05, "epoch": 0.638700947225981, "percentage": 21.38, "elapsed_time": "1:36:04", "remaining_time": "5:53:22"}
60
+ {"current_steps": 60, "total_steps": 276, "loss": 0.4898, "lr": 8e-05, "epoch": 0.6495263870094723, "percentage": 21.74, "elapsed_time": "1:37:41", "remaining_time": "5:51:40"}
61
+ {"current_steps": 61, "total_steps": 276, "loss": 0.5023, "lr": 8e-05, "epoch": 0.6603518267929634, "percentage": 22.1, "elapsed_time": "1:39:18", "remaining_time": "5:50:00"}
62
+ {"current_steps": 62, "total_steps": 276, "loss": 0.4907, "lr": 8e-05, "epoch": 0.6711772665764547, "percentage": 22.46, "elapsed_time": "1:40:54", "remaining_time": "5:48:19"}
63
+ {"current_steps": 63, "total_steps": 276, "loss": 0.4978, "lr": 8e-05, "epoch": 0.6820027063599459, "percentage": 22.83, "elapsed_time": "1:42:31", "remaining_time": "5:46:38"}
64
+ {"current_steps": 64, "total_steps": 276, "loss": 0.4945, "lr": 8e-05, "epoch": 0.6928281461434371, "percentage": 23.19, "elapsed_time": "1:44:08", "remaining_time": "5:44:58"}
65
+ {"current_steps": 65, "total_steps": 276, "loss": 0.4843, "lr": 8e-05, "epoch": 0.7036535859269283, "percentage": 23.55, "elapsed_time": "1:45:45", "remaining_time": "5:43:18"}
66
+ {"current_steps": 66, "total_steps": 276, "loss": 0.4927, "lr": 8e-05, "epoch": 0.7144790257104194, "percentage": 23.91, "elapsed_time": "1:47:22", "remaining_time": "5:41:37"}
67
+ {"current_steps": 67, "total_steps": 276, "loss": 0.4897, "lr": 8e-05, "epoch": 0.7253044654939107, "percentage": 24.28, "elapsed_time": "1:48:59", "remaining_time": "5:39:58"}
68
+ {"current_steps": 68, "total_steps": 276, "loss": 0.5005, "lr": 8e-05, "epoch": 0.7361299052774019, "percentage": 24.64, "elapsed_time": "1:50:36", "remaining_time": "5:38:18"}
69
+ {"current_steps": 69, "total_steps": 276, "loss": 0.4918, "lr": 8e-05, "epoch": 0.7469553450608931, "percentage": 25.0, "elapsed_time": "1:52:12", "remaining_time": "5:36:38"}
70
+ {"current_steps": 70, "total_steps": 276, "loss": 0.4733, "lr": 8e-05, "epoch": 0.7577807848443843, "percentage": 25.36, "elapsed_time": "1:53:49", "remaining_time": "5:34:58"}
71
+ {"current_steps": 71, "total_steps": 276, "loss": 0.4762, "lr": 8e-05, "epoch": 0.7686062246278755, "percentage": 25.72, "elapsed_time": "1:55:26", "remaining_time": "5:33:19"}
72
+ {"current_steps": 72, "total_steps": 276, "loss": 0.4878, "lr": 8e-05, "epoch": 0.7794316644113667, "percentage": 26.09, "elapsed_time": "1:57:03", "remaining_time": "5:31:39"}
73
+ {"current_steps": 73, "total_steps": 276, "loss": 0.4928, "lr": 8e-05, "epoch": 0.790257104194858, "percentage": 26.45, "elapsed_time": "1:58:40", "remaining_time": "5:29:59"}
74
+ {"current_steps": 74, "total_steps": 276, "loss": 0.4879, "lr": 8e-05, "epoch": 0.8010825439783491, "percentage": 26.81, "elapsed_time": "2:00:16", "remaining_time": "5:28:19"}
75
+ {"current_steps": 75, "total_steps": 276, "loss": 0.4861, "lr": 8e-05, "epoch": 0.8119079837618404, "percentage": 27.17, "elapsed_time": "2:01:53", "remaining_time": "5:26:40"}
76
+ {"current_steps": 76, "total_steps": 276, "loss": 0.4765, "lr": 8e-05, "epoch": 0.8227334235453315, "percentage": 27.54, "elapsed_time": "2:03:30", "remaining_time": "5:25:01"}
77
+ {"current_steps": 77, "total_steps": 276, "loss": 0.4751, "lr": 8e-05, "epoch": 0.8335588633288228, "percentage": 27.9, "elapsed_time": "2:05:07", "remaining_time": "5:23:21"}
78
+ {"current_steps": 78, "total_steps": 276, "loss": 0.4796, "lr": 8e-05, "epoch": 0.8443843031123139, "percentage": 28.26, "elapsed_time": "2:06:44", "remaining_time": "5:21:42"}
79
+ {"current_steps": 79, "total_steps": 276, "loss": 0.4834, "lr": 8e-05, "epoch": 0.8552097428958051, "percentage": 28.62, "elapsed_time": "2:08:21", "remaining_time": "5:20:03"}
80
+ {"current_steps": 80, "total_steps": 276, "loss": 0.4786, "lr": 8e-05, "epoch": 0.8660351826792964, "percentage": 28.99, "elapsed_time": "2:09:57", "remaining_time": "5:18:24"}
81
+ {"current_steps": 81, "total_steps": 276, "loss": 0.491, "lr": 8e-05, "epoch": 0.8768606224627875, "percentage": 29.35, "elapsed_time": "2:11:34", "remaining_time": "5:16:45"}
82
+ {"current_steps": 82, "total_steps": 276, "loss": 0.4736, "lr": 8e-05, "epoch": 0.8876860622462788, "percentage": 29.71, "elapsed_time": "2:13:11", "remaining_time": "5:15:06"}
83
+ {"current_steps": 83, "total_steps": 276, "loss": 0.4818, "lr": 8e-05, "epoch": 0.8985115020297699, "percentage": 30.07, "elapsed_time": "2:14:48", "remaining_time": "5:13:27"}
84
+ {"current_steps": 84, "total_steps": 276, "loss": 0.48, "lr": 8e-05, "epoch": 0.9093369418132612, "percentage": 30.43, "elapsed_time": "2:16:25", "remaining_time": "5:11:48"}
85
+ {"current_steps": 85, "total_steps": 276, "loss": 0.4786, "lr": 8e-05, "epoch": 0.9201623815967523, "percentage": 30.8, "elapsed_time": "2:18:01", "remaining_time": "5:10:09"}
86
+ {"current_steps": 86, "total_steps": 276, "loss": 0.4835, "lr": 8e-05, "epoch": 0.9309878213802436, "percentage": 31.16, "elapsed_time": "2:19:38", "remaining_time": "5:08:30"}
87
+ {"current_steps": 87, "total_steps": 276, "loss": 0.4864, "lr": 8e-05, "epoch": 0.9418132611637348, "percentage": 31.52, "elapsed_time": "2:21:15", "remaining_time": "5:06:52"}
88
+ {"current_steps": 88, "total_steps": 276, "loss": 0.4754, "lr": 8e-05, "epoch": 0.952638700947226, "percentage": 31.88, "elapsed_time": "2:22:52", "remaining_time": "5:05:13"}
89
+ {"current_steps": 89, "total_steps": 276, "loss": 0.4865, "lr": 8e-05, "epoch": 0.9634641407307172, "percentage": 32.25, "elapsed_time": "2:24:29", "remaining_time": "5:03:35"}
90
+ {"current_steps": 90, "total_steps": 276, "loss": 0.4879, "lr": 8e-05, "epoch": 0.9742895805142084, "percentage": 32.61, "elapsed_time": "2:26:05", "remaining_time": "5:01:56"}
91
+ {"current_steps": 91, "total_steps": 276, "loss": 0.4864, "lr": 8e-05, "epoch": 0.9851150202976996, "percentage": 32.97, "elapsed_time": "2:27:42", "remaining_time": "5:00:17"}
92
+ {"current_steps": 92, "total_steps": 276, "loss": 0.4882, "lr": 8e-05, "epoch": 0.9959404600811907, "percentage": 33.33, "elapsed_time": "2:29:19", "remaining_time": "4:58:38"}
93
+ {"current_steps": 93, "total_steps": 276, "loss": 0.7533, "lr": 8e-05, "epoch": 1.006765899864682, "percentage": 33.7, "elapsed_time": "2:32:26", "remaining_time": "4:59:58"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20a71ca2824418237dc770fb189d88f4608af86524eadb5c3d8e98002de9d5d0
3
  size 7224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4711bb9e484be25b4ed94b03ee35f3455325789656979eb6abce903268c5b27
3
  size 7224