neginr commited on
Commit
882c88d
·
verified ·
1 Parent(s): 283b895

Training in progress, epoch 0

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b27ce1622b5f0ee360d4122f70efaf7e0b1d087b0bb5d3e3849bd533405dd4c4
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43ca864894953f716ae60186c8580f93fd8c235d11e0b7a97f5add7ff6a3180c
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fe7998d012aaea2abdd30326d41261104721516f3714d52efb159e267fa80d9
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6adbd0c2f5a5d6bf087cbec5bf89c28651e512fb5944a3faa614cdd8f157eb3d
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:255140b4a650fa8b7a83cf315a20dc52656098f4cf995af9613d357d20b99890
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:020c87e9290b68a7bdf4b30f9137210ba648cb55ffc92804ddf3d8231c323357
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e04e90c4338215c31eb039c9cafb3a8cae6183db19253f57ac95939097420274
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71df631e8ebd2ccb1644080217cddbde8d60ca3bc0b47691e275fcbffb04456c
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -1,91 +1,88 @@
1
- {"current_steps": 1, "total_steps": 364, "loss": 1.1132, "lr": 5.405405405405406e-07, "epoch": 0.01910828025477707, "percentage": 0.27, "elapsed_time": "0:00:11", "remaining_time": "1:10:35"}
2
- {"current_steps": 2, "total_steps": 364, "loss": 1.1203, "lr": 1.0810810810810812e-06, "epoch": 0.03821656050955414, "percentage": 0.55, "elapsed_time": "0:00:17", "remaining_time": "0:52:33"}
3
- {"current_steps": 3, "total_steps": 364, "loss": 1.0717, "lr": 1.6216216216216219e-06, "epoch": 0.05732484076433121, "percentage": 0.82, "elapsed_time": "0:00:22", "remaining_time": "0:44:29"}
4
- {"current_steps": 4, "total_steps": 364, "loss": 1.0972, "lr": 2.1621621621621623e-06, "epoch": 0.07643312101910828, "percentage": 1.1, "elapsed_time": "0:00:26", "remaining_time": "0:39:24"}
5
- {"current_steps": 5, "total_steps": 364, "loss": 1.0653, "lr": 2.702702702702703e-06, "epoch": 0.09554140127388536, "percentage": 1.37, "elapsed_time": "0:00:30", "remaining_time": "0:36:15"}
6
- {"current_steps": 6, "total_steps": 364, "loss": 0.9884, "lr": 3.2432432432432437e-06, "epoch": 0.11464968152866242, "percentage": 1.65, "elapsed_time": "0:00:34", "remaining_time": "0:34:43"}
7
- {"current_steps": 7, "total_steps": 364, "loss": 0.8798, "lr": 3.7837837837837844e-06, "epoch": 0.1337579617834395, "percentage": 1.92, "elapsed_time": "0:00:39", "remaining_time": "0:33:52"}
8
- {"current_steps": 8, "total_steps": 364, "loss": 0.8372, "lr": 4.324324324324325e-06, "epoch": 0.15286624203821655, "percentage": 2.2, "elapsed_time": "0:00:44", "remaining_time": "0:33:00"}
9
- {"current_steps": 9, "total_steps": 364, "loss": 0.8177, "lr": 4.864864864864866e-06, "epoch": 0.17197452229299362, "percentage": 2.47, "elapsed_time": "0:00:48", "remaining_time": "0:31:54"}
10
- {"current_steps": 10, "total_steps": 364, "loss": 0.7615, "lr": 5.405405405405406e-06, "epoch": 0.1910828025477707, "percentage": 2.75, "elapsed_time": "0:00:53", "remaining_time": "0:31:24"}
11
- {"current_steps": 11, "total_steps": 364, "loss": 0.7108, "lr": 5.945945945945947e-06, "epoch": 0.21019108280254778, "percentage": 3.02, "elapsed_time": "0:00:57", "remaining_time": "0:30:43"}
12
- {"current_steps": 12, "total_steps": 364, "loss": 0.6855, "lr": 6.486486486486487e-06, "epoch": 0.22929936305732485, "percentage": 3.3, "elapsed_time": "0:01:01", "remaining_time": "0:30:16"}
13
- {"current_steps": 13, "total_steps": 364, "loss": 0.6099, "lr": 7.027027027027028e-06, "epoch": 0.2484076433121019, "percentage": 3.57, "elapsed_time": "0:01:08", "remaining_time": "0:30:43"}
14
- {"current_steps": 14, "total_steps": 364, "loss": 0.5898, "lr": 7.567567567567569e-06, "epoch": 0.267515923566879, "percentage": 3.85, "elapsed_time": "0:01:12", "remaining_time": "0:30:05"}
15
- {"current_steps": 15, "total_steps": 364, "loss": 0.5502, "lr": 8.108108108108109e-06, "epoch": 0.28662420382165604, "percentage": 4.12, "elapsed_time": "0:01:16", "remaining_time": "0:29:32"}
16
- {"current_steps": 16, "total_steps": 364, "loss": 0.5409, "lr": 8.64864864864865e-06, "epoch": 0.3057324840764331, "percentage": 4.4, "elapsed_time": "0:01:20", "remaining_time": "0:29:13"}
17
- {"current_steps": 17, "total_steps": 364, "loss": 0.5368, "lr": 9.189189189189191e-06, "epoch": 0.3248407643312102, "percentage": 4.67, "elapsed_time": "0:01:26", "remaining_time": "0:29:20"}
18
- {"current_steps": 18, "total_steps": 364, "loss": 0.5146, "lr": 9.729729729729732e-06, "epoch": 0.34394904458598724, "percentage": 4.95, "elapsed_time": "0:01:30", "remaining_time": "0:28:58"}
19
- {"current_steps": 19, "total_steps": 364, "loss": 0.5045, "lr": 1.027027027027027e-05, "epoch": 0.3630573248407643, "percentage": 5.22, "elapsed_time": "0:01:34", "remaining_time": "0:28:34"}
20
- {"current_steps": 20, "total_steps": 364, "loss": 0.4996, "lr": 1.0810810810810812e-05, "epoch": 0.3821656050955414, "percentage": 5.49, "elapsed_time": "0:01:38", "remaining_time": "0:28:10"}
21
- {"current_steps": 21, "total_steps": 364, "loss": 0.5055, "lr": 1.1351351351351352e-05, "epoch": 0.4012738853503185, "percentage": 5.77, "elapsed_time": "0:01:44", "remaining_time": "0:28:22"}
22
- {"current_steps": 22, "total_steps": 364, "loss": 0.483, "lr": 1.1891891891891894e-05, "epoch": 0.42038216560509556, "percentage": 6.04, "elapsed_time": "0:01:49", "remaining_time": "0:28:22"}
23
- {"current_steps": 23, "total_steps": 364, "loss": 0.4741, "lr": 1.2432432432432433e-05, "epoch": 0.4394904458598726, "percentage": 6.32, "elapsed_time": "0:01:54", "remaining_time": "0:28:10"}
24
- {"current_steps": 24, "total_steps": 364, "loss": 0.4616, "lr": 1.2972972972972975e-05, "epoch": 0.4585987261146497, "percentage": 6.59, "elapsed_time": "0:01:58", "remaining_time": "0:27:52"}
25
- {"current_steps": 25, "total_steps": 364, "loss": 0.4759, "lr": 1.3513513513513515e-05, "epoch": 0.47770700636942676, "percentage": 6.87, "elapsed_time": "0:02:02", "remaining_time": "0:27:40"}
26
- {"current_steps": 26, "total_steps": 364, "loss": 0.4263, "lr": 1.4054054054054055e-05, "epoch": 0.4968152866242038, "percentage": 7.14, "elapsed_time": "0:02:06", "remaining_time": "0:27:27"}
27
- {"current_steps": 27, "total_steps": 364, "loss": 0.4682, "lr": 1.4594594594594596e-05, "epoch": 0.5159235668789809, "percentage": 7.42, "elapsed_time": "0:02:11", "remaining_time": "0:27:17"}
28
- {"current_steps": 28, "total_steps": 364, "loss": 0.4007, "lr": 1.5135135135135138e-05, "epoch": 0.535031847133758, "percentage": 7.69, "elapsed_time": "0:02:15", "remaining_time": "0:27:09"}
29
- {"current_steps": 29, "total_steps": 364, "loss": 0.4623, "lr": 1.5675675675675676e-05, "epoch": 0.554140127388535, "percentage": 7.97, "elapsed_time": "0:02:20", "remaining_time": "0:27:03"}
30
- {"current_steps": 30, "total_steps": 364, "loss": 0.4255, "lr": 1.6216216216216218e-05, "epoch": 0.5732484076433121, "percentage": 8.24, "elapsed_time": "0:02:25", "remaining_time": "0:26:55"}
31
- {"current_steps": 31, "total_steps": 364, "loss": 0.436, "lr": 1.6756756756756757e-05, "epoch": 0.5923566878980892, "percentage": 8.52, "elapsed_time": "0:02:30", "remaining_time": "0:26:51"}
32
- {"current_steps": 32, "total_steps": 364, "loss": 0.4332, "lr": 1.72972972972973e-05, "epoch": 0.6114649681528662, "percentage": 8.79, "elapsed_time": "0:02:34", "remaining_time": "0:26:38"}
33
- {"current_steps": 33, "total_steps": 364, "loss": 0.4245, "lr": 1.783783783783784e-05, "epoch": 0.6305732484076433, "percentage": 9.07, "elapsed_time": "0:02:38", "remaining_time": "0:26:29"}
34
- {"current_steps": 34, "total_steps": 364, "loss": 0.4423, "lr": 1.8378378378378383e-05, "epoch": 0.6496815286624203, "percentage": 9.34, "elapsed_time": "0:02:42", "remaining_time": "0:26:17"}
35
- {"current_steps": 35, "total_steps": 364, "loss": 0.4572, "lr": 1.891891891891892e-05, "epoch": 0.6687898089171974, "percentage": 9.62, "elapsed_time": "0:02:46", "remaining_time": "0:26:09"}
36
- {"current_steps": 36, "total_steps": 364, "loss": 0.4302, "lr": 1.9459459459459463e-05, "epoch": 0.6878980891719745, "percentage": 9.89, "elapsed_time": "0:02:50", "remaining_time": "0:25:56"}
37
- {"current_steps": 37, "total_steps": 364, "loss": 0.4196, "lr": 2e-05, "epoch": 0.7070063694267515, "percentage": 10.16, "elapsed_time": "0:02:56", "remaining_time": "0:25:57"}
38
- {"current_steps": 38, "total_steps": 364, "loss": 0.416, "lr": 1.9999538500851633e-05, "epoch": 0.7261146496815286, "percentage": 10.44, "elapsed_time": "0:03:00", "remaining_time": "0:25:51"}
39
- {"current_steps": 39, "total_steps": 364, "loss": 0.4257, "lr": 1.9998154046002822e-05, "epoch": 0.7452229299363057, "percentage": 10.71, "elapsed_time": "0:03:09", "remaining_time": "0:26:16"}
40
- {"current_steps": 40, "total_steps": 364, "loss": 0.4069, "lr": 1.9995846763238514e-05, "epoch": 0.7643312101910829, "percentage": 10.99, "elapsed_time": "0:03:13", "remaining_time": "0:26:08"}
41
- {"current_steps": 41, "total_steps": 364, "loss": 0.3891, "lr": 1.9992616865520515e-05, "epoch": 0.7834394904458599, "percentage": 11.26, "elapsed_time": "0:03:18", "remaining_time": "0:26:00"}
42
- {"current_steps": 42, "total_steps": 364, "loss": 0.4131, "lr": 1.9988464650967834e-05, "epoch": 0.802547770700637, "percentage": 11.54, "elapsed_time": "0:03:22", "remaining_time": "0:25:50"}
43
- {"current_steps": 43, "total_steps": 364, "loss": 0.3983, "lr": 1.9983390502829168e-05, "epoch": 0.821656050955414, "percentage": 11.81, "elapsed_time": "0:03:25", "remaining_time": "0:25:36"}
44
- {"current_steps": 44, "total_steps": 364, "loss": 0.3886, "lr": 1.9977394889447526e-05, "epoch": 0.8407643312101911, "percentage": 12.09, "elapsed_time": "0:03:30", "remaining_time": "0:25:29"}
45
- {"current_steps": 45, "total_steps": 364, "loss": 0.3976, "lr": 1.9970478364217e-05, "epoch": 0.8598726114649682, "percentage": 12.36, "elapsed_time": "0:03:34", "remaining_time": "0:25:23"}
46
- {"current_steps": 46, "total_steps": 364, "loss": 0.3957, "lr": 1.9962641565531694e-05, "epoch": 0.8789808917197452, "percentage": 12.64, "elapsed_time": "0:03:39", "remaining_time": "0:25:15"}
47
- {"current_steps": 47, "total_steps": 364, "loss": 0.4027, "lr": 1.9953885216726788e-05, "epoch": 0.8980891719745223, "percentage": 12.91, "elapsed_time": "0:03:45", "remaining_time": "0:25:18"}
48
- {"current_steps": 48, "total_steps": 364, "loss": 0.3997, "lr": 1.994421012601179e-05, "epoch": 0.9171974522292994, "percentage": 13.19, "elapsed_time": "0:03:49", "remaining_time": "0:25:08"}
49
- {"current_steps": 49, "total_steps": 364, "loss": 0.3829, "lr": 1.9933617186395917e-05, "epoch": 0.9363057324840764, "percentage": 13.46, "elapsed_time": "0:03:53", "remaining_time": "0:25:02"}
50
- {"current_steps": 50, "total_steps": 364, "loss": 0.4186, "lr": 1.99221073756057e-05, "epoch": 0.9554140127388535, "percentage": 13.74, "elapsed_time": "0:03:58", "remaining_time": "0:24:58"}
51
- {"current_steps": 51, "total_steps": 364, "loss": 0.4178, "lr": 1.990968175599471e-05, "epoch": 0.9745222929936306, "percentage": 14.01, "elapsed_time": "0:04:04", "remaining_time": "0:24:58"}
52
- {"current_steps": 52, "total_steps": 364, "loss": 0.3973, "lr": 1.9896341474445526e-05, "epoch": 0.9936305732484076, "percentage": 14.29, "elapsed_time": "0:04:08", "remaining_time": "0:24:51"}
53
- {"current_steps": 53, "total_steps": 364, "loss": 0.3691, "lr": 1.9882087762263857e-05, "epoch": 1.0127388535031847, "percentage": 14.56, "elapsed_time": "0:05:28", "remaining_time": "0:32:04"}
54
- {"current_steps": 54, "total_steps": 364, "loss": 0.332, "lr": 1.9866921935064907e-05, "epoch": 1.0318471337579618, "percentage": 14.84, "elapsed_time": "0:05:32", "remaining_time": "0:31:47"}
55
- {"current_steps": 55, "total_steps": 364, "loss": 0.3547, "lr": 1.985084539265195e-05, "epoch": 1.0509554140127388, "percentage": 15.11, "elapsed_time": "0:05:37", "remaining_time": "0:31:35"}
56
- {"current_steps": 56, "total_steps": 364, "loss": 0.3347, "lr": 1.983385961888711e-05, "epoch": 1.070063694267516, "percentage": 15.38, "elapsed_time": "0:05:42", "remaining_time": "0:31:22"}
57
- {"current_steps": 57, "total_steps": 364, "loss": 0.363, "lr": 1.9815966181554412e-05, "epoch": 1.089171974522293, "percentage": 15.66, "elapsed_time": "0:05:47", "remaining_time": "0:31:09"}
58
- {"current_steps": 58, "total_steps": 364, "loss": 0.3046, "lr": 1.9797166732215078e-05, "epoch": 1.10828025477707, "percentage": 15.93, "elapsed_time": "0:05:51", "remaining_time": "0:30:51"}
59
- {"current_steps": 59, "total_steps": 364, "loss": 0.3207, "lr": 1.977746300605507e-05, "epoch": 1.127388535031847, "percentage": 16.21, "elapsed_time": "0:05:55", "remaining_time": "0:30:37"}
60
- {"current_steps": 60, "total_steps": 364, "loss": 0.3206, "lr": 1.975685682172497e-05, "epoch": 1.1464968152866242, "percentage": 16.48, "elapsed_time": "0:05:59", "remaining_time": "0:30:23"}
61
- {"current_steps": 61, "total_steps": 364, "loss": 0.3382, "lr": 1.973535008117207e-05, "epoch": 1.1656050955414012, "percentage": 16.76, "elapsed_time": "0:06:05", "remaining_time": "0:30:14"}
62
- {"current_steps": 62, "total_steps": 364, "loss": 0.3155, "lr": 1.9712944769464864e-05, "epoch": 1.1847133757961783, "percentage": 17.03, "elapsed_time": "0:06:08", "remaining_time": "0:29:57"}
63
- {"current_steps": 63, "total_steps": 364, "loss": 0.3274, "lr": 1.9689642954609808e-05, "epoch": 1.2038216560509554, "percentage": 17.31, "elapsed_time": "0:06:13", "remaining_time": "0:29:44"}
64
- {"current_steps": 64, "total_steps": 364, "loss": 0.3405, "lr": 1.9665446787360444e-05, "epoch": 1.2229299363057324, "percentage": 17.58, "elapsed_time": "0:06:17", "remaining_time": "0:29:29"}
65
- {"current_steps": 65, "total_steps": 364, "loss": 0.3114, "lr": 1.9640358501018885e-05, "epoch": 1.2420382165605095, "percentage": 17.86, "elapsed_time": "0:06:22", "remaining_time": "0:29:18"}
66
- {"current_steps": 66, "total_steps": 364, "loss": 0.3076, "lr": 1.9614380411229693e-05, "epoch": 1.2611464968152866, "percentage": 18.13, "elapsed_time": "0:06:26", "remaining_time": "0:29:02"}
67
- {"current_steps": 67, "total_steps": 364, "loss": 0.3227, "lr": 1.9587514915766124e-05, "epoch": 1.2802547770700636, "percentage": 18.41, "elapsed_time": "0:06:30", "remaining_time": "0:28:51"}
68
- {"current_steps": 68, "total_steps": 364, "loss": 0.3052, "lr": 1.9559764494308838e-05, "epoch": 1.2993630573248407, "percentage": 18.68, "elapsed_time": "0:06:34", "remaining_time": "0:28:36"}
69
- {"current_steps": 69, "total_steps": 364, "loss": 0.3409, "lr": 1.9531131708217005e-05, "epoch": 1.3184713375796178, "percentage": 18.96, "elapsed_time": "0:06:40", "remaining_time": "0:28:31"}
70
- {"current_steps": 70, "total_steps": 364, "loss": 0.3175, "lr": 1.950161920029191e-05, "epoch": 1.3375796178343948, "percentage": 19.23, "elapsed_time": "0:06:44", "remaining_time": "0:28:17"}
71
- {"current_steps": 71, "total_steps": 364, "loss": 0.3199, "lr": 1.9471229694533003e-05, "epoch": 1.356687898089172, "percentage": 19.51, "elapsed_time": "0:06:48", "remaining_time": "0:28:04"}
72
- {"current_steps": 72, "total_steps": 364, "loss": 0.3403, "lr": 1.943996599588649e-05, "epoch": 1.3757961783439492, "percentage": 19.78, "elapsed_time": "0:06:53", "remaining_time": "0:27:58"}
73
- {"current_steps": 73, "total_steps": 364, "loss": 0.3333, "lr": 1.940783098998643e-05, "epoch": 1.394904458598726, "percentage": 20.05, "elapsed_time": "0:06:58", "remaining_time": "0:27:46"}
74
- {"current_steps": 74, "total_steps": 364, "loss": 0.3086, "lr": 1.93748276428884e-05, "epoch": 1.4140127388535033, "percentage": 20.33, "elapsed_time": "0:07:01", "remaining_time": "0:27:33"}
75
- {"current_steps": 75, "total_steps": 364, "loss": 0.3003, "lr": 1.9340959000795707e-05, "epoch": 1.4331210191082802, "percentage": 20.6, "elapsed_time": "0:07:06", "remaining_time": "0:27:22"}
76
- {"current_steps": 76, "total_steps": 364, "loss": 0.34, "lr": 1.9306228189778255e-05, "epoch": 1.4522292993630574, "percentage": 20.88, "elapsed_time": "0:07:11", "remaining_time": "0:27:14"}
77
- {"current_steps": 77, "total_steps": 364, "loss": 0.3282, "lr": 1.927063841548398e-05, "epoch": 1.4713375796178343, "percentage": 21.15, "elapsed_time": "0:07:15", "remaining_time": "0:27:02"}
78
- {"current_steps": 78, "total_steps": 364, "loss": 0.3189, "lr": 1.9234192962842996e-05, "epoch": 1.4904458598726116, "percentage": 21.43, "elapsed_time": "0:07:19", "remaining_time": "0:26:52"}
79
- {"current_steps": 79, "total_steps": 364, "loss": 0.3211, "lr": 1.9196895195764363e-05, "epoch": 1.5095541401273884, "percentage": 21.7, "elapsed_time": "0:07:24", "remaining_time": "0:26:44"}
80
- {"current_steps": 80, "total_steps": 364, "loss": 0.321, "lr": 1.9158748556825637e-05, "epoch": 1.5286624203821657, "percentage": 21.98, "elapsed_time": "0:07:28", "remaining_time": "0:26:33"}
81
- {"current_steps": 81, "total_steps": 364, "loss": 0.3333, "lr": 1.9119756566955092e-05, "epoch": 1.5477707006369426, "percentage": 22.25, "elapsed_time": "0:07:34", "remaining_time": "0:26:27"}
82
- {"current_steps": 82, "total_steps": 364, "loss": 0.3307, "lr": 1.907992282510675e-05, "epoch": 1.5668789808917198, "percentage": 22.53, "elapsed_time": "0:07:38", "remaining_time": "0:26:17"}
83
- {"current_steps": 83, "total_steps": 364, "loss": 0.3282, "lr": 1.90392510079282e-05, "epoch": 1.5859872611464967, "percentage": 22.8, "elapsed_time": "0:07:42", "remaining_time": "0:26:06"}
84
- {"current_steps": 84, "total_steps": 364, "loss": 0.3137, "lr": 1.8997744869421248e-05, "epoch": 1.605095541401274, "percentage": 23.08, "elapsed_time": "0:07:46", "remaining_time": "0:25:55"}
85
- {"current_steps": 85, "total_steps": 364, "loss": 0.3002, "lr": 1.8955408240595396e-05, "epoch": 1.6242038216560508, "percentage": 23.35, "elapsed_time": "0:07:50", "remaining_time": "0:25:44"}
86
- {"current_steps": 86, "total_steps": 364, "loss": 0.3007, "lr": 1.891224502911428e-05, "epoch": 1.643312101910828, "percentage": 23.63, "elapsed_time": "0:07:54", "remaining_time": "0:25:32"}
87
- {"current_steps": 87, "total_steps": 364, "loss": 0.3279, "lr": 1.886825921893497e-05, "epoch": 1.662420382165605, "percentage": 23.9, "elapsed_time": "0:08:00", "remaining_time": "0:25:28"}
88
- {"current_steps": 88, "total_steps": 364, "loss": 0.3185, "lr": 1.8823454869940243e-05, "epoch": 1.6815286624203822, "percentage": 24.18, "elapsed_time": "0:08:04", "remaining_time": "0:25:20"}
89
- {"current_steps": 89, "total_steps": 364, "loss": 0.3314, "lr": 1.8777836117563894e-05, "epoch": 1.700636942675159, "percentage": 24.45, "elapsed_time": "0:08:09", "remaining_time": "0:25:13"}
90
- {"current_steps": 90, "total_steps": 364, "loss": 0.3278, "lr": 1.873140717240899e-05, "epoch": 1.7197452229299364, "percentage": 24.73, "elapsed_time": "0:08:15", "remaining_time": "0:25:08"}
91
- {"current_steps": 91, "total_steps": 364, "loss": 0.318, "lr": 1.8684172319859258e-05, "epoch": 1.7388535031847132, "percentage": 25.0, "elapsed_time": "0:08:19", "remaining_time": "0:24:59"}
 
1
+ {"current_steps": 1, "total_steps": 364, "loss": 1.1132, "lr": 5.405405405405406e-07, "epoch": 0.01910828025477707, "percentage": 0.27, "elapsed_time": "0:00:09", "remaining_time": "0:54:36"}
2
+ {"current_steps": 2, "total_steps": 364, "loss": 1.1203, "lr": 1.0810810810810812e-06, "epoch": 0.03821656050955414, "percentage": 0.55, "elapsed_time": "0:00:14", "remaining_time": "0:43:38"}
3
+ {"current_steps": 3, "total_steps": 364, "loss": 1.072, "lr": 1.6216216216216219e-06, "epoch": 0.05732484076433121, "percentage": 0.82, "elapsed_time": "0:00:19", "remaining_time": "0:38:08"}
4
+ {"current_steps": 4, "total_steps": 364, "loss": 1.0972, "lr": 2.1621621621621623e-06, "epoch": 0.07643312101910828, "percentage": 1.1, "elapsed_time": "0:00:22", "remaining_time": "0:34:13"}
5
+ {"current_steps": 5, "total_steps": 364, "loss": 1.0656, "lr": 2.702702702702703e-06, "epoch": 0.09554140127388536, "percentage": 1.37, "elapsed_time": "0:00:26", "remaining_time": "0:31:58"}
6
+ {"current_steps": 6, "total_steps": 364, "loss": 0.9882, "lr": 3.2432432432432437e-06, "epoch": 0.11464968152866242, "percentage": 1.65, "elapsed_time": "0:00:31", "remaining_time": "0:30:55"}
7
+ {"current_steps": 7, "total_steps": 364, "loss": 0.8802, "lr": 3.7837837837837844e-06, "epoch": 0.1337579617834395, "percentage": 1.92, "elapsed_time": "0:00:35", "remaining_time": "0:30:34"}
8
+ {"current_steps": 8, "total_steps": 364, "loss": 0.837, "lr": 4.324324324324325e-06, "epoch": 0.15286624203821655, "percentage": 2.2, "elapsed_time": "0:00:40", "remaining_time": "0:29:57"}
9
+ {"current_steps": 9, "total_steps": 364, "loss": 0.8174, "lr": 4.864864864864866e-06, "epoch": 0.17197452229299362, "percentage": 2.47, "elapsed_time": "0:00:44", "remaining_time": "0:29:07"}
10
+ {"current_steps": 10, "total_steps": 364, "loss": 0.7624, "lr": 5.405405405405406e-06, "epoch": 0.1910828025477707, "percentage": 2.75, "elapsed_time": "0:00:48", "remaining_time": "0:28:51"}
11
+ {"current_steps": 11, "total_steps": 364, "loss": 0.7111, "lr": 5.945945945945947e-06, "epoch": 0.21019108280254778, "percentage": 3.02, "elapsed_time": "0:00:53", "remaining_time": "0:28:20"}
12
+ {"current_steps": 12, "total_steps": 364, "loss": 0.6858, "lr": 6.486486486486487e-06, "epoch": 0.22929936305732485, "percentage": 3.3, "elapsed_time": "0:00:57", "remaining_time": "0:28:01"}
13
+ {"current_steps": 13, "total_steps": 364, "loss": 0.61, "lr": 7.027027027027028e-06, "epoch": 0.2484076433121019, "percentage": 3.57, "elapsed_time": "0:01:03", "remaining_time": "0:28:36"}
14
+ {"current_steps": 14, "total_steps": 364, "loss": 0.5898, "lr": 7.567567567567569e-06, "epoch": 0.267515923566879, "percentage": 3.85, "elapsed_time": "0:01:07", "remaining_time": "0:28:08"}
15
+ {"current_steps": 15, "total_steps": 364, "loss": 0.5504, "lr": 8.108108108108109e-06, "epoch": 0.28662420382165604, "percentage": 4.12, "elapsed_time": "0:01:11", "remaining_time": "0:27:36"}
16
+ {"current_steps": 16, "total_steps": 364, "loss": 0.541, "lr": 8.64864864864865e-06, "epoch": 0.3057324840764331, "percentage": 4.4, "elapsed_time": "0:01:15", "remaining_time": "0:27:26"}
17
+ {"current_steps": 17, "total_steps": 364, "loss": 0.5369, "lr": 9.189189189189191e-06, "epoch": 0.3248407643312102, "percentage": 4.67, "elapsed_time": "0:01:21", "remaining_time": "0:27:40"}
18
+ {"current_steps": 18, "total_steps": 364, "loss": 0.5146, "lr": 9.729729729729732e-06, "epoch": 0.34394904458598724, "percentage": 4.95, "elapsed_time": "0:01:25", "remaining_time": "0:27:24"}
19
+ {"current_steps": 19, "total_steps": 364, "loss": 0.5047, "lr": 1.027027027027027e-05, "epoch": 0.3630573248407643, "percentage": 5.22, "elapsed_time": "0:01:29", "remaining_time": "0:27:04"}
20
+ {"current_steps": 20, "total_steps": 364, "loss": 0.5, "lr": 1.0810810810810812e-05, "epoch": 0.3821656050955414, "percentage": 5.49, "elapsed_time": "0:01:33", "remaining_time": "0:26:45"}
21
+ {"current_steps": 21, "total_steps": 364, "loss": 0.5057, "lr": 1.1351351351351352e-05, "epoch": 0.4012738853503185, "percentage": 5.77, "elapsed_time": "0:01:39", "remaining_time": "0:27:03"}
22
+ {"current_steps": 22, "total_steps": 364, "loss": 0.483, "lr": 1.1891891891891894e-05, "epoch": 0.42038216560509556, "percentage": 6.04, "elapsed_time": "0:01:44", "remaining_time": "0:27:07"}
23
+ {"current_steps": 23, "total_steps": 364, "loss": 0.4743, "lr": 1.2432432432432433e-05, "epoch": 0.4394904458598726, "percentage": 6.32, "elapsed_time": "0:01:49", "remaining_time": "0:26:58"}
24
+ {"current_steps": 24, "total_steps": 364, "loss": 0.4617, "lr": 1.2972972972972975e-05, "epoch": 0.4585987261146497, "percentage": 6.59, "elapsed_time": "0:01:53", "remaining_time": "0:26:43"}
25
+ {"current_steps": 25, "total_steps": 364, "loss": 0.4761, "lr": 1.3513513513513515e-05, "epoch": 0.47770700636942676, "percentage": 6.87, "elapsed_time": "0:01:57", "remaining_time": "0:26:34"}
26
+ {"current_steps": 26, "total_steps": 364, "loss": 0.4266, "lr": 1.4054054054054055e-05, "epoch": 0.4968152866242038, "percentage": 7.14, "elapsed_time": "0:02:01", "remaining_time": "0:26:24"}
27
+ {"current_steps": 27, "total_steps": 364, "loss": 0.4683, "lr": 1.4594594594594596e-05, "epoch": 0.5159235668789809, "percentage": 7.42, "elapsed_time": "0:02:06", "remaining_time": "0:26:15"}
28
+ {"current_steps": 28, "total_steps": 364, "loss": 0.4009, "lr": 1.5135135135135138e-05, "epoch": 0.535031847133758, "percentage": 7.69, "elapsed_time": "0:02:10", "remaining_time": "0:26:09"}
29
+ {"current_steps": 29, "total_steps": 364, "loss": 0.4609, "lr": 1.5675675675675676e-05, "epoch": 0.554140127388535, "percentage": 7.97, "elapsed_time": "0:02:15", "remaining_time": "0:26:05"}
30
+ {"current_steps": 30, "total_steps": 364, "loss": 0.4173, "lr": 1.6216216216216218e-05, "epoch": 0.5732484076433121, "percentage": 8.24, "elapsed_time": "0:02:20", "remaining_time": "0:25:58"}
31
+ {"current_steps": 31, "total_steps": 364, "loss": 0.4311, "lr": 1.6756756756756757e-05, "epoch": 0.5923566878980892, "percentage": 8.52, "elapsed_time": "0:02:24", "remaining_time": "0:25:57"}
32
+ {"current_steps": 32, "total_steps": 364, "loss": 0.4295, "lr": 1.72972972972973e-05, "epoch": 0.6114649681528662, "percentage": 8.79, "elapsed_time": "0:02:28", "remaining_time": "0:25:45"}
33
+ {"current_steps": 33, "total_steps": 364, "loss": 0.4211, "lr": 1.783783783783784e-05, "epoch": 0.6305732484076433, "percentage": 9.07, "elapsed_time": "0:02:33", "remaining_time": "0:25:37"}
34
+ {"current_steps": 34, "total_steps": 364, "loss": 0.4391, "lr": 1.8378378378378383e-05, "epoch": 0.6496815286624203, "percentage": 9.34, "elapsed_time": "0:02:37", "remaining_time": "0:25:27"}
35
+ {"current_steps": 35, "total_steps": 364, "loss": 0.4556, "lr": 1.891891891891892e-05, "epoch": 0.6687898089171974, "percentage": 9.62, "elapsed_time": "0:02:41", "remaining_time": "0:25:21"}
36
+ {"current_steps": 36, "total_steps": 364, "loss": 0.4296, "lr": 1.9459459459459463e-05, "epoch": 0.6878980891719745, "percentage": 9.89, "elapsed_time": "0:02:45", "remaining_time": "0:25:08"}
37
+ {"current_steps": 37, "total_steps": 364, "loss": 0.4189, "lr": 2e-05, "epoch": 0.7070063694267515, "percentage": 10.16, "elapsed_time": "0:02:50", "remaining_time": "0:25:10"}
38
+ {"current_steps": 38, "total_steps": 364, "loss": 0.4141, "lr": 1.9999538500851633e-05, "epoch": 0.7261146496815286, "percentage": 10.44, "elapsed_time": "0:02:55", "remaining_time": "0:25:05"}
39
+ {"current_steps": 39, "total_steps": 364, "loss": 0.4248, "lr": 1.9998154046002822e-05, "epoch": 0.7452229299363057, "percentage": 10.71, "elapsed_time": "0:03:03", "remaining_time": "0:25:31"}
40
+ {"current_steps": 40, "total_steps": 364, "loss": 0.4054, "lr": 1.9995846763238514e-05, "epoch": 0.7643312101910829, "percentage": 10.99, "elapsed_time": "0:03:08", "remaining_time": "0:25:24"}
41
+ {"current_steps": 41, "total_steps": 364, "loss": 0.3879, "lr": 1.9992616865520515e-05, "epoch": 0.7834394904458599, "percentage": 11.26, "elapsed_time": "0:03:12", "remaining_time": "0:25:18"}
42
+ {"current_steps": 42, "total_steps": 364, "loss": 0.4109, "lr": 1.9988464650967834e-05, "epoch": 0.802547770700637, "percentage": 11.54, "elapsed_time": "0:03:16", "remaining_time": "0:25:09"}
43
+ {"current_steps": 43, "total_steps": 364, "loss": 0.3974, "lr": 1.9983390502829168e-05, "epoch": 0.821656050955414, "percentage": 11.81, "elapsed_time": "0:03:20", "remaining_time": "0:24:56"}
44
+ {"current_steps": 44, "total_steps": 364, "loss": 0.3866, "lr": 1.9977394889447526e-05, "epoch": 0.8407643312101911, "percentage": 12.09, "elapsed_time": "0:03:24", "remaining_time": "0:24:49"}
45
+ {"current_steps": 45, "total_steps": 364, "loss": 0.3965, "lr": 1.9970478364217e-05, "epoch": 0.8598726114649682, "percentage": 12.36, "elapsed_time": "0:03:29", "remaining_time": "0:24:45"}
46
+ {"current_steps": 46, "total_steps": 364, "loss": 0.3955, "lr": 1.9962641565531694e-05, "epoch": 0.8789808917197452, "percentage": 12.64, "elapsed_time": "0:03:33", "remaining_time": "0:24:37"}
47
+ {"current_steps": 47, "total_steps": 364, "loss": 0.4011, "lr": 1.9953885216726788e-05, "epoch": 0.8980891719745223, "percentage": 12.91, "elapsed_time": "0:03:39", "remaining_time": "0:24:41"}
48
+ {"current_steps": 48, "total_steps": 364, "loss": 0.3994, "lr": 1.994421012601179e-05, "epoch": 0.9171974522292994, "percentage": 13.19, "elapsed_time": "0:03:43", "remaining_time": "0:24:32"}
49
+ {"current_steps": 49, "total_steps": 364, "loss": 0.3816, "lr": 1.9933617186395917e-05, "epoch": 0.9363057324840764, "percentage": 13.46, "elapsed_time": "0:03:48", "remaining_time": "0:24:26"}
50
+ {"current_steps": 50, "total_steps": 364, "loss": 0.4186, "lr": 1.99221073756057e-05, "epoch": 0.9554140127388535, "percentage": 13.74, "elapsed_time": "0:03:52", "remaining_time": "0:24:23"}
51
+ {"current_steps": 51, "total_steps": 364, "loss": 0.4176, "lr": 1.990968175599471e-05, "epoch": 0.9745222929936306, "percentage": 14.01, "elapsed_time": "0:03:58", "remaining_time": "0:24:23"}
52
+ {"current_steps": 52, "total_steps": 364, "loss": 0.3976, "lr": 1.9896341474445526e-05, "epoch": 0.9936305732484076, "percentage": 14.29, "elapsed_time": "0:04:02", "remaining_time": "0:24:17"}
53
+ {"current_steps": 53, "total_steps": 364, "loss": 0.3684, "lr": 1.9882087762263857e-05, "epoch": 1.0127388535031847, "percentage": 14.56, "elapsed_time": "0:05:57", "remaining_time": "0:34:55"}
54
+ {"current_steps": 54, "total_steps": 364, "loss": 0.332, "lr": 1.9866921935064907e-05, "epoch": 1.0318471337579618, "percentage": 14.84, "elapsed_time": "0:06:01", "remaining_time": "0:34:34"}
55
+ {"current_steps": 55, "total_steps": 364, "loss": 0.3523, "lr": 1.985084539265195e-05, "epoch": 1.0509554140127388, "percentage": 15.11, "elapsed_time": "0:06:06", "remaining_time": "0:34:19"}
56
+ {"current_steps": 56, "total_steps": 364, "loss": 0.3353, "lr": 1.983385961888711e-05, "epoch": 1.070063694267516, "percentage": 15.38, "elapsed_time": "0:06:11", "remaining_time": "0:34:02"}
57
+ {"current_steps": 57, "total_steps": 364, "loss": 0.3593, "lr": 1.9815966181554412e-05, "epoch": 1.089171974522293, "percentage": 15.66, "elapsed_time": "0:06:16", "remaining_time": "0:33:45"}
58
+ {"current_steps": 58, "total_steps": 364, "loss": 0.3042, "lr": 1.9797166732215078e-05, "epoch": 1.10828025477707, "percentage": 15.93, "elapsed_time": "0:06:20", "remaining_time": "0:33:25"}
59
+ {"current_steps": 59, "total_steps": 364, "loss": 0.3179, "lr": 1.977746300605507e-05, "epoch": 1.127388535031847, "percentage": 16.21, "elapsed_time": "0:06:24", "remaining_time": "0:33:07"}
60
+ {"current_steps": 60, "total_steps": 364, "loss": 0.3195, "lr": 1.975685682172497e-05, "epoch": 1.1464968152866242, "percentage": 16.48, "elapsed_time": "0:06:28", "remaining_time": "0:32:50"}
61
+ {"current_steps": 61, "total_steps": 364, "loss": 0.3361, "lr": 1.973535008117207e-05, "epoch": 1.1656050955414012, "percentage": 16.76, "elapsed_time": "0:06:34", "remaining_time": "0:32:39"}
62
+ {"current_steps": 62, "total_steps": 364, "loss": 0.3151, "lr": 1.9712944769464864e-05, "epoch": 1.1847133757961783, "percentage": 17.03, "elapsed_time": "0:06:38", "remaining_time": "0:32:18"}
63
+ {"current_steps": 63, "total_steps": 364, "loss": 0.3288, "lr": 1.9689642954609808e-05, "epoch": 1.2038216560509554, "percentage": 17.31, "elapsed_time": "0:06:42", "remaining_time": "0:32:03"}
64
+ {"current_steps": 64, "total_steps": 364, "loss": 0.3403, "lr": 1.9665446787360444e-05, "epoch": 1.2229299363057324, "percentage": 17.58, "elapsed_time": "0:06:46", "remaining_time": "0:31:46"}
65
+ {"current_steps": 65, "total_steps": 364, "loss": 0.3093, "lr": 1.9640358501018885e-05, "epoch": 1.2420382165605095, "percentage": 17.86, "elapsed_time": "0:06:51", "remaining_time": "0:31:32"}
66
+ {"current_steps": 66, "total_steps": 364, "loss": 0.3077, "lr": 1.9614380411229693e-05, "epoch": 1.2611464968152866, "percentage": 18.13, "elapsed_time": "0:06:55", "remaining_time": "0:31:13"}
67
+ {"current_steps": 67, "total_steps": 364, "loss": 0.3222, "lr": 1.9587514915766124e-05, "epoch": 1.2802547770700636, "percentage": 18.41, "elapsed_time": "0:06:59", "remaining_time": "0:30:59"}
68
+ {"current_steps": 68, "total_steps": 364, "loss": 0.304, "lr": 1.9559764494308838e-05, "epoch": 1.2993630573248407, "percentage": 18.68, "elapsed_time": "0:07:03", "remaining_time": "0:30:42"}
69
+ {"current_steps": 69, "total_steps": 364, "loss": 0.3404, "lr": 1.9531131708217005e-05, "epoch": 1.3184713375796178, "percentage": 18.96, "elapsed_time": "0:07:09", "remaining_time": "0:30:34"}
70
+ {"current_steps": 70, "total_steps": 364, "loss": 0.3178, "lr": 1.950161920029191e-05, "epoch": 1.3375796178343948, "percentage": 19.23, "elapsed_time": "0:07:13", "remaining_time": "0:30:18"}
71
+ {"current_steps": 71, "total_steps": 364, "loss": 0.3179, "lr": 1.9471229694533003e-05, "epoch": 1.356687898089172, "percentage": 19.51, "elapsed_time": "0:07:17", "remaining_time": "0:30:04"}
72
+ {"current_steps": 72, "total_steps": 364, "loss": 0.3393, "lr": 1.943996599588649e-05, "epoch": 1.3757961783439492, "percentage": 19.78, "elapsed_time": "0:07:22", "remaining_time": "0:29:56"}
73
+ {"current_steps": 73, "total_steps": 364, "loss": 0.3292, "lr": 1.940783098998643e-05, "epoch": 1.394904458598726, "percentage": 20.05, "elapsed_time": "0:07:27", "remaining_time": "0:29:42"}
74
+ {"current_steps": 74, "total_steps": 364, "loss": 0.304, "lr": 1.93748276428884e-05, "epoch": 1.4140127388535033, "percentage": 20.33, "elapsed_time": "0:07:30", "remaining_time": "0:29:26"}
75
+ {"current_steps": 75, "total_steps": 364, "loss": 0.2991, "lr": 1.9340959000795707e-05, "epoch": 1.4331210191082802, "percentage": 20.6, "elapsed_time": "0:07:35", "remaining_time": "0:29:13"}
76
+ {"current_steps": 76, "total_steps": 364, "loss": 0.3389, "lr": 1.9306228189778255e-05, "epoch": 1.4522292993630574, "percentage": 20.88, "elapsed_time": "0:07:40", "remaining_time": "0:29:03"}
77
+ {"current_steps": 77, "total_steps": 364, "loss": 0.3287, "lr": 1.927063841548398e-05, "epoch": 1.4713375796178343, "percentage": 21.15, "elapsed_time": "0:07:44", "remaining_time": "0:28:50"}
78
+ {"current_steps": 78, "total_steps": 364, "loss": 0.3171, "lr": 1.9234192962842996e-05, "epoch": 1.4904458598726116, "percentage": 21.43, "elapsed_time": "0:07:48", "remaining_time": "0:28:38"}
79
+ {"current_steps": 79, "total_steps": 364, "loss": 0.3211, "lr": 1.9196895195764363e-05, "epoch": 1.5095541401273884, "percentage": 21.7, "elapsed_time": "0:07:53", "remaining_time": "0:28:28"}
80
+ {"current_steps": 80, "total_steps": 364, "loss": 0.3178, "lr": 1.9158748556825637e-05, "epoch": 1.5286624203821657, "percentage": 21.98, "elapsed_time": "0:07:57", "remaining_time": "0:28:16"}
81
+ {"current_steps": 81, "total_steps": 364, "loss": 0.3292, "lr": 1.9119756566955092e-05, "epoch": 1.5477707006369426, "percentage": 22.25, "elapsed_time": "0:08:03", "remaining_time": "0:28:08"}
82
+ {"current_steps": 82, "total_steps": 364, "loss": 0.3301, "lr": 1.907992282510675e-05, "epoch": 1.5668789808917198, "percentage": 22.53, "elapsed_time": "0:08:07", "remaining_time": "0:27:57"}
83
+ {"current_steps": 83, "total_steps": 364, "loss": 0.3269, "lr": 1.90392510079282e-05, "epoch": 1.5859872611464967, "percentage": 22.8, "elapsed_time": "0:08:11", "remaining_time": "0:27:44"}
84
+ {"current_steps": 84, "total_steps": 364, "loss": 0.3106, "lr": 1.8997744869421248e-05, "epoch": 1.605095541401274, "percentage": 23.08, "elapsed_time": "0:08:15", "remaining_time": "0:27:32"}
85
+ {"current_steps": 85, "total_steps": 364, "loss": 0.2997, "lr": 1.8955408240595396e-05, "epoch": 1.6242038216560508, "percentage": 23.35, "elapsed_time": "0:08:19", "remaining_time": "0:27:18"}
86
+ {"current_steps": 86, "total_steps": 364, "loss": 0.3, "lr": 1.891224502911428e-05, "epoch": 1.643312101910828, "percentage": 23.63, "elapsed_time": "0:08:22", "remaining_time": "0:27:05"}
87
+ {"current_steps": 87, "total_steps": 364, "loss": 0.3277, "lr": 1.886825921893497e-05, "epoch": 1.662420382165605, "percentage": 23.9, "elapsed_time": "0:08:28", "remaining_time": "0:27:00"}
88
+ {"current_steps": 88, "total_steps": 364, "loss": 0.3143, "lr": 1.8823454869940243e-05, "epoch": 1.6815286624203822, "percentage": 24.18, "elapsed_time": "0:08:33", "remaining_time": "0:26:51"}
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcf0d2d999b493ae9f8440b69f6203fe98064a65a7717ffc86b999f80d99d964
3
- size 7160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdc6fd08039548fd46002289d4c319bd04c39b84522db1b381c10bbed7383d90
3
+ size 7288