SystemAdmin123 commited on
Commit
6095cb3
·
verified ·
1 Parent(s): 85fbda3

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75814a6362e4e11dac5581c59e464280e239c90816a945cbdb7784b7f4a2870f
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6ff65fdaabdbf317ebeba7a949d5d0a4c4d92e83844319c0d7fe8a36188ae52
3
  size 4976698672
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12dadb820ceeb4e269d17b6aa6f86327c5aa969f69dfcd327a64bc9dac04190b
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a8866762d9e391b12fc72c391a88203f43a851b8cb313ef7dad4f518fa97c7
3
  size 4999802720
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df0e1ea16260f45887d2c677c0ae8e31c8c4d2255fab7d191fcc44bc34c71122
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bdc3634cc2773d067b17c976dcd11b7c7df5940a7571bbb00f5d7df4c638644
3
  size 4915916176
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22cfae872a9afec0b59d053ac9909bee382518e88fa4a402a4f4847d1eb58185
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f015982ff6bce7542e828f15d4eeef9584a889686885e52763409fdc683cfcdd
3
  size 1168138808
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d446451777d9865628bc2749fb0484e3d91216d8ea6282c691d65006008a8f8
3
  size 16311821444
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65e0c53a4e65a879c4f5b6bbea3d323377988982d8d0b1157cdadf6ac17d04c3
3
  size 16311821444
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:857e5bd047d58fafc1913aa9f90fe3c9025187bb963b8cad4a7d0e2e10d025a2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90b33c83dc0410b94f46921f5ce291ef34ed50ec198c7840b5fe049891543c68
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ee99799f89d8494b24d4cce06afa38d806c0f10acfc9b783e494c77f6ea5559
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a673aaf85c0fe6b6c29cb8f3e7dbd829eef637110e4ad9a775f3fcf001c92591
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.09311950336264874,
5
  "eval_steps": 20,
6
- "global_step": 360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -411,6 +411,50 @@
411
  "eval_samples_per_second": 9.247,
412
  "eval_steps_per_second": 4.623,
413
  "step": 360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  }
415
  ],
416
  "logging_steps": 10,
@@ -430,7 +474,7 @@
430
  "attributes": {}
431
  }
432
  },
433
- "total_flos": 6.639877034606592e+16,
434
  "train_batch_size": 2,
435
  "trial_name": null,
436
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.10346611484738748,
5
  "eval_steps": 20,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
411
  "eval_samples_per_second": 9.247,
412
  "eval_steps_per_second": 4.623,
413
  "step": 360
414
+ },
415
+ {
416
+ "epoch": 0.09570615623383343,
417
+ "grad_norm": 15.3125,
418
+ "learning_rate": 0.0001947944062577507,
419
+ "loss": 3.3218,
420
+ "step": 370
421
+ },
422
+ {
423
+ "epoch": 0.0982928091050181,
424
+ "grad_norm": 10.75,
425
+ "learning_rate": 0.00019436490477135878,
426
+ "loss": 2.914,
427
+ "step": 380
428
+ },
429
+ {
430
+ "epoch": 0.0982928091050181,
431
+ "eval_loss": 3.1102402210235596,
432
+ "eval_runtime": 113.9573,
433
+ "eval_samples_per_second": 7.547,
434
+ "eval_steps_per_second": 3.773,
435
+ "step": 380
436
+ },
437
+ {
438
+ "epoch": 0.10087946197620279,
439
+ "grad_norm": 18.625,
440
+ "learning_rate": 0.00019391889215899299,
441
+ "loss": 3.2308,
442
+ "step": 390
443
+ },
444
+ {
445
+ "epoch": 0.10346611484738748,
446
+ "grad_norm": 13.9375,
447
+ "learning_rate": 0.0001934564464599461,
448
+ "loss": 2.8181,
449
+ "step": 400
450
+ },
451
+ {
452
+ "epoch": 0.10346611484738748,
453
+ "eval_loss": 3.4344868659973145,
454
+ "eval_runtime": 100.2818,
455
+ "eval_samples_per_second": 8.576,
456
+ "eval_steps_per_second": 4.288,
457
+ "step": 400
458
  }
459
  ],
460
  "logging_steps": 10,
 
474
  "attributes": {}
475
  }
476
  },
477
+ "total_flos": 7.37764114956288e+16,
478
  "train_batch_size": 2,
479
  "trial_name": null,
480
  "trial_params": null