Training in progress, step 2142, checkpoint
Browse files
    	
        checkpoint-2142/adapter_config.json
    CHANGED
    
    | 
         @@ -26,13 +26,13 @@ 
     | 
|
| 26 | 
         
             
              "rank_pattern": {},
         
     | 
| 27 | 
         
             
              "revision": null,
         
     | 
| 28 | 
         
             
              "target_modules": [
         
     | 
| 29 | 
         
            -
                " 
     | 
| 30 | 
         
            -
                " 
     | 
| 31 | 
         
            -
                "v_proj",
         
     | 
| 32 | 
         
             
                "q_proj",
         
     | 
| 
         | 
|
| 
         | 
|
| 33 | 
         
             
                "down_proj",
         
     | 
| 34 | 
         
            -
                " 
     | 
| 35 | 
         
            -
                "gate_proj"
         
     | 
| 36 | 
         
             
              ],
         
     | 
| 37 | 
         
             
              "task_type": null,
         
     | 
| 38 | 
         
             
              "use_dora": false,
         
     | 
| 
         | 
|
| 26 | 
         
             
              "rank_pattern": {},
         
     | 
| 27 | 
         
             
              "revision": null,
         
     | 
| 28 | 
         
             
              "target_modules": [
         
     | 
| 29 | 
         
            +
                "k_proj",
         
     | 
| 30 | 
         
            +
                "gate_proj",
         
     | 
| 
         | 
|
| 31 | 
         
             
                "q_proj",
         
     | 
| 32 | 
         
            +
                "v_proj",
         
     | 
| 33 | 
         
            +
                "o_proj",
         
     | 
| 34 | 
         
             
                "down_proj",
         
     | 
| 35 | 
         
            +
                "up_proj"
         
     | 
| 
         | 
|
| 36 | 
         
             
              ],
         
     | 
| 37 | 
         
             
              "task_type": null,
         
     | 
| 38 | 
         
             
              "use_dora": false,
         
     | 
    	
        checkpoint-2142/adapter_model.safetensors
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 400616360
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:c1c8404dc5f869f364ec72c5769ca22d6cea4956198f9cd005146a7cba699327
         
     | 
| 3 | 
         
             
            size 400616360
         
     | 
    	
        checkpoint-2142/optimizer.pt
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 205100562
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:a8fa25d0df9a77a82d1be0116ca7579231d446e0ede044d6749b460a2b448e28
         
     | 
| 3 | 
         
             
            size 205100562
         
     | 
    	
        checkpoint-2142/rng_state.pth
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 14308
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:58593014ffe5074887df6ba5c4ff4972f82be99bdef3518c2a618376dc4cf03f
         
     | 
| 3 | 
         
             
            size 14308
         
     | 
    	
        checkpoint-2142/trainer_state.json
    CHANGED
    
    | 
         @@ -70,62 +70,62 @@ 
     | 
|
| 70 | 
         
             
                },
         
     | 
| 71 | 
         
             
                {
         
     | 
| 72 | 
         
             
                  "epoch": 1.167114015637764,
         
     | 
| 73 | 
         
            -
                  "grad_norm": 2. 
     | 
| 74 | 
         
             
                  "learning_rate": 3.915853581228413e-05,
         
     | 
| 75 | 
         
            -
                  "loss": 0. 
     | 
| 76 | 
         
             
                  "step": 1250
         
     | 
| 77 | 
         
             
                },
         
     | 
| 78 | 
         
             
                {
         
     | 
| 79 | 
         
             
                  "epoch": 1.167114015637764,
         
     | 
| 80 | 
         
            -
                  "eval_loss": 0. 
     | 
| 81 | 
         
            -
                  "eval_runtime":  
     | 
| 82 | 
         
            -
                  "eval_samples_per_second": 3. 
     | 
| 83 | 
         
            -
                  "eval_steps_per_second": 3. 
     | 
| 84 | 
         
             
                  "step": 1250
         
     | 
| 85 | 
         
             
                },
         
     | 
| 86 | 
         
             
                {
         
     | 
| 87 | 
         
             
                  "epoch": 1.4005134788189988,
         
     | 
| 88 | 
         
            -
                  "grad_norm": 1. 
     | 
| 89 | 
         
             
                  "learning_rate": 2.1903963223439395e-05,
         
     | 
| 90 | 
         
            -
                  "loss": 0. 
     | 
| 91 | 
         
             
                  "step": 1500
         
     | 
| 92 | 
         
             
                },
         
     | 
| 93 | 
         
             
                {
         
     | 
| 94 | 
         
             
                  "epoch": 1.4005134788189988,
         
     | 
| 95 | 
         
            -
                  "eval_loss": 0. 
     | 
| 96 | 
         
            -
                  "eval_runtime": 22. 
     | 
| 97 | 
         
            -
                  "eval_samples_per_second": 3. 
     | 
| 98 | 
         
            -
                  "eval_steps_per_second": 3. 
     | 
| 99 | 
         
             
                  "step": 1500
         
     | 
| 100 | 
         
             
                },
         
     | 
| 101 | 
         
             
                {
         
     | 
| 102 | 
         
             
                  "epoch": 1.6339129420002334,
         
     | 
| 103 | 
         
            -
                  "grad_norm":  
     | 
| 104 | 
         
             
                  "learning_rate": 8.619209196560924e-06,
         
     | 
| 105 | 
         
            -
                  "loss": 0. 
     | 
| 106 | 
         
             
                  "step": 1750
         
     | 
| 107 | 
         
             
                },
         
     | 
| 108 | 
         
             
                {
         
     | 
| 109 | 
         
             
                  "epoch": 1.6339129420002334,
         
     | 
| 110 | 
         
            -
                  "eval_loss": 0. 
     | 
| 111 | 
         
            -
                  "eval_runtime": 22. 
     | 
| 112 | 
         
            -
                  "eval_samples_per_second": 3. 
     | 
| 113 | 
         
            -
                  "eval_steps_per_second": 3. 
     | 
| 114 | 
         
             
                  "step": 1750
         
     | 
| 115 | 
         
             
                },
         
     | 
| 116 | 
         
             
                {
         
     | 
| 117 | 
         
             
                  "epoch": 1.867312405181468,
         
     | 
| 118 | 
         
            -
                  "grad_norm":  
     | 
| 119 | 
         
             
                  "learning_rate": 1.181337872277094e-06,
         
     | 
| 120 | 
         
            -
                  "loss": 0. 
     | 
| 121 | 
         
             
                  "step": 2000
         
     | 
| 122 | 
         
             
                },
         
     | 
| 123 | 
         
             
                {
         
     | 
| 124 | 
         
             
                  "epoch": 1.867312405181468,
         
     | 
| 125 | 
         
            -
                  "eval_loss": 0. 
     | 
| 126 | 
         
            -
                  "eval_runtime": 22. 
     | 
| 127 | 
         
            -
                  "eval_samples_per_second": 3. 
     | 
| 128 | 
         
            -
                  "eval_steps_per_second": 3. 
     | 
| 129 | 
         
             
                  "step": 2000
         
     | 
| 130 | 
         
             
                }
         
     | 
| 131 | 
         
             
              ],
         
     | 
| 
         @@ -146,7 +146,7 @@ 
     | 
|
| 146 | 
         
             
                  "attributes": {}
         
     | 
| 147 | 
         
             
                }
         
     | 
| 148 | 
         
             
              },
         
     | 
| 149 | 
         
            -
              "total_flos": 1. 
     | 
| 150 | 
         
             
              "train_batch_size": 1,
         
     | 
| 151 | 
         
             
              "trial_name": null,
         
     | 
| 152 | 
         
             
              "trial_params": null
         
     | 
| 
         | 
|
| 70 | 
         
             
                },
         
     | 
| 71 | 
         
             
                {
         
     | 
| 72 | 
         
             
                  "epoch": 1.167114015637764,
         
     | 
| 73 | 
         
            +
                  "grad_norm": 2.2942659854888916,
         
     | 
| 74 | 
         
             
                  "learning_rate": 3.915853581228413e-05,
         
     | 
| 75 | 
         
            +
                  "loss": 0.7993,
         
     | 
| 76 | 
         
             
                  "step": 1250
         
     | 
| 77 | 
         
             
                },
         
     | 
| 78 | 
         
             
                {
         
     | 
| 79 | 
         
             
                  "epoch": 1.167114015637764,
         
     | 
| 80 | 
         
            +
                  "eval_loss": 0.1208883598446846,
         
     | 
| 81 | 
         
            +
                  "eval_runtime": 21.9734,
         
     | 
| 82 | 
         
            +
                  "eval_samples_per_second": 3.959,
         
     | 
| 83 | 
         
            +
                  "eval_steps_per_second": 3.959,
         
     | 
| 84 | 
         
             
                  "step": 1250
         
     | 
| 85 | 
         
             
                },
         
     | 
| 86 | 
         
             
                {
         
     | 
| 87 | 
         
             
                  "epoch": 1.4005134788189988,
         
     | 
| 88 | 
         
            +
                  "grad_norm": 1.147830843925476,
         
     | 
| 89 | 
         
             
                  "learning_rate": 2.1903963223439395e-05,
         
     | 
| 90 | 
         
            +
                  "loss": 0.7032,
         
     | 
| 91 | 
         
             
                  "step": 1500
         
     | 
| 92 | 
         
             
                },
         
     | 
| 93 | 
         
             
                {
         
     | 
| 94 | 
         
             
                  "epoch": 1.4005134788189988,
         
     | 
| 95 | 
         
            +
                  "eval_loss": 0.10965924710035324,
         
     | 
| 96 | 
         
            +
                  "eval_runtime": 22.0058,
         
     | 
| 97 | 
         
            +
                  "eval_samples_per_second": 3.954,
         
     | 
| 98 | 
         
            +
                  "eval_steps_per_second": 3.954,
         
     | 
| 99 | 
         
             
                  "step": 1500
         
     | 
| 100 | 
         
             
                },
         
     | 
| 101 | 
         
             
                {
         
     | 
| 102 | 
         
             
                  "epoch": 1.6339129420002334,
         
     | 
| 103 | 
         
            +
                  "grad_norm": 2.2439823150634766,
         
     | 
| 104 | 
         
             
                  "learning_rate": 8.619209196560924e-06,
         
     | 
| 105 | 
         
            +
                  "loss": 0.6596,
         
     | 
| 106 | 
         
             
                  "step": 1750
         
     | 
| 107 | 
         
             
                },
         
     | 
| 108 | 
         
             
                {
         
     | 
| 109 | 
         
             
                  "epoch": 1.6339129420002334,
         
     | 
| 110 | 
         
            +
                  "eval_loss": 0.10602504760026932,
         
     | 
| 111 | 
         
            +
                  "eval_runtime": 22.0084,
         
     | 
| 112 | 
         
            +
                  "eval_samples_per_second": 3.953,
         
     | 
| 113 | 
         
            +
                  "eval_steps_per_second": 3.953,
         
     | 
| 114 | 
         
             
                  "step": 1750
         
     | 
| 115 | 
         
             
                },
         
     | 
| 116 | 
         
             
                {
         
     | 
| 117 | 
         
             
                  "epoch": 1.867312405181468,
         
     | 
| 118 | 
         
            +
                  "grad_norm": 1.053748369216919,
         
     | 
| 119 | 
         
             
                  "learning_rate": 1.181337872277094e-06,
         
     | 
| 120 | 
         
            +
                  "loss": 0.6323,
         
     | 
| 121 | 
         
             
                  "step": 2000
         
     | 
| 122 | 
         
             
                },
         
     | 
| 123 | 
         
             
                {
         
     | 
| 124 | 
         
             
                  "epoch": 1.867312405181468,
         
     | 
| 125 | 
         
            +
                  "eval_loss": 0.10403568297624588,
         
     | 
| 126 | 
         
            +
                  "eval_runtime": 22.0253,
         
     | 
| 127 | 
         
            +
                  "eval_samples_per_second": 3.95,
         
     | 
| 128 | 
         
            +
                  "eval_steps_per_second": 3.95,
         
     | 
| 129 | 
         
             
                  "step": 2000
         
     | 
| 130 | 
         
             
                }
         
     | 
| 131 | 
         
             
              ],
         
     | 
| 
         | 
|
| 146 | 
         
             
                  "attributes": {}
         
     | 
| 147 | 
         
             
                }
         
     | 
| 148 | 
         
             
              },
         
     | 
| 149 | 
         
            +
              "total_flos": 1.7185280711396352e+17,
         
     | 
| 150 | 
         
             
              "train_batch_size": 1,
         
     | 
| 151 | 
         
             
              "trial_name": null,
         
     | 
| 152 | 
         
             
              "trial_params": null
         
     | 
    	
        checkpoint-2142/training_args.bin
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 5560
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:944c7c22023831a73ee4b0a66805723ecef65f25064cb419b24a8d84b3daee22
         
     | 
| 3 | 
         
             
            size 5560
         
     |