taicheng commited on
Commit
97200bc
·
verified ·
1 Parent(s): 08e31e6

Model save

Browse files
README.md CHANGED
@@ -3,16 +3,10 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: alignment-handbook/zephyr-7b-sft-full
5
  tags:
6
- - alignment-handbook
7
- - trl
8
- - dpo
9
- - generated_from_trainer
10
  - trl
11
  - dpo
12
  - alignment-handbook
13
  - generated_from_trainer
14
- datasets:
15
- - HuggingFaceH4/ultrafeedback_binarized
16
  model-index:
17
  - name: zephyr-7b-align-scan
18
  results: []
@@ -23,17 +17,17 @@ should probably proofread and complete it, then remove this comment. -->
23
 
24
  # zephyr-7b-align-scan
25
 
26
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the HuggingFaceH4/ultrafeedback_binarized dataset.
27
  It achieves the following results on the evaluation set:
28
- - Loss: 0.6700
29
- - Rewards/chosen: 0.0017
30
- - Rewards/rejected: -0.0506
31
- - Rewards/accuracies: 0.3155
32
- - Rewards/margins: 0.0522
33
- - Logps/rejected: -86.1855
34
- - Logps/chosen: -74.3248
35
- - Logits/rejected: -2.4181
36
- - Logits/chosen: -2.4359
37
 
38
  ## Model description
39
 
@@ -68,6 +62,9 @@ The following hyperparameters were used during training:
68
 
69
  ### Training results
70
 
 
 
 
71
 
72
 
73
  ### Framework versions
 
3
  license: apache-2.0
4
  base_model: alignment-handbook/zephyr-7b-sft-full
5
  tags:
 
 
 
 
6
  - trl
7
  - dpo
8
  - alignment-handbook
9
  - generated_from_trainer
 
 
10
  model-index:
11
  - name: zephyr-7b-align-scan
12
  results: []
 
17
 
18
  # zephyr-7b-align-scan
19
 
20
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Logits/chosen: -2.4391
23
+ - Logits/rejected: -2.4211
24
+ - Logps/chosen: -77.9509
25
+ - Logps/rejected: -90.5673
26
+ - Loss: 3.3938
27
+ - Rewards/accuracies: 0.3234
28
+ - Rewards/chosen: -3.1108
29
+ - Rewards/margins: 5.3764
30
+ - Rewards/rejected: -8.4871
31
 
32
  ## Model description
33
 
 
62
 
63
  ### Training results
64
 
65
+ | Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
66
+ |:-------------:|:------:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
67
+ | 1.6705 | 1.0417 | 100 | -2.4391 | -2.4211 | -77.9509 | -90.5673 | 3.3938 | 0.3234 | -3.1108 | 5.3764 | -8.4871 |
68
 
69
 
70
  ### Framework versions
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 1.0,
3
  "eval_logits/chosen": -2.4359169006347656,
4
  "eval_logits/rejected": -2.4180893898010254,
5
  "eval_logps/chosen": -74.32483673095703,
@@ -15,8 +15,8 @@
15
  "eval_steps_per_second": 0.552,
16
  "total_flos": 0.0,
17
  "train_loss": 0.0,
18
- "train_runtime": 0.0351,
19
  "train_samples": 6113,
20
- "train_samples_per_second": 173972.916,
21
- "train_steps_per_second": 2732.112
22
  }
 
1
  {
2
+ "epoch": 1.0416666666666667,
3
  "eval_logits/chosen": -2.4359169006347656,
4
  "eval_logits/rejected": -2.4180893898010254,
5
  "eval_logps/chosen": -74.32483673095703,
 
15
  "eval_steps_per_second": 0.552,
16
  "total_flos": 0.0,
17
  "train_loss": 0.0,
18
+ "train_runtime": 0.0379,
19
  "train_samples": 6113,
20
+ "train_samples_per_second": 161198.692,
21
+ "train_steps_per_second": 2531.502
22
  }
config.json CHANGED
@@ -22,6 +22,6 @@
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.44.2",
25
- "use_cache": true,
26
  "vocab_size": 32000
27
  }
 
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.44.2",
25
+ "use_cache": false,
26
  "vocab_size": 32000
27
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94582ef6c6b515be1bfd492df25f84691ac9f418d9020c68257a2c87fc341059
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88b5ac76b6cc07a642f83a417750e35b946bf41af4727ae2e4fc1d1a3fde0389
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:670bd034809ee8e2333e07eedb121a23c2f70abd3462105f685a15bc714b5cce
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1621a6f0e65a4e21ad912dc08ba5cf2a25455a7ea9ccf7eb9d243063ee710c7e
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f5888382b4331aaa5c5f69e91f05f7b73f63b929dda9d3b88142d71451b0e39
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7896c037cdbcbb78d2c6b86cc26d75e858ccacae61ff33f2fb02bdb60226574
3
  size 4540516344
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
  "total_flos": 0.0,
4
  "train_loss": 0.0,
5
- "train_runtime": 0.0351,
6
  "train_samples": 6113,
7
- "train_samples_per_second": 173972.916,
8
- "train_steps_per_second": 2732.112
9
  }
 
1
  {
2
+ "epoch": 1.0416666666666667,
3
  "total_flos": 0.0,
4
  "train_loss": 0.0,
5
+ "train_runtime": 0.0379,
6
  "train_samples": 6113,
7
+ "train_samples_per_second": 161198.692,
8
+ "train_steps_per_second": 2531.502
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 100,
6
- "global_step": 96,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -159,13 +159,44 @@
159
  "step": 90
160
  },
161
  {
162
- "epoch": 1.0,
163
- "step": 96,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  "total_flos": 0.0,
165
  "train_loss": 0.0,
166
- "train_runtime": 0.0351,
167
- "train_samples_per_second": 173972.916,
168
- "train_steps_per_second": 2732.112
169
  }
170
  ],
171
  "logging_steps": 10,
@@ -180,7 +211,7 @@
180
  "should_evaluate": false,
181
  "should_log": false,
182
  "should_save": true,
183
- "should_training_stop": true
184
  },
185
  "attributes": {}
186
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0416666666666667,
5
  "eval_steps": 100,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
159
  "step": 90
160
  },
161
  {
162
+ "epoch": 1.0416666666666667,
163
+ "grad_norm": 737.5513177463773,
164
+ "learning_rate": 2.6744186046511626e-07,
165
+ "logits/chosen": -2.4184558391571045,
166
+ "logits/rejected": -2.387362480163574,
167
+ "logps/chosen": -67.42814636230469,
168
+ "logps/rejected": -77.54327392578125,
169
+ "loss": 1.6705,
170
+ "rewards/accuracies": 0.46875,
171
+ "rewards/chosen": 5.45920467376709,
172
+ "rewards/margins": 15.010032653808594,
173
+ "rewards/rejected": -9.550827980041504,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 1.0416666666666667,
178
+ "eval_logits/chosen": -2.4391345977783203,
179
+ "eval_logits/rejected": -2.421090841293335,
180
+ "eval_logps/chosen": -77.95087432861328,
181
+ "eval_logps/rejected": -90.56730651855469,
182
+ "eval_loss": 3.3937504291534424,
183
+ "eval_rewards/accuracies": 0.32341268658638,
184
+ "eval_rewards/chosen": -3.110783576965332,
185
+ "eval_rewards/margins": 5.376364707946777,
186
+ "eval_rewards/rejected": -8.487147331237793,
187
+ "eval_runtime": 114.9559,
188
+ "eval_samples_per_second": 17.398,
189
+ "eval_steps_per_second": 0.548,
190
+ "step": 100
191
+ },
192
+ {
193
+ "epoch": 1.0416666666666667,
194
+ "step": 100,
195
  "total_flos": 0.0,
196
  "train_loss": 0.0,
197
+ "train_runtime": 0.0379,
198
+ "train_samples_per_second": 161198.692,
199
+ "train_steps_per_second": 2531.502
200
  }
201
  ],
202
  "logging_steps": 10,
 
211
  "should_evaluate": false,
212
  "should_log": false,
213
  "should_save": true,
214
+ "should_training_stop": false
215
  },
216
  "attributes": {}
217
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb70819aff50f0f5d14361d26da420675d5f923ff9f30acb45f090d98f2c96fa
3
  size 7544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e6a807ea3e83faaf40677a6b14fc582b840706e660e7fd05eff913967b3f7aa
3
  size 7544