mjleee commited on
Commit
d3820cf
·
verified ·
1 Parent(s): 24b568f

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_coincide_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json +974 -0
  2. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth +3 -0
  3. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth +3 -0
  4. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth +3 -0
  5. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth +3 -0
  6. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_divbs_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json +974 -0
  7. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth +3 -0
  8. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth +3 -0
  9. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth +3 -0
  10. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth +3 -0
  11. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_gradnorm_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json +1131 -0
  12. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth +3 -0
  13. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth +3 -0
  14. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth +3 -0
  15. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth +3 -0
  16. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_infoBatch_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json +974 -0
  17. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth +3 -0
  18. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth +3 -0
  19. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth +3 -0
  20. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth +3 -0
  21. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_selfsup_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json +974 -0
  22. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth +3 -0
  23. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth +3 -0
  24. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth +3 -0
  25. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth +3 -0
  26. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_coincide_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json +1904 -0
  27. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth +3 -0
  28. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth +3 -0
  29. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth +3 -0
  30. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth +3 -0
  31. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_divbs_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json +1904 -0
  32. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth +3 -0
  33. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth +3 -0
  34. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth +3 -0
  35. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth +3 -0
  36. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_gradnorm_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json +2216 -0
  37. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth +3 -0
  38. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth +3 -0
  39. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth +3 -0
  40. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth +3 -0
  41. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_infoBatch_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json +1904 -0
  42. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth +3 -0
  43. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth +3 -0
  44. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth +3 -0
  45. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth +3 -0
  46. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_selfsup_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json +1904 -0
  47. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth +3 -0
  48. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth +3 -0
  49. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth +3 -0
  50. new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth +3 -0
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_coincide_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json ADDED
@@ -0,0 +1,974 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 314,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.006369426751592357,
14
+ "learning_rate": 2.5834789435204156e-06,
15
+ "loss": 0.0505,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.012738853503184714,
20
+ "learning_rate": 2.73476360561837e-06,
21
+ "loss": 0.0862,
22
+ "step": 4
23
+ },
24
+ {
25
+ "epoch": 0.01910828025477707,
26
+ "learning_rate": 2.889654828892393e-06,
27
+ "loss": 0.2743,
28
+ "step": 6
29
+ },
30
+ {
31
+ "epoch": 0.025477707006369428,
32
+ "learning_rate": 3.0480757232535773e-06,
33
+ "loss": 0.2937,
34
+ "step": 8
35
+ },
36
+ {
37
+ "epoch": 0.03184713375796178,
38
+ "learning_rate": 3.2099476464367486e-06,
39
+ "loss": 0.3973,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.03821656050955414,
44
+ "learning_rate": 3.3751902430395558e-06,
45
+ "loss": 0.4688,
46
+ "step": 12
47
+ },
48
+ {
49
+ "epoch": 0.044585987261146494,
50
+ "learning_rate": 3.5437214844119727e-06,
51
+ "loss": 0.5201,
52
+ "step": 14
53
+ },
54
+ {
55
+ "epoch": 0.050955414012738856,
56
+ "learning_rate": 3.7154577093764287e-06,
57
+ "loss": 0.448,
58
+ "step": 16
59
+ },
60
+ {
61
+ "epoch": 0.05732484076433121,
62
+ "learning_rate": 3.890313665758341e-06,
63
+ "loss": 0.1361,
64
+ "step": 18
65
+ },
66
+ {
67
+ "epoch": 0.06369426751592357,
68
+ "learning_rate": 4.068202552706455e-06,
69
+ "loss": 0.4843,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.07006369426751592,
74
+ "learning_rate": 4.249036063781902e-06,
75
+ "loss": 0.8539,
76
+ "step": 22
77
+ },
78
+ {
79
+ "epoch": 0.07643312101910828,
80
+ "learning_rate": 4.432724430794775e-06,
81
+ "loss": 0.0686,
82
+ "step": 24
83
+ },
84
+ {
85
+ "epoch": 0.08280254777070063,
86
+ "learning_rate": 4.6191764683662625e-06,
87
+ "loss": 0.3056,
88
+ "step": 26
89
+ },
90
+ {
91
+ "epoch": 0.08917197452229299,
92
+ "learning_rate": 4.8082996191942354e-06,
93
+ "loss": 0.1533,
94
+ "step": 28
95
+ },
96
+ {
97
+ "epoch": 0.09554140127388536,
98
+ "learning_rate": 5.000000000000003e-06,
99
+ "loss": 0.1882,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.10191082802547771,
104
+ "learning_rate": 5.194182448133163e-06,
105
+ "loss": 0.4956,
106
+ "step": 32
107
+ },
108
+ {
109
+ "epoch": 0.10828025477707007,
110
+ "learning_rate": 5.39075056881172e-06,
111
+ "loss": 0.0621,
112
+ "step": 34
113
+ },
114
+ {
115
+ "epoch": 0.11464968152866242,
116
+ "learning_rate": 5.589606782973682e-06,
117
+ "loss": 0.2904,
118
+ "step": 36
119
+ },
120
+ {
121
+ "epoch": 0.12101910828025478,
122
+ "learning_rate": 5.7906523757166475e-06,
123
+ "loss": 0.2351,
124
+ "step": 38
125
+ },
126
+ {
127
+ "epoch": 0.12738853503184713,
128
+ "learning_rate": 5.9937875453012e-06,
129
+ "loss": 0.3256,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.1337579617834395,
134
+ "learning_rate": 6.198911452693847e-06,
135
+ "loss": 0.4068,
136
+ "step": 42
137
+ },
138
+ {
139
+ "epoch": 0.14012738853503184,
140
+ "learning_rate": 6.405922271624865e-06,
141
+ "loss": 0.2709,
142
+ "step": 44
143
+ },
144
+ {
145
+ "epoch": 0.1464968152866242,
146
+ "learning_rate": 6.614717239136237e-06,
147
+ "loss": 0.0155,
148
+ "step": 46
149
+ },
150
+ {
151
+ "epoch": 0.15286624203821655,
152
+ "learning_rate": 6.8251927065945815e-06,
153
+ "loss": 0.5768,
154
+ "step": 48
155
+ },
156
+ {
157
+ "epoch": 0.1592356687898089,
158
+ "learning_rate": 7.037244191143648e-06,
159
+ "loss": 0.2028,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.16560509554140126,
164
+ "learning_rate": 7.250766427571185e-06,
165
+ "loss": 0.4226,
166
+ "step": 52
167
+ },
168
+ {
169
+ "epoch": 0.17197452229299362,
170
+ "learning_rate": 7.465653420563828e-06,
171
+ "loss": 0.1874,
172
+ "step": 54
173
+ },
174
+ {
175
+ "epoch": 0.17834394904458598,
176
+ "learning_rate": 7.68179849732472e-06,
177
+ "loss": 0.0486,
178
+ "step": 56
179
+ },
180
+ {
181
+ "epoch": 0.18471337579617833,
182
+ "learning_rate": 7.899094360527221e-06,
183
+ "loss": 0.3334,
184
+ "step": 58
185
+ },
186
+ {
187
+ "epoch": 0.1910828025477707,
188
+ "learning_rate": 8.117433141578865e-06,
189
+ "loss": 0.0569,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.19745222929936307,
194
+ "learning_rate": 8.336706454168698e-06,
195
+ "loss": 0.0103,
196
+ "step": 62
197
+ },
198
+ {
199
+ "epoch": 0.20382165605095542,
200
+ "learning_rate": 8.55680544807173e-06,
201
+ "loss": 0.4509,
202
+ "step": 64
203
+ },
204
+ {
205
+ "epoch": 0.21019108280254778,
206
+ "learning_rate": 8.777620863183652e-06,
207
+ "loss": 0.0635,
208
+ "step": 66
209
+ },
210
+ {
211
+ "epoch": 0.21656050955414013,
212
+ "learning_rate": 8.99904308375901e-06,
213
+ "loss": 0.3395,
214
+ "step": 68
215
+ },
216
+ {
217
+ "epoch": 0.2229299363057325,
218
+ "learning_rate": 9.220962192825959e-06,
219
+ "loss": 0.0262,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.22929936305732485,
224
+ "learning_rate": 9.443268026750509e-06,
225
+ "loss": 0.2095,
226
+ "step": 72
227
+ },
228
+ {
229
+ "epoch": 0.2356687898089172,
230
+ "learning_rate": 9.665850229923262e-06,
231
+ "loss": 0.1242,
232
+ "step": 74
233
+ },
234
+ {
235
+ "epoch": 0.24203821656050956,
236
+ "learning_rate": 9.88859830954135e-06,
237
+ "loss": 0.0032,
238
+ "step": 76
239
+ },
240
+ {
241
+ "epoch": 0.2484076433121019,
242
+ "learning_rate": 1.0111401690458642e-05,
243
+ "loss": 0.3737,
244
+ "step": 78
245
+ },
246
+ {
247
+ "epoch": 0.25477707006369427,
248
+ "learning_rate": 1.0334149770076732e-05,
249
+ "loss": 1.1583,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.2611464968152866,
254
+ "learning_rate": 1.0556731973249482e-05,
255
+ "loss": 0.0546,
256
+ "step": 82
257
+ },
258
+ {
259
+ "epoch": 0.267515923566879,
260
+ "learning_rate": 1.0779037807174032e-05,
261
+ "loss": 0.3756,
262
+ "step": 84
263
+ },
264
+ {
265
+ "epoch": 0.27388535031847133,
266
+ "learning_rate": 1.1000956916240984e-05,
267
+ "loss": 0.0395,
268
+ "step": 86
269
+ },
270
+ {
271
+ "epoch": 0.2802547770700637,
272
+ "learning_rate": 1.1222379136816342e-05,
273
+ "loss": 0.1488,
274
+ "step": 88
275
+ },
276
+ {
277
+ "epoch": 0.28662420382165604,
278
+ "learning_rate": 1.1443194551928264e-05,
279
+ "loss": 0.2701,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.2929936305732484,
284
+ "learning_rate": 1.1663293545831295e-05,
285
+ "loss": 0.7403,
286
+ "step": 92
287
+ },
288
+ {
289
+ "epoch": 0.29936305732484075,
290
+ "learning_rate": 1.188256685842113e-05,
291
+ "loss": 0.1445,
292
+ "step": 94
293
+ },
294
+ {
295
+ "epoch": 0.3057324840764331,
296
+ "learning_rate": 1.210090563947277e-05,
297
+ "loss": 0.3097,
298
+ "step": 96
299
+ },
300
+ {
301
+ "epoch": 0.31210191082802546,
302
+ "learning_rate": 1.2318201502675273e-05,
303
+ "loss": 0.4982,
304
+ "step": 98
305
+ },
306
+ {
307
+ "epoch": 0.3184713375796178,
308
+ "learning_rate": 1.2534346579436164e-05,
309
+ "loss": 0.2402,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.3248407643312102,
314
+ "learning_rate": 1.274923357242881e-05,
315
+ "loss": 0.1077,
316
+ "step": 102
317
+ },
318
+ {
319
+ "epoch": 0.33121019108280253,
320
+ "learning_rate": 1.2962755808856345e-05,
321
+ "loss": 0.6843,
322
+ "step": 104
323
+ },
324
+ {
325
+ "epoch": 0.3375796178343949,
326
+ "learning_rate": 1.3174807293405412e-05,
327
+ "loss": 0.1502,
328
+ "step": 106
329
+ },
330
+ {
331
+ "epoch": 0.34394904458598724,
332
+ "learning_rate": 1.3385282760863758e-05,
333
+ "loss": 0.5122,
334
+ "step": 108
335
+ },
336
+ {
337
+ "epoch": 0.3503184713375796,
338
+ "learning_rate": 1.3594077728375129e-05,
339
+ "loss": 0.4533,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.35668789808917195,
344
+ "learning_rate": 1.3801088547306147e-05,
345
+ "loss": 0.2561,
346
+ "step": 112
347
+ },
348
+ {
349
+ "epoch": 0.3630573248407643,
350
+ "learning_rate": 1.4006212454698793e-05,
351
+ "loss": 0.3514,
352
+ "step": 114
353
+ },
354
+ {
355
+ "epoch": 0.36942675159235666,
356
+ "learning_rate": 1.4209347624283347e-05,
357
+ "loss": 0.2827,
358
+ "step": 116
359
+ },
360
+ {
361
+ "epoch": 0.37579617834394907,
362
+ "learning_rate": 1.441039321702631e-05,
363
+ "loss": 0.0804,
364
+ "step": 118
365
+ },
366
+ {
367
+ "epoch": 0.3821656050955414,
368
+ "learning_rate": 1.4609249431188274e-05,
369
+ "loss": 0.9344,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.3885350318471338,
374
+ "learning_rate": 1.480581755186683e-05,
375
+ "loss": 0.2892,
376
+ "step": 122
377
+ },
378
+ {
379
+ "epoch": 0.39490445859872614,
380
+ "learning_rate": 1.4999999999999992e-05,
381
+ "loss": 0.2521,
382
+ "step": 124
383
+ },
384
+ {
385
+ "epoch": 0.4012738853503185,
386
+ "learning_rate": 1.5191700380805761e-05,
387
+ "loss": 0.3105,
388
+ "step": 126
389
+ },
390
+ {
391
+ "epoch": 0.40764331210191085,
392
+ "learning_rate": 1.538082353163373e-05,
393
+ "loss": 0.3418,
394
+ "step": 128
395
+ },
396
+ {
397
+ "epoch": 0.4140127388535032,
398
+ "learning_rate": 1.556727556920522e-05,
399
+ "loss": 0.0588,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.42038216560509556,
404
+ "learning_rate": 1.5750963936218094e-05,
405
+ "loss": 0.3193,
406
+ "step": 132
407
+ },
408
+ {
409
+ "epoch": 0.4267515923566879,
410
+ "learning_rate": 1.593179744729354e-05,
411
+ "loss": 0.1802,
412
+ "step": 134
413
+ },
414
+ {
415
+ "epoch": 0.43312101910828027,
416
+ "learning_rate": 1.6109686334241655e-05,
417
+ "loss": 0.2122,
418
+ "step": 136
419
+ },
420
+ {
421
+ "epoch": 0.4394904458598726,
422
+ "learning_rate": 1.6284542290623565e-05,
423
+ "loss": 0.8726,
424
+ "step": 138
425
+ },
426
+ {
427
+ "epoch": 0.445859872611465,
428
+ "learning_rate": 1.6456278515588023e-05,
429
+ "loss": 0.1363,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.45222929936305734,
434
+ "learning_rate": 1.662480975696044e-05,
435
+ "loss": 0.0766,
436
+ "step": 142
437
+ },
438
+ {
439
+ "epoch": 0.4585987261146497,
440
+ "learning_rate": 1.6790052353563247e-05,
441
+ "loss": 0.7178,
442
+ "step": 144
443
+ },
444
+ {
445
+ "epoch": 0.46496815286624205,
446
+ "learning_rate": 1.6951924276746418e-05,
447
+ "loss": 0.7837,
448
+ "step": 146
449
+ },
450
+ {
451
+ "epoch": 0.4713375796178344,
452
+ "learning_rate": 1.7110345171107602e-05,
453
+ "loss": 0.5237,
454
+ "step": 148
455
+ },
456
+ {
457
+ "epoch": 0.47770700636942676,
458
+ "learning_rate": 1.7265236394381627e-05,
459
+ "loss": 0.4444,
460
+ "step": 150
461
+ },
462
+ {
463
+ "epoch": 0.4840764331210191,
464
+ "learning_rate": 1.741652105647958e-05,
465
+ "loss": 0.0142,
466
+ "step": 152
467
+ },
468
+ {
469
+ "epoch": 0.49044585987261147,
470
+ "learning_rate": 1.7564124057658057e-05,
471
+ "loss": 0.0969,
472
+ "step": 154
473
+ },
474
+ {
475
+ "epoch": 0.4968152866242038,
476
+ "learning_rate": 1.7707972125799738e-05,
477
+ "loss": 0.3022,
478
+ "step": 156
479
+ },
480
+ {
481
+ "epoch": 0.5031847133757962,
482
+ "learning_rate": 1.7847993852786612e-05,
483
+ "loss": 0.1648,
484
+ "step": 158
485
+ },
486
+ {
487
+ "epoch": 0.5095541401273885,
488
+ "learning_rate": 1.7984119729947937e-05,
489
+ "loss": 0.0168,
490
+ "step": 160
491
+ },
492
+ {
493
+ "epoch": 0.5159235668789809,
494
+ "learning_rate": 1.811628218256531e-05,
495
+ "loss": 0.0575,
496
+ "step": 162
497
+ },
498
+ {
499
+ "epoch": 0.5222929936305732,
500
+ "learning_rate": 1.8244415603417603e-05,
501
+ "loss": 0.6108,
502
+ "step": 164
503
+ },
504
+ {
505
+ "epoch": 0.5286624203821656,
506
+ "learning_rate": 1.836845638534933e-05,
507
+ "loss": 0.2658,
508
+ "step": 166
509
+ },
510
+ {
511
+ "epoch": 0.535031847133758,
512
+ "learning_rate": 1.8488342952846074e-05,
513
+ "loss": 0.3892,
514
+ "step": 168
515
+ },
516
+ {
517
+ "epoch": 0.5414012738853503,
518
+ "learning_rate": 1.860401579260139e-05,
519
+ "loss": 0.4322,
520
+ "step": 170
521
+ },
522
+ {
523
+ "epoch": 0.5477707006369427,
524
+ "learning_rate": 1.8715417483060044e-05,
525
+ "loss": 0.2036,
526
+ "step": 172
527
+ },
528
+ {
529
+ "epoch": 0.554140127388535,
530
+ "learning_rate": 1.8822492722922816e-05,
531
+ "loss": 0.1441,
532
+ "step": 174
533
+ },
534
+ {
535
+ "epoch": 0.5605095541401274,
536
+ "learning_rate": 1.8925188358598808e-05,
537
+ "loss": 0.3341,
538
+ "step": 176
539
+ },
540
+ {
541
+ "epoch": 0.5668789808917197,
542
+ "learning_rate": 1.902345341059163e-05,
543
+ "loss": 0.4069,
544
+ "step": 178
545
+ },
546
+ {
547
+ "epoch": 0.5732484076433121,
548
+ "learning_rate": 1.9117239098806296e-05,
549
+ "loss": 0.0086,
550
+ "step": 180
551
+ },
552
+ {
553
+ "epoch": 0.5796178343949044,
554
+ "learning_rate": 1.920649886676429e-05,
555
+ "loss": 0.497,
556
+ "step": 182
557
+ },
558
+ {
559
+ "epoch": 0.5859872611464968,
560
+ "learning_rate": 1.9291188404714876e-05,
561
+ "loss": 0.1694,
562
+ "step": 184
563
+ },
564
+ {
565
+ "epoch": 0.5923566878980892,
566
+ "learning_rate": 1.937126567163103e-05,
567
+ "loss": 0.4397,
568
+ "step": 186
569
+ },
570
+ {
571
+ "epoch": 0.5987261146496815,
572
+ "learning_rate": 1.944669091607919e-05,
573
+ "loss": 0.5807,
574
+ "step": 188
575
+ },
576
+ {
577
+ "epoch": 0.6050955414012739,
578
+ "learning_rate": 1.9517426695952354e-05,
579
+ "loss": 0.128,
580
+ "step": 190
581
+ },
582
+ {
583
+ "epoch": 0.6114649681528662,
584
+ "learning_rate": 1.9583437897056915e-05,
585
+ "loss": 0.7845,
586
+ "step": 192
587
+ },
588
+ {
589
+ "epoch": 0.6178343949044586,
590
+ "learning_rate": 1.964469175054377e-05,
591
+ "loss": 0.2964,
592
+ "step": 194
593
+ },
594
+ {
595
+ "epoch": 0.6242038216560509,
596
+ "learning_rate": 1.970115784917523e-05,
597
+ "loss": 0.0337,
598
+ "step": 196
599
+ },
600
+ {
601
+ "epoch": 0.6305732484076433,
602
+ "learning_rate": 1.975280816241959e-05,
603
+ "loss": 0.3463,
604
+ "step": 198
605
+ },
606
+ {
607
+ "epoch": 0.6369426751592356,
608
+ "learning_rate": 1.979961705036587e-05,
609
+ "loss": 0.7921,
610
+ "step": 200
611
+ },
612
+ {
613
+ "epoch": 0.643312101910828,
614
+ "learning_rate": 1.9841561276451777e-05,
615
+ "loss": 0.129,
616
+ "step": 202
617
+ },
618
+ {
619
+ "epoch": 0.6496815286624203,
620
+ "learning_rate": 1.9878620018998696e-05,
621
+ "loss": 0.028,
622
+ "step": 204
623
+ },
624
+ {
625
+ "epoch": 0.6560509554140127,
626
+ "learning_rate": 1.9910774881547803e-05,
627
+ "loss": 0.2591,
628
+ "step": 206
629
+ },
630
+ {
631
+ "epoch": 0.6624203821656051,
632
+ "learning_rate": 1.993800990199235e-05,
633
+ "loss": 0.4982,
634
+ "step": 208
635
+ },
636
+ {
637
+ "epoch": 0.6687898089171974,
638
+ "learning_rate": 1.9960311560501457e-05,
639
+ "loss": 0.2502,
640
+ "step": 210
641
+ },
642
+ {
643
+ "epoch": 0.6751592356687898,
644
+ "learning_rate": 1.9977668786231536e-05,
645
+ "loss": 0.9019,
646
+ "step": 212
647
+ },
648
+ {
649
+ "epoch": 0.6815286624203821,
650
+ "learning_rate": 1.999007296282201e-05,
651
+ "loss": 0.452,
652
+ "step": 214
653
+ },
654
+ {
655
+ "epoch": 0.6878980891719745,
656
+ "learning_rate": 1.9997517932672592e-05,
657
+ "loss": 0.283,
658
+ "step": 216
659
+ },
660
+ {
661
+ "epoch": 0.6942675159235668,
662
+ "learning_rate": 2e-05,
663
+ "loss": 0.2723,
664
+ "step": 218
665
+ },
666
+ {
667
+ "epoch": 0.7006369426751592,
668
+ "learning_rate": 1.9997517932672592e-05,
669
+ "loss": 0.1013,
670
+ "step": 220
671
+ },
672
+ {
673
+ "epoch": 0.7070063694267515,
674
+ "learning_rate": 1.999007296282201e-05,
675
+ "loss": 0.5748,
676
+ "step": 222
677
+ },
678
+ {
679
+ "epoch": 0.7133757961783439,
680
+ "learning_rate": 1.9977668786231536e-05,
681
+ "loss": 0.2457,
682
+ "step": 224
683
+ },
684
+ {
685
+ "epoch": 0.7197452229299363,
686
+ "learning_rate": 1.9960311560501457e-05,
687
+ "loss": 0.4159,
688
+ "step": 226
689
+ },
690
+ {
691
+ "epoch": 0.7261146496815286,
692
+ "learning_rate": 1.993800990199235e-05,
693
+ "loss": 0.2193,
694
+ "step": 228
695
+ },
696
+ {
697
+ "epoch": 0.732484076433121,
698
+ "learning_rate": 1.99107748815478e-05,
699
+ "loss": 0.4867,
700
+ "step": 230
701
+ },
702
+ {
703
+ "epoch": 0.7388535031847133,
704
+ "learning_rate": 1.9878620018998696e-05,
705
+ "loss": 0.2393,
706
+ "step": 232
707
+ },
708
+ {
709
+ "epoch": 0.7452229299363057,
710
+ "learning_rate": 1.984156127645178e-05,
711
+ "loss": 0.228,
712
+ "step": 234
713
+ },
714
+ {
715
+ "epoch": 0.7515923566878981,
716
+ "learning_rate": 1.979961705036587e-05,
717
+ "loss": 0.0731,
718
+ "step": 236
719
+ },
720
+ {
721
+ "epoch": 0.7579617834394905,
722
+ "learning_rate": 1.975280816241959e-05,
723
+ "loss": 0.0534,
724
+ "step": 238
725
+ },
726
+ {
727
+ "epoch": 0.7643312101910829,
728
+ "learning_rate": 1.9701157849175232e-05,
729
+ "loss": 0.1819,
730
+ "step": 240
731
+ },
732
+ {
733
+ "epoch": 0.7707006369426752,
734
+ "learning_rate": 1.9644691750543772e-05,
735
+ "loss": 0.0205,
736
+ "step": 242
737
+ },
738
+ {
739
+ "epoch": 0.7770700636942676,
740
+ "learning_rate": 1.958343789705692e-05,
741
+ "loss": 0.3531,
742
+ "step": 244
743
+ },
744
+ {
745
+ "epoch": 0.7834394904458599,
746
+ "learning_rate": 1.9517426695952354e-05,
747
+ "loss": 0.3391,
748
+ "step": 246
749
+ },
750
+ {
751
+ "epoch": 0.7898089171974523,
752
+ "learning_rate": 1.9446690916079184e-05,
753
+ "loss": 0.1271,
754
+ "step": 248
755
+ },
756
+ {
757
+ "epoch": 0.7961783439490446,
758
+ "learning_rate": 1.9371265671631034e-05,
759
+ "loss": 0.1924,
760
+ "step": 250
761
+ },
762
+ {
763
+ "epoch": 0.802547770700637,
764
+ "learning_rate": 1.929118840471488e-05,
765
+ "loss": 0.0193,
766
+ "step": 252
767
+ },
768
+ {
769
+ "epoch": 0.8089171974522293,
770
+ "learning_rate": 1.9206498866764293e-05,
771
+ "loss": 0.8572,
772
+ "step": 254
773
+ },
774
+ {
775
+ "epoch": 0.8152866242038217,
776
+ "learning_rate": 1.9117239098806302e-05,
777
+ "loss": 0.0557,
778
+ "step": 256
779
+ },
780
+ {
781
+ "epoch": 0.821656050955414,
782
+ "learning_rate": 1.9023453410591645e-05,
783
+ "loss": 0.2022,
784
+ "step": 258
785
+ },
786
+ {
787
+ "epoch": 0.8280254777070064,
788
+ "learning_rate": 1.8925188358598822e-05,
789
+ "loss": 0.5536,
790
+ "step": 260
791
+ },
792
+ {
793
+ "epoch": 0.8343949044585988,
794
+ "learning_rate": 1.882249272292283e-05,
795
+ "loss": 0.4296,
796
+ "step": 262
797
+ },
798
+ {
799
+ "epoch": 0.8407643312101911,
800
+ "learning_rate": 1.871541748306005e-05,
801
+ "loss": 0.1649,
802
+ "step": 264
803
+ },
804
+ {
805
+ "epoch": 0.8471337579617835,
806
+ "learning_rate": 1.8604015792601395e-05,
807
+ "loss": 0.3044,
808
+ "step": 266
809
+ },
810
+ {
811
+ "epoch": 0.8535031847133758,
812
+ "learning_rate": 1.8488342952846077e-05,
813
+ "loss": 0.3655,
814
+ "step": 268
815
+ },
816
+ {
817
+ "epoch": 0.8598726114649682,
818
+ "learning_rate": 1.8368456385349333e-05,
819
+ "loss": 0.569,
820
+ "step": 270
821
+ },
822
+ {
823
+ "epoch": 0.8662420382165605,
824
+ "learning_rate": 1.824441560341761e-05,
825
+ "loss": 0.3474,
826
+ "step": 272
827
+ },
828
+ {
829
+ "epoch": 0.8726114649681529,
830
+ "learning_rate": 1.811628218256532e-05,
831
+ "loss": 0.366,
832
+ "step": 274
833
+ },
834
+ {
835
+ "epoch": 0.8789808917197452,
836
+ "learning_rate": 1.798411972994795e-05,
837
+ "loss": 2.5077,
838
+ "step": 276
839
+ },
840
+ {
841
+ "epoch": 0.8853503184713376,
842
+ "learning_rate": 1.784799385278662e-05,
843
+ "loss": 0.2863,
844
+ "step": 278
845
+ },
846
+ {
847
+ "epoch": 0.89171974522293,
848
+ "learning_rate": 1.770797212579973e-05,
849
+ "loss": 0.3239,
850
+ "step": 280
851
+ },
852
+ {
853
+ "epoch": 0.8980891719745223,
854
+ "learning_rate": 1.756412405765805e-05,
855
+ "loss": 0.1448,
856
+ "step": 282
857
+ },
858
+ {
859
+ "epoch": 0.9044585987261147,
860
+ "learning_rate": 1.7416521056479573e-05,
861
+ "loss": 0.2393,
862
+ "step": 284
863
+ },
864
+ {
865
+ "epoch": 0.910828025477707,
866
+ "learning_rate": 1.7265236394381634e-05,
867
+ "loss": 0.3568,
868
+ "step": 286
869
+ },
870
+ {
871
+ "epoch": 0.9171974522292994,
872
+ "learning_rate": 1.711034517110761e-05,
873
+ "loss": 0.0124,
874
+ "step": 288
875
+ },
876
+ {
877
+ "epoch": 0.9235668789808917,
878
+ "learning_rate": 1.6951924276746425e-05,
879
+ "loss": 0.4765,
880
+ "step": 290
881
+ },
882
+ {
883
+ "epoch": 0.9299363057324841,
884
+ "learning_rate": 1.6790052353563254e-05,
885
+ "loss": 0.1502,
886
+ "step": 292
887
+ },
888
+ {
889
+ "epoch": 0.9363057324840764,
890
+ "learning_rate": 1.662480975696046e-05,
891
+ "loss": 0.1455,
892
+ "step": 294
893
+ },
894
+ {
895
+ "epoch": 0.9426751592356688,
896
+ "learning_rate": 1.6456278515588044e-05,
897
+ "loss": 0.1501,
898
+ "step": 296
899
+ },
900
+ {
901
+ "epoch": 0.9490445859872612,
902
+ "learning_rate": 1.6284542290623558e-05,
903
+ "loss": 0.4569,
904
+ "step": 298
905
+ },
906
+ {
907
+ "epoch": 0.9554140127388535,
908
+ "learning_rate": 1.6109686334241648e-05,
909
+ "loss": 0.2271,
910
+ "step": 300
911
+ },
912
+ {
913
+ "epoch": 0.9617834394904459,
914
+ "learning_rate": 1.593179744729355e-05,
915
+ "loss": 0.2835,
916
+ "step": 302
917
+ },
918
+ {
919
+ "epoch": 0.9681528662420382,
920
+ "learning_rate": 1.57509639362181e-05,
921
+ "loss": 0.2865,
922
+ "step": 304
923
+ },
924
+ {
925
+ "epoch": 0.9745222929936306,
926
+ "learning_rate": 1.5567275569205227e-05,
927
+ "loss": 0.0294,
928
+ "step": 306
929
+ },
930
+ {
931
+ "epoch": 0.9808917197452229,
932
+ "learning_rate": 1.538082353163374e-05,
933
+ "loss": 0.8354,
934
+ "step": 308
935
+ },
936
+ {
937
+ "epoch": 0.9872611464968153,
938
+ "learning_rate": 1.5191700380805768e-05,
939
+ "loss": 0.9003,
940
+ "step": 310
941
+ },
942
+ {
943
+ "epoch": 0.9936305732484076,
944
+ "learning_rate": 1.5000000000000014e-05,
945
+ "loss": 0.0171,
946
+ "step": 312
947
+ },
948
+ {
949
+ "epoch": 1.0,
950
+ "learning_rate": 1.4805817551866854e-05,
951
+ "loss": 0.0555,
952
+ "step": 314
953
+ },
954
+ {
955
+ "epoch": 1.0,
956
+ "step": 314,
957
+ "total_flos": 1802881866924032.0,
958
+ "train_loss": 0.3193168936469325,
959
+ "train_runtime": 1687.0863,
960
+ "train_samples_per_second": 2.978,
961
+ "train_steps_per_second": 0.186
962
+ }
963
+ ],
964
+ "logging_steps": 2,
965
+ "max_steps": 314,
966
+ "num_input_tokens_seen": 0,
967
+ "num_train_epochs": 1,
968
+ "save_steps": 500,
969
+ "stateful_callbacks": {},
970
+ "total_flos": 1802881866924032.0,
971
+ "train_batch_size": 1,
972
+ "trial_name": null,
973
+ "trial_params": null
974
+ }
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a92c1614a8bad7370023fec4431bb5e371fe7e3e508dc6e2af6fc6c14cc6f7a8
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:139666b08b69317031fc1d6dd8bcfe1ed0e09d11b2ef71b517bd03b515a7ae4e
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fcb1daf238aa41b7da5beec79cbf6d306a03c5f033165540036e6280b4c9be5
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cade9800299ef877cbab4fdcccd1273c83f97e8a6696aee6fc4fc332bcaea98f
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_divbs_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json ADDED
@@ -0,0 +1,974 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 314,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.006369426751592357,
14
+ "learning_rate": 2.5834789435204156e-06,
15
+ "loss": 0.5361,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.012738853503184714,
20
+ "learning_rate": 2.73476360561837e-06,
21
+ "loss": 0.2018,
22
+ "step": 4
23
+ },
24
+ {
25
+ "epoch": 0.01910828025477707,
26
+ "learning_rate": 2.889654828892393e-06,
27
+ "loss": 0.3738,
28
+ "step": 6
29
+ },
30
+ {
31
+ "epoch": 0.025477707006369428,
32
+ "learning_rate": 3.0480757232535773e-06,
33
+ "loss": 1.0056,
34
+ "step": 8
35
+ },
36
+ {
37
+ "epoch": 0.03184713375796178,
38
+ "learning_rate": 3.2099476464367486e-06,
39
+ "loss": 0.6308,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.03821656050955414,
44
+ "learning_rate": 3.3751902430395558e-06,
45
+ "loss": 0.6796,
46
+ "step": 12
47
+ },
48
+ {
49
+ "epoch": 0.044585987261146494,
50
+ "learning_rate": 3.5437214844119727e-06,
51
+ "loss": 0.43,
52
+ "step": 14
53
+ },
54
+ {
55
+ "epoch": 0.050955414012738856,
56
+ "learning_rate": 3.7154577093764287e-06,
57
+ "loss": 0.5045,
58
+ "step": 16
59
+ },
60
+ {
61
+ "epoch": 0.05732484076433121,
62
+ "learning_rate": 3.890313665758341e-06,
63
+ "loss": 0.2084,
64
+ "step": 18
65
+ },
66
+ {
67
+ "epoch": 0.06369426751592357,
68
+ "learning_rate": 4.068202552706455e-06,
69
+ "loss": 0.3108,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.07006369426751592,
74
+ "learning_rate": 4.249036063781902e-06,
75
+ "loss": 1.0176,
76
+ "step": 22
77
+ },
78
+ {
79
+ "epoch": 0.07643312101910828,
80
+ "learning_rate": 4.432724430794775e-06,
81
+ "loss": 0.6321,
82
+ "step": 24
83
+ },
84
+ {
85
+ "epoch": 0.08280254777070063,
86
+ "learning_rate": 4.6191764683662625e-06,
87
+ "loss": 0.548,
88
+ "step": 26
89
+ },
90
+ {
91
+ "epoch": 0.08917197452229299,
92
+ "learning_rate": 4.8082996191942354e-06,
93
+ "loss": 0.3522,
94
+ "step": 28
95
+ },
96
+ {
97
+ "epoch": 0.09554140127388536,
98
+ "learning_rate": 5.000000000000003e-06,
99
+ "loss": 0.5196,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.10191082802547771,
104
+ "learning_rate": 5.194182448133163e-06,
105
+ "loss": 0.4787,
106
+ "step": 32
107
+ },
108
+ {
109
+ "epoch": 0.10828025477707007,
110
+ "learning_rate": 5.39075056881172e-06,
111
+ "loss": 0.568,
112
+ "step": 34
113
+ },
114
+ {
115
+ "epoch": 0.11464968152866242,
116
+ "learning_rate": 5.589606782973682e-06,
117
+ "loss": 0.5401,
118
+ "step": 36
119
+ },
120
+ {
121
+ "epoch": 0.12101910828025478,
122
+ "learning_rate": 5.7906523757166475e-06,
123
+ "loss": 0.4241,
124
+ "step": 38
125
+ },
126
+ {
127
+ "epoch": 0.12738853503184713,
128
+ "learning_rate": 5.9937875453012e-06,
129
+ "loss": 0.1682,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.1337579617834395,
134
+ "learning_rate": 6.198911452693847e-06,
135
+ "loss": 0.6789,
136
+ "step": 42
137
+ },
138
+ {
139
+ "epoch": 0.14012738853503184,
140
+ "learning_rate": 6.405922271624865e-06,
141
+ "loss": 0.7686,
142
+ "step": 44
143
+ },
144
+ {
145
+ "epoch": 0.1464968152866242,
146
+ "learning_rate": 6.614717239136237e-06,
147
+ "loss": 0.5242,
148
+ "step": 46
149
+ },
150
+ {
151
+ "epoch": 0.15286624203821655,
152
+ "learning_rate": 6.8251927065945815e-06,
153
+ "loss": 0.5326,
154
+ "step": 48
155
+ },
156
+ {
157
+ "epoch": 0.1592356687898089,
158
+ "learning_rate": 7.037244191143648e-06,
159
+ "loss": 0.2171,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.16560509554140126,
164
+ "learning_rate": 7.250766427571185e-06,
165
+ "loss": 0.3231,
166
+ "step": 52
167
+ },
168
+ {
169
+ "epoch": 0.17197452229299362,
170
+ "learning_rate": 7.465653420563828e-06,
171
+ "loss": 0.6905,
172
+ "step": 54
173
+ },
174
+ {
175
+ "epoch": 0.17834394904458598,
176
+ "learning_rate": 7.68179849732472e-06,
177
+ "loss": 0.7773,
178
+ "step": 56
179
+ },
180
+ {
181
+ "epoch": 0.18471337579617833,
182
+ "learning_rate": 7.899094360527221e-06,
183
+ "loss": 0.8143,
184
+ "step": 58
185
+ },
186
+ {
187
+ "epoch": 0.1910828025477707,
188
+ "learning_rate": 8.117433141578865e-06,
189
+ "loss": 0.1776,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.19745222929936307,
194
+ "learning_rate": 8.336706454168698e-06,
195
+ "loss": 0.6091,
196
+ "step": 62
197
+ },
198
+ {
199
+ "epoch": 0.20382165605095542,
200
+ "learning_rate": 8.55680544807173e-06,
201
+ "loss": 0.6649,
202
+ "step": 64
203
+ },
204
+ {
205
+ "epoch": 0.21019108280254778,
206
+ "learning_rate": 8.777620863183652e-06,
207
+ "loss": 0.4914,
208
+ "step": 66
209
+ },
210
+ {
211
+ "epoch": 0.21656050955414013,
212
+ "learning_rate": 8.99904308375901e-06,
213
+ "loss": 0.3618,
214
+ "step": 68
215
+ },
216
+ {
217
+ "epoch": 0.2229299363057325,
218
+ "learning_rate": 9.220962192825959e-06,
219
+ "loss": 0.5064,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.22929936305732485,
224
+ "learning_rate": 9.443268026750509e-06,
225
+ "loss": 0.6748,
226
+ "step": 72
227
+ },
228
+ {
229
+ "epoch": 0.2356687898089172,
230
+ "learning_rate": 9.665850229923262e-06,
231
+ "loss": 0.7058,
232
+ "step": 74
233
+ },
234
+ {
235
+ "epoch": 0.24203821656050956,
236
+ "learning_rate": 9.88859830954135e-06,
237
+ "loss": 0.7704,
238
+ "step": 76
239
+ },
240
+ {
241
+ "epoch": 0.2484076433121019,
242
+ "learning_rate": 1.0111401690458642e-05,
243
+ "loss": 0.9882,
244
+ "step": 78
245
+ },
246
+ {
247
+ "epoch": 0.25477707006369427,
248
+ "learning_rate": 1.0334149770076732e-05,
249
+ "loss": 1.1594,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.2611464968152866,
254
+ "learning_rate": 1.0556731973249482e-05,
255
+ "loss": 0.6744,
256
+ "step": 82
257
+ },
258
+ {
259
+ "epoch": 0.267515923566879,
260
+ "learning_rate": 1.0779037807174032e-05,
261
+ "loss": 0.4816,
262
+ "step": 84
263
+ },
264
+ {
265
+ "epoch": 0.27388535031847133,
266
+ "learning_rate": 1.1000956916240984e-05,
267
+ "loss": 0.2887,
268
+ "step": 86
269
+ },
270
+ {
271
+ "epoch": 0.2802547770700637,
272
+ "learning_rate": 1.1222379136816342e-05,
273
+ "loss": 0.348,
274
+ "step": 88
275
+ },
276
+ {
277
+ "epoch": 0.28662420382165604,
278
+ "learning_rate": 1.1443194551928264e-05,
279
+ "loss": 0.7011,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.2929936305732484,
284
+ "learning_rate": 1.1663293545831295e-05,
285
+ "loss": 0.2513,
286
+ "step": 92
287
+ },
288
+ {
289
+ "epoch": 0.29936305732484075,
290
+ "learning_rate": 1.188256685842113e-05,
291
+ "loss": 0.7644,
292
+ "step": 94
293
+ },
294
+ {
295
+ "epoch": 0.3057324840764331,
296
+ "learning_rate": 1.210090563947277e-05,
297
+ "loss": 0.3947,
298
+ "step": 96
299
+ },
300
+ {
301
+ "epoch": 0.31210191082802546,
302
+ "learning_rate": 1.2318201502675273e-05,
303
+ "loss": 0.3527,
304
+ "step": 98
305
+ },
306
+ {
307
+ "epoch": 0.3184713375796178,
308
+ "learning_rate": 1.2534346579436164e-05,
309
+ "loss": 0.2598,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.3248407643312102,
314
+ "learning_rate": 1.274923357242881e-05,
315
+ "loss": 0.4266,
316
+ "step": 102
317
+ },
318
+ {
319
+ "epoch": 0.33121019108280253,
320
+ "learning_rate": 1.2962755808856345e-05,
321
+ "loss": 0.3997,
322
+ "step": 104
323
+ },
324
+ {
325
+ "epoch": 0.3375796178343949,
326
+ "learning_rate": 1.3174807293405412e-05,
327
+ "loss": 0.3758,
328
+ "step": 106
329
+ },
330
+ {
331
+ "epoch": 0.34394904458598724,
332
+ "learning_rate": 1.3385282760863758e-05,
333
+ "loss": 0.4305,
334
+ "step": 108
335
+ },
336
+ {
337
+ "epoch": 0.3503184713375796,
338
+ "learning_rate": 1.3594077728375129e-05,
339
+ "loss": 0.0364,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.35668789808917195,
344
+ "learning_rate": 1.3801088547306147e-05,
345
+ "loss": 0.6869,
346
+ "step": 112
347
+ },
348
+ {
349
+ "epoch": 0.3630573248407643,
350
+ "learning_rate": 1.4006212454698793e-05,
351
+ "loss": 0.5869,
352
+ "step": 114
353
+ },
354
+ {
355
+ "epoch": 0.36942675159235666,
356
+ "learning_rate": 1.4209347624283347e-05,
357
+ "loss": 0.3358,
358
+ "step": 116
359
+ },
360
+ {
361
+ "epoch": 0.37579617834394907,
362
+ "learning_rate": 1.441039321702631e-05,
363
+ "loss": 0.4655,
364
+ "step": 118
365
+ },
366
+ {
367
+ "epoch": 0.3821656050955414,
368
+ "learning_rate": 1.4609249431188274e-05,
369
+ "loss": 0.4452,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.3885350318471338,
374
+ "learning_rate": 1.480581755186683e-05,
375
+ "loss": 0.7202,
376
+ "step": 122
377
+ },
378
+ {
379
+ "epoch": 0.39490445859872614,
380
+ "learning_rate": 1.4999999999999992e-05,
381
+ "loss": 0.618,
382
+ "step": 124
383
+ },
384
+ {
385
+ "epoch": 0.4012738853503185,
386
+ "learning_rate": 1.5191700380805761e-05,
387
+ "loss": 0.8044,
388
+ "step": 126
389
+ },
390
+ {
391
+ "epoch": 0.40764331210191085,
392
+ "learning_rate": 1.538082353163373e-05,
393
+ "loss": 0.4794,
394
+ "step": 128
395
+ },
396
+ {
397
+ "epoch": 0.4140127388535032,
398
+ "learning_rate": 1.556727556920522e-05,
399
+ "loss": 0.3746,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.42038216560509556,
404
+ "learning_rate": 1.5750963936218094e-05,
405
+ "loss": 0.3161,
406
+ "step": 132
407
+ },
408
+ {
409
+ "epoch": 0.4267515923566879,
410
+ "learning_rate": 1.593179744729354e-05,
411
+ "loss": 0.5384,
412
+ "step": 134
413
+ },
414
+ {
415
+ "epoch": 0.43312101910828027,
416
+ "learning_rate": 1.6109686334241655e-05,
417
+ "loss": 0.8099,
418
+ "step": 136
419
+ },
420
+ {
421
+ "epoch": 0.4394904458598726,
422
+ "learning_rate": 1.6284542290623565e-05,
423
+ "loss": 0.7089,
424
+ "step": 138
425
+ },
426
+ {
427
+ "epoch": 0.445859872611465,
428
+ "learning_rate": 1.6456278515588023e-05,
429
+ "loss": 0.4458,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.45222929936305734,
434
+ "learning_rate": 1.662480975696044e-05,
435
+ "loss": 0.76,
436
+ "step": 142
437
+ },
438
+ {
439
+ "epoch": 0.4585987261146497,
440
+ "learning_rate": 1.6790052353563247e-05,
441
+ "loss": 0.5168,
442
+ "step": 144
443
+ },
444
+ {
445
+ "epoch": 0.46496815286624205,
446
+ "learning_rate": 1.6951924276746418e-05,
447
+ "loss": 0.6594,
448
+ "step": 146
449
+ },
450
+ {
451
+ "epoch": 0.4713375796178344,
452
+ "learning_rate": 1.7110345171107602e-05,
453
+ "loss": 0.5252,
454
+ "step": 148
455
+ },
456
+ {
457
+ "epoch": 0.47770700636942676,
458
+ "learning_rate": 1.7265236394381627e-05,
459
+ "loss": 0.5808,
460
+ "step": 150
461
+ },
462
+ {
463
+ "epoch": 0.4840764331210191,
464
+ "learning_rate": 1.741652105647958e-05,
465
+ "loss": 0.3341,
466
+ "step": 152
467
+ },
468
+ {
469
+ "epoch": 0.49044585987261147,
470
+ "learning_rate": 1.7564124057658057e-05,
471
+ "loss": 0.3744,
472
+ "step": 154
473
+ },
474
+ {
475
+ "epoch": 0.4968152866242038,
476
+ "learning_rate": 1.7707972125799738e-05,
477
+ "loss": 0.4799,
478
+ "step": 156
479
+ },
480
+ {
481
+ "epoch": 0.5031847133757962,
482
+ "learning_rate": 1.7847993852786612e-05,
483
+ "loss": 0.5027,
484
+ "step": 158
485
+ },
486
+ {
487
+ "epoch": 0.5095541401273885,
488
+ "learning_rate": 1.7984119729947937e-05,
489
+ "loss": 0.3931,
490
+ "step": 160
491
+ },
492
+ {
493
+ "epoch": 0.5159235668789809,
494
+ "learning_rate": 1.811628218256531e-05,
495
+ "loss": 0.3879,
496
+ "step": 162
497
+ },
498
+ {
499
+ "epoch": 0.5222929936305732,
500
+ "learning_rate": 1.8244415603417603e-05,
501
+ "loss": 0.4793,
502
+ "step": 164
503
+ },
504
+ {
505
+ "epoch": 0.5286624203821656,
506
+ "learning_rate": 1.836845638534933e-05,
507
+ "loss": 0.5282,
508
+ "step": 166
509
+ },
510
+ {
511
+ "epoch": 0.535031847133758,
512
+ "learning_rate": 1.8488342952846074e-05,
513
+ "loss": 0.5434,
514
+ "step": 168
515
+ },
516
+ {
517
+ "epoch": 0.5414012738853503,
518
+ "learning_rate": 1.860401579260139e-05,
519
+ "loss": 0.5263,
520
+ "step": 170
521
+ },
522
+ {
523
+ "epoch": 0.5477707006369427,
524
+ "learning_rate": 1.8715417483060044e-05,
525
+ "loss": 0.7306,
526
+ "step": 172
527
+ },
528
+ {
529
+ "epoch": 0.554140127388535,
530
+ "learning_rate": 1.8822492722922816e-05,
531
+ "loss": 0.5666,
532
+ "step": 174
533
+ },
534
+ {
535
+ "epoch": 0.5605095541401274,
536
+ "learning_rate": 1.8925188358598808e-05,
537
+ "loss": 0.4222,
538
+ "step": 176
539
+ },
540
+ {
541
+ "epoch": 0.5668789808917197,
542
+ "learning_rate": 1.902345341059163e-05,
543
+ "loss": 0.9838,
544
+ "step": 178
545
+ },
546
+ {
547
+ "epoch": 0.5732484076433121,
548
+ "learning_rate": 1.9117239098806296e-05,
549
+ "loss": 0.6729,
550
+ "step": 180
551
+ },
552
+ {
553
+ "epoch": 0.5796178343949044,
554
+ "learning_rate": 1.920649886676429e-05,
555
+ "loss": 0.419,
556
+ "step": 182
557
+ },
558
+ {
559
+ "epoch": 0.5859872611464968,
560
+ "learning_rate": 1.9291188404714876e-05,
561
+ "loss": 0.3516,
562
+ "step": 184
563
+ },
564
+ {
565
+ "epoch": 0.5923566878980892,
566
+ "learning_rate": 1.937126567163103e-05,
567
+ "loss": 0.4074,
568
+ "step": 186
569
+ },
570
+ {
571
+ "epoch": 0.5987261146496815,
572
+ "learning_rate": 1.944669091607919e-05,
573
+ "loss": 0.4973,
574
+ "step": 188
575
+ },
576
+ {
577
+ "epoch": 0.6050955414012739,
578
+ "learning_rate": 1.9517426695952354e-05,
579
+ "loss": 0.3044,
580
+ "step": 190
581
+ },
582
+ {
583
+ "epoch": 0.6114649681528662,
584
+ "learning_rate": 1.9583437897056915e-05,
585
+ "loss": 0.7326,
586
+ "step": 192
587
+ },
588
+ {
589
+ "epoch": 0.6178343949044586,
590
+ "learning_rate": 1.964469175054377e-05,
591
+ "loss": 0.309,
592
+ "step": 194
593
+ },
594
+ {
595
+ "epoch": 0.6242038216560509,
596
+ "learning_rate": 1.970115784917523e-05,
597
+ "loss": 1.0457,
598
+ "step": 196
599
+ },
600
+ {
601
+ "epoch": 0.6305732484076433,
602
+ "learning_rate": 1.975280816241959e-05,
603
+ "loss": 0.4953,
604
+ "step": 198
605
+ },
606
+ {
607
+ "epoch": 0.6369426751592356,
608
+ "learning_rate": 1.979961705036587e-05,
609
+ "loss": 0.1983,
610
+ "step": 200
611
+ },
612
+ {
613
+ "epoch": 0.643312101910828,
614
+ "learning_rate": 1.9841561276451777e-05,
615
+ "loss": 0.5022,
616
+ "step": 202
617
+ },
618
+ {
619
+ "epoch": 0.6496815286624203,
620
+ "learning_rate": 1.9878620018998696e-05,
621
+ "loss": 0.3723,
622
+ "step": 204
623
+ },
624
+ {
625
+ "epoch": 0.6560509554140127,
626
+ "learning_rate": 1.9910774881547803e-05,
627
+ "loss": 0.3967,
628
+ "step": 206
629
+ },
630
+ {
631
+ "epoch": 0.6624203821656051,
632
+ "learning_rate": 1.993800990199235e-05,
633
+ "loss": 0.9567,
634
+ "step": 208
635
+ },
636
+ {
637
+ "epoch": 0.6687898089171974,
638
+ "learning_rate": 1.9960311560501457e-05,
639
+ "loss": 0.4802,
640
+ "step": 210
641
+ },
642
+ {
643
+ "epoch": 0.6751592356687898,
644
+ "learning_rate": 1.9977668786231536e-05,
645
+ "loss": 0.5766,
646
+ "step": 212
647
+ },
648
+ {
649
+ "epoch": 0.6815286624203821,
650
+ "learning_rate": 1.999007296282201e-05,
651
+ "loss": 0.754,
652
+ "step": 214
653
+ },
654
+ {
655
+ "epoch": 0.6878980891719745,
656
+ "learning_rate": 1.9997517932672592e-05,
657
+ "loss": 0.617,
658
+ "step": 216
659
+ },
660
+ {
661
+ "epoch": 0.6942675159235668,
662
+ "learning_rate": 2e-05,
663
+ "loss": 0.299,
664
+ "step": 218
665
+ },
666
+ {
667
+ "epoch": 0.7006369426751592,
668
+ "learning_rate": 1.9997517932672592e-05,
669
+ "loss": 0.3849,
670
+ "step": 220
671
+ },
672
+ {
673
+ "epoch": 0.7070063694267515,
674
+ "learning_rate": 1.999007296282201e-05,
675
+ "loss": 0.49,
676
+ "step": 222
677
+ },
678
+ {
679
+ "epoch": 0.7133757961783439,
680
+ "learning_rate": 1.9977668786231536e-05,
681
+ "loss": 0.279,
682
+ "step": 224
683
+ },
684
+ {
685
+ "epoch": 0.7197452229299363,
686
+ "learning_rate": 1.9960311560501457e-05,
687
+ "loss": 0.3816,
688
+ "step": 226
689
+ },
690
+ {
691
+ "epoch": 0.7261146496815286,
692
+ "learning_rate": 1.993800990199235e-05,
693
+ "loss": 0.1561,
694
+ "step": 228
695
+ },
696
+ {
697
+ "epoch": 0.732484076433121,
698
+ "learning_rate": 1.99107748815478e-05,
699
+ "loss": 0.1531,
700
+ "step": 230
701
+ },
702
+ {
703
+ "epoch": 0.7388535031847133,
704
+ "learning_rate": 1.9878620018998696e-05,
705
+ "loss": 0.5737,
706
+ "step": 232
707
+ },
708
+ {
709
+ "epoch": 0.7452229299363057,
710
+ "learning_rate": 1.984156127645178e-05,
711
+ "loss": 0.3855,
712
+ "step": 234
713
+ },
714
+ {
715
+ "epoch": 0.7515923566878981,
716
+ "learning_rate": 1.979961705036587e-05,
717
+ "loss": 0.4611,
718
+ "step": 236
719
+ },
720
+ {
721
+ "epoch": 0.7579617834394905,
722
+ "learning_rate": 1.975280816241959e-05,
723
+ "loss": 0.5788,
724
+ "step": 238
725
+ },
726
+ {
727
+ "epoch": 0.7643312101910829,
728
+ "learning_rate": 1.9701157849175232e-05,
729
+ "loss": 0.5932,
730
+ "step": 240
731
+ },
732
+ {
733
+ "epoch": 0.7707006369426752,
734
+ "learning_rate": 1.9644691750543772e-05,
735
+ "loss": 0.3169,
736
+ "step": 242
737
+ },
738
+ {
739
+ "epoch": 0.7770700636942676,
740
+ "learning_rate": 1.958343789705692e-05,
741
+ "loss": 0.4777,
742
+ "step": 244
743
+ },
744
+ {
745
+ "epoch": 0.7834394904458599,
746
+ "learning_rate": 1.9517426695952354e-05,
747
+ "loss": 0.5903,
748
+ "step": 246
749
+ },
750
+ {
751
+ "epoch": 0.7898089171974523,
752
+ "learning_rate": 1.9446690916079184e-05,
753
+ "loss": 0.5089,
754
+ "step": 248
755
+ },
756
+ {
757
+ "epoch": 0.7961783439490446,
758
+ "learning_rate": 1.9371265671631034e-05,
759
+ "loss": 0.3621,
760
+ "step": 250
761
+ },
762
+ {
763
+ "epoch": 0.802547770700637,
764
+ "learning_rate": 1.929118840471488e-05,
765
+ "loss": 0.5418,
766
+ "step": 252
767
+ },
768
+ {
769
+ "epoch": 0.8089171974522293,
770
+ "learning_rate": 1.9206498866764293e-05,
771
+ "loss": 0.4291,
772
+ "step": 254
773
+ },
774
+ {
775
+ "epoch": 0.8152866242038217,
776
+ "learning_rate": 1.9117239098806302e-05,
777
+ "loss": 0.1913,
778
+ "step": 256
779
+ },
780
+ {
781
+ "epoch": 0.821656050955414,
782
+ "learning_rate": 1.9023453410591645e-05,
783
+ "loss": 0.7233,
784
+ "step": 258
785
+ },
786
+ {
787
+ "epoch": 0.8280254777070064,
788
+ "learning_rate": 1.8925188358598822e-05,
789
+ "loss": 0.4016,
790
+ "step": 260
791
+ },
792
+ {
793
+ "epoch": 0.8343949044585988,
794
+ "learning_rate": 1.882249272292283e-05,
795
+ "loss": 0.5974,
796
+ "step": 262
797
+ },
798
+ {
799
+ "epoch": 0.8407643312101911,
800
+ "learning_rate": 1.871541748306005e-05,
801
+ "loss": 0.7111,
802
+ "step": 264
803
+ },
804
+ {
805
+ "epoch": 0.8471337579617835,
806
+ "learning_rate": 1.8604015792601395e-05,
807
+ "loss": 0.3797,
808
+ "step": 266
809
+ },
810
+ {
811
+ "epoch": 0.8535031847133758,
812
+ "learning_rate": 1.8488342952846077e-05,
813
+ "loss": 1.1778,
814
+ "step": 268
815
+ },
816
+ {
817
+ "epoch": 0.8598726114649682,
818
+ "learning_rate": 1.8368456385349333e-05,
819
+ "loss": 0.2904,
820
+ "step": 270
821
+ },
822
+ {
823
+ "epoch": 0.8662420382165605,
824
+ "learning_rate": 1.824441560341761e-05,
825
+ "loss": 0.3213,
826
+ "step": 272
827
+ },
828
+ {
829
+ "epoch": 0.8726114649681529,
830
+ "learning_rate": 1.811628218256532e-05,
831
+ "loss": 0.18,
832
+ "step": 274
833
+ },
834
+ {
835
+ "epoch": 0.8789808917197452,
836
+ "learning_rate": 1.798411972994795e-05,
837
+ "loss": 0.6601,
838
+ "step": 276
839
+ },
840
+ {
841
+ "epoch": 0.8853503184713376,
842
+ "learning_rate": 1.784799385278662e-05,
843
+ "loss": 0.4559,
844
+ "step": 278
845
+ },
846
+ {
847
+ "epoch": 0.89171974522293,
848
+ "learning_rate": 1.770797212579973e-05,
849
+ "loss": 0.4516,
850
+ "step": 280
851
+ },
852
+ {
853
+ "epoch": 0.8980891719745223,
854
+ "learning_rate": 1.756412405765805e-05,
855
+ "loss": 0.5078,
856
+ "step": 282
857
+ },
858
+ {
859
+ "epoch": 0.9044585987261147,
860
+ "learning_rate": 1.7416521056479573e-05,
861
+ "loss": 0.6255,
862
+ "step": 284
863
+ },
864
+ {
865
+ "epoch": 0.910828025477707,
866
+ "learning_rate": 1.7265236394381634e-05,
867
+ "loss": 0.3578,
868
+ "step": 286
869
+ },
870
+ {
871
+ "epoch": 0.9171974522292994,
872
+ "learning_rate": 1.711034517110761e-05,
873
+ "loss": 0.685,
874
+ "step": 288
875
+ },
876
+ {
877
+ "epoch": 0.9235668789808917,
878
+ "learning_rate": 1.6951924276746425e-05,
879
+ "loss": 0.3528,
880
+ "step": 290
881
+ },
882
+ {
883
+ "epoch": 0.9299363057324841,
884
+ "learning_rate": 1.6790052353563254e-05,
885
+ "loss": 0.5669,
886
+ "step": 292
887
+ },
888
+ {
889
+ "epoch": 0.9363057324840764,
890
+ "learning_rate": 1.662480975696046e-05,
891
+ "loss": 0.6841,
892
+ "step": 294
893
+ },
894
+ {
895
+ "epoch": 0.9426751592356688,
896
+ "learning_rate": 1.6456278515588044e-05,
897
+ "loss": 0.4325,
898
+ "step": 296
899
+ },
900
+ {
901
+ "epoch": 0.9490445859872612,
902
+ "learning_rate": 1.6284542290623558e-05,
903
+ "loss": 0.4169,
904
+ "step": 298
905
+ },
906
+ {
907
+ "epoch": 0.9554140127388535,
908
+ "learning_rate": 1.6109686334241648e-05,
909
+ "loss": 0.4317,
910
+ "step": 300
911
+ },
912
+ {
913
+ "epoch": 0.9617834394904459,
914
+ "learning_rate": 1.593179744729355e-05,
915
+ "loss": 0.4362,
916
+ "step": 302
917
+ },
918
+ {
919
+ "epoch": 0.9681528662420382,
920
+ "learning_rate": 1.57509639362181e-05,
921
+ "loss": 0.325,
922
+ "step": 304
923
+ },
924
+ {
925
+ "epoch": 0.9745222929936306,
926
+ "learning_rate": 1.5567275569205227e-05,
927
+ "loss": 0.54,
928
+ "step": 306
929
+ },
930
+ {
931
+ "epoch": 0.9808917197452229,
932
+ "learning_rate": 1.538082353163374e-05,
933
+ "loss": 0.5871,
934
+ "step": 308
935
+ },
936
+ {
937
+ "epoch": 0.9872611464968153,
938
+ "learning_rate": 1.5191700380805768e-05,
939
+ "loss": 0.4206,
940
+ "step": 310
941
+ },
942
+ {
943
+ "epoch": 0.9936305732484076,
944
+ "learning_rate": 1.5000000000000014e-05,
945
+ "loss": 0.2635,
946
+ "step": 312
947
+ },
948
+ {
949
+ "epoch": 1.0,
950
+ "learning_rate": 1.4805817551866854e-05,
951
+ "loss": 0.3167,
952
+ "step": 314
953
+ },
954
+ {
955
+ "epoch": 1.0,
956
+ "step": 314,
957
+ "total_flos": 0,
958
+ "train_loss": 0.509083763903873,
959
+ "train_runtime": 1444.3817,
960
+ "train_samples_per_second": 3.478,
961
+ "train_steps_per_second": 0.217
962
+ }
963
+ ],
964
+ "logging_steps": 2,
965
+ "max_steps": 314,
966
+ "num_input_tokens_seen": 0,
967
+ "num_train_epochs": 1,
968
+ "save_steps": 500,
969
+ "stateful_callbacks": {},
970
+ "total_flos": 0,
971
+ "train_batch_size": 1,
972
+ "trial_name": null,
973
+ "trial_params": null
974
+ }
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcb1da0d865900a86247a27953a7a0dc5458426437874d62407c4314902d9cf3
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b245521f6ecaab00312ce79cc07193da1fd88f560aabd1fd1ebf5fbc1b629acd
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e48f4f06fb3434c6a8ddf19bb2458cadd666f5af656ea0aa0c3769237d7711b8
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16b762a908c575dd87147925241bacb742eb964233f03206a1aabbc0e55fffa5
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_gradnorm_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json ADDED
@@ -0,0 +1,1131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 314,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.006369426751592357,
14
+ "grad_norm": 10.967390060424805,
15
+ "learning_rate": 2.5834789435204156e-06,
16
+ "loss": 0.8083,
17
+ "step": 2
18
+ },
19
+ {
20
+ "epoch": 0.012738853503184714,
21
+ "grad_norm": 4.827531814575195,
22
+ "learning_rate": 2.73476360561837e-06,
23
+ "loss": 0.3808,
24
+ "step": 4
25
+ },
26
+ {
27
+ "epoch": 0.01910828025477707,
28
+ "grad_norm": 10.00290584564209,
29
+ "learning_rate": 2.889654828892393e-06,
30
+ "loss": 0.5577,
31
+ "step": 6
32
+ },
33
+ {
34
+ "epoch": 0.025477707006369428,
35
+ "grad_norm": 7.657961368560791,
36
+ "learning_rate": 3.0480757232535773e-06,
37
+ "loss": 0.7111,
38
+ "step": 8
39
+ },
40
+ {
41
+ "epoch": 0.03184713375796178,
42
+ "grad_norm": 14.12212085723877,
43
+ "learning_rate": 3.2099476464367486e-06,
44
+ "loss": 0.8264,
45
+ "step": 10
46
+ },
47
+ {
48
+ "epoch": 0.03821656050955414,
49
+ "grad_norm": 5.643587112426758,
50
+ "learning_rate": 3.3751902430395558e-06,
51
+ "loss": 0.8057,
52
+ "step": 12
53
+ },
54
+ {
55
+ "epoch": 0.044585987261146494,
56
+ "grad_norm": 13.343512535095215,
57
+ "learning_rate": 3.5437214844119727e-06,
58
+ "loss": 0.6541,
59
+ "step": 14
60
+ },
61
+ {
62
+ "epoch": 0.050955414012738856,
63
+ "grad_norm": 8.84065055847168,
64
+ "learning_rate": 3.7154577093764287e-06,
65
+ "loss": 0.6246,
66
+ "step": 16
67
+ },
68
+ {
69
+ "epoch": 0.05732484076433121,
70
+ "grad_norm": 11.651762962341309,
71
+ "learning_rate": 3.890313665758341e-06,
72
+ "loss": 0.719,
73
+ "step": 18
74
+ },
75
+ {
76
+ "epoch": 0.06369426751592357,
77
+ "grad_norm": 7.249644756317139,
78
+ "learning_rate": 4.068202552706455e-06,
79
+ "loss": 0.7934,
80
+ "step": 20
81
+ },
82
+ {
83
+ "epoch": 0.07006369426751592,
84
+ "grad_norm": 16.207679748535156,
85
+ "learning_rate": 4.249036063781902e-06,
86
+ "loss": 1.0335,
87
+ "step": 22
88
+ },
89
+ {
90
+ "epoch": 0.07643312101910828,
91
+ "grad_norm": 7.816985607147217,
92
+ "learning_rate": 4.432724430794775e-06,
93
+ "loss": 0.383,
94
+ "step": 24
95
+ },
96
+ {
97
+ "epoch": 0.08280254777070063,
98
+ "grad_norm": 3.924746036529541,
99
+ "learning_rate": 4.6191764683662625e-06,
100
+ "loss": 0.2819,
101
+ "step": 26
102
+ },
103
+ {
104
+ "epoch": 0.08917197452229299,
105
+ "grad_norm": 3.802884578704834,
106
+ "learning_rate": 4.8082996191942354e-06,
107
+ "loss": 0.2926,
108
+ "step": 28
109
+ },
110
+ {
111
+ "epoch": 0.09554140127388536,
112
+ "grad_norm": 7.976809978485107,
113
+ "learning_rate": 5.000000000000003e-06,
114
+ "loss": 0.5368,
115
+ "step": 30
116
+ },
117
+ {
118
+ "epoch": 0.10191082802547771,
119
+ "grad_norm": 27.513673782348633,
120
+ "learning_rate": 5.194182448133163e-06,
121
+ "loss": 0.7441,
122
+ "step": 32
123
+ },
124
+ {
125
+ "epoch": 0.10828025477707007,
126
+ "grad_norm": 9.224363327026367,
127
+ "learning_rate": 5.39075056881172e-06,
128
+ "loss": 0.7198,
129
+ "step": 34
130
+ },
131
+ {
132
+ "epoch": 0.11464968152866242,
133
+ "grad_norm": 4.422102928161621,
134
+ "learning_rate": 5.589606782973682e-06,
135
+ "loss": 0.4043,
136
+ "step": 36
137
+ },
138
+ {
139
+ "epoch": 0.12101910828025478,
140
+ "grad_norm": 4.438572406768799,
141
+ "learning_rate": 5.7906523757166475e-06,
142
+ "loss": 0.3523,
143
+ "step": 38
144
+ },
145
+ {
146
+ "epoch": 0.12738853503184713,
147
+ "grad_norm": 7.6988115310668945,
148
+ "learning_rate": 5.9937875453012e-06,
149
+ "loss": 0.442,
150
+ "step": 40
151
+ },
152
+ {
153
+ "epoch": 0.1337579617834395,
154
+ "grad_norm": 11.33991527557373,
155
+ "learning_rate": 6.198911452693847e-06,
156
+ "loss": 0.5794,
157
+ "step": 42
158
+ },
159
+ {
160
+ "epoch": 0.14012738853503184,
161
+ "grad_norm": 11.485501289367676,
162
+ "learning_rate": 6.405922271624865e-06,
163
+ "loss": 0.9043,
164
+ "step": 44
165
+ },
166
+ {
167
+ "epoch": 0.1464968152866242,
168
+ "grad_norm": 4.232797145843506,
169
+ "learning_rate": 6.614717239136237e-06,
170
+ "loss": 0.2157,
171
+ "step": 46
172
+ },
173
+ {
174
+ "epoch": 0.15286624203821655,
175
+ "grad_norm": 9.58480453491211,
176
+ "learning_rate": 6.8251927065945815e-06,
177
+ "loss": 0.6158,
178
+ "step": 48
179
+ },
180
+ {
181
+ "epoch": 0.1592356687898089,
182
+ "grad_norm": 4.837071895599365,
183
+ "learning_rate": 7.037244191143648e-06,
184
+ "loss": 0.3622,
185
+ "step": 50
186
+ },
187
+ {
188
+ "epoch": 0.16560509554140126,
189
+ "grad_norm": 25.650348663330078,
190
+ "learning_rate": 7.250766427571185e-06,
191
+ "loss": 0.8697,
192
+ "step": 52
193
+ },
194
+ {
195
+ "epoch": 0.17197452229299362,
196
+ "grad_norm": 14.62186050415039,
197
+ "learning_rate": 7.465653420563828e-06,
198
+ "loss": 0.8044,
199
+ "step": 54
200
+ },
201
+ {
202
+ "epoch": 0.17834394904458598,
203
+ "grad_norm": 8.525863647460938,
204
+ "learning_rate": 7.68179849732472e-06,
205
+ "loss": 1.0154,
206
+ "step": 56
207
+ },
208
+ {
209
+ "epoch": 0.18471337579617833,
210
+ "grad_norm": 4.199936389923096,
211
+ "learning_rate": 7.899094360527221e-06,
212
+ "loss": 0.6157,
213
+ "step": 58
214
+ },
215
+ {
216
+ "epoch": 0.1910828025477707,
217
+ "grad_norm": 4.920378684997559,
218
+ "learning_rate": 8.117433141578865e-06,
219
+ "loss": 0.4933,
220
+ "step": 60
221
+ },
222
+ {
223
+ "epoch": 0.19745222929936307,
224
+ "grad_norm": 3.483499765396118,
225
+ "learning_rate": 8.336706454168698e-06,
226
+ "loss": 0.3853,
227
+ "step": 62
228
+ },
229
+ {
230
+ "epoch": 0.20382165605095542,
231
+ "grad_norm": 10.102392196655273,
232
+ "learning_rate": 8.55680544807173e-06,
233
+ "loss": 0.7389,
234
+ "step": 64
235
+ },
236
+ {
237
+ "epoch": 0.21019108280254778,
238
+ "grad_norm": 4.874934673309326,
239
+ "learning_rate": 8.777620863183652e-06,
240
+ "loss": 0.7997,
241
+ "step": 66
242
+ },
243
+ {
244
+ "epoch": 0.21656050955414013,
245
+ "grad_norm": 10.369234085083008,
246
+ "learning_rate": 8.99904308375901e-06,
247
+ "loss": 0.4694,
248
+ "step": 68
249
+ },
250
+ {
251
+ "epoch": 0.2229299363057325,
252
+ "grad_norm": 6.755327224731445,
253
+ "learning_rate": 9.220962192825959e-06,
254
+ "loss": 0.643,
255
+ "step": 70
256
+ },
257
+ {
258
+ "epoch": 0.22929936305732485,
259
+ "grad_norm": 9.252140045166016,
260
+ "learning_rate": 9.443268026750509e-06,
261
+ "loss": 0.7274,
262
+ "step": 72
263
+ },
264
+ {
265
+ "epoch": 0.2356687898089172,
266
+ "grad_norm": 0.9168484807014465,
267
+ "learning_rate": 9.665850229923262e-06,
268
+ "loss": 0.2262,
269
+ "step": 74
270
+ },
271
+ {
272
+ "epoch": 0.24203821656050956,
273
+ "grad_norm": 11.39022159576416,
274
+ "learning_rate": 9.88859830954135e-06,
275
+ "loss": 0.8035,
276
+ "step": 76
277
+ },
278
+ {
279
+ "epoch": 0.2484076433121019,
280
+ "grad_norm": 3.0904781818389893,
281
+ "learning_rate": 1.0111401690458642e-05,
282
+ "loss": 0.3005,
283
+ "step": 78
284
+ },
285
+ {
286
+ "epoch": 0.25477707006369427,
287
+ "grad_norm": 13.052651405334473,
288
+ "learning_rate": 1.0334149770076732e-05,
289
+ "loss": 0.8611,
290
+ "step": 80
291
+ },
292
+ {
293
+ "epoch": 0.2611464968152866,
294
+ "grad_norm": 9.630870819091797,
295
+ "learning_rate": 1.0556731973249482e-05,
296
+ "loss": 0.4201,
297
+ "step": 82
298
+ },
299
+ {
300
+ "epoch": 0.267515923566879,
301
+ "grad_norm": 9.032388687133789,
302
+ "learning_rate": 1.0779037807174032e-05,
303
+ "loss": 0.3791,
304
+ "step": 84
305
+ },
306
+ {
307
+ "epoch": 0.27388535031847133,
308
+ "grad_norm": 7.560876369476318,
309
+ "learning_rate": 1.1000956916240984e-05,
310
+ "loss": 0.3656,
311
+ "step": 86
312
+ },
313
+ {
314
+ "epoch": 0.2802547770700637,
315
+ "grad_norm": 7.056580543518066,
316
+ "learning_rate": 1.1222379136816342e-05,
317
+ "loss": 0.5659,
318
+ "step": 88
319
+ },
320
+ {
321
+ "epoch": 0.28662420382165604,
322
+ "grad_norm": 15.845664024353027,
323
+ "learning_rate": 1.1443194551928264e-05,
324
+ "loss": 0.7557,
325
+ "step": 90
326
+ },
327
+ {
328
+ "epoch": 0.2929936305732484,
329
+ "grad_norm": 10.240681648254395,
330
+ "learning_rate": 1.1663293545831295e-05,
331
+ "loss": 0.954,
332
+ "step": 92
333
+ },
334
+ {
335
+ "epoch": 0.29936305732484075,
336
+ "grad_norm": 27.095378875732422,
337
+ "learning_rate": 1.188256685842113e-05,
338
+ "loss": 0.5918,
339
+ "step": 94
340
+ },
341
+ {
342
+ "epoch": 0.3057324840764331,
343
+ "grad_norm": 10.295605659484863,
344
+ "learning_rate": 1.210090563947277e-05,
345
+ "loss": 0.8789,
346
+ "step": 96
347
+ },
348
+ {
349
+ "epoch": 0.31210191082802546,
350
+ "grad_norm": 8.310393333435059,
351
+ "learning_rate": 1.2318201502675273e-05,
352
+ "loss": 0.8859,
353
+ "step": 98
354
+ },
355
+ {
356
+ "epoch": 0.3184713375796178,
357
+ "grad_norm": 13.601262092590332,
358
+ "learning_rate": 1.2534346579436164e-05,
359
+ "loss": 0.5679,
360
+ "step": 100
361
+ },
362
+ {
363
+ "epoch": 0.3248407643312102,
364
+ "grad_norm": 11.069892883300781,
365
+ "learning_rate": 1.274923357242881e-05,
366
+ "loss": 0.9464,
367
+ "step": 102
368
+ },
369
+ {
370
+ "epoch": 0.33121019108280253,
371
+ "grad_norm": 11.5009183883667,
372
+ "learning_rate": 1.2962755808856345e-05,
373
+ "loss": 0.8625,
374
+ "step": 104
375
+ },
376
+ {
377
+ "epoch": 0.3375796178343949,
378
+ "grad_norm": 10.298592567443848,
379
+ "learning_rate": 1.3174807293405412e-05,
380
+ "loss": 0.7258,
381
+ "step": 106
382
+ },
383
+ {
384
+ "epoch": 0.34394904458598724,
385
+ "grad_norm": 6.594852924346924,
386
+ "learning_rate": 1.3385282760863758e-05,
387
+ "loss": 0.708,
388
+ "step": 108
389
+ },
390
+ {
391
+ "epoch": 0.3503184713375796,
392
+ "grad_norm": 3.0060510635375977,
393
+ "learning_rate": 1.3594077728375129e-05,
394
+ "loss": 0.3931,
395
+ "step": 110
396
+ },
397
+ {
398
+ "epoch": 0.35668789808917195,
399
+ "grad_norm": 8.377557754516602,
400
+ "learning_rate": 1.3801088547306147e-05,
401
+ "loss": 0.765,
402
+ "step": 112
403
+ },
404
+ {
405
+ "epoch": 0.3630573248407643,
406
+ "grad_norm": 9.043540000915527,
407
+ "learning_rate": 1.4006212454698793e-05,
408
+ "loss": 0.5778,
409
+ "step": 114
410
+ },
411
+ {
412
+ "epoch": 0.36942675159235666,
413
+ "grad_norm": 9.072078704833984,
414
+ "learning_rate": 1.4209347624283347e-05,
415
+ "loss": 0.5493,
416
+ "step": 116
417
+ },
418
+ {
419
+ "epoch": 0.37579617834394907,
420
+ "grad_norm": 9.102341651916504,
421
+ "learning_rate": 1.441039321702631e-05,
422
+ "loss": 1.1755,
423
+ "step": 118
424
+ },
425
+ {
426
+ "epoch": 0.3821656050955414,
427
+ "grad_norm": 7.582345008850098,
428
+ "learning_rate": 1.4609249431188274e-05,
429
+ "loss": 0.7363,
430
+ "step": 120
431
+ },
432
+ {
433
+ "epoch": 0.3885350318471338,
434
+ "grad_norm": 5.271243095397949,
435
+ "learning_rate": 1.480581755186683e-05,
436
+ "loss": 0.5457,
437
+ "step": 122
438
+ },
439
+ {
440
+ "epoch": 0.39490445859872614,
441
+ "grad_norm": 6.142684459686279,
442
+ "learning_rate": 1.4999999999999992e-05,
443
+ "loss": 0.6939,
444
+ "step": 124
445
+ },
446
+ {
447
+ "epoch": 0.4012738853503185,
448
+ "grad_norm": 8.789036750793457,
449
+ "learning_rate": 1.5191700380805761e-05,
450
+ "loss": 0.7891,
451
+ "step": 126
452
+ },
453
+ {
454
+ "epoch": 0.40764331210191085,
455
+ "grad_norm": 5.271059989929199,
456
+ "learning_rate": 1.538082353163373e-05,
457
+ "loss": 0.3996,
458
+ "step": 128
459
+ },
460
+ {
461
+ "epoch": 0.4140127388535032,
462
+ "grad_norm": 4.013172626495361,
463
+ "learning_rate": 1.556727556920522e-05,
464
+ "loss": 0.5014,
465
+ "step": 130
466
+ },
467
+ {
468
+ "epoch": 0.42038216560509556,
469
+ "grad_norm": 3.9519894123077393,
470
+ "learning_rate": 1.5750963936218094e-05,
471
+ "loss": 0.8364,
472
+ "step": 132
473
+ },
474
+ {
475
+ "epoch": 0.4267515923566879,
476
+ "grad_norm": 7.547327041625977,
477
+ "learning_rate": 1.593179744729354e-05,
478
+ "loss": 0.7579,
479
+ "step": 134
480
+ },
481
+ {
482
+ "epoch": 0.43312101910828027,
483
+ "grad_norm": 4.156999588012695,
484
+ "learning_rate": 1.6109686334241655e-05,
485
+ "loss": 0.4703,
486
+ "step": 136
487
+ },
488
+ {
489
+ "epoch": 0.4394904458598726,
490
+ "grad_norm": 4.95545768737793,
491
+ "learning_rate": 1.6284542290623565e-05,
492
+ "loss": 0.3763,
493
+ "step": 138
494
+ },
495
+ {
496
+ "epoch": 0.445859872611465,
497
+ "grad_norm": 142.92599487304688,
498
+ "learning_rate": 1.6456278515588023e-05,
499
+ "loss": 3.0181,
500
+ "step": 140
501
+ },
502
+ {
503
+ "epoch": 0.45222929936305734,
504
+ "grad_norm": 6.379714488983154,
505
+ "learning_rate": 1.662480975696044e-05,
506
+ "loss": 0.6882,
507
+ "step": 142
508
+ },
509
+ {
510
+ "epoch": 0.4585987261146497,
511
+ "grad_norm": 7.686674118041992,
512
+ "learning_rate": 1.6790052353563247e-05,
513
+ "loss": 0.5241,
514
+ "step": 144
515
+ },
516
+ {
517
+ "epoch": 0.46496815286624205,
518
+ "grad_norm": 4.402493953704834,
519
+ "learning_rate": 1.6951924276746418e-05,
520
+ "loss": 0.5151,
521
+ "step": 146
522
+ },
523
+ {
524
+ "epoch": 0.4713375796178344,
525
+ "grad_norm": 4.853139400482178,
526
+ "learning_rate": 1.7110345171107602e-05,
527
+ "loss": 0.6634,
528
+ "step": 148
529
+ },
530
+ {
531
+ "epoch": 0.47770700636942676,
532
+ "grad_norm": 6.9735260009765625,
533
+ "learning_rate": 1.7265236394381627e-05,
534
+ "loss": 0.8028,
535
+ "step": 150
536
+ },
537
+ {
538
+ "epoch": 0.4840764331210191,
539
+ "grad_norm": 6.749086856842041,
540
+ "learning_rate": 1.741652105647958e-05,
541
+ "loss": 0.6335,
542
+ "step": 152
543
+ },
544
+ {
545
+ "epoch": 0.49044585987261147,
546
+ "grad_norm": 34.68336486816406,
547
+ "learning_rate": 1.7564124057658057e-05,
548
+ "loss": 0.7938,
549
+ "step": 154
550
+ },
551
+ {
552
+ "epoch": 0.4968152866242038,
553
+ "grad_norm": 22.7413387298584,
554
+ "learning_rate": 1.7707972125799738e-05,
555
+ "loss": 0.6283,
556
+ "step": 156
557
+ },
558
+ {
559
+ "epoch": 0.5031847133757962,
560
+ "grad_norm": 9.641555786132812,
561
+ "learning_rate": 1.7847993852786612e-05,
562
+ "loss": 0.9091,
563
+ "step": 158
564
+ },
565
+ {
566
+ "epoch": 0.5095541401273885,
567
+ "grad_norm": 7.344991683959961,
568
+ "learning_rate": 1.7984119729947937e-05,
569
+ "loss": 0.4567,
570
+ "step": 160
571
+ },
572
+ {
573
+ "epoch": 0.5159235668789809,
574
+ "grad_norm": 4.73659610748291,
575
+ "learning_rate": 1.811628218256531e-05,
576
+ "loss": 0.778,
577
+ "step": 162
578
+ },
579
+ {
580
+ "epoch": 0.5222929936305732,
581
+ "grad_norm": 5.34453010559082,
582
+ "learning_rate": 1.8244415603417603e-05,
583
+ "loss": 0.3745,
584
+ "step": 164
585
+ },
586
+ {
587
+ "epoch": 0.5286624203821656,
588
+ "grad_norm": 7.497495651245117,
589
+ "learning_rate": 1.836845638534933e-05,
590
+ "loss": 0.6032,
591
+ "step": 166
592
+ },
593
+ {
594
+ "epoch": 0.535031847133758,
595
+ "grad_norm": 4.8603925704956055,
596
+ "learning_rate": 1.8488342952846074e-05,
597
+ "loss": 0.4958,
598
+ "step": 168
599
+ },
600
+ {
601
+ "epoch": 0.5414012738853503,
602
+ "grad_norm": 10.660771369934082,
603
+ "learning_rate": 1.860401579260139e-05,
604
+ "loss": 0.512,
605
+ "step": 170
606
+ },
607
+ {
608
+ "epoch": 0.5477707006369427,
609
+ "grad_norm": 8.473343849182129,
610
+ "learning_rate": 1.8715417483060044e-05,
611
+ "loss": 0.7816,
612
+ "step": 172
613
+ },
614
+ {
615
+ "epoch": 0.554140127388535,
616
+ "grad_norm": 7.611669540405273,
617
+ "learning_rate": 1.8822492722922816e-05,
618
+ "loss": 0.6086,
619
+ "step": 174
620
+ },
621
+ {
622
+ "epoch": 0.5605095541401274,
623
+ "grad_norm": 14.955540657043457,
624
+ "learning_rate": 1.8925188358598808e-05,
625
+ "loss": 0.6231,
626
+ "step": 176
627
+ },
628
+ {
629
+ "epoch": 0.5668789808917197,
630
+ "grad_norm": 6.111566066741943,
631
+ "learning_rate": 1.902345341059163e-05,
632
+ "loss": 0.3272,
633
+ "step": 178
634
+ },
635
+ {
636
+ "epoch": 0.5732484076433121,
637
+ "grad_norm": 6.98579740524292,
638
+ "learning_rate": 1.9117239098806296e-05,
639
+ "loss": 0.6097,
640
+ "step": 180
641
+ },
642
+ {
643
+ "epoch": 0.5796178343949044,
644
+ "grad_norm": 12.00461196899414,
645
+ "learning_rate": 1.920649886676429e-05,
646
+ "loss": 0.8231,
647
+ "step": 182
648
+ },
649
+ {
650
+ "epoch": 0.5859872611464968,
651
+ "grad_norm": 13.3690767288208,
652
+ "learning_rate": 1.9291188404714876e-05,
653
+ "loss": 0.579,
654
+ "step": 184
655
+ },
656
+ {
657
+ "epoch": 0.5923566878980892,
658
+ "grad_norm": 8.468439102172852,
659
+ "learning_rate": 1.937126567163103e-05,
660
+ "loss": 0.5412,
661
+ "step": 186
662
+ },
663
+ {
664
+ "epoch": 0.5987261146496815,
665
+ "grad_norm": 8.921483039855957,
666
+ "learning_rate": 1.944669091607919e-05,
667
+ "loss": 0.6761,
668
+ "step": 188
669
+ },
670
+ {
671
+ "epoch": 0.6050955414012739,
672
+ "grad_norm": 5.962874412536621,
673
+ "learning_rate": 1.9517426695952354e-05,
674
+ "loss": 0.4777,
675
+ "step": 190
676
+ },
677
+ {
678
+ "epoch": 0.6114649681528662,
679
+ "grad_norm": 6.013513565063477,
680
+ "learning_rate": 1.9583437897056915e-05,
681
+ "loss": 0.3634,
682
+ "step": 192
683
+ },
684
+ {
685
+ "epoch": 0.6178343949044586,
686
+ "grad_norm": 7.779994010925293,
687
+ "learning_rate": 1.964469175054377e-05,
688
+ "loss": 0.7477,
689
+ "step": 194
690
+ },
691
+ {
692
+ "epoch": 0.6242038216560509,
693
+ "grad_norm": 4.3088788986206055,
694
+ "learning_rate": 1.970115784917523e-05,
695
+ "loss": 0.5438,
696
+ "step": 196
697
+ },
698
+ {
699
+ "epoch": 0.6305732484076433,
700
+ "grad_norm": 14.164244651794434,
701
+ "learning_rate": 1.975280816241959e-05,
702
+ "loss": 0.9717,
703
+ "step": 198
704
+ },
705
+ {
706
+ "epoch": 0.6369426751592356,
707
+ "grad_norm": 3.357556104660034,
708
+ "learning_rate": 1.979961705036587e-05,
709
+ "loss": 0.4938,
710
+ "step": 200
711
+ },
712
+ {
713
+ "epoch": 0.643312101910828,
714
+ "grad_norm": 4.60561990737915,
715
+ "learning_rate": 1.9841561276451777e-05,
716
+ "loss": 0.3408,
717
+ "step": 202
718
+ },
719
+ {
720
+ "epoch": 0.6496815286624203,
721
+ "grad_norm": 5.312678337097168,
722
+ "learning_rate": 1.9878620018998696e-05,
723
+ "loss": 0.4405,
724
+ "step": 204
725
+ },
726
+ {
727
+ "epoch": 0.6560509554140127,
728
+ "grad_norm": 5.059690475463867,
729
+ "learning_rate": 1.9910774881547803e-05,
730
+ "loss": 0.4156,
731
+ "step": 206
732
+ },
733
+ {
734
+ "epoch": 0.6624203821656051,
735
+ "grad_norm": 6.9592742919921875,
736
+ "learning_rate": 1.993800990199235e-05,
737
+ "loss": 1.0661,
738
+ "step": 208
739
+ },
740
+ {
741
+ "epoch": 0.6687898089171974,
742
+ "grad_norm": 10.315267562866211,
743
+ "learning_rate": 1.9960311560501457e-05,
744
+ "loss": 0.6111,
745
+ "step": 210
746
+ },
747
+ {
748
+ "epoch": 0.6751592356687898,
749
+ "grad_norm": 6.091007709503174,
750
+ "learning_rate": 1.9977668786231536e-05,
751
+ "loss": 0.6001,
752
+ "step": 212
753
+ },
754
+ {
755
+ "epoch": 0.6815286624203821,
756
+ "grad_norm": 8.963953018188477,
757
+ "learning_rate": 1.999007296282201e-05,
758
+ "loss": 0.8111,
759
+ "step": 214
760
+ },
761
+ {
762
+ "epoch": 0.6878980891719745,
763
+ "grad_norm": 15.050474166870117,
764
+ "learning_rate": 1.9997517932672592e-05,
765
+ "loss": 0.6209,
766
+ "step": 216
767
+ },
768
+ {
769
+ "epoch": 0.6942675159235668,
770
+ "grad_norm": 4.384186744689941,
771
+ "learning_rate": 2e-05,
772
+ "loss": 0.4493,
773
+ "step": 218
774
+ },
775
+ {
776
+ "epoch": 0.7006369426751592,
777
+ "grad_norm": 2.46761417388916,
778
+ "learning_rate": 1.9997517932672592e-05,
779
+ "loss": 0.4532,
780
+ "step": 220
781
+ },
782
+ {
783
+ "epoch": 0.7070063694267515,
784
+ "grad_norm": 7.234741687774658,
785
+ "learning_rate": 1.999007296282201e-05,
786
+ "loss": 0.6705,
787
+ "step": 222
788
+ },
789
+ {
790
+ "epoch": 0.7133757961783439,
791
+ "grad_norm": 11.147261619567871,
792
+ "learning_rate": 1.9977668786231536e-05,
793
+ "loss": 0.7441,
794
+ "step": 224
795
+ },
796
+ {
797
+ "epoch": 0.7197452229299363,
798
+ "grad_norm": 4.694805145263672,
799
+ "learning_rate": 1.9960311560501457e-05,
800
+ "loss": 0.9027,
801
+ "step": 226
802
+ },
803
+ {
804
+ "epoch": 0.7261146496815286,
805
+ "grad_norm": 6.381280899047852,
806
+ "learning_rate": 1.993800990199235e-05,
807
+ "loss": 0.6166,
808
+ "step": 228
809
+ },
810
+ {
811
+ "epoch": 0.732484076433121,
812
+ "grad_norm": 10.952537536621094,
813
+ "learning_rate": 1.99107748815478e-05,
814
+ "loss": 0.8709,
815
+ "step": 230
816
+ },
817
+ {
818
+ "epoch": 0.7388535031847133,
819
+ "grad_norm": 8.05765151977539,
820
+ "learning_rate": 1.9878620018998696e-05,
821
+ "loss": 0.619,
822
+ "step": 232
823
+ },
824
+ {
825
+ "epoch": 0.7452229299363057,
826
+ "grad_norm": 3.9685816764831543,
827
+ "learning_rate": 1.984156127645178e-05,
828
+ "loss": 1.0332,
829
+ "step": 234
830
+ },
831
+ {
832
+ "epoch": 0.7515923566878981,
833
+ "grad_norm": 8.497997283935547,
834
+ "learning_rate": 1.979961705036587e-05,
835
+ "loss": 0.9633,
836
+ "step": 236
837
+ },
838
+ {
839
+ "epoch": 0.7579617834394905,
840
+ "grad_norm": 7.018019676208496,
841
+ "learning_rate": 1.975280816241959e-05,
842
+ "loss": 0.4901,
843
+ "step": 238
844
+ },
845
+ {
846
+ "epoch": 0.7643312101910829,
847
+ "grad_norm": 5.196238994598389,
848
+ "learning_rate": 1.9701157849175232e-05,
849
+ "loss": 0.4994,
850
+ "step": 240
851
+ },
852
+ {
853
+ "epoch": 0.7707006369426752,
854
+ "grad_norm": 5.84513521194458,
855
+ "learning_rate": 1.9644691750543772e-05,
856
+ "loss": 0.5337,
857
+ "step": 242
858
+ },
859
+ {
860
+ "epoch": 0.7770700636942676,
861
+ "grad_norm": 3.9592578411102295,
862
+ "learning_rate": 1.958343789705692e-05,
863
+ "loss": 0.5261,
864
+ "step": 244
865
+ },
866
+ {
867
+ "epoch": 0.7834394904458599,
868
+ "grad_norm": 7.668013572692871,
869
+ "learning_rate": 1.9517426695952354e-05,
870
+ "loss": 0.4313,
871
+ "step": 246
872
+ },
873
+ {
874
+ "epoch": 0.7898089171974523,
875
+ "grad_norm": 6.829677104949951,
876
+ "learning_rate": 1.9446690916079184e-05,
877
+ "loss": 0.9799,
878
+ "step": 248
879
+ },
880
+ {
881
+ "epoch": 0.7961783439490446,
882
+ "grad_norm": 3.195507764816284,
883
+ "learning_rate": 1.9371265671631034e-05,
884
+ "loss": 0.425,
885
+ "step": 250
886
+ },
887
+ {
888
+ "epoch": 0.802547770700637,
889
+ "grad_norm": 2.563486099243164,
890
+ "learning_rate": 1.929118840471488e-05,
891
+ "loss": 0.362,
892
+ "step": 252
893
+ },
894
+ {
895
+ "epoch": 0.8089171974522293,
896
+ "grad_norm": 4.692176818847656,
897
+ "learning_rate": 1.9206498866764293e-05,
898
+ "loss": 0.3505,
899
+ "step": 254
900
+ },
901
+ {
902
+ "epoch": 0.8152866242038217,
903
+ "grad_norm": 3.836381673812866,
904
+ "learning_rate": 1.9117239098806302e-05,
905
+ "loss": 0.4359,
906
+ "step": 256
907
+ },
908
+ {
909
+ "epoch": 0.821656050955414,
910
+ "grad_norm": 5.667571067810059,
911
+ "learning_rate": 1.9023453410591645e-05,
912
+ "loss": 0.7398,
913
+ "step": 258
914
+ },
915
+ {
916
+ "epoch": 0.8280254777070064,
917
+ "grad_norm": 7.975452899932861,
918
+ "learning_rate": 1.8925188358598822e-05,
919
+ "loss": 0.6783,
920
+ "step": 260
921
+ },
922
+ {
923
+ "epoch": 0.8343949044585988,
924
+ "grad_norm": 5.269340991973877,
925
+ "learning_rate": 1.882249272292283e-05,
926
+ "loss": 0.6184,
927
+ "step": 262
928
+ },
929
+ {
930
+ "epoch": 0.8407643312101911,
931
+ "grad_norm": 10.280611038208008,
932
+ "learning_rate": 1.871541748306005e-05,
933
+ "loss": 0.3653,
934
+ "step": 264
935
+ },
936
+ {
937
+ "epoch": 0.8471337579617835,
938
+ "grad_norm": 2.831383466720581,
939
+ "learning_rate": 1.8604015792601395e-05,
940
+ "loss": 0.4349,
941
+ "step": 266
942
+ },
943
+ {
944
+ "epoch": 0.8535031847133758,
945
+ "grad_norm": 9.044788360595703,
946
+ "learning_rate": 1.8488342952846077e-05,
947
+ "loss": 0.4842,
948
+ "step": 268
949
+ },
950
+ {
951
+ "epoch": 0.8598726114649682,
952
+ "grad_norm": 5.392291069030762,
953
+ "learning_rate": 1.8368456385349333e-05,
954
+ "loss": 0.4772,
955
+ "step": 270
956
+ },
957
+ {
958
+ "epoch": 0.8662420382165605,
959
+ "grad_norm": 2.797391176223755,
960
+ "learning_rate": 1.824441560341761e-05,
961
+ "loss": 0.3883,
962
+ "step": 272
963
+ },
964
+ {
965
+ "epoch": 0.8726114649681529,
966
+ "grad_norm": 3.6665701866149902,
967
+ "learning_rate": 1.811628218256532e-05,
968
+ "loss": 0.504,
969
+ "step": 274
970
+ },
971
+ {
972
+ "epoch": 0.8789808917197452,
973
+ "grad_norm": 8.1128511428833,
974
+ "learning_rate": 1.798411972994795e-05,
975
+ "loss": 0.4383,
976
+ "step": 276
977
+ },
978
+ {
979
+ "epoch": 0.8853503184713376,
980
+ "grad_norm": 6.009725093841553,
981
+ "learning_rate": 1.784799385278662e-05,
982
+ "loss": 0.4464,
983
+ "step": 278
984
+ },
985
+ {
986
+ "epoch": 0.89171974522293,
987
+ "grad_norm": 2.803722858428955,
988
+ "learning_rate": 1.770797212579973e-05,
989
+ "loss": 0.6519,
990
+ "step": 280
991
+ },
992
+ {
993
+ "epoch": 0.8980891719745223,
994
+ "grad_norm": 3.778076410293579,
995
+ "learning_rate": 1.756412405765805e-05,
996
+ "loss": 0.6337,
997
+ "step": 282
998
+ },
999
+ {
1000
+ "epoch": 0.9044585987261147,
1001
+ "grad_norm": 5.328536510467529,
1002
+ "learning_rate": 1.7416521056479573e-05,
1003
+ "loss": 0.7336,
1004
+ "step": 284
1005
+ },
1006
+ {
1007
+ "epoch": 0.910828025477707,
1008
+ "grad_norm": 5.951910495758057,
1009
+ "learning_rate": 1.7265236394381634e-05,
1010
+ "loss": 0.4997,
1011
+ "step": 286
1012
+ },
1013
+ {
1014
+ "epoch": 0.9171974522292994,
1015
+ "grad_norm": 6.259306907653809,
1016
+ "learning_rate": 1.711034517110761e-05,
1017
+ "loss": 0.749,
1018
+ "step": 288
1019
+ },
1020
+ {
1021
+ "epoch": 0.9235668789808917,
1022
+ "grad_norm": 2.7894022464752197,
1023
+ "learning_rate": 1.6951924276746425e-05,
1024
+ "loss": 0.3758,
1025
+ "step": 290
1026
+ },
1027
+ {
1028
+ "epoch": 0.9299363057324841,
1029
+ "grad_norm": 5.453607082366943,
1030
+ "learning_rate": 1.6790052353563254e-05,
1031
+ "loss": 0.5871,
1032
+ "step": 292
1033
+ },
1034
+ {
1035
+ "epoch": 0.9363057324840764,
1036
+ "grad_norm": 6.539675235748291,
1037
+ "learning_rate": 1.662480975696046e-05,
1038
+ "loss": 0.4869,
1039
+ "step": 294
1040
+ },
1041
+ {
1042
+ "epoch": 0.9426751592356688,
1043
+ "grad_norm": 4.624678134918213,
1044
+ "learning_rate": 1.6456278515588044e-05,
1045
+ "loss": 0.3576,
1046
+ "step": 296
1047
+ },
1048
+ {
1049
+ "epoch": 0.9490445859872612,
1050
+ "grad_norm": 5.726230621337891,
1051
+ "learning_rate": 1.6284542290623558e-05,
1052
+ "loss": 2.1993,
1053
+ "step": 298
1054
+ },
1055
+ {
1056
+ "epoch": 0.9554140127388535,
1057
+ "grad_norm": 6.0250396728515625,
1058
+ "learning_rate": 1.6109686334241648e-05,
1059
+ "loss": 0.4139,
1060
+ "step": 300
1061
+ },
1062
+ {
1063
+ "epoch": 0.9617834394904459,
1064
+ "grad_norm": 5.527227401733398,
1065
+ "learning_rate": 1.593179744729355e-05,
1066
+ "loss": 0.4961,
1067
+ "step": 302
1068
+ },
1069
+ {
1070
+ "epoch": 0.9681528662420382,
1071
+ "grad_norm": 11.022268295288086,
1072
+ "learning_rate": 1.57509639362181e-05,
1073
+ "loss": 0.5184,
1074
+ "step": 304
1075
+ },
1076
+ {
1077
+ "epoch": 0.9745222929936306,
1078
+ "grad_norm": 9.068928718566895,
1079
+ "learning_rate": 1.5567275569205227e-05,
1080
+ "loss": 0.6846,
1081
+ "step": 306
1082
+ },
1083
+ {
1084
+ "epoch": 0.9808917197452229,
1085
+ "grad_norm": 10.308109283447266,
1086
+ "learning_rate": 1.538082353163374e-05,
1087
+ "loss": 0.5754,
1088
+ "step": 308
1089
+ },
1090
+ {
1091
+ "epoch": 0.9872611464968153,
1092
+ "grad_norm": 7.5352935791015625,
1093
+ "learning_rate": 1.5191700380805768e-05,
1094
+ "loss": 0.5878,
1095
+ "step": 310
1096
+ },
1097
+ {
1098
+ "epoch": 0.9936305732484076,
1099
+ "grad_norm": 5.239936828613281,
1100
+ "learning_rate": 1.5000000000000014e-05,
1101
+ "loss": 0.479,
1102
+ "step": 312
1103
+ },
1104
+ {
1105
+ "epoch": 1.0,
1106
+ "grad_norm": 4.027707576751709,
1107
+ "learning_rate": 1.4805817551866854e-05,
1108
+ "loss": 0.7548,
1109
+ "step": 314
1110
+ },
1111
+ {
1112
+ "epoch": 1.0,
1113
+ "step": 314,
1114
+ "total_flos": 1239016874704896.0,
1115
+ "train_loss": 0.6360755648202957,
1116
+ "train_runtime": 2238.6022,
1117
+ "train_samples_per_second": 2.244,
1118
+ "train_steps_per_second": 0.14
1119
+ }
1120
+ ],
1121
+ "logging_steps": 2,
1122
+ "max_steps": 314,
1123
+ "num_input_tokens_seen": 0,
1124
+ "num_train_epochs": 1,
1125
+ "save_steps": 500,
1126
+ "stateful_callbacks": {},
1127
+ "total_flos": 1239016874704896.0,
1128
+ "train_batch_size": 1,
1129
+ "trial_name": null,
1130
+ "trial_params": null
1131
+ }
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb2e81344a610bbb4729cc3bc280f7c12225d1885a5277ec3e35d2e6fc7dec9
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c7fbfc175c8049e53d2818ec913b868a0babc466f31266623cd3504a6410bf
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0e725a6a06be969403b94ee3a866f44b60a87b4c4ad06d3145ffd071f2d4ea6
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038e898b0d517ae99c3296ba7932ae5f7707c16d3caa882e4bdea4a3cf8e73c8
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_infoBatch_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json ADDED
@@ -0,0 +1,974 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 314,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.006369426751592357,
14
+ "learning_rate": 2.5834789435204156e-06,
15
+ "loss": 1.3838,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.012738853503184714,
20
+ "learning_rate": 2.73476360561837e-06,
21
+ "loss": 0.4167,
22
+ "step": 4
23
+ },
24
+ {
25
+ "epoch": 0.01910828025477707,
26
+ "learning_rate": 2.889654828892393e-06,
27
+ "loss": 0.7214,
28
+ "step": 6
29
+ },
30
+ {
31
+ "epoch": 0.025477707006369428,
32
+ "learning_rate": 3.0480757232535773e-06,
33
+ "loss": 0.5926,
34
+ "step": 8
35
+ },
36
+ {
37
+ "epoch": 0.03184713375796178,
38
+ "learning_rate": 3.2099476464367486e-06,
39
+ "loss": 0.8231,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.03821656050955414,
44
+ "learning_rate": 3.3751902430395558e-06,
45
+ "loss": 0.6351,
46
+ "step": 12
47
+ },
48
+ {
49
+ "epoch": 0.044585987261146494,
50
+ "learning_rate": 3.5437214844119727e-06,
51
+ "loss": 0.8684,
52
+ "step": 14
53
+ },
54
+ {
55
+ "epoch": 0.050955414012738856,
56
+ "learning_rate": 3.7154577093764287e-06,
57
+ "loss": 0.4996,
58
+ "step": 16
59
+ },
60
+ {
61
+ "epoch": 0.05732484076433121,
62
+ "learning_rate": 3.890313665758341e-06,
63
+ "loss": 0.4307,
64
+ "step": 18
65
+ },
66
+ {
67
+ "epoch": 0.06369426751592357,
68
+ "learning_rate": 4.068202552706455e-06,
69
+ "loss": 0.4412,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.07006369426751592,
74
+ "learning_rate": 4.249036063781902e-06,
75
+ "loss": 0.5067,
76
+ "step": 22
77
+ },
78
+ {
79
+ "epoch": 0.07643312101910828,
80
+ "learning_rate": 4.432724430794775e-06,
81
+ "loss": 0.3978,
82
+ "step": 24
83
+ },
84
+ {
85
+ "epoch": 0.08280254777070063,
86
+ "learning_rate": 4.6191764683662625e-06,
87
+ "loss": 0.4606,
88
+ "step": 26
89
+ },
90
+ {
91
+ "epoch": 0.08917197452229299,
92
+ "learning_rate": 4.8082996191942354e-06,
93
+ "loss": 0.4328,
94
+ "step": 28
95
+ },
96
+ {
97
+ "epoch": 0.09554140127388536,
98
+ "learning_rate": 5.000000000000003e-06,
99
+ "loss": 0.6153,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.10191082802547771,
104
+ "learning_rate": 5.194182448133163e-06,
105
+ "loss": 0.894,
106
+ "step": 32
107
+ },
108
+ {
109
+ "epoch": 0.10828025477707007,
110
+ "learning_rate": 5.39075056881172e-06,
111
+ "loss": 0.512,
112
+ "step": 34
113
+ },
114
+ {
115
+ "epoch": 0.11464968152866242,
116
+ "learning_rate": 5.589606782973682e-06,
117
+ "loss": 0.4806,
118
+ "step": 36
119
+ },
120
+ {
121
+ "epoch": 0.12101910828025478,
122
+ "learning_rate": 5.7906523757166475e-06,
123
+ "loss": 0.3867,
124
+ "step": 38
125
+ },
126
+ {
127
+ "epoch": 0.12738853503184713,
128
+ "learning_rate": 5.9937875453012e-06,
129
+ "loss": 0.515,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.1337579617834395,
134
+ "learning_rate": 6.198911452693847e-06,
135
+ "loss": 0.472,
136
+ "step": 42
137
+ },
138
+ {
139
+ "epoch": 0.14012738853503184,
140
+ "learning_rate": 6.405922271624865e-06,
141
+ "loss": 0.7776,
142
+ "step": 44
143
+ },
144
+ {
145
+ "epoch": 0.1464968152866242,
146
+ "learning_rate": 6.614717239136237e-06,
147
+ "loss": 0.2591,
148
+ "step": 46
149
+ },
150
+ {
151
+ "epoch": 0.15286624203821655,
152
+ "learning_rate": 6.8251927065945815e-06,
153
+ "loss": 0.4697,
154
+ "step": 48
155
+ },
156
+ {
157
+ "epoch": 0.1592356687898089,
158
+ "learning_rate": 7.037244191143648e-06,
159
+ "loss": 0.504,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.16560509554140126,
164
+ "learning_rate": 7.250766427571185e-06,
165
+ "loss": 0.9246,
166
+ "step": 52
167
+ },
168
+ {
169
+ "epoch": 0.17197452229299362,
170
+ "learning_rate": 7.465653420563828e-06,
171
+ "loss": 0.3775,
172
+ "step": 54
173
+ },
174
+ {
175
+ "epoch": 0.17834394904458598,
176
+ "learning_rate": 7.68179849732472e-06,
177
+ "loss": 0.7175,
178
+ "step": 56
179
+ },
180
+ {
181
+ "epoch": 0.18471337579617833,
182
+ "learning_rate": 7.899094360527221e-06,
183
+ "loss": 0.4747,
184
+ "step": 58
185
+ },
186
+ {
187
+ "epoch": 0.1910828025477707,
188
+ "learning_rate": 8.117433141578865e-06,
189
+ "loss": 0.5985,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.19745222929936307,
194
+ "learning_rate": 8.336706454168698e-06,
195
+ "loss": 0.3404,
196
+ "step": 62
197
+ },
198
+ {
199
+ "epoch": 0.20382165605095542,
200
+ "learning_rate": 8.55680544807173e-06,
201
+ "loss": 0.35,
202
+ "step": 64
203
+ },
204
+ {
205
+ "epoch": 0.21019108280254778,
206
+ "learning_rate": 8.777620863183652e-06,
207
+ "loss": 0.7105,
208
+ "step": 66
209
+ },
210
+ {
211
+ "epoch": 0.21656050955414013,
212
+ "learning_rate": 8.99904308375901e-06,
213
+ "loss": 0.3712,
214
+ "step": 68
215
+ },
216
+ {
217
+ "epoch": 0.2229299363057325,
218
+ "learning_rate": 9.220962192825959e-06,
219
+ "loss": 0.4918,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.22929936305732485,
224
+ "learning_rate": 9.443268026750509e-06,
225
+ "loss": 0.6386,
226
+ "step": 72
227
+ },
228
+ {
229
+ "epoch": 0.2356687898089172,
230
+ "learning_rate": 9.665850229923262e-06,
231
+ "loss": 0.5113,
232
+ "step": 74
233
+ },
234
+ {
235
+ "epoch": 0.24203821656050956,
236
+ "learning_rate": 9.88859830954135e-06,
237
+ "loss": 0.5075,
238
+ "step": 76
239
+ },
240
+ {
241
+ "epoch": 0.2484076433121019,
242
+ "learning_rate": 1.0111401690458642e-05,
243
+ "loss": 0.5798,
244
+ "step": 78
245
+ },
246
+ {
247
+ "epoch": 0.25477707006369427,
248
+ "learning_rate": 1.0334149770076732e-05,
249
+ "loss": 0.6072,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.2611464968152866,
254
+ "learning_rate": 1.0556731973249482e-05,
255
+ "loss": 0.4954,
256
+ "step": 82
257
+ },
258
+ {
259
+ "epoch": 0.267515923566879,
260
+ "learning_rate": 1.0779037807174032e-05,
261
+ "loss": 0.653,
262
+ "step": 84
263
+ },
264
+ {
265
+ "epoch": 0.27388535031847133,
266
+ "learning_rate": 1.1000956916240984e-05,
267
+ "loss": 0.5771,
268
+ "step": 86
269
+ },
270
+ {
271
+ "epoch": 0.2802547770700637,
272
+ "learning_rate": 1.1222379136816342e-05,
273
+ "loss": 0.4746,
274
+ "step": 88
275
+ },
276
+ {
277
+ "epoch": 0.28662420382165604,
278
+ "learning_rate": 1.1443194551928264e-05,
279
+ "loss": 0.6626,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.2929936305732484,
284
+ "learning_rate": 1.1663293545831295e-05,
285
+ "loss": 0.8747,
286
+ "step": 92
287
+ },
288
+ {
289
+ "epoch": 0.29936305732484075,
290
+ "learning_rate": 1.188256685842113e-05,
291
+ "loss": 0.3416,
292
+ "step": 94
293
+ },
294
+ {
295
+ "epoch": 0.3057324840764331,
296
+ "learning_rate": 1.210090563947277e-05,
297
+ "loss": 0.4761,
298
+ "step": 96
299
+ },
300
+ {
301
+ "epoch": 0.31210191082802546,
302
+ "learning_rate": 1.2318201502675273e-05,
303
+ "loss": 0.5684,
304
+ "step": 98
305
+ },
306
+ {
307
+ "epoch": 0.3184713375796178,
308
+ "learning_rate": 1.2534346579436164e-05,
309
+ "loss": 0.7771,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.3248407643312102,
314
+ "learning_rate": 1.274923357242881e-05,
315
+ "loss": 0.6231,
316
+ "step": 102
317
+ },
318
+ {
319
+ "epoch": 0.33121019108280253,
320
+ "learning_rate": 1.2962755808856345e-05,
321
+ "loss": 0.3676,
322
+ "step": 104
323
+ },
324
+ {
325
+ "epoch": 0.3375796178343949,
326
+ "learning_rate": 1.3174807293405412e-05,
327
+ "loss": 0.5425,
328
+ "step": 106
329
+ },
330
+ {
331
+ "epoch": 0.34394904458598724,
332
+ "learning_rate": 1.3385282760863758e-05,
333
+ "loss": 0.5188,
334
+ "step": 108
335
+ },
336
+ {
337
+ "epoch": 0.3503184713375796,
338
+ "learning_rate": 1.3594077728375129e-05,
339
+ "loss": 0.4089,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.35668789808917195,
344
+ "learning_rate": 1.3801088547306147e-05,
345
+ "loss": 0.3555,
346
+ "step": 112
347
+ },
348
+ {
349
+ "epoch": 0.3630573248407643,
350
+ "learning_rate": 1.4006212454698793e-05,
351
+ "loss": 0.5001,
352
+ "step": 114
353
+ },
354
+ {
355
+ "epoch": 0.36942675159235666,
356
+ "learning_rate": 1.4209347624283347e-05,
357
+ "loss": 0.336,
358
+ "step": 116
359
+ },
360
+ {
361
+ "epoch": 0.37579617834394907,
362
+ "learning_rate": 1.441039321702631e-05,
363
+ "loss": 0.8143,
364
+ "step": 118
365
+ },
366
+ {
367
+ "epoch": 0.3821656050955414,
368
+ "learning_rate": 1.4609249431188274e-05,
369
+ "loss": 0.346,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.3885350318471338,
374
+ "learning_rate": 1.480581755186683e-05,
375
+ "loss": 0.56,
376
+ "step": 122
377
+ },
378
+ {
379
+ "epoch": 0.39490445859872614,
380
+ "learning_rate": 1.4999999999999992e-05,
381
+ "loss": 0.5652,
382
+ "step": 124
383
+ },
384
+ {
385
+ "epoch": 0.4012738853503185,
386
+ "learning_rate": 1.5191700380805761e-05,
387
+ "loss": 0.32,
388
+ "step": 126
389
+ },
390
+ {
391
+ "epoch": 0.40764331210191085,
392
+ "learning_rate": 1.538082353163373e-05,
393
+ "loss": 0.5876,
394
+ "step": 128
395
+ },
396
+ {
397
+ "epoch": 0.4140127388535032,
398
+ "learning_rate": 1.556727556920522e-05,
399
+ "loss": 0.3075,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.42038216560509556,
404
+ "learning_rate": 1.5750963936218094e-05,
405
+ "loss": 0.5996,
406
+ "step": 132
407
+ },
408
+ {
409
+ "epoch": 0.4267515923566879,
410
+ "learning_rate": 1.593179744729354e-05,
411
+ "loss": 0.4651,
412
+ "step": 134
413
+ },
414
+ {
415
+ "epoch": 0.43312101910828027,
416
+ "learning_rate": 1.6109686334241655e-05,
417
+ "loss": 0.5205,
418
+ "step": 136
419
+ },
420
+ {
421
+ "epoch": 0.4394904458598726,
422
+ "learning_rate": 1.6284542290623565e-05,
423
+ "loss": 0.3501,
424
+ "step": 138
425
+ },
426
+ {
427
+ "epoch": 0.445859872611465,
428
+ "learning_rate": 1.6456278515588023e-05,
429
+ "loss": 0.6194,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.45222929936305734,
434
+ "learning_rate": 1.662480975696044e-05,
435
+ "loss": 0.4967,
436
+ "step": 142
437
+ },
438
+ {
439
+ "epoch": 0.4585987261146497,
440
+ "learning_rate": 1.6790052353563247e-05,
441
+ "loss": 0.3984,
442
+ "step": 144
443
+ },
444
+ {
445
+ "epoch": 0.46496815286624205,
446
+ "learning_rate": 1.6951924276746418e-05,
447
+ "loss": 0.6918,
448
+ "step": 146
449
+ },
450
+ {
451
+ "epoch": 0.4713375796178344,
452
+ "learning_rate": 1.7110345171107602e-05,
453
+ "loss": 0.9902,
454
+ "step": 148
455
+ },
456
+ {
457
+ "epoch": 0.47770700636942676,
458
+ "learning_rate": 1.7265236394381627e-05,
459
+ "loss": 0.4518,
460
+ "step": 150
461
+ },
462
+ {
463
+ "epoch": 0.4840764331210191,
464
+ "learning_rate": 1.741652105647958e-05,
465
+ "loss": 0.4963,
466
+ "step": 152
467
+ },
468
+ {
469
+ "epoch": 0.49044585987261147,
470
+ "learning_rate": 1.7564124057658057e-05,
471
+ "loss": 0.3386,
472
+ "step": 154
473
+ },
474
+ {
475
+ "epoch": 0.4968152866242038,
476
+ "learning_rate": 1.7707972125799738e-05,
477
+ "loss": 0.4369,
478
+ "step": 156
479
+ },
480
+ {
481
+ "epoch": 0.5031847133757962,
482
+ "learning_rate": 1.7847993852786612e-05,
483
+ "loss": 0.55,
484
+ "step": 158
485
+ },
486
+ {
487
+ "epoch": 0.5095541401273885,
488
+ "learning_rate": 1.7984119729947937e-05,
489
+ "loss": 0.5363,
490
+ "step": 160
491
+ },
492
+ {
493
+ "epoch": 0.5159235668789809,
494
+ "learning_rate": 1.811628218256531e-05,
495
+ "loss": 0.3066,
496
+ "step": 162
497
+ },
498
+ {
499
+ "epoch": 0.5222929936305732,
500
+ "learning_rate": 1.8244415603417603e-05,
501
+ "loss": 0.3565,
502
+ "step": 164
503
+ },
504
+ {
505
+ "epoch": 0.5286624203821656,
506
+ "learning_rate": 1.836845638534933e-05,
507
+ "loss": 0.6966,
508
+ "step": 166
509
+ },
510
+ {
511
+ "epoch": 0.535031847133758,
512
+ "learning_rate": 1.8488342952846074e-05,
513
+ "loss": 0.3571,
514
+ "step": 168
515
+ },
516
+ {
517
+ "epoch": 0.5414012738853503,
518
+ "learning_rate": 1.860401579260139e-05,
519
+ "loss": 0.7104,
520
+ "step": 170
521
+ },
522
+ {
523
+ "epoch": 0.5477707006369427,
524
+ "learning_rate": 1.8715417483060044e-05,
525
+ "loss": 0.4933,
526
+ "step": 172
527
+ },
528
+ {
529
+ "epoch": 0.554140127388535,
530
+ "learning_rate": 1.8822492722922816e-05,
531
+ "loss": 0.4154,
532
+ "step": 174
533
+ },
534
+ {
535
+ "epoch": 0.5605095541401274,
536
+ "learning_rate": 1.8925188358598808e-05,
537
+ "loss": 0.4303,
538
+ "step": 176
539
+ },
540
+ {
541
+ "epoch": 0.5668789808917197,
542
+ "learning_rate": 1.902345341059163e-05,
543
+ "loss": 0.4708,
544
+ "step": 178
545
+ },
546
+ {
547
+ "epoch": 0.5732484076433121,
548
+ "learning_rate": 1.9117239098806296e-05,
549
+ "loss": 0.5172,
550
+ "step": 180
551
+ },
552
+ {
553
+ "epoch": 0.5796178343949044,
554
+ "learning_rate": 1.920649886676429e-05,
555
+ "loss": 0.6281,
556
+ "step": 182
557
+ },
558
+ {
559
+ "epoch": 0.5859872611464968,
560
+ "learning_rate": 1.9291188404714876e-05,
561
+ "loss": 0.4933,
562
+ "step": 184
563
+ },
564
+ {
565
+ "epoch": 0.5923566878980892,
566
+ "learning_rate": 1.937126567163103e-05,
567
+ "loss": 0.4513,
568
+ "step": 186
569
+ },
570
+ {
571
+ "epoch": 0.5987261146496815,
572
+ "learning_rate": 1.944669091607919e-05,
573
+ "loss": 0.4949,
574
+ "step": 188
575
+ },
576
+ {
577
+ "epoch": 0.6050955414012739,
578
+ "learning_rate": 1.9517426695952354e-05,
579
+ "loss": 0.6284,
580
+ "step": 190
581
+ },
582
+ {
583
+ "epoch": 0.6114649681528662,
584
+ "learning_rate": 1.9583437897056915e-05,
585
+ "loss": 0.4047,
586
+ "step": 192
587
+ },
588
+ {
589
+ "epoch": 0.6178343949044586,
590
+ "learning_rate": 1.964469175054377e-05,
591
+ "loss": 0.6866,
592
+ "step": 194
593
+ },
594
+ {
595
+ "epoch": 0.6242038216560509,
596
+ "learning_rate": 1.970115784917523e-05,
597
+ "loss": 0.6026,
598
+ "step": 196
599
+ },
600
+ {
601
+ "epoch": 0.6305732484076433,
602
+ "learning_rate": 1.975280816241959e-05,
603
+ "loss": 0.4448,
604
+ "step": 198
605
+ },
606
+ {
607
+ "epoch": 0.6369426751592356,
608
+ "learning_rate": 1.979961705036587e-05,
609
+ "loss": 0.4261,
610
+ "step": 200
611
+ },
612
+ {
613
+ "epoch": 0.643312101910828,
614
+ "learning_rate": 1.9841561276451777e-05,
615
+ "loss": 0.4357,
616
+ "step": 202
617
+ },
618
+ {
619
+ "epoch": 0.6496815286624203,
620
+ "learning_rate": 1.9878620018998696e-05,
621
+ "loss": 0.3806,
622
+ "step": 204
623
+ },
624
+ {
625
+ "epoch": 0.6560509554140127,
626
+ "learning_rate": 1.9910774881547803e-05,
627
+ "loss": 0.3888,
628
+ "step": 206
629
+ },
630
+ {
631
+ "epoch": 0.6624203821656051,
632
+ "learning_rate": 1.993800990199235e-05,
633
+ "loss": 0.9534,
634
+ "step": 208
635
+ },
636
+ {
637
+ "epoch": 0.6687898089171974,
638
+ "learning_rate": 1.9960311560501457e-05,
639
+ "loss": 0.4221,
640
+ "step": 210
641
+ },
642
+ {
643
+ "epoch": 0.6751592356687898,
644
+ "learning_rate": 1.9977668786231536e-05,
645
+ "loss": 0.6196,
646
+ "step": 212
647
+ },
648
+ {
649
+ "epoch": 0.6815286624203821,
650
+ "learning_rate": 1.999007296282201e-05,
651
+ "loss": 0.4501,
652
+ "step": 214
653
+ },
654
+ {
655
+ "epoch": 0.6878980891719745,
656
+ "learning_rate": 1.9997517932672592e-05,
657
+ "loss": 0.3531,
658
+ "step": 216
659
+ },
660
+ {
661
+ "epoch": 0.6942675159235668,
662
+ "learning_rate": 2e-05,
663
+ "loss": 0.5846,
664
+ "step": 218
665
+ },
666
+ {
667
+ "epoch": 0.7006369426751592,
668
+ "learning_rate": 1.9997517932672592e-05,
669
+ "loss": 0.4323,
670
+ "step": 220
671
+ },
672
+ {
673
+ "epoch": 0.7070063694267515,
674
+ "learning_rate": 1.999007296282201e-05,
675
+ "loss": 0.4081,
676
+ "step": 222
677
+ },
678
+ {
679
+ "epoch": 0.7133757961783439,
680
+ "learning_rate": 1.9977668786231536e-05,
681
+ "loss": 0.7469,
682
+ "step": 224
683
+ },
684
+ {
685
+ "epoch": 0.7197452229299363,
686
+ "learning_rate": 1.9960311560501457e-05,
687
+ "loss": 1.0746,
688
+ "step": 226
689
+ },
690
+ {
691
+ "epoch": 0.7261146496815286,
692
+ "learning_rate": 1.993800990199235e-05,
693
+ "loss": 0.6235,
694
+ "step": 228
695
+ },
696
+ {
697
+ "epoch": 0.732484076433121,
698
+ "learning_rate": 1.99107748815478e-05,
699
+ "loss": 0.5052,
700
+ "step": 230
701
+ },
702
+ {
703
+ "epoch": 0.7388535031847133,
704
+ "learning_rate": 1.9878620018998696e-05,
705
+ "loss": 0.4844,
706
+ "step": 232
707
+ },
708
+ {
709
+ "epoch": 0.7452229299363057,
710
+ "learning_rate": 1.984156127645178e-05,
711
+ "loss": 0.4121,
712
+ "step": 234
713
+ },
714
+ {
715
+ "epoch": 0.7515923566878981,
716
+ "learning_rate": 1.979961705036587e-05,
717
+ "loss": 0.3291,
718
+ "step": 236
719
+ },
720
+ {
721
+ "epoch": 0.7579617834394905,
722
+ "learning_rate": 1.975280816241959e-05,
723
+ "loss": 0.4238,
724
+ "step": 238
725
+ },
726
+ {
727
+ "epoch": 0.7643312101910829,
728
+ "learning_rate": 1.9701157849175232e-05,
729
+ "loss": 0.5666,
730
+ "step": 240
731
+ },
732
+ {
733
+ "epoch": 0.7707006369426752,
734
+ "learning_rate": 1.9644691750543772e-05,
735
+ "loss": 0.5248,
736
+ "step": 242
737
+ },
738
+ {
739
+ "epoch": 0.7770700636942676,
740
+ "learning_rate": 1.958343789705692e-05,
741
+ "loss": 0.4315,
742
+ "step": 244
743
+ },
744
+ {
745
+ "epoch": 0.7834394904458599,
746
+ "learning_rate": 1.9517426695952354e-05,
747
+ "loss": 0.6065,
748
+ "step": 246
749
+ },
750
+ {
751
+ "epoch": 0.7898089171974523,
752
+ "learning_rate": 1.9446690916079184e-05,
753
+ "loss": 0.391,
754
+ "step": 248
755
+ },
756
+ {
757
+ "epoch": 0.7961783439490446,
758
+ "learning_rate": 1.9371265671631034e-05,
759
+ "loss": 0.6801,
760
+ "step": 250
761
+ },
762
+ {
763
+ "epoch": 0.802547770700637,
764
+ "learning_rate": 1.929118840471488e-05,
765
+ "loss": 0.4783,
766
+ "step": 252
767
+ },
768
+ {
769
+ "epoch": 0.8089171974522293,
770
+ "learning_rate": 1.9206498866764293e-05,
771
+ "loss": 0.4129,
772
+ "step": 254
773
+ },
774
+ {
775
+ "epoch": 0.8152866242038217,
776
+ "learning_rate": 1.9117239098806302e-05,
777
+ "loss": 0.3749,
778
+ "step": 256
779
+ },
780
+ {
781
+ "epoch": 0.821656050955414,
782
+ "learning_rate": 1.9023453410591645e-05,
783
+ "loss": 0.543,
784
+ "step": 258
785
+ },
786
+ {
787
+ "epoch": 0.8280254777070064,
788
+ "learning_rate": 1.8925188358598822e-05,
789
+ "loss": 0.4735,
790
+ "step": 260
791
+ },
792
+ {
793
+ "epoch": 0.8343949044585988,
794
+ "learning_rate": 1.882249272292283e-05,
795
+ "loss": 0.488,
796
+ "step": 262
797
+ },
798
+ {
799
+ "epoch": 0.8407643312101911,
800
+ "learning_rate": 1.871541748306005e-05,
801
+ "loss": 0.3531,
802
+ "step": 264
803
+ },
804
+ {
805
+ "epoch": 0.8471337579617835,
806
+ "learning_rate": 1.8604015792601395e-05,
807
+ "loss": 0.3552,
808
+ "step": 266
809
+ },
810
+ {
811
+ "epoch": 0.8535031847133758,
812
+ "learning_rate": 1.8488342952846077e-05,
813
+ "loss": 1.0282,
814
+ "step": 268
815
+ },
816
+ {
817
+ "epoch": 0.8598726114649682,
818
+ "learning_rate": 1.8368456385349333e-05,
819
+ "loss": 0.5032,
820
+ "step": 270
821
+ },
822
+ {
823
+ "epoch": 0.8662420382165605,
824
+ "learning_rate": 1.824441560341761e-05,
825
+ "loss": 0.4555,
826
+ "step": 272
827
+ },
828
+ {
829
+ "epoch": 0.8726114649681529,
830
+ "learning_rate": 1.811628218256532e-05,
831
+ "loss": 0.7456,
832
+ "step": 274
833
+ },
834
+ {
835
+ "epoch": 0.8789808917197452,
836
+ "learning_rate": 1.798411972994795e-05,
837
+ "loss": 0.5424,
838
+ "step": 276
839
+ },
840
+ {
841
+ "epoch": 0.8853503184713376,
842
+ "learning_rate": 1.784799385278662e-05,
843
+ "loss": 0.7407,
844
+ "step": 278
845
+ },
846
+ {
847
+ "epoch": 0.89171974522293,
848
+ "learning_rate": 1.770797212579973e-05,
849
+ "loss": 0.3791,
850
+ "step": 280
851
+ },
852
+ {
853
+ "epoch": 0.8980891719745223,
854
+ "learning_rate": 1.756412405765805e-05,
855
+ "loss": 0.3953,
856
+ "step": 282
857
+ },
858
+ {
859
+ "epoch": 0.9044585987261147,
860
+ "learning_rate": 1.7416521056479573e-05,
861
+ "loss": 0.5932,
862
+ "step": 284
863
+ },
864
+ {
865
+ "epoch": 0.910828025477707,
866
+ "learning_rate": 1.7265236394381634e-05,
867
+ "loss": 0.4036,
868
+ "step": 286
869
+ },
870
+ {
871
+ "epoch": 0.9171974522292994,
872
+ "learning_rate": 1.711034517110761e-05,
873
+ "loss": 0.3685,
874
+ "step": 288
875
+ },
876
+ {
877
+ "epoch": 0.9235668789808917,
878
+ "learning_rate": 1.6951924276746425e-05,
879
+ "loss": 0.3526,
880
+ "step": 290
881
+ },
882
+ {
883
+ "epoch": 0.9299363057324841,
884
+ "learning_rate": 1.6790052353563254e-05,
885
+ "loss": 0.5022,
886
+ "step": 292
887
+ },
888
+ {
889
+ "epoch": 0.9363057324840764,
890
+ "learning_rate": 1.662480975696046e-05,
891
+ "loss": 0.3848,
892
+ "step": 294
893
+ },
894
+ {
895
+ "epoch": 0.9426751592356688,
896
+ "learning_rate": 1.6456278515588044e-05,
897
+ "loss": 0.3712,
898
+ "step": 296
899
+ },
900
+ {
901
+ "epoch": 0.9490445859872612,
902
+ "learning_rate": 1.6284542290623558e-05,
903
+ "loss": 2.371,
904
+ "step": 298
905
+ },
906
+ {
907
+ "epoch": 0.9554140127388535,
908
+ "learning_rate": 1.6109686334241648e-05,
909
+ "loss": 0.4082,
910
+ "step": 300
911
+ },
912
+ {
913
+ "epoch": 0.9617834394904459,
914
+ "learning_rate": 1.593179744729355e-05,
915
+ "loss": 0.3691,
916
+ "step": 302
917
+ },
918
+ {
919
+ "epoch": 0.9681528662420382,
920
+ "learning_rate": 1.57509639362181e-05,
921
+ "loss": 0.4227,
922
+ "step": 304
923
+ },
924
+ {
925
+ "epoch": 0.9745222929936306,
926
+ "learning_rate": 1.5567275569205227e-05,
927
+ "loss": 0.3981,
928
+ "step": 306
929
+ },
930
+ {
931
+ "epoch": 0.9808917197452229,
932
+ "learning_rate": 1.538082353163374e-05,
933
+ "loss": 0.507,
934
+ "step": 308
935
+ },
936
+ {
937
+ "epoch": 0.9872611464968153,
938
+ "learning_rate": 1.5191700380805768e-05,
939
+ "loss": 0.4625,
940
+ "step": 310
941
+ },
942
+ {
943
+ "epoch": 0.9936305732484076,
944
+ "learning_rate": 1.5000000000000014e-05,
945
+ "loss": 0.509,
946
+ "step": 312
947
+ },
948
+ {
949
+ "epoch": 1.0,
950
+ "learning_rate": 1.4805817551866854e-05,
951
+ "loss": 0.4223,
952
+ "step": 314
953
+ },
954
+ {
955
+ "epoch": 1.0,
956
+ "step": 314,
957
+ "total_flos": 1322375011368960.0,
958
+ "train_loss": 0.5353610082796425,
959
+ "train_runtime": 1276.7534,
960
+ "train_samples_per_second": 3.935,
961
+ "train_steps_per_second": 0.246
962
+ }
963
+ ],
964
+ "logging_steps": 2,
965
+ "max_steps": 314,
966
+ "num_input_tokens_seen": 0,
967
+ "num_train_epochs": 1,
968
+ "save_steps": 500,
969
+ "stateful_callbacks": {},
970
+ "total_flos": 1322375011368960.0,
971
+ "train_batch_size": 1,
972
+ "trial_name": null,
973
+ "trial_params": null
974
+ }
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10bb6d107a165c0bac8ee6b44d62bb23c9d77c43e50dce7ea33d1156f7c60ff
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:957890eac44a229447322393018abccdc4c4d0a1d2e7c06a74d6bf3f3269c754
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c728d92eee5b09c526d58b8394a8dce470256e69f80304e4be88e24d6c90d941
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:482fb07cd37f8cce548388c0afd1b8ddef08fb7a8a6b5bf29655d3c58de6d165
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_selfsup_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json ADDED
@@ -0,0 +1,974 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 314,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.006369426751592357,
14
+ "learning_rate": 2.5834789435204156e-06,
15
+ "loss": 0.0437,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.012738853503184714,
20
+ "learning_rate": 2.73476360561837e-06,
21
+ "loss": 0.0635,
22
+ "step": 4
23
+ },
24
+ {
25
+ "epoch": 0.01910828025477707,
26
+ "learning_rate": 2.889654828892393e-06,
27
+ "loss": 0.3517,
28
+ "step": 6
29
+ },
30
+ {
31
+ "epoch": 0.025477707006369428,
32
+ "learning_rate": 3.0480757232535773e-06,
33
+ "loss": 0.8403,
34
+ "step": 8
35
+ },
36
+ {
37
+ "epoch": 0.03184713375796178,
38
+ "learning_rate": 3.2099476464367486e-06,
39
+ "loss": 0.0856,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.03821656050955414,
44
+ "learning_rate": 3.3751902430395558e-06,
45
+ "loss": 0.2464,
46
+ "step": 12
47
+ },
48
+ {
49
+ "epoch": 0.044585987261146494,
50
+ "learning_rate": 3.5437214844119727e-06,
51
+ "loss": 0.0283,
52
+ "step": 14
53
+ },
54
+ {
55
+ "epoch": 0.050955414012738856,
56
+ "learning_rate": 3.7154577093764287e-06,
57
+ "loss": 0.4218,
58
+ "step": 16
59
+ },
60
+ {
61
+ "epoch": 0.05732484076433121,
62
+ "learning_rate": 3.890313665758341e-06,
63
+ "loss": 0.0523,
64
+ "step": 18
65
+ },
66
+ {
67
+ "epoch": 0.06369426751592357,
68
+ "learning_rate": 4.068202552706455e-06,
69
+ "loss": 0.1286,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.07006369426751592,
74
+ "learning_rate": 4.249036063781902e-06,
75
+ "loss": 0.2258,
76
+ "step": 22
77
+ },
78
+ {
79
+ "epoch": 0.07643312101910828,
80
+ "learning_rate": 4.432724430794775e-06,
81
+ "loss": 0.2625,
82
+ "step": 24
83
+ },
84
+ {
85
+ "epoch": 0.08280254777070063,
86
+ "learning_rate": 4.6191764683662625e-06,
87
+ "loss": 0.0699,
88
+ "step": 26
89
+ },
90
+ {
91
+ "epoch": 0.08917197452229299,
92
+ "learning_rate": 4.8082996191942354e-06,
93
+ "loss": 1.0868,
94
+ "step": 28
95
+ },
96
+ {
97
+ "epoch": 0.09554140127388536,
98
+ "learning_rate": 5.000000000000003e-06,
99
+ "loss": 0.0467,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.10191082802547771,
104
+ "learning_rate": 5.194182448133163e-06,
105
+ "loss": 0.0898,
106
+ "step": 32
107
+ },
108
+ {
109
+ "epoch": 0.10828025477707007,
110
+ "learning_rate": 5.39075056881172e-06,
111
+ "loss": 0.0576,
112
+ "step": 34
113
+ },
114
+ {
115
+ "epoch": 0.11464968152866242,
116
+ "learning_rate": 5.589606782973682e-06,
117
+ "loss": 0.2819,
118
+ "step": 36
119
+ },
120
+ {
121
+ "epoch": 0.12101910828025478,
122
+ "learning_rate": 5.7906523757166475e-06,
123
+ "loss": 0.0205,
124
+ "step": 38
125
+ },
126
+ {
127
+ "epoch": 0.12738853503184713,
128
+ "learning_rate": 5.9937875453012e-06,
129
+ "loss": 0.2175,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.1337579617834395,
134
+ "learning_rate": 6.198911452693847e-06,
135
+ "loss": 0.0992,
136
+ "step": 42
137
+ },
138
+ {
139
+ "epoch": 0.14012738853503184,
140
+ "learning_rate": 6.405922271624865e-06,
141
+ "loss": 0.0549,
142
+ "step": 44
143
+ },
144
+ {
145
+ "epoch": 0.1464968152866242,
146
+ "learning_rate": 6.614717239136237e-06,
147
+ "loss": 0.1491,
148
+ "step": 46
149
+ },
150
+ {
151
+ "epoch": 0.15286624203821655,
152
+ "learning_rate": 6.8251927065945815e-06,
153
+ "loss": 0.8473,
154
+ "step": 48
155
+ },
156
+ {
157
+ "epoch": 0.1592356687898089,
158
+ "learning_rate": 7.037244191143648e-06,
159
+ "loss": 0.0447,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.16560509554140126,
164
+ "learning_rate": 7.250766427571185e-06,
165
+ "loss": 0.0597,
166
+ "step": 52
167
+ },
168
+ {
169
+ "epoch": 0.17197452229299362,
170
+ "learning_rate": 7.465653420563828e-06,
171
+ "loss": 0.0587,
172
+ "step": 54
173
+ },
174
+ {
175
+ "epoch": 0.17834394904458598,
176
+ "learning_rate": 7.68179849732472e-06,
177
+ "loss": 0.167,
178
+ "step": 56
179
+ },
180
+ {
181
+ "epoch": 0.18471337579617833,
182
+ "learning_rate": 7.899094360527221e-06,
183
+ "loss": 0.0063,
184
+ "step": 58
185
+ },
186
+ {
187
+ "epoch": 0.1910828025477707,
188
+ "learning_rate": 8.117433141578865e-06,
189
+ "loss": 0.0427,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.19745222929936307,
194
+ "learning_rate": 8.336706454168698e-06,
195
+ "loss": 0.006,
196
+ "step": 62
197
+ },
198
+ {
199
+ "epoch": 0.20382165605095542,
200
+ "learning_rate": 8.55680544807173e-06,
201
+ "loss": 0.1898,
202
+ "step": 64
203
+ },
204
+ {
205
+ "epoch": 0.21019108280254778,
206
+ "learning_rate": 8.777620863183652e-06,
207
+ "loss": 0.1642,
208
+ "step": 66
209
+ },
210
+ {
211
+ "epoch": 0.21656050955414013,
212
+ "learning_rate": 8.99904308375901e-06,
213
+ "loss": 0.0916,
214
+ "step": 68
215
+ },
216
+ {
217
+ "epoch": 0.2229299363057325,
218
+ "learning_rate": 9.220962192825959e-06,
219
+ "loss": 0.1389,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.22929936305732485,
224
+ "learning_rate": 9.443268026750509e-06,
225
+ "loss": 0.0734,
226
+ "step": 72
227
+ },
228
+ {
229
+ "epoch": 0.2356687898089172,
230
+ "learning_rate": 9.665850229923262e-06,
231
+ "loss": 0.0275,
232
+ "step": 74
233
+ },
234
+ {
235
+ "epoch": 0.24203821656050956,
236
+ "learning_rate": 9.88859830954135e-06,
237
+ "loss": 0.01,
238
+ "step": 76
239
+ },
240
+ {
241
+ "epoch": 0.2484076433121019,
242
+ "learning_rate": 1.0111401690458642e-05,
243
+ "loss": 0.0131,
244
+ "step": 78
245
+ },
246
+ {
247
+ "epoch": 0.25477707006369427,
248
+ "learning_rate": 1.0334149770076732e-05,
249
+ "loss": 0.0065,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.2611464968152866,
254
+ "learning_rate": 1.0556731973249482e-05,
255
+ "loss": 0.0109,
256
+ "step": 82
257
+ },
258
+ {
259
+ "epoch": 0.267515923566879,
260
+ "learning_rate": 1.0779037807174032e-05,
261
+ "loss": 0.4836,
262
+ "step": 84
263
+ },
264
+ {
265
+ "epoch": 0.27388535031847133,
266
+ "learning_rate": 1.1000956916240984e-05,
267
+ "loss": 0.2034,
268
+ "step": 86
269
+ },
270
+ {
271
+ "epoch": 0.2802547770700637,
272
+ "learning_rate": 1.1222379136816342e-05,
273
+ "loss": 0.0553,
274
+ "step": 88
275
+ },
276
+ {
277
+ "epoch": 0.28662420382165604,
278
+ "learning_rate": 1.1443194551928264e-05,
279
+ "loss": 0.5734,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.2929936305732484,
284
+ "learning_rate": 1.1663293545831295e-05,
285
+ "loss": 0.2468,
286
+ "step": 92
287
+ },
288
+ {
289
+ "epoch": 0.29936305732484075,
290
+ "learning_rate": 1.188256685842113e-05,
291
+ "loss": 0.4644,
292
+ "step": 94
293
+ },
294
+ {
295
+ "epoch": 0.3057324840764331,
296
+ "learning_rate": 1.210090563947277e-05,
297
+ "loss": 0.0546,
298
+ "step": 96
299
+ },
300
+ {
301
+ "epoch": 0.31210191082802546,
302
+ "learning_rate": 1.2318201502675273e-05,
303
+ "loss": 0.0156,
304
+ "step": 98
305
+ },
306
+ {
307
+ "epoch": 0.3184713375796178,
308
+ "learning_rate": 1.2534346579436164e-05,
309
+ "loss": 0.0615,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.3248407643312102,
314
+ "learning_rate": 1.274923357242881e-05,
315
+ "loss": 0.0354,
316
+ "step": 102
317
+ },
318
+ {
319
+ "epoch": 0.33121019108280253,
320
+ "learning_rate": 1.2962755808856345e-05,
321
+ "loss": 0.342,
322
+ "step": 104
323
+ },
324
+ {
325
+ "epoch": 0.3375796178343949,
326
+ "learning_rate": 1.3174807293405412e-05,
327
+ "loss": 0.405,
328
+ "step": 106
329
+ },
330
+ {
331
+ "epoch": 0.34394904458598724,
332
+ "learning_rate": 1.3385282760863758e-05,
333
+ "loss": 0.0077,
334
+ "step": 108
335
+ },
336
+ {
337
+ "epoch": 0.3503184713375796,
338
+ "learning_rate": 1.3594077728375129e-05,
339
+ "loss": 0.3975,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.35668789808917195,
344
+ "learning_rate": 1.3801088547306147e-05,
345
+ "loss": 0.3575,
346
+ "step": 112
347
+ },
348
+ {
349
+ "epoch": 0.3630573248407643,
350
+ "learning_rate": 1.4006212454698793e-05,
351
+ "loss": 0.6353,
352
+ "step": 114
353
+ },
354
+ {
355
+ "epoch": 0.36942675159235666,
356
+ "learning_rate": 1.4209347624283347e-05,
357
+ "loss": 0.4914,
358
+ "step": 116
359
+ },
360
+ {
361
+ "epoch": 0.37579617834394907,
362
+ "learning_rate": 1.441039321702631e-05,
363
+ "loss": 0.3155,
364
+ "step": 118
365
+ },
366
+ {
367
+ "epoch": 0.3821656050955414,
368
+ "learning_rate": 1.4609249431188274e-05,
369
+ "loss": 0.5629,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.3885350318471338,
374
+ "learning_rate": 1.480581755186683e-05,
375
+ "loss": 0.022,
376
+ "step": 122
377
+ },
378
+ {
379
+ "epoch": 0.39490445859872614,
380
+ "learning_rate": 1.4999999999999992e-05,
381
+ "loss": 0.0041,
382
+ "step": 124
383
+ },
384
+ {
385
+ "epoch": 0.4012738853503185,
386
+ "learning_rate": 1.5191700380805761e-05,
387
+ "loss": 0.6424,
388
+ "step": 126
389
+ },
390
+ {
391
+ "epoch": 0.40764331210191085,
392
+ "learning_rate": 1.538082353163373e-05,
393
+ "loss": 0.1021,
394
+ "step": 128
395
+ },
396
+ {
397
+ "epoch": 0.4140127388535032,
398
+ "learning_rate": 1.556727556920522e-05,
399
+ "loss": 0.9117,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.42038216560509556,
404
+ "learning_rate": 1.5750963936218094e-05,
405
+ "loss": 0.2738,
406
+ "step": 132
407
+ },
408
+ {
409
+ "epoch": 0.4267515923566879,
410
+ "learning_rate": 1.593179744729354e-05,
411
+ "loss": 0.7301,
412
+ "step": 134
413
+ },
414
+ {
415
+ "epoch": 0.43312101910828027,
416
+ "learning_rate": 1.6109686334241655e-05,
417
+ "loss": 0.3389,
418
+ "step": 136
419
+ },
420
+ {
421
+ "epoch": 0.4394904458598726,
422
+ "learning_rate": 1.6284542290623565e-05,
423
+ "loss": 0.1688,
424
+ "step": 138
425
+ },
426
+ {
427
+ "epoch": 0.445859872611465,
428
+ "learning_rate": 1.6456278515588023e-05,
429
+ "loss": 0.2313,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.45222929936305734,
434
+ "learning_rate": 1.662480975696044e-05,
435
+ "loss": 0.7155,
436
+ "step": 142
437
+ },
438
+ {
439
+ "epoch": 0.4585987261146497,
440
+ "learning_rate": 1.6790052353563247e-05,
441
+ "loss": 0.0314,
442
+ "step": 144
443
+ },
444
+ {
445
+ "epoch": 0.46496815286624205,
446
+ "learning_rate": 1.6951924276746418e-05,
447
+ "loss": 0.0323,
448
+ "step": 146
449
+ },
450
+ {
451
+ "epoch": 0.4713375796178344,
452
+ "learning_rate": 1.7110345171107602e-05,
453
+ "loss": 0.0467,
454
+ "step": 148
455
+ },
456
+ {
457
+ "epoch": 0.47770700636942676,
458
+ "learning_rate": 1.7265236394381627e-05,
459
+ "loss": 0.0883,
460
+ "step": 150
461
+ },
462
+ {
463
+ "epoch": 0.4840764331210191,
464
+ "learning_rate": 1.741652105647958e-05,
465
+ "loss": 0.1867,
466
+ "step": 152
467
+ },
468
+ {
469
+ "epoch": 0.49044585987261147,
470
+ "learning_rate": 1.7564124057658057e-05,
471
+ "loss": 0.2516,
472
+ "step": 154
473
+ },
474
+ {
475
+ "epoch": 0.4968152866242038,
476
+ "learning_rate": 1.7707972125799738e-05,
477
+ "loss": 0.0261,
478
+ "step": 156
479
+ },
480
+ {
481
+ "epoch": 0.5031847133757962,
482
+ "learning_rate": 1.7847993852786612e-05,
483
+ "loss": 0.1774,
484
+ "step": 158
485
+ },
486
+ {
487
+ "epoch": 0.5095541401273885,
488
+ "learning_rate": 1.7984119729947937e-05,
489
+ "loss": 0.1415,
490
+ "step": 160
491
+ },
492
+ {
493
+ "epoch": 0.5159235668789809,
494
+ "learning_rate": 1.811628218256531e-05,
495
+ "loss": 0.4364,
496
+ "step": 162
497
+ },
498
+ {
499
+ "epoch": 0.5222929936305732,
500
+ "learning_rate": 1.8244415603417603e-05,
501
+ "loss": 0.15,
502
+ "step": 164
503
+ },
504
+ {
505
+ "epoch": 0.5286624203821656,
506
+ "learning_rate": 1.836845638534933e-05,
507
+ "loss": 0.1189,
508
+ "step": 166
509
+ },
510
+ {
511
+ "epoch": 0.535031847133758,
512
+ "learning_rate": 1.8488342952846074e-05,
513
+ "loss": 0.0061,
514
+ "step": 168
515
+ },
516
+ {
517
+ "epoch": 0.5414012738853503,
518
+ "learning_rate": 1.860401579260139e-05,
519
+ "loss": 0.3801,
520
+ "step": 170
521
+ },
522
+ {
523
+ "epoch": 0.5477707006369427,
524
+ "learning_rate": 1.8715417483060044e-05,
525
+ "loss": 0.5063,
526
+ "step": 172
527
+ },
528
+ {
529
+ "epoch": 0.554140127388535,
530
+ "learning_rate": 1.8822492722922816e-05,
531
+ "loss": 0.1346,
532
+ "step": 174
533
+ },
534
+ {
535
+ "epoch": 0.5605095541401274,
536
+ "learning_rate": 1.8925188358598808e-05,
537
+ "loss": 0.0054,
538
+ "step": 176
539
+ },
540
+ {
541
+ "epoch": 0.5668789808917197,
542
+ "learning_rate": 1.902345341059163e-05,
543
+ "loss": 0.2782,
544
+ "step": 178
545
+ },
546
+ {
547
+ "epoch": 0.5732484076433121,
548
+ "learning_rate": 1.9117239098806296e-05,
549
+ "loss": 1.3079,
550
+ "step": 180
551
+ },
552
+ {
553
+ "epoch": 0.5796178343949044,
554
+ "learning_rate": 1.920649886676429e-05,
555
+ "loss": 0.1065,
556
+ "step": 182
557
+ },
558
+ {
559
+ "epoch": 0.5859872611464968,
560
+ "learning_rate": 1.9291188404714876e-05,
561
+ "loss": 0.1146,
562
+ "step": 184
563
+ },
564
+ {
565
+ "epoch": 0.5923566878980892,
566
+ "learning_rate": 1.937126567163103e-05,
567
+ "loss": 0.1226,
568
+ "step": 186
569
+ },
570
+ {
571
+ "epoch": 0.5987261146496815,
572
+ "learning_rate": 1.944669091607919e-05,
573
+ "loss": 0.032,
574
+ "step": 188
575
+ },
576
+ {
577
+ "epoch": 0.6050955414012739,
578
+ "learning_rate": 1.9517426695952354e-05,
579
+ "loss": 0.185,
580
+ "step": 190
581
+ },
582
+ {
583
+ "epoch": 0.6114649681528662,
584
+ "learning_rate": 1.9583437897056915e-05,
585
+ "loss": 1.2277,
586
+ "step": 192
587
+ },
588
+ {
589
+ "epoch": 0.6178343949044586,
590
+ "learning_rate": 1.964469175054377e-05,
591
+ "loss": 0.66,
592
+ "step": 194
593
+ },
594
+ {
595
+ "epoch": 0.6242038216560509,
596
+ "learning_rate": 1.970115784917523e-05,
597
+ "loss": 0.4214,
598
+ "step": 196
599
+ },
600
+ {
601
+ "epoch": 0.6305732484076433,
602
+ "learning_rate": 1.975280816241959e-05,
603
+ "loss": 0.3423,
604
+ "step": 198
605
+ },
606
+ {
607
+ "epoch": 0.6369426751592356,
608
+ "learning_rate": 1.979961705036587e-05,
609
+ "loss": 0.4321,
610
+ "step": 200
611
+ },
612
+ {
613
+ "epoch": 0.643312101910828,
614
+ "learning_rate": 1.9841561276451777e-05,
615
+ "loss": 0.0109,
616
+ "step": 202
617
+ },
618
+ {
619
+ "epoch": 0.6496815286624203,
620
+ "learning_rate": 1.9878620018998696e-05,
621
+ "loss": 0.2352,
622
+ "step": 204
623
+ },
624
+ {
625
+ "epoch": 0.6560509554140127,
626
+ "learning_rate": 1.9910774881547803e-05,
627
+ "loss": 0.112,
628
+ "step": 206
629
+ },
630
+ {
631
+ "epoch": 0.6624203821656051,
632
+ "learning_rate": 1.993800990199235e-05,
633
+ "loss": 0.3111,
634
+ "step": 208
635
+ },
636
+ {
637
+ "epoch": 0.6687898089171974,
638
+ "learning_rate": 1.9960311560501457e-05,
639
+ "loss": 0.5812,
640
+ "step": 210
641
+ },
642
+ {
643
+ "epoch": 0.6751592356687898,
644
+ "learning_rate": 1.9977668786231536e-05,
645
+ "loss": 0.2126,
646
+ "step": 212
647
+ },
648
+ {
649
+ "epoch": 0.6815286624203821,
650
+ "learning_rate": 1.999007296282201e-05,
651
+ "loss": 0.0831,
652
+ "step": 214
653
+ },
654
+ {
655
+ "epoch": 0.6878980891719745,
656
+ "learning_rate": 1.9997517932672592e-05,
657
+ "loss": 0.1763,
658
+ "step": 216
659
+ },
660
+ {
661
+ "epoch": 0.6942675159235668,
662
+ "learning_rate": 2e-05,
663
+ "loss": 0.3726,
664
+ "step": 218
665
+ },
666
+ {
667
+ "epoch": 0.7006369426751592,
668
+ "learning_rate": 1.9997517932672592e-05,
669
+ "loss": 0.141,
670
+ "step": 220
671
+ },
672
+ {
673
+ "epoch": 0.7070063694267515,
674
+ "learning_rate": 1.999007296282201e-05,
675
+ "loss": 0.1182,
676
+ "step": 222
677
+ },
678
+ {
679
+ "epoch": 0.7133757961783439,
680
+ "learning_rate": 1.9977668786231536e-05,
681
+ "loss": 0.1174,
682
+ "step": 224
683
+ },
684
+ {
685
+ "epoch": 0.7197452229299363,
686
+ "learning_rate": 1.9960311560501457e-05,
687
+ "loss": 0.1507,
688
+ "step": 226
689
+ },
690
+ {
691
+ "epoch": 0.7261146496815286,
692
+ "learning_rate": 1.993800990199235e-05,
693
+ "loss": 0.0347,
694
+ "step": 228
695
+ },
696
+ {
697
+ "epoch": 0.732484076433121,
698
+ "learning_rate": 1.99107748815478e-05,
699
+ "loss": 0.2476,
700
+ "step": 230
701
+ },
702
+ {
703
+ "epoch": 0.7388535031847133,
704
+ "learning_rate": 1.9878620018998696e-05,
705
+ "loss": 0.095,
706
+ "step": 232
707
+ },
708
+ {
709
+ "epoch": 0.7452229299363057,
710
+ "learning_rate": 1.984156127645178e-05,
711
+ "loss": 0.1634,
712
+ "step": 234
713
+ },
714
+ {
715
+ "epoch": 0.7515923566878981,
716
+ "learning_rate": 1.979961705036587e-05,
717
+ "loss": 0.0125,
718
+ "step": 236
719
+ },
720
+ {
721
+ "epoch": 0.7579617834394905,
722
+ "learning_rate": 1.975280816241959e-05,
723
+ "loss": 0.3666,
724
+ "step": 238
725
+ },
726
+ {
727
+ "epoch": 0.7643312101910829,
728
+ "learning_rate": 1.9701157849175232e-05,
729
+ "loss": 0.1501,
730
+ "step": 240
731
+ },
732
+ {
733
+ "epoch": 0.7707006369426752,
734
+ "learning_rate": 1.9644691750543772e-05,
735
+ "loss": 0.2974,
736
+ "step": 242
737
+ },
738
+ {
739
+ "epoch": 0.7770700636942676,
740
+ "learning_rate": 1.958343789705692e-05,
741
+ "loss": 0.1029,
742
+ "step": 244
743
+ },
744
+ {
745
+ "epoch": 0.7834394904458599,
746
+ "learning_rate": 1.9517426695952354e-05,
747
+ "loss": 0.0726,
748
+ "step": 246
749
+ },
750
+ {
751
+ "epoch": 0.7898089171974523,
752
+ "learning_rate": 1.9446690916079184e-05,
753
+ "loss": 0.0602,
754
+ "step": 248
755
+ },
756
+ {
757
+ "epoch": 0.7961783439490446,
758
+ "learning_rate": 1.9371265671631034e-05,
759
+ "loss": 2.234,
760
+ "step": 250
761
+ },
762
+ {
763
+ "epoch": 0.802547770700637,
764
+ "learning_rate": 1.929118840471488e-05,
765
+ "loss": 1.4173,
766
+ "step": 252
767
+ },
768
+ {
769
+ "epoch": 0.8089171974522293,
770
+ "learning_rate": 1.9206498866764293e-05,
771
+ "loss": 1.0029,
772
+ "step": 254
773
+ },
774
+ {
775
+ "epoch": 0.8152866242038217,
776
+ "learning_rate": 1.9117239098806302e-05,
777
+ "loss": 0.5414,
778
+ "step": 256
779
+ },
780
+ {
781
+ "epoch": 0.821656050955414,
782
+ "learning_rate": 1.9023453410591645e-05,
783
+ "loss": 0.0349,
784
+ "step": 258
785
+ },
786
+ {
787
+ "epoch": 0.8280254777070064,
788
+ "learning_rate": 1.8925188358598822e-05,
789
+ "loss": 0.3559,
790
+ "step": 260
791
+ },
792
+ {
793
+ "epoch": 0.8343949044585988,
794
+ "learning_rate": 1.882249272292283e-05,
795
+ "loss": 0.4071,
796
+ "step": 262
797
+ },
798
+ {
799
+ "epoch": 0.8407643312101911,
800
+ "learning_rate": 1.871541748306005e-05,
801
+ "loss": 0.0814,
802
+ "step": 264
803
+ },
804
+ {
805
+ "epoch": 0.8471337579617835,
806
+ "learning_rate": 1.8604015792601395e-05,
807
+ "loss": 0.8049,
808
+ "step": 266
809
+ },
810
+ {
811
+ "epoch": 0.8535031847133758,
812
+ "learning_rate": 1.8488342952846077e-05,
813
+ "loss": 0.5639,
814
+ "step": 268
815
+ },
816
+ {
817
+ "epoch": 0.8598726114649682,
818
+ "learning_rate": 1.8368456385349333e-05,
819
+ "loss": 0.5041,
820
+ "step": 270
821
+ },
822
+ {
823
+ "epoch": 0.8662420382165605,
824
+ "learning_rate": 1.824441560341761e-05,
825
+ "loss": 0.546,
826
+ "step": 272
827
+ },
828
+ {
829
+ "epoch": 0.8726114649681529,
830
+ "learning_rate": 1.811628218256532e-05,
831
+ "loss": 0.0711,
832
+ "step": 274
833
+ },
834
+ {
835
+ "epoch": 0.8789808917197452,
836
+ "learning_rate": 1.798411972994795e-05,
837
+ "loss": 1.1661,
838
+ "step": 276
839
+ },
840
+ {
841
+ "epoch": 0.8853503184713376,
842
+ "learning_rate": 1.784799385278662e-05,
843
+ "loss": 0.7112,
844
+ "step": 278
845
+ },
846
+ {
847
+ "epoch": 0.89171974522293,
848
+ "learning_rate": 1.770797212579973e-05,
849
+ "loss": 0.207,
850
+ "step": 280
851
+ },
852
+ {
853
+ "epoch": 0.8980891719745223,
854
+ "learning_rate": 1.756412405765805e-05,
855
+ "loss": 0.5363,
856
+ "step": 282
857
+ },
858
+ {
859
+ "epoch": 0.9044585987261147,
860
+ "learning_rate": 1.7416521056479573e-05,
861
+ "loss": 0.8558,
862
+ "step": 284
863
+ },
864
+ {
865
+ "epoch": 0.910828025477707,
866
+ "learning_rate": 1.7265236394381634e-05,
867
+ "loss": 0.2549,
868
+ "step": 286
869
+ },
870
+ {
871
+ "epoch": 0.9171974522292994,
872
+ "learning_rate": 1.711034517110761e-05,
873
+ "loss": 0.0656,
874
+ "step": 288
875
+ },
876
+ {
877
+ "epoch": 0.9235668789808917,
878
+ "learning_rate": 1.6951924276746425e-05,
879
+ "loss": 0.1805,
880
+ "step": 290
881
+ },
882
+ {
883
+ "epoch": 0.9299363057324841,
884
+ "learning_rate": 1.6790052353563254e-05,
885
+ "loss": 0.234,
886
+ "step": 292
887
+ },
888
+ {
889
+ "epoch": 0.9363057324840764,
890
+ "learning_rate": 1.662480975696046e-05,
891
+ "loss": 0.0778,
892
+ "step": 294
893
+ },
894
+ {
895
+ "epoch": 0.9426751592356688,
896
+ "learning_rate": 1.6456278515588044e-05,
897
+ "loss": 0.0258,
898
+ "step": 296
899
+ },
900
+ {
901
+ "epoch": 0.9490445859872612,
902
+ "learning_rate": 1.6284542290623558e-05,
903
+ "loss": 0.1791,
904
+ "step": 298
905
+ },
906
+ {
907
+ "epoch": 0.9554140127388535,
908
+ "learning_rate": 1.6109686334241648e-05,
909
+ "loss": 0.3059,
910
+ "step": 300
911
+ },
912
+ {
913
+ "epoch": 0.9617834394904459,
914
+ "learning_rate": 1.593179744729355e-05,
915
+ "loss": 0.4586,
916
+ "step": 302
917
+ },
918
+ {
919
+ "epoch": 0.9681528662420382,
920
+ "learning_rate": 1.57509639362181e-05,
921
+ "loss": 0.1173,
922
+ "step": 304
923
+ },
924
+ {
925
+ "epoch": 0.9745222929936306,
926
+ "learning_rate": 1.5567275569205227e-05,
927
+ "loss": 0.0107,
928
+ "step": 306
929
+ },
930
+ {
931
+ "epoch": 0.9808917197452229,
932
+ "learning_rate": 1.538082353163374e-05,
933
+ "loss": 1.0589,
934
+ "step": 308
935
+ },
936
+ {
937
+ "epoch": 0.9872611464968153,
938
+ "learning_rate": 1.5191700380805768e-05,
939
+ "loss": 0.5083,
940
+ "step": 310
941
+ },
942
+ {
943
+ "epoch": 0.9936305732484076,
944
+ "learning_rate": 1.5000000000000014e-05,
945
+ "loss": 0.3787,
946
+ "step": 312
947
+ },
948
+ {
949
+ "epoch": 1.0,
950
+ "learning_rate": 1.4805817551866854e-05,
951
+ "loss": 0.1243,
952
+ "step": 314
953
+ },
954
+ {
955
+ "epoch": 1.0,
956
+ "step": 314,
957
+ "total_flos": 1384288393625600.0,
958
+ "train_loss": 0.28617319338332126,
959
+ "train_runtime": 1575.6528,
960
+ "train_samples_per_second": 3.189,
961
+ "train_steps_per_second": 0.199
962
+ }
963
+ ],
964
+ "logging_steps": 2,
965
+ "max_steps": 314,
966
+ "num_input_tokens_seen": 0,
967
+ "num_train_epochs": 1,
968
+ "save_steps": 500,
969
+ "stateful_callbacks": {},
970
+ "total_flos": 1384288393625600.0,
971
+ "train_batch_size": 1,
972
+ "trial_name": null,
973
+ "trial_params": null
974
+ }
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3b6809f72af3459a8bfb00453fa0fd9ca5d1cde65ac1977013093efde04fb94
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8831935745a767aff6567bd96b61c8084b4afc2327500e40a130fca7e024012c
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84cc3307cc3cba1b296b0da6d7a76813d8100d33676a8752931ef762e37c90ce
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_03125_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a07d3ce27b76176ebf16341ed50dd4062f55c99df2d42d2db6bd9474018113e
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_coincide_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json ADDED
@@ -0,0 +1,1904 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 625,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0032,
14
+ "learning_rate": 2.4524967251364995e-06,
15
+ "loss": 0.1856,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.0064,
20
+ "learning_rate": 2.5263093403840022e-06,
21
+ "loss": 0.2075,
22
+ "step": 4
23
+ },
24
+ {
25
+ "epoch": 0.0096,
26
+ "learning_rate": 2.6010561079587694e-06,
27
+ "loss": 0.2234,
28
+ "step": 6
29
+ },
30
+ {
31
+ "epoch": 0.0128,
32
+ "learning_rate": 2.6767276851049716e-06,
33
+ "loss": 0.2668,
34
+ "step": 8
35
+ },
36
+ {
37
+ "epoch": 0.016,
38
+ "learning_rate": 2.7533146134728993e-06,
39
+ "loss": 0.1002,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.0192,
44
+ "learning_rate": 2.8308073203011634e-06,
45
+ "loss": 0.0876,
46
+ "step": 12
47
+ },
48
+ {
49
+ "epoch": 0.0224,
50
+ "learning_rate": 2.909196119613218e-06,
51
+ "loss": 0.0139,
52
+ "step": 14
53
+ },
54
+ {
55
+ "epoch": 0.0256,
56
+ "learning_rate": 2.988471213428035e-06,
57
+ "loss": 0.0248,
58
+ "step": 16
59
+ },
60
+ {
61
+ "epoch": 0.0288,
62
+ "learning_rate": 3.068622692984767e-06,
63
+ "loss": 0.0242,
64
+ "step": 18
65
+ },
66
+ {
67
+ "epoch": 0.032,
68
+ "learning_rate": 3.1496405399812602e-06,
69
+ "loss": 0.3101,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.0352,
74
+ "learning_rate": 3.231514627826302e-06,
75
+ "loss": 0.1165,
76
+ "step": 22
77
+ },
78
+ {
79
+ "epoch": 0.0384,
80
+ "learning_rate": 3.314234722905302e-06,
81
+ "loss": 0.5796,
82
+ "step": 24
83
+ },
84
+ {
85
+ "epoch": 0.0416,
86
+ "learning_rate": 3.3977904858594534e-06,
87
+ "loss": 0.0059,
88
+ "step": 26
89
+ },
90
+ {
91
+ "epoch": 0.0448,
92
+ "learning_rate": 3.4821714728780654e-06,
93
+ "loss": 0.1573,
94
+ "step": 28
95
+ },
96
+ {
97
+ "epoch": 0.048,
98
+ "learning_rate": 3.567367137003953e-06,
99
+ "loss": 0.2623,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.0512,
104
+ "learning_rate": 3.653366829451711e-06,
105
+ "loss": 0.3131,
106
+ "step": 32
107
+ },
108
+ {
109
+ "epoch": 0.0544,
110
+ "learning_rate": 3.740159800938784e-06,
111
+ "loss": 0.8242,
112
+ "step": 34
113
+ },
114
+ {
115
+ "epoch": 0.0576,
116
+ "learning_rate": 3.827735203028956e-06,
117
+ "loss": 0.0827,
118
+ "step": 36
119
+ },
120
+ {
121
+ "epoch": 0.0608,
122
+ "learning_rate": 3.916082089488379e-06,
123
+ "loss": 0.2051,
124
+ "step": 38
125
+ },
126
+ {
127
+ "epoch": 0.064,
128
+ "learning_rate": 4.005189417653737e-06,
129
+ "loss": 0.1269,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.0672,
134
+ "learning_rate": 4.095046049812541e-06,
135
+ "loss": 0.2132,
136
+ "step": 42
137
+ },
138
+ {
139
+ "epoch": 0.0704,
140
+ "learning_rate": 4.1856407545951825e-06,
141
+ "loss": 0.333,
142
+ "step": 44
143
+ },
144
+ {
145
+ "epoch": 0.0736,
146
+ "learning_rate": 4.276962208378814e-06,
147
+ "loss": 0.0089,
148
+ "step": 46
149
+ },
150
+ {
151
+ "epoch": 0.0768,
152
+ "learning_rate": 4.368998996702686e-06,
153
+ "loss": 0.4542,
154
+ "step": 48
155
+ },
156
+ {
157
+ "epoch": 0.08,
158
+ "learning_rate": 4.461739615694921e-06,
159
+ "loss": 0.5164,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.0832,
164
+ "learning_rate": 4.555172473510324e-06,
165
+ "loss": 0.3665,
166
+ "step": 52
167
+ },
168
+ {
169
+ "epoch": 0.0864,
170
+ "learning_rate": 4.649285891779326e-06,
171
+ "loss": 0.1527,
172
+ "step": 54
173
+ },
174
+ {
175
+ "epoch": 0.0896,
176
+ "learning_rate": 4.744068107067673e-06,
177
+ "loss": 0.1667,
178
+ "step": 56
179
+ },
180
+ {
181
+ "epoch": 0.0928,
182
+ "learning_rate": 4.839507272346751e-06,
183
+ "loss": 0.1369,
184
+ "step": 58
185
+ },
186
+ {
187
+ "epoch": 0.096,
188
+ "learning_rate": 4.935591458474425e-06,
189
+ "loss": 0.2281,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.0992,
194
+ "learning_rate": 5.032308655686007e-06,
195
+ "loss": 0.1405,
196
+ "step": 62
197
+ },
198
+ {
199
+ "epoch": 0.1024,
200
+ "learning_rate": 5.129646775095432e-06,
201
+ "loss": 0.0045,
202
+ "step": 64
203
+ },
204
+ {
205
+ "epoch": 0.1056,
206
+ "learning_rate": 5.227593650206246e-06,
207
+ "loss": 0.7821,
208
+ "step": 66
209
+ },
210
+ {
211
+ "epoch": 0.1088,
212
+ "learning_rate": 5.3261370384323904e-06,
213
+ "loss": 0.4551,
214
+ "step": 68
215
+ },
216
+ {
217
+ "epoch": 0.112,
218
+ "learning_rate": 5.425264622628326e-06,
219
+ "loss": 0.0533,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.1152,
224
+ "learning_rate": 5.524964012628644e-06,
225
+ "loss": 0.0006,
226
+ "step": 72
227
+ },
228
+ {
229
+ "epoch": 0.1184,
230
+ "learning_rate": 5.62522274679673e-06,
231
+ "loss": 0.0159,
232
+ "step": 74
233
+ },
234
+ {
235
+ "epoch": 0.1216,
236
+ "learning_rate": 5.726028293582342e-06,
237
+ "loss": 0.1656,
238
+ "step": 76
239
+ },
240
+ {
241
+ "epoch": 0.1248,
242
+ "learning_rate": 5.827368053088032e-06,
243
+ "loss": 0.7154,
244
+ "step": 78
245
+ },
246
+ {
247
+ "epoch": 0.128,
248
+ "learning_rate": 5.929229358643925e-06,
249
+ "loss": 0.1261,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.1312,
254
+ "learning_rate": 6.03159947839103e-06,
255
+ "loss": 0.5616,
256
+ "step": 82
257
+ },
258
+ {
259
+ "epoch": 0.1344,
260
+ "learning_rate": 6.13446561687258e-06,
261
+ "loss": 0.3386,
262
+ "step": 84
263
+ },
264
+ {
265
+ "epoch": 0.1376,
266
+ "learning_rate": 6.237814916633431e-06,
267
+ "loss": 0.4369,
268
+ "step": 86
269
+ },
270
+ {
271
+ "epoch": 0.1408,
272
+ "learning_rate": 6.341634459827044e-06,
273
+ "loss": 0.3947,
274
+ "step": 88
275
+ },
276
+ {
277
+ "epoch": 0.144,
278
+ "learning_rate": 6.445911269830183e-06,
279
+ "loss": 0.0982,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.1472,
284
+ "learning_rate": 6.5506323128648654e-06,
285
+ "loss": 0.3838,
286
+ "step": 92
287
+ },
288
+ {
289
+ "epoch": 0.1504,
290
+ "learning_rate": 6.655784499627476e-06,
291
+ "loss": 0.9755,
292
+ "step": 94
293
+ },
294
+ {
295
+ "epoch": 0.1536,
296
+ "learning_rate": 6.761354686924883e-06,
297
+ "loss": 0.0549,
298
+ "step": 96
299
+ },
300
+ {
301
+ "epoch": 0.1568,
302
+ "learning_rate": 6.867329679317144e-06,
303
+ "loss": 0.4365,
304
+ "step": 98
305
+ },
306
+ {
307
+ "epoch": 0.16,
308
+ "learning_rate": 6.973696230766884e-06,
309
+ "loss": 0.4253,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.1632,
314
+ "learning_rate": 7.080441046294945e-06,
315
+ "loss": 0.06,
316
+ "step": 102
317
+ },
318
+ {
319
+ "epoch": 0.1664,
320
+ "learning_rate": 7.18755078364214e-06,
321
+ "loss": 0.4592,
322
+ "step": 104
323
+ },
324
+ {
325
+ "epoch": 0.1696,
326
+ "learning_rate": 7.2950120549369204e-06,
327
+ "loss": 0.2125,
328
+ "step": 106
329
+ },
330
+ {
331
+ "epoch": 0.1728,
332
+ "learning_rate": 7.402811428368824e-06,
333
+ "loss": 0.3792,
334
+ "step": 108
335
+ },
336
+ {
337
+ "epoch": 0.176,
338
+ "learning_rate": 7.510935429867233e-06,
339
+ "loss": 0.1535,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.1792,
344
+ "learning_rate": 7.619370544785608e-06,
345
+ "loss": 0.1375,
346
+ "step": 112
347
+ },
348
+ {
349
+ "epoch": 0.1824,
350
+ "learning_rate": 7.728103219590684e-06,
351
+ "loss": 0.1129,
352
+ "step": 114
353
+ },
354
+ {
355
+ "epoch": 0.1856,
356
+ "learning_rate": 7.83711986355656e-06,
357
+ "loss": 0.1558,
358
+ "step": 116
359
+ },
360
+ {
361
+ "epoch": 0.1888,
362
+ "learning_rate": 7.946406850463435e-06,
363
+ "loss": 0.4794,
364
+ "step": 118
365
+ },
366
+ {
367
+ "epoch": 0.192,
368
+ "learning_rate": 8.055950520300756e-06,
369
+ "loss": 0.0791,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.1952,
374
+ "learning_rate": 8.165737180974676e-06,
375
+ "loss": 0.0991,
376
+ "step": 122
377
+ },
378
+ {
379
+ "epoch": 0.1984,
380
+ "learning_rate": 8.275753110019367e-06,
381
+ "loss": 0.1581,
382
+ "step": 124
383
+ },
384
+ {
385
+ "epoch": 0.2016,
386
+ "learning_rate": 8.385984556312285e-06,
387
+ "loss": 0.4554,
388
+ "step": 126
389
+ },
390
+ {
391
+ "epoch": 0.2048,
392
+ "learning_rate": 8.496417741792922e-06,
393
+ "loss": 0.0035,
394
+ "step": 128
395
+ },
396
+ {
397
+ "epoch": 0.208,
398
+ "learning_rate": 8.607038863184952e-06,
399
+ "loss": 0.1598,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.2112,
404
+ "learning_rate": 8.717834093721598e-06,
405
+ "loss": 0.0816,
406
+ "step": 132
407
+ },
408
+ {
409
+ "epoch": 0.2144,
410
+ "learning_rate": 8.828789584873757e-06,
411
+ "loss": 0.2173,
412
+ "step": 134
413
+ },
414
+ {
415
+ "epoch": 0.2176,
416
+ "learning_rate": 8.939891468081036e-06,
417
+ "loss": 0.5008,
418
+ "step": 136
419
+ },
420
+ {
421
+ "epoch": 0.2208,
422
+ "learning_rate": 9.051125856485175e-06,
423
+ "loss": 0.2191,
424
+ "step": 138
425
+ },
426
+ {
427
+ "epoch": 0.224,
428
+ "learning_rate": 9.162478846665854e-06,
429
+ "loss": 0.0324,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.2272,
434
+ "learning_rate": 9.273936520378426e-06,
435
+ "loss": 0.1816,
436
+ "step": 142
437
+ },
438
+ {
439
+ "epoch": 0.2304,
440
+ "learning_rate": 9.38548494629364e-06,
441
+ "loss": 0.3324,
442
+ "step": 144
443
+ },
444
+ {
445
+ "epoch": 0.2336,
446
+ "learning_rate": 9.497110181738935e-06,
447
+ "loss": 0.0182,
448
+ "step": 146
449
+ },
450
+ {
451
+ "epoch": 0.2368,
452
+ "learning_rate": 9.608798274441153e-06,
453
+ "loss": 0.0734,
454
+ "step": 148
455
+ },
456
+ {
457
+ "epoch": 0.24,
458
+ "learning_rate": 9.720535264270526e-06,
459
+ "loss": 0.0685,
460
+ "step": 150
461
+ },
462
+ {
463
+ "epoch": 0.2432,
464
+ "learning_rate": 9.832307184985473e-06,
465
+ "loss": 0.0739,
466
+ "step": 152
467
+ },
468
+ {
469
+ "epoch": 0.2464,
470
+ "learning_rate": 9.944100065978354e-06,
471
+ "loss": 0.1421,
472
+ "step": 154
473
+ },
474
+ {
475
+ "epoch": 0.2496,
476
+ "learning_rate": 1.0055899934021637e-05,
477
+ "loss": 0.8392,
478
+ "step": 156
479
+ },
480
+ {
481
+ "epoch": 0.2528,
482
+ "learning_rate": 1.016769281501452e-05,
483
+ "loss": 0.043,
484
+ "step": 158
485
+ },
486
+ {
487
+ "epoch": 0.256,
488
+ "learning_rate": 1.0279464735729467e-05,
489
+ "loss": 0.0865,
490
+ "step": 160
491
+ },
492
+ {
493
+ "epoch": 0.2592,
494
+ "learning_rate": 1.039120172555884e-05,
495
+ "loss": 0.0151,
496
+ "step": 162
497
+ },
498
+ {
499
+ "epoch": 0.2624,
500
+ "learning_rate": 1.0502889818261058e-05,
501
+ "loss": 0.1952,
502
+ "step": 164
503
+ },
504
+ {
505
+ "epoch": 0.2656,
506
+ "learning_rate": 1.0614515053706354e-05,
507
+ "loss": 0.4652,
508
+ "step": 166
509
+ },
510
+ {
511
+ "epoch": 0.2688,
512
+ "learning_rate": 1.0726063479621567e-05,
513
+ "loss": 0.3148,
514
+ "step": 168
515
+ },
516
+ {
517
+ "epoch": 0.272,
518
+ "learning_rate": 1.083752115333414e-05,
519
+ "loss": 0.2418,
520
+ "step": 170
521
+ },
522
+ {
523
+ "epoch": 0.2752,
524
+ "learning_rate": 1.0948874143514818e-05,
525
+ "loss": 0.1,
526
+ "step": 172
527
+ },
528
+ {
529
+ "epoch": 0.2784,
530
+ "learning_rate": 1.1060108531918955e-05,
531
+ "loss": 0.431,
532
+ "step": 174
533
+ },
534
+ {
535
+ "epoch": 0.2816,
536
+ "learning_rate": 1.1171210415126238e-05,
537
+ "loss": 0.3789,
538
+ "step": 176
539
+ },
540
+ {
541
+ "epoch": 0.2848,
542
+ "learning_rate": 1.1282165906278395e-05,
543
+ "loss": 0.2438,
544
+ "step": 178
545
+ },
546
+ {
547
+ "epoch": 0.288,
548
+ "learning_rate": 1.1392961136815041e-05,
549
+ "loss": 0.087,
550
+ "step": 180
551
+ },
552
+ {
553
+ "epoch": 0.2912,
554
+ "learning_rate": 1.150358225820707e-05,
555
+ "loss": 0.7962,
556
+ "step": 182
557
+ },
558
+ {
559
+ "epoch": 0.2944,
560
+ "learning_rate": 1.1614015443687708e-05,
561
+ "loss": 0.3676,
562
+ "step": 184
563
+ },
564
+ {
565
+ "epoch": 0.2976,
566
+ "learning_rate": 1.1724246889980626e-05,
567
+ "loss": 0.0204,
568
+ "step": 186
569
+ },
570
+ {
571
+ "epoch": 0.3008,
572
+ "learning_rate": 1.1834262819025317e-05,
573
+ "loss": 0.0862,
574
+ "step": 188
575
+ },
576
+ {
577
+ "epoch": 0.304,
578
+ "learning_rate": 1.1944049479699241e-05,
579
+ "loss": 0.1435,
580
+ "step": 190
581
+ },
582
+ {
583
+ "epoch": 0.3072,
584
+ "learning_rate": 1.2053593149536557e-05,
585
+ "loss": 0.4801,
586
+ "step": 192
587
+ },
588
+ {
589
+ "epoch": 0.3104,
590
+ "learning_rate": 1.2162880136443434e-05,
591
+ "loss": 0.0117,
592
+ "step": 194
593
+ },
594
+ {
595
+ "epoch": 0.3136,
596
+ "learning_rate": 1.2271896780409309e-05,
597
+ "loss": 0.1483,
598
+ "step": 196
599
+ },
600
+ {
601
+ "epoch": 0.3168,
602
+ "learning_rate": 1.2380629455214385e-05,
603
+ "loss": 0.5002,
604
+ "step": 198
605
+ },
606
+ {
607
+ "epoch": 0.32,
608
+ "learning_rate": 1.2489064570132761e-05,
609
+ "loss": 0.2566,
610
+ "step": 200
611
+ },
612
+ {
613
+ "epoch": 0.3232,
614
+ "learning_rate": 1.259718857163117e-05,
615
+ "loss": 0.0124,
616
+ "step": 202
617
+ },
618
+ {
619
+ "epoch": 0.3264,
620
+ "learning_rate": 1.2704987945063073e-05,
621
+ "loss": 0.4494,
622
+ "step": 204
623
+ },
624
+ {
625
+ "epoch": 0.3296,
626
+ "learning_rate": 1.2812449216357855e-05,
627
+ "loss": 0.2693,
628
+ "step": 206
629
+ },
630
+ {
631
+ "epoch": 0.3328,
632
+ "learning_rate": 1.2919558953705047e-05,
633
+ "loss": 0.2243,
634
+ "step": 208
635
+ },
636
+ {
637
+ "epoch": 0.336,
638
+ "learning_rate": 1.3026303769233109e-05,
639
+ "loss": 0.0728,
640
+ "step": 210
641
+ },
642
+ {
643
+ "epoch": 0.3392,
644
+ "learning_rate": 1.313267032068285e-05,
645
+ "loss": 0.0464,
646
+ "step": 212
647
+ },
648
+ {
649
+ "epoch": 0.3424,
650
+ "learning_rate": 1.3238645313075109e-05,
651
+ "loss": 0.5131,
652
+ "step": 214
653
+ },
654
+ {
655
+ "epoch": 0.3456,
656
+ "learning_rate": 1.3344215500372517e-05,
657
+ "loss": 0.109,
658
+ "step": 216
659
+ },
660
+ {
661
+ "epoch": 0.3488,
662
+ "learning_rate": 1.344936768713513e-05,
663
+ "loss": 0.3551,
664
+ "step": 218
665
+ },
666
+ {
667
+ "epoch": 0.352,
668
+ "learning_rate": 1.3554088730169812e-05,
669
+ "loss": 0.2274,
670
+ "step": 220
671
+ },
672
+ {
673
+ "epoch": 0.3552,
674
+ "learning_rate": 1.3658365540172948e-05,
675
+ "loss": 0.3262,
676
+ "step": 222
677
+ },
678
+ {
679
+ "epoch": 0.3584,
680
+ "learning_rate": 1.3762185083366562e-05,
681
+ "loss": 0.0089,
682
+ "step": 224
683
+ },
684
+ {
685
+ "epoch": 0.3616,
686
+ "learning_rate": 1.3865534383127413e-05,
687
+ "loss": 0.2882,
688
+ "step": 226
689
+ },
690
+ {
691
+ "epoch": 0.3648,
692
+ "learning_rate": 1.3968400521608962e-05,
693
+ "loss": 0.1762,
694
+ "step": 228
695
+ },
696
+ {
697
+ "epoch": 0.368,
698
+ "learning_rate": 1.4070770641356069e-05,
699
+ "loss": 0.0952,
700
+ "step": 230
701
+ },
702
+ {
703
+ "epoch": 0.3712,
704
+ "learning_rate": 1.4172631946911964e-05,
705
+ "loss": 0.519,
706
+ "step": 232
707
+ },
708
+ {
709
+ "epoch": 0.3744,
710
+ "learning_rate": 1.4273971706417653e-05,
711
+ "loss": 0.0641,
712
+ "step": 234
713
+ },
714
+ {
715
+ "epoch": 0.3776,
716
+ "learning_rate": 1.4374777253203265e-05,
717
+ "loss": 0.0274,
718
+ "step": 236
719
+ },
720
+ {
721
+ "epoch": 0.3808,
722
+ "learning_rate": 1.4475035987371348e-05,
723
+ "loss": 0.5089,
724
+ "step": 238
725
+ },
726
+ {
727
+ "epoch": 0.384,
728
+ "learning_rate": 1.4574735377371669e-05,
729
+ "loss": 0.3623,
730
+ "step": 240
731
+ },
732
+ {
733
+ "epoch": 0.3872,
734
+ "learning_rate": 1.4673862961567604e-05,
735
+ "loss": 0.2898,
736
+ "step": 242
737
+ },
738
+ {
739
+ "epoch": 0.3904,
740
+ "learning_rate": 1.4772406349793749e-05,
741
+ "loss": 0.9582,
742
+ "step": 244
743
+ },
744
+ {
745
+ "epoch": 0.3936,
746
+ "learning_rate": 1.4870353224904563e-05,
747
+ "loss": 0.1007,
748
+ "step": 246
749
+ },
750
+ {
751
+ "epoch": 0.3968,
752
+ "learning_rate": 1.4967691344313988e-05,
753
+ "loss": 0.39,
754
+ "step": 248
755
+ },
756
+ {
757
+ "epoch": 0.4,
758
+ "learning_rate": 1.5064408541525568e-05,
759
+ "loss": 0.0082,
760
+ "step": 250
761
+ },
762
+ {
763
+ "epoch": 0.4032,
764
+ "learning_rate": 1.5160492727653245e-05,
765
+ "loss": 0.2143,
766
+ "step": 252
767
+ },
768
+ {
769
+ "epoch": 0.4064,
770
+ "learning_rate": 1.5255931892932322e-05,
771
+ "loss": 0.6342,
772
+ "step": 254
773
+ },
774
+ {
775
+ "epoch": 0.4096,
776
+ "learning_rate": 1.5350714108220667e-05,
777
+ "loss": 0.5303,
778
+ "step": 256
779
+ },
780
+ {
781
+ "epoch": 0.4128,
782
+ "learning_rate": 1.5444827526489668e-05,
783
+ "loss": 0.459,
784
+ "step": 258
785
+ },
786
+ {
787
+ "epoch": 0.416,
788
+ "learning_rate": 1.5538260384305073e-05,
789
+ "loss": 0.434,
790
+ "step": 260
791
+ },
792
+ {
793
+ "epoch": 0.4192,
794
+ "learning_rate": 1.563100100329731e-05,
795
+ "loss": 0.0634,
796
+ "step": 262
797
+ },
798
+ {
799
+ "epoch": 0.4224,
800
+ "learning_rate": 1.572303779162118e-05,
801
+ "loss": 0.0668,
802
+ "step": 264
803
+ },
804
+ {
805
+ "epoch": 0.4256,
806
+ "learning_rate": 1.581435924540481e-05,
807
+ "loss": 0.2634,
808
+ "step": 266
809
+ },
810
+ {
811
+ "epoch": 0.4288,
812
+ "learning_rate": 1.5904953950187455e-05,
813
+ "loss": 0.4033,
814
+ "step": 268
815
+ },
816
+ {
817
+ "epoch": 0.432,
818
+ "learning_rate": 1.599481058234626e-05,
819
+ "loss": 0.0395,
820
+ "step": 270
821
+ },
822
+ {
823
+ "epoch": 0.4352,
824
+ "learning_rate": 1.6083917910511616e-05,
825
+ "loss": 0.2896,
826
+ "step": 272
827
+ },
828
+ {
829
+ "epoch": 0.4384,
830
+ "learning_rate": 1.617226479697104e-05,
831
+ "loss": 0.3215,
832
+ "step": 274
833
+ },
834
+ {
835
+ "epoch": 0.4416,
836
+ "learning_rate": 1.6259840199061212e-05,
837
+ "loss": 0.1937,
838
+ "step": 276
839
+ },
840
+ {
841
+ "epoch": 0.4448,
842
+ "learning_rate": 1.6346633170548285e-05,
843
+ "loss": 0.4305,
844
+ "step": 278
845
+ },
846
+ {
847
+ "epoch": 0.448,
848
+ "learning_rate": 1.6432632862996042e-05,
849
+ "loss": 0.019,
850
+ "step": 280
851
+ },
852
+ {
853
+ "epoch": 0.4512,
854
+ "learning_rate": 1.6517828527121928e-05,
855
+ "loss": 0.548,
856
+ "step": 282
857
+ },
858
+ {
859
+ "epoch": 0.4544,
860
+ "learning_rate": 1.6602209514140542e-05,
861
+ "loss": 0.4799,
862
+ "step": 284
863
+ },
864
+ {
865
+ "epoch": 0.4576,
866
+ "learning_rate": 1.6685765277094695e-05,
867
+ "loss": 0.3917,
868
+ "step": 286
869
+ },
870
+ {
871
+ "epoch": 0.4608,
872
+ "learning_rate": 1.6768485372173696e-05,
873
+ "loss": 0.6199,
874
+ "step": 288
875
+ },
876
+ {
877
+ "epoch": 0.464,
878
+ "learning_rate": 1.6850359460018733e-05,
879
+ "loss": 0.8202,
880
+ "step": 290
881
+ },
882
+ {
883
+ "epoch": 0.4672,
884
+ "learning_rate": 1.6931377307015226e-05,
885
+ "loss": 0.0064,
886
+ "step": 292
887
+ },
888
+ {
889
+ "epoch": 0.4704,
890
+ "learning_rate": 1.701152878657196e-05,
891
+ "loss": 0.13,
892
+ "step": 294
893
+ },
894
+ {
895
+ "epoch": 0.4736,
896
+ "learning_rate": 1.7090803880386778e-05,
897
+ "loss": 0.9176,
898
+ "step": 296
899
+ },
900
+ {
901
+ "epoch": 0.4768,
902
+ "learning_rate": 1.716919267969883e-05,
903
+ "loss": 0.6146,
904
+ "step": 298
905
+ },
906
+ {
907
+ "epoch": 0.48,
908
+ "learning_rate": 1.7246685386527095e-05,
909
+ "loss": 0.142,
910
+ "step": 300
911
+ },
912
+ {
913
+ "epoch": 0.4832,
914
+ "learning_rate": 1.7323272314895022e-05,
915
+ "loss": 0.613,
916
+ "step": 302
917
+ },
918
+ {
919
+ "epoch": 0.4864,
920
+ "learning_rate": 1.7398943892041227e-05,
921
+ "loss": 0.1759,
922
+ "step": 304
923
+ },
924
+ {
925
+ "epoch": 0.4896,
926
+ "learning_rate": 1.7473690659615992e-05,
927
+ "loss": 0.3598,
928
+ "step": 306
929
+ },
930
+ {
931
+ "epoch": 0.4928,
932
+ "learning_rate": 1.7547503274863495e-05,
933
+ "loss": 0.7282,
934
+ "step": 308
935
+ },
936
+ {
937
+ "epoch": 0.496,
938
+ "learning_rate": 1.7620372511789604e-05,
939
+ "loss": 0.0885,
940
+ "step": 310
941
+ },
942
+ {
943
+ "epoch": 0.4992,
944
+ "learning_rate": 1.7692289262315e-05,
945
+ "loss": 0.3294,
946
+ "step": 312
947
+ },
948
+ {
949
+ "epoch": 0.5024,
950
+ "learning_rate": 1.7763244537413657e-05,
951
+ "loss": 0.0262,
952
+ "step": 314
953
+ },
954
+ {
955
+ "epoch": 0.5056,
956
+ "learning_rate": 1.7833229468236364e-05,
957
+ "loss": 0.1026,
958
+ "step": 316
959
+ },
960
+ {
961
+ "epoch": 0.5088,
962
+ "learning_rate": 1.790223530721933e-05,
963
+ "loss": 0.1589,
964
+ "step": 318
965
+ },
966
+ {
967
+ "epoch": 0.512,
968
+ "learning_rate": 1.7970253429177477e-05,
969
+ "loss": 0.7074,
970
+ "step": 320
971
+ },
972
+ {
973
+ "epoch": 0.5152,
974
+ "learning_rate": 1.803727533238257e-05,
975
+ "loss": 0.4914,
976
+ "step": 322
977
+ },
978
+ {
979
+ "epoch": 0.5184,
980
+ "learning_rate": 1.8103292639625842e-05,
981
+ "loss": 0.1835,
982
+ "step": 324
983
+ },
984
+ {
985
+ "epoch": 0.5216,
986
+ "learning_rate": 1.816829709926509e-05,
987
+ "loss": 0.0233,
988
+ "step": 326
989
+ },
990
+ {
991
+ "epoch": 0.5248,
992
+ "learning_rate": 1.8232280586256097e-05,
993
+ "loss": 0.0566,
994
+ "step": 328
995
+ },
996
+ {
997
+ "epoch": 0.528,
998
+ "learning_rate": 1.829523510316813e-05,
999
+ "loss": 0.6869,
1000
+ "step": 330
1001
+ },
1002
+ {
1003
+ "epoch": 0.5312,
1004
+ "learning_rate": 1.8357152781183606e-05,
1005
+ "loss": 0.1445,
1006
+ "step": 332
1007
+ },
1008
+ {
1009
+ "epoch": 0.5344,
1010
+ "learning_rate": 1.8418025881081606e-05,
1011
+ "loss": 0.7789,
1012
+ "step": 334
1013
+ },
1014
+ {
1015
+ "epoch": 0.5376,
1016
+ "learning_rate": 1.8477846794205258e-05,
1017
+ "loss": 0.5204,
1018
+ "step": 336
1019
+ },
1020
+ {
1021
+ "epoch": 0.5408,
1022
+ "learning_rate": 1.8536608043412695e-05,
1023
+ "loss": 0.1039,
1024
+ "step": 338
1025
+ },
1026
+ {
1027
+ "epoch": 0.544,
1028
+ "learning_rate": 1.85943022840117e-05,
1029
+ "loss": 0.4561,
1030
+ "step": 340
1031
+ },
1032
+ {
1033
+ "epoch": 0.5472,
1034
+ "learning_rate": 1.865092230467769e-05,
1035
+ "loss": 0.0233,
1036
+ "step": 342
1037
+ },
1038
+ {
1039
+ "epoch": 0.5504,
1040
+ "learning_rate": 1.87064610283551e-05,
1041
+ "loss": 0.1351,
1042
+ "step": 344
1043
+ },
1044
+ {
1045
+ "epoch": 0.5536,
1046
+ "learning_rate": 1.876091151314196e-05,
1047
+ "loss": 0.0252,
1048
+ "step": 346
1049
+ },
1050
+ {
1051
+ "epoch": 0.5568,
1052
+ "learning_rate": 1.8814266953157557e-05,
1053
+ "loss": 0.3327,
1054
+ "step": 348
1055
+ },
1056
+ {
1057
+ "epoch": 0.56,
1058
+ "learning_rate": 1.8866520679393127e-05,
1059
+ "loss": 0.3069,
1060
+ "step": 350
1061
+ },
1062
+ {
1063
+ "epoch": 0.5632,
1064
+ "learning_rate": 1.8917666160545436e-05,
1065
+ "loss": 0.4257,
1066
+ "step": 352
1067
+ },
1068
+ {
1069
+ "epoch": 0.5664,
1070
+ "learning_rate": 1.896769700383315e-05,
1071
+ "loss": 0.5447,
1072
+ "step": 354
1073
+ },
1074
+ {
1075
+ "epoch": 0.5696,
1076
+ "learning_rate": 1.901660695579585e-05,
1077
+ "loss": 0.024,
1078
+ "step": 356
1079
+ },
1080
+ {
1081
+ "epoch": 0.5728,
1082
+ "learning_rate": 1.9064389903075676e-05,
1083
+ "loss": 0.5923,
1084
+ "step": 358
1085
+ },
1086
+ {
1087
+ "epoch": 0.576,
1088
+ "learning_rate": 1.911103987318148e-05,
1089
+ "loss": 0.8732,
1090
+ "step": 360
1091
+ },
1092
+ {
1093
+ "epoch": 0.5792,
1094
+ "learning_rate": 1.9156551035235288e-05,
1095
+ "loss": 0.4092,
1096
+ "step": 362
1097
+ },
1098
+ {
1099
+ "epoch": 0.5824,
1100
+ "learning_rate": 1.9200917700701173e-05,
1101
+ "loss": 0.4093,
1102
+ "step": 364
1103
+ },
1104
+ {
1105
+ "epoch": 0.5856,
1106
+ "learning_rate": 1.924413432409622e-05,
1107
+ "loss": 0.0291,
1108
+ "step": 366
1109
+ },
1110
+ {
1111
+ "epoch": 0.5888,
1112
+ "learning_rate": 1.9286195503683705e-05,
1113
+ "loss": 0.3259,
1114
+ "step": 368
1115
+ },
1116
+ {
1117
+ "epoch": 0.592,
1118
+ "learning_rate": 1.932709598214825e-05,
1119
+ "loss": 0.3957,
1120
+ "step": 370
1121
+ },
1122
+ {
1123
+ "epoch": 0.5952,
1124
+ "learning_rate": 1.9366830647252967e-05,
1125
+ "loss": 0.5752,
1126
+ "step": 372
1127
+ },
1128
+ {
1129
+ "epoch": 0.5984,
1130
+ "learning_rate": 1.940539453247842e-05,
1131
+ "loss": 0.4475,
1132
+ "step": 374
1133
+ },
1134
+ {
1135
+ "epoch": 0.6016,
1136
+ "learning_rate": 1.944278281764342e-05,
1137
+ "loss": 0.004,
1138
+ "step": 376
1139
+ },
1140
+ {
1141
+ "epoch": 0.6048,
1142
+ "learning_rate": 1.9478990829507504e-05,
1143
+ "loss": 0.1485,
1144
+ "step": 378
1145
+ },
1146
+ {
1147
+ "epoch": 0.608,
1148
+ "learning_rate": 1.951401404235505e-05,
1149
+ "loss": 0.035,
1150
+ "step": 380
1151
+ },
1152
+ {
1153
+ "epoch": 0.6112,
1154
+ "learning_rate": 1.9547848078560975e-05,
1155
+ "loss": 0.2546,
1156
+ "step": 382
1157
+ },
1158
+ {
1159
+ "epoch": 0.6144,
1160
+ "learning_rate": 1.9580488709137858e-05,
1161
+ "loss": 0.0937,
1162
+ "step": 384
1163
+ },
1164
+ {
1165
+ "epoch": 0.6176,
1166
+ "learning_rate": 1.961193185426459e-05,
1167
+ "loss": 0.2251,
1168
+ "step": 386
1169
+ },
1170
+ {
1171
+ "epoch": 0.6208,
1172
+ "learning_rate": 1.9642173583796265e-05,
1173
+ "loss": 0.6789,
1174
+ "step": 388
1175
+ },
1176
+ {
1177
+ "epoch": 0.624,
1178
+ "learning_rate": 1.967121011775546e-05,
1179
+ "loss": 0.06,
1180
+ "step": 390
1181
+ },
1182
+ {
1183
+ "epoch": 0.6272,
1184
+ "learning_rate": 1.969903782680467e-05,
1185
+ "loss": 0.0109,
1186
+ "step": 392
1187
+ },
1188
+ {
1189
+ "epoch": 0.6304,
1190
+ "learning_rate": 1.9725653232699962e-05,
1191
+ "loss": 0.1111,
1192
+ "step": 394
1193
+ },
1194
+ {
1195
+ "epoch": 0.6336,
1196
+ "learning_rate": 1.9751053008725736e-05,
1197
+ "loss": 0.1314,
1198
+ "step": 396
1199
+ },
1200
+ {
1201
+ "epoch": 0.6368,
1202
+ "learning_rate": 1.9775233980110524e-05,
1203
+ "loss": 0.57,
1204
+ "step": 398
1205
+ },
1206
+ {
1207
+ "epoch": 0.64,
1208
+ "learning_rate": 1.9798193124423804e-05,
1209
+ "loss": 0.5711,
1210
+ "step": 400
1211
+ },
1212
+ {
1213
+ "epoch": 0.6432,
1214
+ "learning_rate": 1.9819927571953807e-05,
1215
+ "loss": 0.1568,
1216
+ "step": 402
1217
+ },
1218
+ {
1219
+ "epoch": 0.6464,
1220
+ "learning_rate": 1.9840434606066182e-05,
1221
+ "loss": 0.5302,
1222
+ "step": 404
1223
+ },
1224
+ {
1225
+ "epoch": 0.6496,
1226
+ "learning_rate": 1.985971166354357e-05,
1227
+ "loss": 0.0343,
1228
+ "step": 406
1229
+ },
1230
+ {
1231
+ "epoch": 0.6528,
1232
+ "learning_rate": 1.9877756334905983e-05,
1233
+ "loss": 0.0959,
1234
+ "step": 408
1235
+ },
1236
+ {
1237
+ "epoch": 0.656,
1238
+ "learning_rate": 1.9894566364711965e-05,
1239
+ "loss": 0.0006,
1240
+ "step": 410
1241
+ },
1242
+ {
1243
+ "epoch": 0.6592,
1244
+ "learning_rate": 1.99101396518405e-05,
1245
+ "loss": 0.3317,
1246
+ "step": 412
1247
+ },
1248
+ {
1249
+ "epoch": 0.6624,
1250
+ "learning_rate": 1.9924474249753652e-05,
1251
+ "loss": 0.1517,
1252
+ "step": 414
1253
+ },
1254
+ {
1255
+ "epoch": 0.6656,
1256
+ "learning_rate": 1.9937568366739858e-05,
1257
+ "loss": 0.2894,
1258
+ "step": 416
1259
+ },
1260
+ {
1261
+ "epoch": 0.6688,
1262
+ "learning_rate": 1.994942036613787e-05,
1263
+ "loss": 0.479,
1264
+ "step": 418
1265
+ },
1266
+ {
1267
+ "epoch": 0.672,
1268
+ "learning_rate": 1.9960028766541336e-05,
1269
+ "loss": 0.8059,
1270
+ "step": 420
1271
+ },
1272
+ {
1273
+ "epoch": 0.6752,
1274
+ "learning_rate": 1.9969392241983957e-05,
1275
+ "loss": 0.6762,
1276
+ "step": 422
1277
+ },
1278
+ {
1279
+ "epoch": 0.6784,
1280
+ "learning_rate": 1.9977509622105233e-05,
1281
+ "loss": 0.0211,
1282
+ "step": 424
1283
+ },
1284
+ {
1285
+ "epoch": 0.6816,
1286
+ "learning_rate": 1.998437989229673e-05,
1287
+ "loss": 0.1284,
1288
+ "step": 426
1289
+ },
1290
+ {
1291
+ "epoch": 0.6848,
1292
+ "learning_rate": 1.9990002193828923e-05,
1293
+ "loss": 0.4959,
1294
+ "step": 428
1295
+ },
1296
+ {
1297
+ "epoch": 0.688,
1298
+ "learning_rate": 1.9994375823958504e-05,
1299
+ "loss": 0.9201,
1300
+ "step": 430
1301
+ },
1302
+ {
1303
+ "epoch": 0.6912,
1304
+ "learning_rate": 1.9997500236016233e-05,
1305
+ "loss": 0.4045,
1306
+ "step": 432
1307
+ },
1308
+ {
1309
+ "epoch": 0.6944,
1310
+ "learning_rate": 1.9999375039475275e-05,
1311
+ "loss": 0.6011,
1312
+ "step": 434
1313
+ },
1314
+ {
1315
+ "epoch": 0.6976,
1316
+ "learning_rate": 2e-05,
1317
+ "loss": 0.0079,
1318
+ "step": 436
1319
+ },
1320
+ {
1321
+ "epoch": 0.7008,
1322
+ "learning_rate": 1.9999375039475278e-05,
1323
+ "loss": 0.2923,
1324
+ "step": 438
1325
+ },
1326
+ {
1327
+ "epoch": 0.704,
1328
+ "learning_rate": 1.9997500236016233e-05,
1329
+ "loss": 0.3311,
1330
+ "step": 440
1331
+ },
1332
+ {
1333
+ "epoch": 0.7072,
1334
+ "learning_rate": 1.9994375823958504e-05,
1335
+ "loss": 0.475,
1336
+ "step": 442
1337
+ },
1338
+ {
1339
+ "epoch": 0.7104,
1340
+ "learning_rate": 1.9990002193828923e-05,
1341
+ "loss": 0.5985,
1342
+ "step": 444
1343
+ },
1344
+ {
1345
+ "epoch": 0.7136,
1346
+ "learning_rate": 1.9984379892296735e-05,
1347
+ "loss": 0.2905,
1348
+ "step": 446
1349
+ },
1350
+ {
1351
+ "epoch": 0.7168,
1352
+ "learning_rate": 1.9977509622105236e-05,
1353
+ "loss": 0.41,
1354
+ "step": 448
1355
+ },
1356
+ {
1357
+ "epoch": 0.72,
1358
+ "learning_rate": 1.9969392241983957e-05,
1359
+ "loss": 0.0997,
1360
+ "step": 450
1361
+ },
1362
+ {
1363
+ "epoch": 0.7232,
1364
+ "learning_rate": 1.9960028766541336e-05,
1365
+ "loss": 0.4197,
1366
+ "step": 452
1367
+ },
1368
+ {
1369
+ "epoch": 0.7264,
1370
+ "learning_rate": 1.9949420366137873e-05,
1371
+ "loss": 0.531,
1372
+ "step": 454
1373
+ },
1374
+ {
1375
+ "epoch": 0.7296,
1376
+ "learning_rate": 1.993756836673986e-05,
1377
+ "loss": 0.1413,
1378
+ "step": 456
1379
+ },
1380
+ {
1381
+ "epoch": 0.7328,
1382
+ "learning_rate": 1.9924474249753656e-05,
1383
+ "loss": 0.3965,
1384
+ "step": 458
1385
+ },
1386
+ {
1387
+ "epoch": 0.736,
1388
+ "learning_rate": 1.9910139651840497e-05,
1389
+ "loss": 0.6252,
1390
+ "step": 460
1391
+ },
1392
+ {
1393
+ "epoch": 0.7392,
1394
+ "learning_rate": 1.9894566364711965e-05,
1395
+ "loss": 0.0171,
1396
+ "step": 462
1397
+ },
1398
+ {
1399
+ "epoch": 0.7424,
1400
+ "learning_rate": 1.987775633490599e-05,
1401
+ "loss": 0.0143,
1402
+ "step": 464
1403
+ },
1404
+ {
1405
+ "epoch": 0.7456,
1406
+ "learning_rate": 1.9859711663543573e-05,
1407
+ "loss": 0.0024,
1408
+ "step": 466
1409
+ },
1410
+ {
1411
+ "epoch": 0.7488,
1412
+ "learning_rate": 1.9840434606066186e-05,
1413
+ "loss": 0.0078,
1414
+ "step": 468
1415
+ },
1416
+ {
1417
+ "epoch": 0.752,
1418
+ "learning_rate": 1.9819927571953804e-05,
1419
+ "loss": 0.3243,
1420
+ "step": 470
1421
+ },
1422
+ {
1423
+ "epoch": 0.7552,
1424
+ "learning_rate": 1.9798193124423804e-05,
1425
+ "loss": 0.172,
1426
+ "step": 472
1427
+ },
1428
+ {
1429
+ "epoch": 0.7584,
1430
+ "learning_rate": 1.9775233980110524e-05,
1431
+ "loss": 0.2799,
1432
+ "step": 474
1433
+ },
1434
+ {
1435
+ "epoch": 0.7616,
1436
+ "learning_rate": 1.9751053008725736e-05,
1437
+ "loss": 0.5695,
1438
+ "step": 476
1439
+ },
1440
+ {
1441
+ "epoch": 0.7648,
1442
+ "learning_rate": 1.9725653232699962e-05,
1443
+ "loss": 0.0014,
1444
+ "step": 478
1445
+ },
1446
+ {
1447
+ "epoch": 0.768,
1448
+ "learning_rate": 1.969903782680467e-05,
1449
+ "loss": 0.339,
1450
+ "step": 480
1451
+ },
1452
+ {
1453
+ "epoch": 0.7712,
1454
+ "learning_rate": 1.9671210117755462e-05,
1455
+ "loss": 0.1612,
1456
+ "step": 482
1457
+ },
1458
+ {
1459
+ "epoch": 0.7744,
1460
+ "learning_rate": 1.9642173583796265e-05,
1461
+ "loss": 0.5288,
1462
+ "step": 484
1463
+ },
1464
+ {
1465
+ "epoch": 0.7776,
1466
+ "learning_rate": 1.961193185426459e-05,
1467
+ "loss": 0.0344,
1468
+ "step": 486
1469
+ },
1470
+ {
1471
+ "epoch": 0.7808,
1472
+ "learning_rate": 1.958048870913786e-05,
1473
+ "loss": 0.2335,
1474
+ "step": 488
1475
+ },
1476
+ {
1477
+ "epoch": 0.784,
1478
+ "learning_rate": 1.9547848078560982e-05,
1479
+ "loss": 0.1126,
1480
+ "step": 490
1481
+ },
1482
+ {
1483
+ "epoch": 0.7872,
1484
+ "learning_rate": 1.9514014042355054e-05,
1485
+ "loss": 0.3188,
1486
+ "step": 492
1487
+ },
1488
+ {
1489
+ "epoch": 0.7904,
1490
+ "learning_rate": 1.947899082950751e-05,
1491
+ "loss": 0.1649,
1492
+ "step": 494
1493
+ },
1494
+ {
1495
+ "epoch": 0.7936,
1496
+ "learning_rate": 1.9442782817643425e-05,
1497
+ "loss": 0.113,
1498
+ "step": 496
1499
+ },
1500
+ {
1501
+ "epoch": 0.7968,
1502
+ "learning_rate": 1.9405394532478422e-05,
1503
+ "loss": 0.0208,
1504
+ "step": 498
1505
+ },
1506
+ {
1507
+ "epoch": 0.8,
1508
+ "learning_rate": 1.9366830647252977e-05,
1509
+ "loss": 0.9054,
1510
+ "step": 500
1511
+ },
1512
+ {
1513
+ "epoch": 0.8032,
1514
+ "learning_rate": 1.9327095982148255e-05,
1515
+ "loss": 0.8002,
1516
+ "step": 502
1517
+ },
1518
+ {
1519
+ "epoch": 0.8064,
1520
+ "learning_rate": 1.928619550368371e-05,
1521
+ "loss": 0.5833,
1522
+ "step": 504
1523
+ },
1524
+ {
1525
+ "epoch": 0.8096,
1526
+ "learning_rate": 1.9244134324096216e-05,
1527
+ "loss": 0.3013,
1528
+ "step": 506
1529
+ },
1530
+ {
1531
+ "epoch": 0.8128,
1532
+ "learning_rate": 1.9200917700701176e-05,
1533
+ "loss": 0.1436,
1534
+ "step": 508
1535
+ },
1536
+ {
1537
+ "epoch": 0.816,
1538
+ "learning_rate": 1.9156551035235298e-05,
1539
+ "loss": 0.0074,
1540
+ "step": 510
1541
+ },
1542
+ {
1543
+ "epoch": 0.8192,
1544
+ "learning_rate": 1.9111039873181475e-05,
1545
+ "loss": 0.3746,
1546
+ "step": 512
1547
+ },
1548
+ {
1549
+ "epoch": 0.8224,
1550
+ "learning_rate": 1.9064389903075683e-05,
1551
+ "loss": 0.333,
1552
+ "step": 514
1553
+ },
1554
+ {
1555
+ "epoch": 0.8256,
1556
+ "learning_rate": 1.9016606955795843e-05,
1557
+ "loss": 0.3691,
1558
+ "step": 516
1559
+ },
1560
+ {
1561
+ "epoch": 0.8288,
1562
+ "learning_rate": 1.8967697003833156e-05,
1563
+ "loss": 0.0542,
1564
+ "step": 518
1565
+ },
1566
+ {
1567
+ "epoch": 0.832,
1568
+ "learning_rate": 1.891766616054545e-05,
1569
+ "loss": 0.1936,
1570
+ "step": 520
1571
+ },
1572
+ {
1573
+ "epoch": 0.8352,
1574
+ "learning_rate": 1.8866520679393124e-05,
1575
+ "loss": 0.409,
1576
+ "step": 522
1577
+ },
1578
+ {
1579
+ "epoch": 0.8384,
1580
+ "learning_rate": 1.881426695315756e-05,
1581
+ "loss": 0.4607,
1582
+ "step": 524
1583
+ },
1584
+ {
1585
+ "epoch": 0.8416,
1586
+ "learning_rate": 1.8760911513141974e-05,
1587
+ "loss": 0.2343,
1588
+ "step": 526
1589
+ },
1590
+ {
1591
+ "epoch": 0.8448,
1592
+ "learning_rate": 1.8706461028355107e-05,
1593
+ "loss": 0.0826,
1594
+ "step": 528
1595
+ },
1596
+ {
1597
+ "epoch": 0.848,
1598
+ "learning_rate": 1.86509223046777e-05,
1599
+ "loss": 0.2732,
1600
+ "step": 530
1601
+ },
1602
+ {
1603
+ "epoch": 0.8512,
1604
+ "learning_rate": 1.8594302284011697e-05,
1605
+ "loss": 0.1587,
1606
+ "step": 532
1607
+ },
1608
+ {
1609
+ "epoch": 0.8544,
1610
+ "learning_rate": 1.8536608043412702e-05,
1611
+ "loss": 0.1673,
1612
+ "step": 534
1613
+ },
1614
+ {
1615
+ "epoch": 0.8576,
1616
+ "learning_rate": 1.847784679420527e-05,
1617
+ "loss": 0.418,
1618
+ "step": 536
1619
+ },
1620
+ {
1621
+ "epoch": 0.8608,
1622
+ "learning_rate": 1.841802588108161e-05,
1623
+ "loss": 0.0498,
1624
+ "step": 538
1625
+ },
1626
+ {
1627
+ "epoch": 0.864,
1628
+ "learning_rate": 1.8357152781183613e-05,
1629
+ "loss": 0.1067,
1630
+ "step": 540
1631
+ },
1632
+ {
1633
+ "epoch": 0.8672,
1634
+ "learning_rate": 1.8295235103168128e-05,
1635
+ "loss": 0.4733,
1636
+ "step": 542
1637
+ },
1638
+ {
1639
+ "epoch": 0.8704,
1640
+ "learning_rate": 1.8232280586256104e-05,
1641
+ "loss": 0.3421,
1642
+ "step": 544
1643
+ },
1644
+ {
1645
+ "epoch": 0.8736,
1646
+ "learning_rate": 1.8168297099265108e-05,
1647
+ "loss": 0.0142,
1648
+ "step": 546
1649
+ },
1650
+ {
1651
+ "epoch": 0.8768,
1652
+ "learning_rate": 1.8103292639625835e-05,
1653
+ "loss": 0.4063,
1654
+ "step": 548
1655
+ },
1656
+ {
1657
+ "epoch": 0.88,
1658
+ "learning_rate": 1.8037275332382575e-05,
1659
+ "loss": 0.1884,
1660
+ "step": 550
1661
+ },
1662
+ {
1663
+ "epoch": 0.8832,
1664
+ "learning_rate": 1.7970253429177494e-05,
1665
+ "loss": 0.1479,
1666
+ "step": 552
1667
+ },
1668
+ {
1669
+ "epoch": 0.8864,
1670
+ "learning_rate": 1.7902235307219336e-05,
1671
+ "loss": 0.8131,
1672
+ "step": 554
1673
+ },
1674
+ {
1675
+ "epoch": 0.8896,
1676
+ "learning_rate": 1.783322946823638e-05,
1677
+ "loss": 1.4739,
1678
+ "step": 556
1679
+ },
1680
+ {
1681
+ "epoch": 0.8928,
1682
+ "learning_rate": 1.776324453741365e-05,
1683
+ "loss": 0.0167,
1684
+ "step": 558
1685
+ },
1686
+ {
1687
+ "epoch": 0.896,
1688
+ "learning_rate": 1.7692289262315008e-05,
1689
+ "loss": 0.1035,
1690
+ "step": 560
1691
+ },
1692
+ {
1693
+ "epoch": 0.8992,
1694
+ "learning_rate": 1.762037251178961e-05,
1695
+ "loss": 0.0091,
1696
+ "step": 562
1697
+ },
1698
+ {
1699
+ "epoch": 0.9024,
1700
+ "learning_rate": 1.7547503274863502e-05,
1701
+ "loss": 0.5114,
1702
+ "step": 564
1703
+ },
1704
+ {
1705
+ "epoch": 0.9056,
1706
+ "learning_rate": 1.7473690659616e-05,
1707
+ "loss": 0.1832,
1708
+ "step": 566
1709
+ },
1710
+ {
1711
+ "epoch": 0.9088,
1712
+ "learning_rate": 1.739894389204122e-05,
1713
+ "loss": 0.1871,
1714
+ "step": 568
1715
+ },
1716
+ {
1717
+ "epoch": 0.912,
1718
+ "learning_rate": 1.732327231489503e-05,
1719
+ "loss": 0.4769,
1720
+ "step": 570
1721
+ },
1722
+ {
1723
+ "epoch": 0.9152,
1724
+ "learning_rate": 1.7246685386527105e-05,
1725
+ "loss": 0.0114,
1726
+ "step": 572
1727
+ },
1728
+ {
1729
+ "epoch": 0.9184,
1730
+ "learning_rate": 1.716919267969884e-05,
1731
+ "loss": 0.1715,
1732
+ "step": 574
1733
+ },
1734
+ {
1735
+ "epoch": 0.9216,
1736
+ "learning_rate": 1.7090803880386784e-05,
1737
+ "loss": 0.0107,
1738
+ "step": 576
1739
+ },
1740
+ {
1741
+ "epoch": 0.9248,
1742
+ "learning_rate": 1.701152878657197e-05,
1743
+ "loss": 0.3016,
1744
+ "step": 578
1745
+ },
1746
+ {
1747
+ "epoch": 0.928,
1748
+ "learning_rate": 1.6931377307015236e-05,
1749
+ "loss": 0.0272,
1750
+ "step": 580
1751
+ },
1752
+ {
1753
+ "epoch": 0.9312,
1754
+ "learning_rate": 1.6850359460018744e-05,
1755
+ "loss": 0.3739,
1756
+ "step": 582
1757
+ },
1758
+ {
1759
+ "epoch": 0.9344,
1760
+ "learning_rate": 1.67684853721737e-05,
1761
+ "loss": 0.9064,
1762
+ "step": 584
1763
+ },
1764
+ {
1765
+ "epoch": 0.9376,
1766
+ "learning_rate": 1.6685765277094702e-05,
1767
+ "loss": 0.2249,
1768
+ "step": 586
1769
+ },
1770
+ {
1771
+ "epoch": 0.9408,
1772
+ "learning_rate": 1.6602209514140562e-05,
1773
+ "loss": 0.6704,
1774
+ "step": 588
1775
+ },
1776
+ {
1777
+ "epoch": 0.944,
1778
+ "learning_rate": 1.651782852712194e-05,
1779
+ "loss": 0.1726,
1780
+ "step": 590
1781
+ },
1782
+ {
1783
+ "epoch": 0.9472,
1784
+ "learning_rate": 1.6432632862996062e-05,
1785
+ "loss": 0.2114,
1786
+ "step": 592
1787
+ },
1788
+ {
1789
+ "epoch": 0.9504,
1790
+ "learning_rate": 1.6346633170548275e-05,
1791
+ "loss": 0.0606,
1792
+ "step": 594
1793
+ },
1794
+ {
1795
+ "epoch": 0.9536,
1796
+ "learning_rate": 1.625984019906122e-05,
1797
+ "loss": 0.184,
1798
+ "step": 596
1799
+ },
1800
+ {
1801
+ "epoch": 0.9568,
1802
+ "learning_rate": 1.6172264796971063e-05,
1803
+ "loss": 0.516,
1804
+ "step": 598
1805
+ },
1806
+ {
1807
+ "epoch": 0.96,
1808
+ "learning_rate": 1.6083917910511623e-05,
1809
+ "loss": 0.0286,
1810
+ "step": 600
1811
+ },
1812
+ {
1813
+ "epoch": 0.9632,
1814
+ "learning_rate": 1.5994810582346266e-05,
1815
+ "loss": 0.2475,
1816
+ "step": 602
1817
+ },
1818
+ {
1819
+ "epoch": 0.9664,
1820
+ "learning_rate": 1.5904953950187448e-05,
1821
+ "loss": 0.2101,
1822
+ "step": 604
1823
+ },
1824
+ {
1825
+ "epoch": 0.9696,
1826
+ "learning_rate": 1.581435924540482e-05,
1827
+ "loss": 0.2451,
1828
+ "step": 606
1829
+ },
1830
+ {
1831
+ "epoch": 0.9728,
1832
+ "learning_rate": 1.5723037791621203e-05,
1833
+ "loss": 0.3071,
1834
+ "step": 608
1835
+ },
1836
+ {
1837
+ "epoch": 0.976,
1838
+ "learning_rate": 1.5631001003297302e-05,
1839
+ "loss": 0.223,
1840
+ "step": 610
1841
+ },
1842
+ {
1843
+ "epoch": 0.9792,
1844
+ "learning_rate": 1.5538260384305083e-05,
1845
+ "loss": 0.1875,
1846
+ "step": 612
1847
+ },
1848
+ {
1849
+ "epoch": 0.9824,
1850
+ "learning_rate": 1.544482752648966e-05,
1851
+ "loss": 0.1216,
1852
+ "step": 614
1853
+ },
1854
+ {
1855
+ "epoch": 0.9856,
1856
+ "learning_rate": 1.5350714108220677e-05,
1857
+ "loss": 0.0796,
1858
+ "step": 616
1859
+ },
1860
+ {
1861
+ "epoch": 0.9888,
1862
+ "learning_rate": 1.5255931892932344e-05,
1863
+ "loss": 0.1365,
1864
+ "step": 618
1865
+ },
1866
+ {
1867
+ "epoch": 0.992,
1868
+ "learning_rate": 1.5160492727653238e-05,
1869
+ "loss": 0.5363,
1870
+ "step": 620
1871
+ },
1872
+ {
1873
+ "epoch": 0.9952,
1874
+ "learning_rate": 1.5064408541525578e-05,
1875
+ "loss": 0.3338,
1876
+ "step": 622
1877
+ },
1878
+ {
1879
+ "epoch": 0.9984,
1880
+ "learning_rate": 1.4967691344314012e-05,
1881
+ "loss": 0.3644,
1882
+ "step": 624
1883
+ },
1884
+ {
1885
+ "epoch": 1.0,
1886
+ "step": 625,
1887
+ "total_flos": 3523135233327104.0,
1888
+ "train_loss": 0.28257334279920904,
1889
+ "train_runtime": 3413.6411,
1890
+ "train_samples_per_second": 2.929,
1891
+ "train_steps_per_second": 0.183
1892
+ }
1893
+ ],
1894
+ "logging_steps": 2,
1895
+ "max_steps": 625,
1896
+ "num_input_tokens_seen": 0,
1897
+ "num_train_epochs": 1,
1898
+ "save_steps": 500,
1899
+ "stateful_callbacks": {},
1900
+ "total_flos": 3523135233327104.0,
1901
+ "train_batch_size": 1,
1902
+ "trial_name": null,
1903
+ "trial_params": null
1904
+ }
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8edd93d657e7e9d35486e01f5d3766e956ff7856de7b1bac69a0a2a33707ab8b
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c34a96a6e506cef0d9f29efce3eaec9754d717ad62ad5333cd8ea1994f502701
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8298b28044f650bdff76a13efded749a58dc9a6552887937bbde7a04397441e8
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_coincide_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9347c433b5ebd3b5020b1f157bc71e4067e52fba6ab26b446af171ab1d6060a1
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_divbs_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json ADDED
@@ -0,0 +1,1904 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 625,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0032,
14
+ "learning_rate": 2.4524967251364995e-06,
15
+ "loss": 0.6146,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.0064,
20
+ "learning_rate": 2.5263093403840022e-06,
21
+ "loss": 0.2054,
22
+ "step": 4
23
+ },
24
+ {
25
+ "epoch": 0.0096,
26
+ "learning_rate": 2.6010561079587694e-06,
27
+ "loss": 0.4025,
28
+ "step": 6
29
+ },
30
+ {
31
+ "epoch": 0.0128,
32
+ "learning_rate": 2.6767276851049716e-06,
33
+ "loss": 0.5981,
34
+ "step": 8
35
+ },
36
+ {
37
+ "epoch": 0.016,
38
+ "learning_rate": 2.7533146134728993e-06,
39
+ "loss": 0.6929,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.0192,
44
+ "learning_rate": 2.8308073203011634e-06,
45
+ "loss": 0.7516,
46
+ "step": 12
47
+ },
48
+ {
49
+ "epoch": 0.0224,
50
+ "learning_rate": 2.909196119613218e-06,
51
+ "loss": 0.4648,
52
+ "step": 14
53
+ },
54
+ {
55
+ "epoch": 0.0256,
56
+ "learning_rate": 2.988471213428035e-06,
57
+ "loss": 0.3468,
58
+ "step": 16
59
+ },
60
+ {
61
+ "epoch": 0.0288,
62
+ "learning_rate": 3.068622692984767e-06,
63
+ "loss": 0.379,
64
+ "step": 18
65
+ },
66
+ {
67
+ "epoch": 0.032,
68
+ "learning_rate": 3.1496405399812602e-06,
69
+ "loss": 0.3141,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.0352,
74
+ "learning_rate": 3.231514627826302e-06,
75
+ "loss": 0.3576,
76
+ "step": 22
77
+ },
78
+ {
79
+ "epoch": 0.0384,
80
+ "learning_rate": 3.314234722905302e-06,
81
+ "loss": 0.4298,
82
+ "step": 24
83
+ },
84
+ {
85
+ "epoch": 0.0416,
86
+ "learning_rate": 3.3977904858594534e-06,
87
+ "loss": 0.2964,
88
+ "step": 26
89
+ },
90
+ {
91
+ "epoch": 0.0448,
92
+ "learning_rate": 3.4821714728780654e-06,
93
+ "loss": 0.0854,
94
+ "step": 28
95
+ },
96
+ {
97
+ "epoch": 0.048,
98
+ "learning_rate": 3.567367137003953e-06,
99
+ "loss": 0.9886,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.0512,
104
+ "learning_rate": 3.653366829451711e-06,
105
+ "loss": 0.2087,
106
+ "step": 32
107
+ },
108
+ {
109
+ "epoch": 0.0544,
110
+ "learning_rate": 3.740159800938784e-06,
111
+ "loss": 0.1179,
112
+ "step": 34
113
+ },
114
+ {
115
+ "epoch": 0.0576,
116
+ "learning_rate": 3.827735203028956e-06,
117
+ "loss": 0.5781,
118
+ "step": 36
119
+ },
120
+ {
121
+ "epoch": 0.0608,
122
+ "learning_rate": 3.916082089488379e-06,
123
+ "loss": 0.3181,
124
+ "step": 38
125
+ },
126
+ {
127
+ "epoch": 0.064,
128
+ "learning_rate": 4.005189417653737e-06,
129
+ "loss": 0.7001,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.0672,
134
+ "learning_rate": 4.095046049812541e-06,
135
+ "loss": 0.534,
136
+ "step": 42
137
+ },
138
+ {
139
+ "epoch": 0.0704,
140
+ "learning_rate": 4.1856407545951825e-06,
141
+ "loss": 1.0717,
142
+ "step": 44
143
+ },
144
+ {
145
+ "epoch": 0.0736,
146
+ "learning_rate": 4.276962208378814e-06,
147
+ "loss": 0.5274,
148
+ "step": 46
149
+ },
150
+ {
151
+ "epoch": 0.0768,
152
+ "learning_rate": 4.368998996702686e-06,
153
+ "loss": 0.5314,
154
+ "step": 48
155
+ },
156
+ {
157
+ "epoch": 0.08,
158
+ "learning_rate": 4.461739615694921e-06,
159
+ "loss": 0.4112,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.0832,
164
+ "learning_rate": 4.555172473510324e-06,
165
+ "loss": 0.8289,
166
+ "step": 52
167
+ },
168
+ {
169
+ "epoch": 0.0864,
170
+ "learning_rate": 4.649285891779326e-06,
171
+ "loss": 0.516,
172
+ "step": 54
173
+ },
174
+ {
175
+ "epoch": 0.0896,
176
+ "learning_rate": 4.744068107067673e-06,
177
+ "loss": 0.2971,
178
+ "step": 56
179
+ },
180
+ {
181
+ "epoch": 0.0928,
182
+ "learning_rate": 4.839507272346751e-06,
183
+ "loss": 0.2742,
184
+ "step": 58
185
+ },
186
+ {
187
+ "epoch": 0.096,
188
+ "learning_rate": 4.935591458474425e-06,
189
+ "loss": 0.491,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.0992,
194
+ "learning_rate": 5.032308655686007e-06,
195
+ "loss": 0.237,
196
+ "step": 62
197
+ },
198
+ {
199
+ "epoch": 0.1024,
200
+ "learning_rate": 5.129646775095432e-06,
201
+ "loss": 0.7654,
202
+ "step": 64
203
+ },
204
+ {
205
+ "epoch": 0.1056,
206
+ "learning_rate": 5.227593650206246e-06,
207
+ "loss": 0.1849,
208
+ "step": 66
209
+ },
210
+ {
211
+ "epoch": 0.1088,
212
+ "learning_rate": 5.3261370384323904e-06,
213
+ "loss": 0.1932,
214
+ "step": 68
215
+ },
216
+ {
217
+ "epoch": 0.112,
218
+ "learning_rate": 5.425264622628326e-06,
219
+ "loss": 0.8049,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.1152,
224
+ "learning_rate": 5.524964012628644e-06,
225
+ "loss": 0.3282,
226
+ "step": 72
227
+ },
228
+ {
229
+ "epoch": 0.1184,
230
+ "learning_rate": 5.62522274679673e-06,
231
+ "loss": 0.243,
232
+ "step": 74
233
+ },
234
+ {
235
+ "epoch": 0.1216,
236
+ "learning_rate": 5.726028293582342e-06,
237
+ "loss": 0.7267,
238
+ "step": 76
239
+ },
240
+ {
241
+ "epoch": 0.1248,
242
+ "learning_rate": 5.827368053088032e-06,
243
+ "loss": 0.3372,
244
+ "step": 78
245
+ },
246
+ {
247
+ "epoch": 0.128,
248
+ "learning_rate": 5.929229358643925e-06,
249
+ "loss": 0.5171,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.1312,
254
+ "learning_rate": 6.03159947839103e-06,
255
+ "loss": 0.4375,
256
+ "step": 82
257
+ },
258
+ {
259
+ "epoch": 0.1344,
260
+ "learning_rate": 6.13446561687258e-06,
261
+ "loss": 0.4506,
262
+ "step": 84
263
+ },
264
+ {
265
+ "epoch": 0.1376,
266
+ "learning_rate": 6.237814916633431e-06,
267
+ "loss": 0.651,
268
+ "step": 86
269
+ },
270
+ {
271
+ "epoch": 0.1408,
272
+ "learning_rate": 6.341634459827044e-06,
273
+ "loss": 0.8014,
274
+ "step": 88
275
+ },
276
+ {
277
+ "epoch": 0.144,
278
+ "learning_rate": 6.445911269830183e-06,
279
+ "loss": 0.2773,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.1472,
284
+ "learning_rate": 6.5506323128648654e-06,
285
+ "loss": 0.5722,
286
+ "step": 92
287
+ },
288
+ {
289
+ "epoch": 0.1504,
290
+ "learning_rate": 6.655784499627476e-06,
291
+ "loss": 0.5714,
292
+ "step": 94
293
+ },
294
+ {
295
+ "epoch": 0.1536,
296
+ "learning_rate": 6.761354686924883e-06,
297
+ "loss": 1.4643,
298
+ "step": 96
299
+ },
300
+ {
301
+ "epoch": 0.1568,
302
+ "learning_rate": 6.867329679317144e-06,
303
+ "loss": 0.6916,
304
+ "step": 98
305
+ },
306
+ {
307
+ "epoch": 0.16,
308
+ "learning_rate": 6.973696230766884e-06,
309
+ "loss": 0.5161,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.1632,
314
+ "learning_rate": 7.080441046294945e-06,
315
+ "loss": 0.4103,
316
+ "step": 102
317
+ },
318
+ {
319
+ "epoch": 0.1664,
320
+ "learning_rate": 7.18755078364214e-06,
321
+ "loss": 0.1365,
322
+ "step": 104
323
+ },
324
+ {
325
+ "epoch": 0.1696,
326
+ "learning_rate": 7.2950120549369204e-06,
327
+ "loss": 0.6711,
328
+ "step": 106
329
+ },
330
+ {
331
+ "epoch": 0.1728,
332
+ "learning_rate": 7.402811428368824e-06,
333
+ "loss": 0.8708,
334
+ "step": 108
335
+ },
336
+ {
337
+ "epoch": 0.176,
338
+ "learning_rate": 7.510935429867233e-06,
339
+ "loss": 0.4039,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.1792,
344
+ "learning_rate": 7.619370544785608e-06,
345
+ "loss": 0.6915,
346
+ "step": 112
347
+ },
348
+ {
349
+ "epoch": 0.1824,
350
+ "learning_rate": 7.728103219590684e-06,
351
+ "loss": 0.7916,
352
+ "step": 114
353
+ },
354
+ {
355
+ "epoch": 0.1856,
356
+ "learning_rate": 7.83711986355656e-06,
357
+ "loss": 0.6757,
358
+ "step": 116
359
+ },
360
+ {
361
+ "epoch": 0.1888,
362
+ "learning_rate": 7.946406850463435e-06,
363
+ "loss": 0.4944,
364
+ "step": 118
365
+ },
366
+ {
367
+ "epoch": 0.192,
368
+ "learning_rate": 8.055950520300756e-06,
369
+ "loss": 0.3472,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.1952,
374
+ "learning_rate": 8.165737180974676e-06,
375
+ "loss": 0.6433,
376
+ "step": 122
377
+ },
378
+ {
379
+ "epoch": 0.1984,
380
+ "learning_rate": 8.275753110019367e-06,
381
+ "loss": 0.9815,
382
+ "step": 124
383
+ },
384
+ {
385
+ "epoch": 0.2016,
386
+ "learning_rate": 8.385984556312285e-06,
387
+ "loss": 0.6509,
388
+ "step": 126
389
+ },
390
+ {
391
+ "epoch": 0.2048,
392
+ "learning_rate": 8.496417741792922e-06,
393
+ "loss": 0.5941,
394
+ "step": 128
395
+ },
396
+ {
397
+ "epoch": 0.208,
398
+ "learning_rate": 8.607038863184952e-06,
399
+ "loss": 0.5394,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.2112,
404
+ "learning_rate": 8.717834093721598e-06,
405
+ "loss": 0.3444,
406
+ "step": 132
407
+ },
408
+ {
409
+ "epoch": 0.2144,
410
+ "learning_rate": 8.828789584873757e-06,
411
+ "loss": 0.4633,
412
+ "step": 134
413
+ },
414
+ {
415
+ "epoch": 0.2176,
416
+ "learning_rate": 8.939891468081036e-06,
417
+ "loss": 0.6362,
418
+ "step": 136
419
+ },
420
+ {
421
+ "epoch": 0.2208,
422
+ "learning_rate": 9.051125856485175e-06,
423
+ "loss": 0.2793,
424
+ "step": 138
425
+ },
426
+ {
427
+ "epoch": 0.224,
428
+ "learning_rate": 9.162478846665854e-06,
429
+ "loss": 1.0965,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.2272,
434
+ "learning_rate": 9.273936520378426e-06,
435
+ "loss": 0.5318,
436
+ "step": 142
437
+ },
438
+ {
439
+ "epoch": 0.2304,
440
+ "learning_rate": 9.38548494629364e-06,
441
+ "loss": 0.6123,
442
+ "step": 144
443
+ },
444
+ {
445
+ "epoch": 0.2336,
446
+ "learning_rate": 9.497110181738935e-06,
447
+ "loss": 0.2697,
448
+ "step": 146
449
+ },
450
+ {
451
+ "epoch": 0.2368,
452
+ "learning_rate": 9.608798274441153e-06,
453
+ "loss": 0.4086,
454
+ "step": 148
455
+ },
456
+ {
457
+ "epoch": 0.24,
458
+ "learning_rate": 9.720535264270526e-06,
459
+ "loss": 0.4048,
460
+ "step": 150
461
+ },
462
+ {
463
+ "epoch": 0.2432,
464
+ "learning_rate": 9.832307184985473e-06,
465
+ "loss": 0.1204,
466
+ "step": 152
467
+ },
468
+ {
469
+ "epoch": 0.2464,
470
+ "learning_rate": 9.944100065978354e-06,
471
+ "loss": 0.4174,
472
+ "step": 154
473
+ },
474
+ {
475
+ "epoch": 0.2496,
476
+ "learning_rate": 1.0055899934021637e-05,
477
+ "loss": 0.374,
478
+ "step": 156
479
+ },
480
+ {
481
+ "epoch": 0.2528,
482
+ "learning_rate": 1.016769281501452e-05,
483
+ "loss": 0.4033,
484
+ "step": 158
485
+ },
486
+ {
487
+ "epoch": 0.256,
488
+ "learning_rate": 1.0279464735729467e-05,
489
+ "loss": 0.4257,
490
+ "step": 160
491
+ },
492
+ {
493
+ "epoch": 0.2592,
494
+ "learning_rate": 1.039120172555884e-05,
495
+ "loss": 0.3176,
496
+ "step": 162
497
+ },
498
+ {
499
+ "epoch": 0.2624,
500
+ "learning_rate": 1.0502889818261058e-05,
501
+ "loss": 0.489,
502
+ "step": 164
503
+ },
504
+ {
505
+ "epoch": 0.2656,
506
+ "learning_rate": 1.0614515053706354e-05,
507
+ "loss": 0.6562,
508
+ "step": 166
509
+ },
510
+ {
511
+ "epoch": 0.2688,
512
+ "learning_rate": 1.0726063479621567e-05,
513
+ "loss": 0.6438,
514
+ "step": 168
515
+ },
516
+ {
517
+ "epoch": 0.272,
518
+ "learning_rate": 1.083752115333414e-05,
519
+ "loss": 0.6896,
520
+ "step": 170
521
+ },
522
+ {
523
+ "epoch": 0.2752,
524
+ "learning_rate": 1.0948874143514818e-05,
525
+ "loss": 0.3064,
526
+ "step": 172
527
+ },
528
+ {
529
+ "epoch": 0.2784,
530
+ "learning_rate": 1.1060108531918955e-05,
531
+ "loss": 0.3117,
532
+ "step": 174
533
+ },
534
+ {
535
+ "epoch": 0.2816,
536
+ "learning_rate": 1.1171210415126238e-05,
537
+ "loss": 0.4046,
538
+ "step": 176
539
+ },
540
+ {
541
+ "epoch": 0.2848,
542
+ "learning_rate": 1.1282165906278395e-05,
543
+ "loss": 0.479,
544
+ "step": 178
545
+ },
546
+ {
547
+ "epoch": 0.288,
548
+ "learning_rate": 1.1392961136815041e-05,
549
+ "loss": 3.3903,
550
+ "step": 180
551
+ },
552
+ {
553
+ "epoch": 0.2912,
554
+ "learning_rate": 1.150358225820707e-05,
555
+ "loss": 0.475,
556
+ "step": 182
557
+ },
558
+ {
559
+ "epoch": 0.2944,
560
+ "learning_rate": 1.1614015443687708e-05,
561
+ "loss": 0.6139,
562
+ "step": 184
563
+ },
564
+ {
565
+ "epoch": 0.2976,
566
+ "learning_rate": 1.1724246889980626e-05,
567
+ "loss": 0.3805,
568
+ "step": 186
569
+ },
570
+ {
571
+ "epoch": 0.3008,
572
+ "learning_rate": 1.1834262819025317e-05,
573
+ "loss": 0.8042,
574
+ "step": 188
575
+ },
576
+ {
577
+ "epoch": 0.304,
578
+ "learning_rate": 1.1944049479699241e-05,
579
+ "loss": 0.7214,
580
+ "step": 190
581
+ },
582
+ {
583
+ "epoch": 0.3072,
584
+ "learning_rate": 1.2053593149536557e-05,
585
+ "loss": 0.1696,
586
+ "step": 192
587
+ },
588
+ {
589
+ "epoch": 0.3104,
590
+ "learning_rate": 1.2162880136443434e-05,
591
+ "loss": 0.2186,
592
+ "step": 194
593
+ },
594
+ {
595
+ "epoch": 0.3136,
596
+ "learning_rate": 1.2271896780409309e-05,
597
+ "loss": 0.7518,
598
+ "step": 196
599
+ },
600
+ {
601
+ "epoch": 0.3168,
602
+ "learning_rate": 1.2380629455214385e-05,
603
+ "loss": 1.0384,
604
+ "step": 198
605
+ },
606
+ {
607
+ "epoch": 0.32,
608
+ "learning_rate": 1.2489064570132761e-05,
609
+ "loss": 0.2304,
610
+ "step": 200
611
+ },
612
+ {
613
+ "epoch": 0.3232,
614
+ "learning_rate": 1.259718857163117e-05,
615
+ "loss": 0.7747,
616
+ "step": 202
617
+ },
618
+ {
619
+ "epoch": 0.3264,
620
+ "learning_rate": 1.2704987945063073e-05,
621
+ "loss": 0.5036,
622
+ "step": 204
623
+ },
624
+ {
625
+ "epoch": 0.3296,
626
+ "learning_rate": 1.2812449216357855e-05,
627
+ "loss": 0.2968,
628
+ "step": 206
629
+ },
630
+ {
631
+ "epoch": 0.3328,
632
+ "learning_rate": 1.2919558953705047e-05,
633
+ "loss": 0.4474,
634
+ "step": 208
635
+ },
636
+ {
637
+ "epoch": 0.336,
638
+ "learning_rate": 1.3026303769233109e-05,
639
+ "loss": 0.4951,
640
+ "step": 210
641
+ },
642
+ {
643
+ "epoch": 0.3392,
644
+ "learning_rate": 1.313267032068285e-05,
645
+ "loss": 0.3272,
646
+ "step": 212
647
+ },
648
+ {
649
+ "epoch": 0.3424,
650
+ "learning_rate": 1.3238645313075109e-05,
651
+ "loss": 0.7141,
652
+ "step": 214
653
+ },
654
+ {
655
+ "epoch": 0.3456,
656
+ "learning_rate": 1.3344215500372517e-05,
657
+ "loss": 0.4329,
658
+ "step": 216
659
+ },
660
+ {
661
+ "epoch": 0.3488,
662
+ "learning_rate": 1.344936768713513e-05,
663
+ "loss": 0.3164,
664
+ "step": 218
665
+ },
666
+ {
667
+ "epoch": 0.352,
668
+ "learning_rate": 1.3554088730169812e-05,
669
+ "loss": 0.2702,
670
+ "step": 220
671
+ },
672
+ {
673
+ "epoch": 0.3552,
674
+ "learning_rate": 1.3658365540172948e-05,
675
+ "loss": 0.4299,
676
+ "step": 222
677
+ },
678
+ {
679
+ "epoch": 0.3584,
680
+ "learning_rate": 1.3762185083366562e-05,
681
+ "loss": 0.4197,
682
+ "step": 224
683
+ },
684
+ {
685
+ "epoch": 0.3616,
686
+ "learning_rate": 1.3865534383127413e-05,
687
+ "loss": 0.4456,
688
+ "step": 226
689
+ },
690
+ {
691
+ "epoch": 0.3648,
692
+ "learning_rate": 1.3968400521608962e-05,
693
+ "loss": 0.3374,
694
+ "step": 228
695
+ },
696
+ {
697
+ "epoch": 0.368,
698
+ "learning_rate": 1.4070770641356069e-05,
699
+ "loss": 0.3931,
700
+ "step": 230
701
+ },
702
+ {
703
+ "epoch": 0.3712,
704
+ "learning_rate": 1.4172631946911964e-05,
705
+ "loss": 0.4724,
706
+ "step": 232
707
+ },
708
+ {
709
+ "epoch": 0.3744,
710
+ "learning_rate": 1.4273971706417653e-05,
711
+ "loss": 0.5238,
712
+ "step": 234
713
+ },
714
+ {
715
+ "epoch": 0.3776,
716
+ "learning_rate": 1.4374777253203265e-05,
717
+ "loss": 0.4868,
718
+ "step": 236
719
+ },
720
+ {
721
+ "epoch": 0.3808,
722
+ "learning_rate": 1.4475035987371348e-05,
723
+ "loss": 0.4074,
724
+ "step": 238
725
+ },
726
+ {
727
+ "epoch": 0.384,
728
+ "learning_rate": 1.4574735377371669e-05,
729
+ "loss": 0.6099,
730
+ "step": 240
731
+ },
732
+ {
733
+ "epoch": 0.3872,
734
+ "learning_rate": 1.4673862961567604e-05,
735
+ "loss": 0.2957,
736
+ "step": 242
737
+ },
738
+ {
739
+ "epoch": 0.3904,
740
+ "learning_rate": 1.4772406349793749e-05,
741
+ "loss": 1.0299,
742
+ "step": 244
743
+ },
744
+ {
745
+ "epoch": 0.3936,
746
+ "learning_rate": 1.4870353224904563e-05,
747
+ "loss": 0.6435,
748
+ "step": 246
749
+ },
750
+ {
751
+ "epoch": 0.3968,
752
+ "learning_rate": 1.4967691344313988e-05,
753
+ "loss": 0.4243,
754
+ "step": 248
755
+ },
756
+ {
757
+ "epoch": 0.4,
758
+ "learning_rate": 1.5064408541525568e-05,
759
+ "loss": 0.6385,
760
+ "step": 250
761
+ },
762
+ {
763
+ "epoch": 0.4032,
764
+ "learning_rate": 1.5160492727653245e-05,
765
+ "loss": 0.3106,
766
+ "step": 252
767
+ },
768
+ {
769
+ "epoch": 0.4064,
770
+ "learning_rate": 1.5255931892932322e-05,
771
+ "loss": 0.1259,
772
+ "step": 254
773
+ },
774
+ {
775
+ "epoch": 0.4096,
776
+ "learning_rate": 1.5350714108220667e-05,
777
+ "loss": 0.7569,
778
+ "step": 256
779
+ },
780
+ {
781
+ "epoch": 0.4128,
782
+ "learning_rate": 1.5444827526489668e-05,
783
+ "loss": 0.6572,
784
+ "step": 258
785
+ },
786
+ {
787
+ "epoch": 0.416,
788
+ "learning_rate": 1.5538260384305073e-05,
789
+ "loss": 0.2119,
790
+ "step": 260
791
+ },
792
+ {
793
+ "epoch": 0.4192,
794
+ "learning_rate": 1.563100100329731e-05,
795
+ "loss": 0.6382,
796
+ "step": 262
797
+ },
798
+ {
799
+ "epoch": 0.4224,
800
+ "learning_rate": 1.572303779162118e-05,
801
+ "loss": 0.5905,
802
+ "step": 264
803
+ },
804
+ {
805
+ "epoch": 0.4256,
806
+ "learning_rate": 1.581435924540481e-05,
807
+ "loss": 0.3332,
808
+ "step": 266
809
+ },
810
+ {
811
+ "epoch": 0.4288,
812
+ "learning_rate": 1.5904953950187455e-05,
813
+ "loss": 0.2212,
814
+ "step": 268
815
+ },
816
+ {
817
+ "epoch": 0.432,
818
+ "learning_rate": 1.599481058234626e-05,
819
+ "loss": 0.5301,
820
+ "step": 270
821
+ },
822
+ {
823
+ "epoch": 0.4352,
824
+ "learning_rate": 1.6083917910511616e-05,
825
+ "loss": 0.4882,
826
+ "step": 272
827
+ },
828
+ {
829
+ "epoch": 0.4384,
830
+ "learning_rate": 1.617226479697104e-05,
831
+ "loss": 0.3687,
832
+ "step": 274
833
+ },
834
+ {
835
+ "epoch": 0.4416,
836
+ "learning_rate": 1.6259840199061212e-05,
837
+ "loss": 0.7489,
838
+ "step": 276
839
+ },
840
+ {
841
+ "epoch": 0.4448,
842
+ "learning_rate": 1.6346633170548285e-05,
843
+ "loss": 0.3595,
844
+ "step": 278
845
+ },
846
+ {
847
+ "epoch": 0.448,
848
+ "learning_rate": 1.6432632862996042e-05,
849
+ "loss": 0.3554,
850
+ "step": 280
851
+ },
852
+ {
853
+ "epoch": 0.4512,
854
+ "learning_rate": 1.6517828527121928e-05,
855
+ "loss": 0.3562,
856
+ "step": 282
857
+ },
858
+ {
859
+ "epoch": 0.4544,
860
+ "learning_rate": 1.6602209514140542e-05,
861
+ "loss": 0.7977,
862
+ "step": 284
863
+ },
864
+ {
865
+ "epoch": 0.4576,
866
+ "learning_rate": 1.6685765277094695e-05,
867
+ "loss": 0.2954,
868
+ "step": 286
869
+ },
870
+ {
871
+ "epoch": 0.4608,
872
+ "learning_rate": 1.6768485372173696e-05,
873
+ "loss": 0.3499,
874
+ "step": 288
875
+ },
876
+ {
877
+ "epoch": 0.464,
878
+ "learning_rate": 1.6850359460018733e-05,
879
+ "loss": 0.2234,
880
+ "step": 290
881
+ },
882
+ {
883
+ "epoch": 0.4672,
884
+ "learning_rate": 1.6931377307015226e-05,
885
+ "loss": 0.34,
886
+ "step": 292
887
+ },
888
+ {
889
+ "epoch": 0.4704,
890
+ "learning_rate": 1.701152878657196e-05,
891
+ "loss": 0.395,
892
+ "step": 294
893
+ },
894
+ {
895
+ "epoch": 0.4736,
896
+ "learning_rate": 1.7090803880386778e-05,
897
+ "loss": 0.3482,
898
+ "step": 296
899
+ },
900
+ {
901
+ "epoch": 0.4768,
902
+ "learning_rate": 1.716919267969883e-05,
903
+ "loss": 0.1296,
904
+ "step": 298
905
+ },
906
+ {
907
+ "epoch": 0.48,
908
+ "learning_rate": 1.7246685386527095e-05,
909
+ "loss": 0.425,
910
+ "step": 300
911
+ },
912
+ {
913
+ "epoch": 0.4832,
914
+ "learning_rate": 1.7323272314895022e-05,
915
+ "loss": 0.4906,
916
+ "step": 302
917
+ },
918
+ {
919
+ "epoch": 0.4864,
920
+ "learning_rate": 1.7398943892041227e-05,
921
+ "loss": 0.377,
922
+ "step": 304
923
+ },
924
+ {
925
+ "epoch": 0.4896,
926
+ "learning_rate": 1.7473690659615992e-05,
927
+ "loss": 0.7725,
928
+ "step": 306
929
+ },
930
+ {
931
+ "epoch": 0.4928,
932
+ "learning_rate": 1.7547503274863495e-05,
933
+ "loss": 0.8413,
934
+ "step": 308
935
+ },
936
+ {
937
+ "epoch": 0.496,
938
+ "learning_rate": 1.7620372511789604e-05,
939
+ "loss": 0.2823,
940
+ "step": 310
941
+ },
942
+ {
943
+ "epoch": 0.4992,
944
+ "learning_rate": 1.7692289262315e-05,
945
+ "loss": 0.1789,
946
+ "step": 312
947
+ },
948
+ {
949
+ "epoch": 0.5024,
950
+ "learning_rate": 1.7763244537413657e-05,
951
+ "loss": 0.3746,
952
+ "step": 314
953
+ },
954
+ {
955
+ "epoch": 0.5056,
956
+ "learning_rate": 1.7833229468236364e-05,
957
+ "loss": 0.4935,
958
+ "step": 316
959
+ },
960
+ {
961
+ "epoch": 0.5088,
962
+ "learning_rate": 1.790223530721933e-05,
963
+ "loss": 0.7973,
964
+ "step": 318
965
+ },
966
+ {
967
+ "epoch": 0.512,
968
+ "learning_rate": 1.7970253429177477e-05,
969
+ "loss": 0.6278,
970
+ "step": 320
971
+ },
972
+ {
973
+ "epoch": 0.5152,
974
+ "learning_rate": 1.803727533238257e-05,
975
+ "loss": 0.969,
976
+ "step": 322
977
+ },
978
+ {
979
+ "epoch": 0.5184,
980
+ "learning_rate": 1.8103292639625842e-05,
981
+ "loss": 0.4981,
982
+ "step": 324
983
+ },
984
+ {
985
+ "epoch": 0.5216,
986
+ "learning_rate": 1.816829709926509e-05,
987
+ "loss": 1.3557,
988
+ "step": 326
989
+ },
990
+ {
991
+ "epoch": 0.5248,
992
+ "learning_rate": 1.8232280586256097e-05,
993
+ "loss": 0.267,
994
+ "step": 328
995
+ },
996
+ {
997
+ "epoch": 0.528,
998
+ "learning_rate": 1.829523510316813e-05,
999
+ "loss": 0.4215,
1000
+ "step": 330
1001
+ },
1002
+ {
1003
+ "epoch": 0.5312,
1004
+ "learning_rate": 1.8357152781183606e-05,
1005
+ "loss": 0.4732,
1006
+ "step": 332
1007
+ },
1008
+ {
1009
+ "epoch": 0.5344,
1010
+ "learning_rate": 1.8418025881081606e-05,
1011
+ "loss": 0.6734,
1012
+ "step": 334
1013
+ },
1014
+ {
1015
+ "epoch": 0.5376,
1016
+ "learning_rate": 1.8477846794205258e-05,
1017
+ "loss": 0.2478,
1018
+ "step": 336
1019
+ },
1020
+ {
1021
+ "epoch": 0.5408,
1022
+ "learning_rate": 1.8536608043412695e-05,
1023
+ "loss": 0.6252,
1024
+ "step": 338
1025
+ },
1026
+ {
1027
+ "epoch": 0.544,
1028
+ "learning_rate": 1.85943022840117e-05,
1029
+ "loss": 0.9087,
1030
+ "step": 340
1031
+ },
1032
+ {
1033
+ "epoch": 0.5472,
1034
+ "learning_rate": 1.865092230467769e-05,
1035
+ "loss": 0.6885,
1036
+ "step": 342
1037
+ },
1038
+ {
1039
+ "epoch": 0.5504,
1040
+ "learning_rate": 1.87064610283551e-05,
1041
+ "loss": 0.447,
1042
+ "step": 344
1043
+ },
1044
+ {
1045
+ "epoch": 0.5536,
1046
+ "learning_rate": 1.876091151314196e-05,
1047
+ "loss": 0.2639,
1048
+ "step": 346
1049
+ },
1050
+ {
1051
+ "epoch": 0.5568,
1052
+ "learning_rate": 1.8814266953157557e-05,
1053
+ "loss": 0.3608,
1054
+ "step": 348
1055
+ },
1056
+ {
1057
+ "epoch": 0.56,
1058
+ "learning_rate": 1.8866520679393127e-05,
1059
+ "loss": 0.1993,
1060
+ "step": 350
1061
+ },
1062
+ {
1063
+ "epoch": 0.5632,
1064
+ "learning_rate": 1.8917666160545436e-05,
1065
+ "loss": 0.432,
1066
+ "step": 352
1067
+ },
1068
+ {
1069
+ "epoch": 0.5664,
1070
+ "learning_rate": 1.896769700383315e-05,
1071
+ "loss": 0.2783,
1072
+ "step": 354
1073
+ },
1074
+ {
1075
+ "epoch": 0.5696,
1076
+ "learning_rate": 1.901660695579585e-05,
1077
+ "loss": 0.4735,
1078
+ "step": 356
1079
+ },
1080
+ {
1081
+ "epoch": 0.5728,
1082
+ "learning_rate": 1.9064389903075676e-05,
1083
+ "loss": 0.2125,
1084
+ "step": 358
1085
+ },
1086
+ {
1087
+ "epoch": 0.576,
1088
+ "learning_rate": 1.911103987318148e-05,
1089
+ "loss": 0.2939,
1090
+ "step": 360
1091
+ },
1092
+ {
1093
+ "epoch": 0.5792,
1094
+ "learning_rate": 1.9156551035235288e-05,
1095
+ "loss": 0.7765,
1096
+ "step": 362
1097
+ },
1098
+ {
1099
+ "epoch": 0.5824,
1100
+ "learning_rate": 1.9200917700701173e-05,
1101
+ "loss": 0.2491,
1102
+ "step": 364
1103
+ },
1104
+ {
1105
+ "epoch": 0.5856,
1106
+ "learning_rate": 1.924413432409622e-05,
1107
+ "loss": 0.7522,
1108
+ "step": 366
1109
+ },
1110
+ {
1111
+ "epoch": 0.5888,
1112
+ "learning_rate": 1.9286195503683705e-05,
1113
+ "loss": 0.4949,
1114
+ "step": 368
1115
+ },
1116
+ {
1117
+ "epoch": 0.592,
1118
+ "learning_rate": 1.932709598214825e-05,
1119
+ "loss": 0.2942,
1120
+ "step": 370
1121
+ },
1122
+ {
1123
+ "epoch": 0.5952,
1124
+ "learning_rate": 1.9366830647252967e-05,
1125
+ "loss": 0.4678,
1126
+ "step": 372
1127
+ },
1128
+ {
1129
+ "epoch": 0.5984,
1130
+ "learning_rate": 1.940539453247842e-05,
1131
+ "loss": 0.3907,
1132
+ "step": 374
1133
+ },
1134
+ {
1135
+ "epoch": 0.6016,
1136
+ "learning_rate": 1.944278281764342e-05,
1137
+ "loss": 0.3525,
1138
+ "step": 376
1139
+ },
1140
+ {
1141
+ "epoch": 0.6048,
1142
+ "learning_rate": 1.9478990829507504e-05,
1143
+ "loss": 0.3866,
1144
+ "step": 378
1145
+ },
1146
+ {
1147
+ "epoch": 0.608,
1148
+ "learning_rate": 1.951401404235505e-05,
1149
+ "loss": 0.4149,
1150
+ "step": 380
1151
+ },
1152
+ {
1153
+ "epoch": 0.6112,
1154
+ "learning_rate": 1.9547848078560975e-05,
1155
+ "loss": 0.216,
1156
+ "step": 382
1157
+ },
1158
+ {
1159
+ "epoch": 0.6144,
1160
+ "learning_rate": 1.9580488709137858e-05,
1161
+ "loss": 0.8168,
1162
+ "step": 384
1163
+ },
1164
+ {
1165
+ "epoch": 0.6176,
1166
+ "learning_rate": 1.961193185426459e-05,
1167
+ "loss": 0.5152,
1168
+ "step": 386
1169
+ },
1170
+ {
1171
+ "epoch": 0.6208,
1172
+ "learning_rate": 1.9642173583796265e-05,
1173
+ "loss": 0.4164,
1174
+ "step": 388
1175
+ },
1176
+ {
1177
+ "epoch": 0.624,
1178
+ "learning_rate": 1.967121011775546e-05,
1179
+ "loss": 0.3975,
1180
+ "step": 390
1181
+ },
1182
+ {
1183
+ "epoch": 0.6272,
1184
+ "learning_rate": 1.969903782680467e-05,
1185
+ "loss": 0.722,
1186
+ "step": 392
1187
+ },
1188
+ {
1189
+ "epoch": 0.6304,
1190
+ "learning_rate": 1.9725653232699962e-05,
1191
+ "loss": 0.581,
1192
+ "step": 394
1193
+ },
1194
+ {
1195
+ "epoch": 0.6336,
1196
+ "learning_rate": 1.9751053008725736e-05,
1197
+ "loss": 0.4199,
1198
+ "step": 396
1199
+ },
1200
+ {
1201
+ "epoch": 0.6368,
1202
+ "learning_rate": 1.9775233980110524e-05,
1203
+ "loss": 0.2482,
1204
+ "step": 398
1205
+ },
1206
+ {
1207
+ "epoch": 0.64,
1208
+ "learning_rate": 1.9798193124423804e-05,
1209
+ "loss": 0.5844,
1210
+ "step": 400
1211
+ },
1212
+ {
1213
+ "epoch": 0.6432,
1214
+ "learning_rate": 1.9819927571953807e-05,
1215
+ "loss": 0.6136,
1216
+ "step": 402
1217
+ },
1218
+ {
1219
+ "epoch": 0.6464,
1220
+ "learning_rate": 1.9840434606066182e-05,
1221
+ "loss": 0.3273,
1222
+ "step": 404
1223
+ },
1224
+ {
1225
+ "epoch": 0.6496,
1226
+ "learning_rate": 1.985971166354357e-05,
1227
+ "loss": 0.38,
1228
+ "step": 406
1229
+ },
1230
+ {
1231
+ "epoch": 0.6528,
1232
+ "learning_rate": 1.9877756334905983e-05,
1233
+ "loss": 0.4104,
1234
+ "step": 408
1235
+ },
1236
+ {
1237
+ "epoch": 0.656,
1238
+ "learning_rate": 1.9894566364711965e-05,
1239
+ "loss": 0.553,
1240
+ "step": 410
1241
+ },
1242
+ {
1243
+ "epoch": 0.6592,
1244
+ "learning_rate": 1.99101396518405e-05,
1245
+ "loss": 0.2728,
1246
+ "step": 412
1247
+ },
1248
+ {
1249
+ "epoch": 0.6624,
1250
+ "learning_rate": 1.9924474249753652e-05,
1251
+ "loss": 0.3134,
1252
+ "step": 414
1253
+ },
1254
+ {
1255
+ "epoch": 0.6656,
1256
+ "learning_rate": 1.9937568366739858e-05,
1257
+ "loss": 0.376,
1258
+ "step": 416
1259
+ },
1260
+ {
1261
+ "epoch": 0.6688,
1262
+ "learning_rate": 1.994942036613787e-05,
1263
+ "loss": 0.4006,
1264
+ "step": 418
1265
+ },
1266
+ {
1267
+ "epoch": 0.672,
1268
+ "learning_rate": 1.9960028766541336e-05,
1269
+ "loss": 0.3528,
1270
+ "step": 420
1271
+ },
1272
+ {
1273
+ "epoch": 0.6752,
1274
+ "learning_rate": 1.9969392241983957e-05,
1275
+ "loss": 0.358,
1276
+ "step": 422
1277
+ },
1278
+ {
1279
+ "epoch": 0.6784,
1280
+ "learning_rate": 1.9977509622105233e-05,
1281
+ "loss": 0.5557,
1282
+ "step": 424
1283
+ },
1284
+ {
1285
+ "epoch": 0.6816,
1286
+ "learning_rate": 1.998437989229673e-05,
1287
+ "loss": 0.3374,
1288
+ "step": 426
1289
+ },
1290
+ {
1291
+ "epoch": 0.6848,
1292
+ "learning_rate": 1.9990002193828923e-05,
1293
+ "loss": 0.5765,
1294
+ "step": 428
1295
+ },
1296
+ {
1297
+ "epoch": 0.688,
1298
+ "learning_rate": 1.9994375823958504e-05,
1299
+ "loss": 0.3994,
1300
+ "step": 430
1301
+ },
1302
+ {
1303
+ "epoch": 0.6912,
1304
+ "learning_rate": 1.9997500236016233e-05,
1305
+ "loss": 0.5955,
1306
+ "step": 432
1307
+ },
1308
+ {
1309
+ "epoch": 0.6944,
1310
+ "learning_rate": 1.9999375039475275e-05,
1311
+ "loss": 0.491,
1312
+ "step": 434
1313
+ },
1314
+ {
1315
+ "epoch": 0.6976,
1316
+ "learning_rate": 2e-05,
1317
+ "loss": 0.4032,
1318
+ "step": 436
1319
+ },
1320
+ {
1321
+ "epoch": 0.7008,
1322
+ "learning_rate": 1.9999375039475278e-05,
1323
+ "loss": 0.1566,
1324
+ "step": 438
1325
+ },
1326
+ {
1327
+ "epoch": 0.704,
1328
+ "learning_rate": 1.9997500236016233e-05,
1329
+ "loss": 0.3797,
1330
+ "step": 440
1331
+ },
1332
+ {
1333
+ "epoch": 0.7072,
1334
+ "learning_rate": 1.9994375823958504e-05,
1335
+ "loss": 0.3815,
1336
+ "step": 442
1337
+ },
1338
+ {
1339
+ "epoch": 0.7104,
1340
+ "learning_rate": 1.9990002193828923e-05,
1341
+ "loss": 0.3353,
1342
+ "step": 444
1343
+ },
1344
+ {
1345
+ "epoch": 0.7136,
1346
+ "learning_rate": 1.9984379892296735e-05,
1347
+ "loss": 0.2807,
1348
+ "step": 446
1349
+ },
1350
+ {
1351
+ "epoch": 0.7168,
1352
+ "learning_rate": 1.9977509622105236e-05,
1353
+ "loss": 0.2739,
1354
+ "step": 448
1355
+ },
1356
+ {
1357
+ "epoch": 0.72,
1358
+ "learning_rate": 1.9969392241983957e-05,
1359
+ "loss": 0.313,
1360
+ "step": 450
1361
+ },
1362
+ {
1363
+ "epoch": 0.7232,
1364
+ "learning_rate": 1.9960028766541336e-05,
1365
+ "loss": 0.6094,
1366
+ "step": 452
1367
+ },
1368
+ {
1369
+ "epoch": 0.7264,
1370
+ "learning_rate": 1.9949420366137873e-05,
1371
+ "loss": 0.4315,
1372
+ "step": 454
1373
+ },
1374
+ {
1375
+ "epoch": 0.7296,
1376
+ "learning_rate": 1.993756836673986e-05,
1377
+ "loss": 0.418,
1378
+ "step": 456
1379
+ },
1380
+ {
1381
+ "epoch": 0.7328,
1382
+ "learning_rate": 1.9924474249753656e-05,
1383
+ "loss": 0.1987,
1384
+ "step": 458
1385
+ },
1386
+ {
1387
+ "epoch": 0.736,
1388
+ "learning_rate": 1.9910139651840497e-05,
1389
+ "loss": 0.1957,
1390
+ "step": 460
1391
+ },
1392
+ {
1393
+ "epoch": 0.7392,
1394
+ "learning_rate": 1.9894566364711965e-05,
1395
+ "loss": 0.3727,
1396
+ "step": 462
1397
+ },
1398
+ {
1399
+ "epoch": 0.7424,
1400
+ "learning_rate": 1.987775633490599e-05,
1401
+ "loss": 0.3998,
1402
+ "step": 464
1403
+ },
1404
+ {
1405
+ "epoch": 0.7456,
1406
+ "learning_rate": 1.9859711663543573e-05,
1407
+ "loss": 0.2162,
1408
+ "step": 466
1409
+ },
1410
+ {
1411
+ "epoch": 0.7488,
1412
+ "learning_rate": 1.9840434606066186e-05,
1413
+ "loss": 0.9561,
1414
+ "step": 468
1415
+ },
1416
+ {
1417
+ "epoch": 0.752,
1418
+ "learning_rate": 1.9819927571953804e-05,
1419
+ "loss": 0.2485,
1420
+ "step": 470
1421
+ },
1422
+ {
1423
+ "epoch": 0.7552,
1424
+ "learning_rate": 1.9798193124423804e-05,
1425
+ "loss": 0.464,
1426
+ "step": 472
1427
+ },
1428
+ {
1429
+ "epoch": 0.7584,
1430
+ "learning_rate": 1.9775233980110524e-05,
1431
+ "loss": 0.5935,
1432
+ "step": 474
1433
+ },
1434
+ {
1435
+ "epoch": 0.7616,
1436
+ "learning_rate": 1.9751053008725736e-05,
1437
+ "loss": 0.4563,
1438
+ "step": 476
1439
+ },
1440
+ {
1441
+ "epoch": 0.7648,
1442
+ "learning_rate": 1.9725653232699962e-05,
1443
+ "loss": 0.6052,
1444
+ "step": 478
1445
+ },
1446
+ {
1447
+ "epoch": 0.768,
1448
+ "learning_rate": 1.969903782680467e-05,
1449
+ "loss": 0.6623,
1450
+ "step": 480
1451
+ },
1452
+ {
1453
+ "epoch": 0.7712,
1454
+ "learning_rate": 1.9671210117755462e-05,
1455
+ "loss": 0.5662,
1456
+ "step": 482
1457
+ },
1458
+ {
1459
+ "epoch": 0.7744,
1460
+ "learning_rate": 1.9642173583796265e-05,
1461
+ "loss": 0.5735,
1462
+ "step": 484
1463
+ },
1464
+ {
1465
+ "epoch": 0.7776,
1466
+ "learning_rate": 1.961193185426459e-05,
1467
+ "loss": 0.4004,
1468
+ "step": 486
1469
+ },
1470
+ {
1471
+ "epoch": 0.7808,
1472
+ "learning_rate": 1.958048870913786e-05,
1473
+ "loss": 0.2139,
1474
+ "step": 488
1475
+ },
1476
+ {
1477
+ "epoch": 0.784,
1478
+ "learning_rate": 1.9547848078560982e-05,
1479
+ "loss": 0.8248,
1480
+ "step": 490
1481
+ },
1482
+ {
1483
+ "epoch": 0.7872,
1484
+ "learning_rate": 1.9514014042355054e-05,
1485
+ "loss": 0.3159,
1486
+ "step": 492
1487
+ },
1488
+ {
1489
+ "epoch": 0.7904,
1490
+ "learning_rate": 1.947899082950751e-05,
1491
+ "loss": 0.2233,
1492
+ "step": 494
1493
+ },
1494
+ {
1495
+ "epoch": 0.7936,
1496
+ "learning_rate": 1.9442782817643425e-05,
1497
+ "loss": 0.7077,
1498
+ "step": 496
1499
+ },
1500
+ {
1501
+ "epoch": 0.7968,
1502
+ "learning_rate": 1.9405394532478422e-05,
1503
+ "loss": 0.3517,
1504
+ "step": 498
1505
+ },
1506
+ {
1507
+ "epoch": 0.8,
1508
+ "learning_rate": 1.9366830647252977e-05,
1509
+ "loss": 0.4569,
1510
+ "step": 500
1511
+ },
1512
+ {
1513
+ "epoch": 0.8032,
1514
+ "learning_rate": 1.9327095982148255e-05,
1515
+ "loss": 0.3031,
1516
+ "step": 502
1517
+ },
1518
+ {
1519
+ "epoch": 0.8064,
1520
+ "learning_rate": 1.928619550368371e-05,
1521
+ "loss": 0.4575,
1522
+ "step": 504
1523
+ },
1524
+ {
1525
+ "epoch": 0.8096,
1526
+ "learning_rate": 1.9244134324096216e-05,
1527
+ "loss": 0.8606,
1528
+ "step": 506
1529
+ },
1530
+ {
1531
+ "epoch": 0.8128,
1532
+ "learning_rate": 1.9200917700701176e-05,
1533
+ "loss": 0.1693,
1534
+ "step": 508
1535
+ },
1536
+ {
1537
+ "epoch": 0.816,
1538
+ "learning_rate": 1.9156551035235298e-05,
1539
+ "loss": 0.8113,
1540
+ "step": 510
1541
+ },
1542
+ {
1543
+ "epoch": 0.8192,
1544
+ "learning_rate": 1.9111039873181475e-05,
1545
+ "loss": 0.926,
1546
+ "step": 512
1547
+ },
1548
+ {
1549
+ "epoch": 0.8224,
1550
+ "learning_rate": 1.9064389903075683e-05,
1551
+ "loss": 0.6971,
1552
+ "step": 514
1553
+ },
1554
+ {
1555
+ "epoch": 0.8256,
1556
+ "learning_rate": 1.9016606955795843e-05,
1557
+ "loss": 0.3265,
1558
+ "step": 516
1559
+ },
1560
+ {
1561
+ "epoch": 0.8288,
1562
+ "learning_rate": 1.8967697003833156e-05,
1563
+ "loss": 0.4668,
1564
+ "step": 518
1565
+ },
1566
+ {
1567
+ "epoch": 0.832,
1568
+ "learning_rate": 1.891766616054545e-05,
1569
+ "loss": 0.5266,
1570
+ "step": 520
1571
+ },
1572
+ {
1573
+ "epoch": 0.8352,
1574
+ "learning_rate": 1.8866520679393124e-05,
1575
+ "loss": 0.3557,
1576
+ "step": 522
1577
+ },
1578
+ {
1579
+ "epoch": 0.8384,
1580
+ "learning_rate": 1.881426695315756e-05,
1581
+ "loss": 0.619,
1582
+ "step": 524
1583
+ },
1584
+ {
1585
+ "epoch": 0.8416,
1586
+ "learning_rate": 1.8760911513141974e-05,
1587
+ "loss": 0.3365,
1588
+ "step": 526
1589
+ },
1590
+ {
1591
+ "epoch": 0.8448,
1592
+ "learning_rate": 1.8706461028355107e-05,
1593
+ "loss": 0.5723,
1594
+ "step": 528
1595
+ },
1596
+ {
1597
+ "epoch": 0.848,
1598
+ "learning_rate": 1.86509223046777e-05,
1599
+ "loss": 0.438,
1600
+ "step": 530
1601
+ },
1602
+ {
1603
+ "epoch": 0.8512,
1604
+ "learning_rate": 1.8594302284011697e-05,
1605
+ "loss": 0.4045,
1606
+ "step": 532
1607
+ },
1608
+ {
1609
+ "epoch": 0.8544,
1610
+ "learning_rate": 1.8536608043412702e-05,
1611
+ "loss": 0.2484,
1612
+ "step": 534
1613
+ },
1614
+ {
1615
+ "epoch": 0.8576,
1616
+ "learning_rate": 1.847784679420527e-05,
1617
+ "loss": 0.2137,
1618
+ "step": 536
1619
+ },
1620
+ {
1621
+ "epoch": 0.8608,
1622
+ "learning_rate": 1.841802588108161e-05,
1623
+ "loss": 0.3481,
1624
+ "step": 538
1625
+ },
1626
+ {
1627
+ "epoch": 0.864,
1628
+ "learning_rate": 1.8357152781183613e-05,
1629
+ "loss": 0.7572,
1630
+ "step": 540
1631
+ },
1632
+ {
1633
+ "epoch": 0.8672,
1634
+ "learning_rate": 1.8295235103168128e-05,
1635
+ "loss": 0.2932,
1636
+ "step": 542
1637
+ },
1638
+ {
1639
+ "epoch": 0.8704,
1640
+ "learning_rate": 1.8232280586256104e-05,
1641
+ "loss": 0.4697,
1642
+ "step": 544
1643
+ },
1644
+ {
1645
+ "epoch": 0.8736,
1646
+ "learning_rate": 1.8168297099265108e-05,
1647
+ "loss": 0.5688,
1648
+ "step": 546
1649
+ },
1650
+ {
1651
+ "epoch": 0.8768,
1652
+ "learning_rate": 1.8103292639625835e-05,
1653
+ "loss": 0.136,
1654
+ "step": 548
1655
+ },
1656
+ {
1657
+ "epoch": 0.88,
1658
+ "learning_rate": 1.8037275332382575e-05,
1659
+ "loss": 0.4427,
1660
+ "step": 550
1661
+ },
1662
+ {
1663
+ "epoch": 0.8832,
1664
+ "learning_rate": 1.7970253429177494e-05,
1665
+ "loss": 0.3046,
1666
+ "step": 552
1667
+ },
1668
+ {
1669
+ "epoch": 0.8864,
1670
+ "learning_rate": 1.7902235307219336e-05,
1671
+ "loss": 0.4759,
1672
+ "step": 554
1673
+ },
1674
+ {
1675
+ "epoch": 0.8896,
1676
+ "learning_rate": 1.783322946823638e-05,
1677
+ "loss": 0.4999,
1678
+ "step": 556
1679
+ },
1680
+ {
1681
+ "epoch": 0.8928,
1682
+ "learning_rate": 1.776324453741365e-05,
1683
+ "loss": 0.2137,
1684
+ "step": 558
1685
+ },
1686
+ {
1687
+ "epoch": 0.896,
1688
+ "learning_rate": 1.7692289262315008e-05,
1689
+ "loss": 0.4347,
1690
+ "step": 560
1691
+ },
1692
+ {
1693
+ "epoch": 0.8992,
1694
+ "learning_rate": 1.762037251178961e-05,
1695
+ "loss": 0.5361,
1696
+ "step": 562
1697
+ },
1698
+ {
1699
+ "epoch": 0.9024,
1700
+ "learning_rate": 1.7547503274863502e-05,
1701
+ "loss": 0.482,
1702
+ "step": 564
1703
+ },
1704
+ {
1705
+ "epoch": 0.9056,
1706
+ "learning_rate": 1.7473690659616e-05,
1707
+ "loss": 0.2961,
1708
+ "step": 566
1709
+ },
1710
+ {
1711
+ "epoch": 0.9088,
1712
+ "learning_rate": 1.739894389204122e-05,
1713
+ "loss": 0.3957,
1714
+ "step": 568
1715
+ },
1716
+ {
1717
+ "epoch": 0.912,
1718
+ "learning_rate": 1.732327231489503e-05,
1719
+ "loss": 0.6299,
1720
+ "step": 570
1721
+ },
1722
+ {
1723
+ "epoch": 0.9152,
1724
+ "learning_rate": 1.7246685386527105e-05,
1725
+ "loss": 0.4689,
1726
+ "step": 572
1727
+ },
1728
+ {
1729
+ "epoch": 0.9184,
1730
+ "learning_rate": 1.716919267969884e-05,
1731
+ "loss": 0.2789,
1732
+ "step": 574
1733
+ },
1734
+ {
1735
+ "epoch": 0.9216,
1736
+ "learning_rate": 1.7090803880386784e-05,
1737
+ "loss": 0.4365,
1738
+ "step": 576
1739
+ },
1740
+ {
1741
+ "epoch": 0.9248,
1742
+ "learning_rate": 1.701152878657197e-05,
1743
+ "loss": 0.605,
1744
+ "step": 578
1745
+ },
1746
+ {
1747
+ "epoch": 0.928,
1748
+ "learning_rate": 1.6931377307015236e-05,
1749
+ "loss": 0.343,
1750
+ "step": 580
1751
+ },
1752
+ {
1753
+ "epoch": 0.9312,
1754
+ "learning_rate": 1.6850359460018744e-05,
1755
+ "loss": 0.2095,
1756
+ "step": 582
1757
+ },
1758
+ {
1759
+ "epoch": 0.9344,
1760
+ "learning_rate": 1.67684853721737e-05,
1761
+ "loss": 0.2724,
1762
+ "step": 584
1763
+ },
1764
+ {
1765
+ "epoch": 0.9376,
1766
+ "learning_rate": 1.6685765277094702e-05,
1767
+ "loss": 0.1679,
1768
+ "step": 586
1769
+ },
1770
+ {
1771
+ "epoch": 0.9408,
1772
+ "learning_rate": 1.6602209514140562e-05,
1773
+ "loss": 0.5185,
1774
+ "step": 588
1775
+ },
1776
+ {
1777
+ "epoch": 0.944,
1778
+ "learning_rate": 1.651782852712194e-05,
1779
+ "loss": 0.2153,
1780
+ "step": 590
1781
+ },
1782
+ {
1783
+ "epoch": 0.9472,
1784
+ "learning_rate": 1.6432632862996062e-05,
1785
+ "loss": 0.139,
1786
+ "step": 592
1787
+ },
1788
+ {
1789
+ "epoch": 0.9504,
1790
+ "learning_rate": 1.6346633170548275e-05,
1791
+ "loss": 0.4417,
1792
+ "step": 594
1793
+ },
1794
+ {
1795
+ "epoch": 0.9536,
1796
+ "learning_rate": 1.625984019906122e-05,
1797
+ "loss": 0.8611,
1798
+ "step": 596
1799
+ },
1800
+ {
1801
+ "epoch": 0.9568,
1802
+ "learning_rate": 1.6172264796971063e-05,
1803
+ "loss": 0.1911,
1804
+ "step": 598
1805
+ },
1806
+ {
1807
+ "epoch": 0.96,
1808
+ "learning_rate": 1.6083917910511623e-05,
1809
+ "loss": 0.2645,
1810
+ "step": 600
1811
+ },
1812
+ {
1813
+ "epoch": 0.9632,
1814
+ "learning_rate": 1.5994810582346266e-05,
1815
+ "loss": 0.5107,
1816
+ "step": 602
1817
+ },
1818
+ {
1819
+ "epoch": 0.9664,
1820
+ "learning_rate": 1.5904953950187448e-05,
1821
+ "loss": 0.4218,
1822
+ "step": 604
1823
+ },
1824
+ {
1825
+ "epoch": 0.9696,
1826
+ "learning_rate": 1.581435924540482e-05,
1827
+ "loss": 0.5054,
1828
+ "step": 606
1829
+ },
1830
+ {
1831
+ "epoch": 0.9728,
1832
+ "learning_rate": 1.5723037791621203e-05,
1833
+ "loss": 0.5874,
1834
+ "step": 608
1835
+ },
1836
+ {
1837
+ "epoch": 0.976,
1838
+ "learning_rate": 1.5631001003297302e-05,
1839
+ "loss": 0.4475,
1840
+ "step": 610
1841
+ },
1842
+ {
1843
+ "epoch": 0.9792,
1844
+ "learning_rate": 1.5538260384305083e-05,
1845
+ "loss": 0.5059,
1846
+ "step": 612
1847
+ },
1848
+ {
1849
+ "epoch": 0.9824,
1850
+ "learning_rate": 1.544482752648966e-05,
1851
+ "loss": 0.17,
1852
+ "step": 614
1853
+ },
1854
+ {
1855
+ "epoch": 0.9856,
1856
+ "learning_rate": 1.5350714108220677e-05,
1857
+ "loss": 0.0608,
1858
+ "step": 616
1859
+ },
1860
+ {
1861
+ "epoch": 0.9888,
1862
+ "learning_rate": 1.5255931892932344e-05,
1863
+ "loss": 0.1834,
1864
+ "step": 618
1865
+ },
1866
+ {
1867
+ "epoch": 0.992,
1868
+ "learning_rate": 1.5160492727653238e-05,
1869
+ "loss": 0.2795,
1870
+ "step": 620
1871
+ },
1872
+ {
1873
+ "epoch": 0.9952,
1874
+ "learning_rate": 1.5064408541525578e-05,
1875
+ "loss": 0.3034,
1876
+ "step": 622
1877
+ },
1878
+ {
1879
+ "epoch": 0.9984,
1880
+ "learning_rate": 1.4967691344314012e-05,
1881
+ "loss": 0.472,
1882
+ "step": 624
1883
+ },
1884
+ {
1885
+ "epoch": 1.0,
1886
+ "step": 625,
1887
+ "total_flos": 0,
1888
+ "train_loss": 0.4759340593457222,
1889
+ "train_runtime": 2952.9029,
1890
+ "train_samples_per_second": 3.386,
1891
+ "train_steps_per_second": 0.212
1892
+ }
1893
+ ],
1894
+ "logging_steps": 2,
1895
+ "max_steps": 625,
1896
+ "num_input_tokens_seen": 0,
1897
+ "num_train_epochs": 1,
1898
+ "save_steps": 500,
1899
+ "stateful_callbacks": {},
1900
+ "total_flos": 0,
1901
+ "train_batch_size": 1,
1902
+ "trial_name": null,
1903
+ "trial_params": null
1904
+ }
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dffd9bd42f34d1827469635d24f43b03b31536ad164f381a1f65164e5470321a
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce6d5dae7ddf8fca1ca15ffb674f343835b36809501b52e2e635b5d7eda28814
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa08bd11c216502fd952ec771c17b90bb86dee458c09727fe50a43e4d15c1a3d
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_divbs_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9db542c5a6813efb07a01d9432b8801770383acf26b603c808864eebeaa90a44
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_gradnorm_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json ADDED
@@ -0,0 +1,2216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 625,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0032,
14
+ "grad_norm": 11.924293518066406,
15
+ "learning_rate": 2.4524967251364995e-06,
16
+ "loss": 0.935,
17
+ "step": 2
18
+ },
19
+ {
20
+ "epoch": 0.0064,
21
+ "grad_norm": 3.9490787982940674,
22
+ "learning_rate": 2.5263093403840022e-06,
23
+ "loss": 0.3235,
24
+ "step": 4
25
+ },
26
+ {
27
+ "epoch": 0.0096,
28
+ "grad_norm": 5.28393030166626,
29
+ "learning_rate": 2.6010561079587694e-06,
30
+ "loss": 0.4202,
31
+ "step": 6
32
+ },
33
+ {
34
+ "epoch": 0.0128,
35
+ "grad_norm": 5.9949727058410645,
36
+ "learning_rate": 2.6767276851049716e-06,
37
+ "loss": 0.5825,
38
+ "step": 8
39
+ },
40
+ {
41
+ "epoch": 0.016,
42
+ "grad_norm": 11.239922523498535,
43
+ "learning_rate": 2.7533146134728993e-06,
44
+ "loss": 0.553,
45
+ "step": 10
46
+ },
47
+ {
48
+ "epoch": 0.0192,
49
+ "grad_norm": 5.190208435058594,
50
+ "learning_rate": 2.8308073203011634e-06,
51
+ "loss": 0.7123,
52
+ "step": 12
53
+ },
54
+ {
55
+ "epoch": 0.0224,
56
+ "grad_norm": 11.484992027282715,
57
+ "learning_rate": 2.909196119613218e-06,
58
+ "loss": 0.9147,
59
+ "step": 14
60
+ },
61
+ {
62
+ "epoch": 0.0256,
63
+ "grad_norm": 5.685008525848389,
64
+ "learning_rate": 2.988471213428035e-06,
65
+ "loss": 0.4397,
66
+ "step": 16
67
+ },
68
+ {
69
+ "epoch": 0.0288,
70
+ "grad_norm": 9.455109596252441,
71
+ "learning_rate": 3.068622692984767e-06,
72
+ "loss": 0.6378,
73
+ "step": 18
74
+ },
75
+ {
76
+ "epoch": 0.032,
77
+ "grad_norm": 4.522568702697754,
78
+ "learning_rate": 3.1496405399812602e-06,
79
+ "loss": 0.7933,
80
+ "step": 20
81
+ },
82
+ {
83
+ "epoch": 0.0352,
84
+ "grad_norm": 10.944385528564453,
85
+ "learning_rate": 3.231514627826302e-06,
86
+ "loss": 1.13,
87
+ "step": 22
88
+ },
89
+ {
90
+ "epoch": 0.0384,
91
+ "grad_norm": 10.723428726196289,
92
+ "learning_rate": 3.314234722905302e-06,
93
+ "loss": 0.3353,
94
+ "step": 24
95
+ },
96
+ {
97
+ "epoch": 0.0416,
98
+ "grad_norm": 3.2999091148376465,
99
+ "learning_rate": 3.3977904858594534e-06,
100
+ "loss": 0.3621,
101
+ "step": 26
102
+ },
103
+ {
104
+ "epoch": 0.0448,
105
+ "grad_norm": 3.1308634281158447,
106
+ "learning_rate": 3.4821714728780654e-06,
107
+ "loss": 0.7325,
108
+ "step": 28
109
+ },
110
+ {
111
+ "epoch": 0.048,
112
+ "grad_norm": 8.053144454956055,
113
+ "learning_rate": 3.567367137003953e-06,
114
+ "loss": 0.4884,
115
+ "step": 30
116
+ },
117
+ {
118
+ "epoch": 0.0512,
119
+ "grad_norm": 25.066747665405273,
120
+ "learning_rate": 3.653366829451711e-06,
121
+ "loss": 0.8357,
122
+ "step": 32
123
+ },
124
+ {
125
+ "epoch": 0.0544,
126
+ "grad_norm": 8.294330596923828,
127
+ "learning_rate": 3.740159800938784e-06,
128
+ "loss": 0.557,
129
+ "step": 34
130
+ },
131
+ {
132
+ "epoch": 0.0576,
133
+ "grad_norm": 10.253148078918457,
134
+ "learning_rate": 3.827735203028956e-06,
135
+ "loss": 0.53,
136
+ "step": 36
137
+ },
138
+ {
139
+ "epoch": 0.0608,
140
+ "grad_norm": 3.1425323486328125,
141
+ "learning_rate": 3.916082089488379e-06,
142
+ "loss": 0.6137,
143
+ "step": 38
144
+ },
145
+ {
146
+ "epoch": 0.064,
147
+ "grad_norm": 7.860047340393066,
148
+ "learning_rate": 4.005189417653737e-06,
149
+ "loss": 0.5169,
150
+ "step": 40
151
+ },
152
+ {
153
+ "epoch": 0.0672,
154
+ "grad_norm": 10.487458229064941,
155
+ "learning_rate": 4.095046049812541e-06,
156
+ "loss": 0.5802,
157
+ "step": 42
158
+ },
159
+ {
160
+ "epoch": 0.0704,
161
+ "grad_norm": 9.957070350646973,
162
+ "learning_rate": 4.1856407545951825e-06,
163
+ "loss": 0.9286,
164
+ "step": 44
165
+ },
166
+ {
167
+ "epoch": 0.0736,
168
+ "grad_norm": 4.670755863189697,
169
+ "learning_rate": 4.276962208378814e-06,
170
+ "loss": 0.4913,
171
+ "step": 46
172
+ },
173
+ {
174
+ "epoch": 0.0768,
175
+ "grad_norm": 10.305075645446777,
176
+ "learning_rate": 4.368998996702686e-06,
177
+ "loss": 0.765,
178
+ "step": 48
179
+ },
180
+ {
181
+ "epoch": 0.08,
182
+ "grad_norm": 9.407020568847656,
183
+ "learning_rate": 4.461739615694921e-06,
184
+ "loss": 0.4923,
185
+ "step": 50
186
+ },
187
+ {
188
+ "epoch": 0.0832,
189
+ "grad_norm": 23.41385841369629,
190
+ "learning_rate": 4.555172473510324e-06,
191
+ "loss": 0.6566,
192
+ "step": 52
193
+ },
194
+ {
195
+ "epoch": 0.0864,
196
+ "grad_norm": 2.507441997528076,
197
+ "learning_rate": 4.649285891779326e-06,
198
+ "loss": 0.8811,
199
+ "step": 54
200
+ },
201
+ {
202
+ "epoch": 0.0896,
203
+ "grad_norm": 15.887990951538086,
204
+ "learning_rate": 4.744068107067673e-06,
205
+ "loss": 0.7528,
206
+ "step": 56
207
+ },
208
+ {
209
+ "epoch": 0.0928,
210
+ "grad_norm": 7.44704532623291,
211
+ "learning_rate": 4.839507272346751e-06,
212
+ "loss": 0.4551,
213
+ "step": 58
214
+ },
215
+ {
216
+ "epoch": 0.096,
217
+ "grad_norm": 4.294933795928955,
218
+ "learning_rate": 4.935591458474425e-06,
219
+ "loss": 0.4674,
220
+ "step": 60
221
+ },
222
+ {
223
+ "epoch": 0.0992,
224
+ "grad_norm": 7.989336013793945,
225
+ "learning_rate": 5.032308655686007e-06,
226
+ "loss": 0.4042,
227
+ "step": 62
228
+ },
229
+ {
230
+ "epoch": 0.1024,
231
+ "grad_norm": 10.031631469726562,
232
+ "learning_rate": 5.129646775095432e-06,
233
+ "loss": 0.7977,
234
+ "step": 64
235
+ },
236
+ {
237
+ "epoch": 0.1056,
238
+ "grad_norm": 3.135248899459839,
239
+ "learning_rate": 5.227593650206246e-06,
240
+ "loss": 0.2882,
241
+ "step": 66
242
+ },
243
+ {
244
+ "epoch": 0.1088,
245
+ "grad_norm": 8.17870807647705,
246
+ "learning_rate": 5.3261370384323904e-06,
247
+ "loss": 0.4969,
248
+ "step": 68
249
+ },
250
+ {
251
+ "epoch": 0.112,
252
+ "grad_norm": 7.233709812164307,
253
+ "learning_rate": 5.425264622628326e-06,
254
+ "loss": 0.3908,
255
+ "step": 70
256
+ },
257
+ {
258
+ "epoch": 0.1152,
259
+ "grad_norm": 6.015388488769531,
260
+ "learning_rate": 5.524964012628644e-06,
261
+ "loss": 0.6212,
262
+ "step": 72
263
+ },
264
+ {
265
+ "epoch": 0.1184,
266
+ "grad_norm": 19.091726303100586,
267
+ "learning_rate": 5.62522274679673e-06,
268
+ "loss": 0.7046,
269
+ "step": 74
270
+ },
271
+ {
272
+ "epoch": 0.1216,
273
+ "grad_norm": 3.5434837341308594,
274
+ "learning_rate": 5.726028293582342e-06,
275
+ "loss": 0.6002,
276
+ "step": 76
277
+ },
278
+ {
279
+ "epoch": 0.1248,
280
+ "grad_norm": 8.320732116699219,
281
+ "learning_rate": 5.827368053088032e-06,
282
+ "loss": 0.5269,
283
+ "step": 78
284
+ },
285
+ {
286
+ "epoch": 0.128,
287
+ "grad_norm": 10.583352088928223,
288
+ "learning_rate": 5.929229358643925e-06,
289
+ "loss": 0.9489,
290
+ "step": 80
291
+ },
292
+ {
293
+ "epoch": 0.1312,
294
+ "grad_norm": 4.805012226104736,
295
+ "learning_rate": 6.03159947839103e-06,
296
+ "loss": 0.4776,
297
+ "step": 82
298
+ },
299
+ {
300
+ "epoch": 0.1344,
301
+ "grad_norm": 4.485952854156494,
302
+ "learning_rate": 6.13446561687258e-06,
303
+ "loss": 0.4304,
304
+ "step": 84
305
+ },
306
+ {
307
+ "epoch": 0.1376,
308
+ "grad_norm": 5.58962345123291,
309
+ "learning_rate": 6.237814916633431e-06,
310
+ "loss": 0.3162,
311
+ "step": 86
312
+ },
313
+ {
314
+ "epoch": 0.1408,
315
+ "grad_norm": 9.707934379577637,
316
+ "learning_rate": 6.341634459827044e-06,
317
+ "loss": 0.5723,
318
+ "step": 88
319
+ },
320
+ {
321
+ "epoch": 0.144,
322
+ "grad_norm": 6.585778713226318,
323
+ "learning_rate": 6.445911269830183e-06,
324
+ "loss": 0.467,
325
+ "step": 90
326
+ },
327
+ {
328
+ "epoch": 0.1472,
329
+ "grad_norm": 3.7008655071258545,
330
+ "learning_rate": 6.5506323128648654e-06,
331
+ "loss": 0.4938,
332
+ "step": 92
333
+ },
334
+ {
335
+ "epoch": 0.1504,
336
+ "grad_norm": 12.401470184326172,
337
+ "learning_rate": 6.655784499627476e-06,
338
+ "loss": 0.4442,
339
+ "step": 94
340
+ },
341
+ {
342
+ "epoch": 0.1536,
343
+ "grad_norm": 8.586926460266113,
344
+ "learning_rate": 6.761354686924883e-06,
345
+ "loss": 0.3653,
346
+ "step": 96
347
+ },
348
+ {
349
+ "epoch": 0.1568,
350
+ "grad_norm": 18.978261947631836,
351
+ "learning_rate": 6.867329679317144e-06,
352
+ "loss": 0.4901,
353
+ "step": 98
354
+ },
355
+ {
356
+ "epoch": 0.16,
357
+ "grad_norm": 8.923133850097656,
358
+ "learning_rate": 6.973696230766884e-06,
359
+ "loss": 0.5532,
360
+ "step": 100
361
+ },
362
+ {
363
+ "epoch": 0.1632,
364
+ "grad_norm": 4.800722599029541,
365
+ "learning_rate": 7.080441046294945e-06,
366
+ "loss": 0.7183,
367
+ "step": 102
368
+ },
369
+ {
370
+ "epoch": 0.1664,
371
+ "grad_norm": 9.728776931762695,
372
+ "learning_rate": 7.18755078364214e-06,
373
+ "loss": 0.5334,
374
+ "step": 104
375
+ },
376
+ {
377
+ "epoch": 0.1696,
378
+ "grad_norm": 8.5445556640625,
379
+ "learning_rate": 7.2950120549369204e-06,
380
+ "loss": 0.3432,
381
+ "step": 106
382
+ },
383
+ {
384
+ "epoch": 0.1728,
385
+ "grad_norm": 4.521656036376953,
386
+ "learning_rate": 7.402811428368824e-06,
387
+ "loss": 0.7348,
388
+ "step": 108
389
+ },
390
+ {
391
+ "epoch": 0.176,
392
+ "grad_norm": 6.669830322265625,
393
+ "learning_rate": 7.510935429867233e-06,
394
+ "loss": 0.762,
395
+ "step": 110
396
+ },
397
+ {
398
+ "epoch": 0.1792,
399
+ "grad_norm": 4.748926162719727,
400
+ "learning_rate": 7.619370544785608e-06,
401
+ "loss": 0.4783,
402
+ "step": 112
403
+ },
404
+ {
405
+ "epoch": 0.1824,
406
+ "grad_norm": 7.183364391326904,
407
+ "learning_rate": 7.728103219590684e-06,
408
+ "loss": 0.4328,
409
+ "step": 114
410
+ },
411
+ {
412
+ "epoch": 0.1856,
413
+ "grad_norm": 14.943339347839355,
414
+ "learning_rate": 7.83711986355656e-06,
415
+ "loss": 0.8939,
416
+ "step": 116
417
+ },
418
+ {
419
+ "epoch": 0.1888,
420
+ "grad_norm": 4.003686904907227,
421
+ "learning_rate": 7.946406850463435e-06,
422
+ "loss": 0.705,
423
+ "step": 118
424
+ },
425
+ {
426
+ "epoch": 0.192,
427
+ "grad_norm": 8.128644943237305,
428
+ "learning_rate": 8.055950520300756e-06,
429
+ "loss": 0.4062,
430
+ "step": 120
431
+ },
432
+ {
433
+ "epoch": 0.1952,
434
+ "grad_norm": 2.2518608570098877,
435
+ "learning_rate": 8.165737180974676e-06,
436
+ "loss": 0.6835,
437
+ "step": 122
438
+ },
439
+ {
440
+ "epoch": 0.1984,
441
+ "grad_norm": 11.007171630859375,
442
+ "learning_rate": 8.275753110019367e-06,
443
+ "loss": 0.7353,
444
+ "step": 124
445
+ },
446
+ {
447
+ "epoch": 0.2016,
448
+ "grad_norm": 4.774467468261719,
449
+ "learning_rate": 8.385984556312285e-06,
450
+ "loss": 0.5589,
451
+ "step": 126
452
+ },
453
+ {
454
+ "epoch": 0.2048,
455
+ "grad_norm": 7.269313335418701,
456
+ "learning_rate": 8.496417741792922e-06,
457
+ "loss": 0.3103,
458
+ "step": 128
459
+ },
460
+ {
461
+ "epoch": 0.208,
462
+ "grad_norm": 7.521888732910156,
463
+ "learning_rate": 8.607038863184952e-06,
464
+ "loss": 0.4026,
465
+ "step": 130
466
+ },
467
+ {
468
+ "epoch": 0.2112,
469
+ "grad_norm": 10.690435409545898,
470
+ "learning_rate": 8.717834093721598e-06,
471
+ "loss": 0.5056,
472
+ "step": 132
473
+ },
474
+ {
475
+ "epoch": 0.2144,
476
+ "grad_norm": 6.21688175201416,
477
+ "learning_rate": 8.828789584873757e-06,
478
+ "loss": 0.4383,
479
+ "step": 134
480
+ },
481
+ {
482
+ "epoch": 0.2176,
483
+ "grad_norm": 15.672515869140625,
484
+ "learning_rate": 8.939891468081036e-06,
485
+ "loss": 0.4669,
486
+ "step": 136
487
+ },
488
+ {
489
+ "epoch": 0.2208,
490
+ "grad_norm": 10.475600242614746,
491
+ "learning_rate": 9.051125856485175e-06,
492
+ "loss": 1.1409,
493
+ "step": 138
494
+ },
495
+ {
496
+ "epoch": 0.224,
497
+ "grad_norm": 11.176009178161621,
498
+ "learning_rate": 9.162478846665854e-06,
499
+ "loss": 0.7849,
500
+ "step": 140
501
+ },
502
+ {
503
+ "epoch": 0.2272,
504
+ "grad_norm": 20.8123722076416,
505
+ "learning_rate": 9.273936520378426e-06,
506
+ "loss": 0.5393,
507
+ "step": 142
508
+ },
509
+ {
510
+ "epoch": 0.2304,
511
+ "grad_norm": 5.819454669952393,
512
+ "learning_rate": 9.38548494629364e-06,
513
+ "loss": 0.5705,
514
+ "step": 144
515
+ },
516
+ {
517
+ "epoch": 0.2336,
518
+ "grad_norm": 3.6171233654022217,
519
+ "learning_rate": 9.497110181738935e-06,
520
+ "loss": 0.3567,
521
+ "step": 146
522
+ },
523
+ {
524
+ "epoch": 0.2368,
525
+ "grad_norm": 7.882349014282227,
526
+ "learning_rate": 9.608798274441153e-06,
527
+ "loss": 0.6364,
528
+ "step": 148
529
+ },
530
+ {
531
+ "epoch": 0.24,
532
+ "grad_norm": 2.2136592864990234,
533
+ "learning_rate": 9.720535264270526e-06,
534
+ "loss": 0.2534,
535
+ "step": 150
536
+ },
537
+ {
538
+ "epoch": 0.2432,
539
+ "grad_norm": 7.360774993896484,
540
+ "learning_rate": 9.832307184985473e-06,
541
+ "loss": 0.4552,
542
+ "step": 152
543
+ },
544
+ {
545
+ "epoch": 0.2464,
546
+ "grad_norm": 6.251705646514893,
547
+ "learning_rate": 9.944100065978354e-06,
548
+ "loss": 0.8051,
549
+ "step": 154
550
+ },
551
+ {
552
+ "epoch": 0.2496,
553
+ "grad_norm": 4.254730224609375,
554
+ "learning_rate": 1.0055899934021637e-05,
555
+ "loss": 0.3781,
556
+ "step": 156
557
+ },
558
+ {
559
+ "epoch": 0.2528,
560
+ "grad_norm": 10.385128021240234,
561
+ "learning_rate": 1.016769281501452e-05,
562
+ "loss": 0.6839,
563
+ "step": 158
564
+ },
565
+ {
566
+ "epoch": 0.256,
567
+ "grad_norm": 8.325345993041992,
568
+ "learning_rate": 1.0279464735729467e-05,
569
+ "loss": 0.6583,
570
+ "step": 160
571
+ },
572
+ {
573
+ "epoch": 0.2592,
574
+ "grad_norm": 6.293595790863037,
575
+ "learning_rate": 1.039120172555884e-05,
576
+ "loss": 0.4719,
577
+ "step": 162
578
+ },
579
+ {
580
+ "epoch": 0.2624,
581
+ "grad_norm": 4.342235565185547,
582
+ "learning_rate": 1.0502889818261058e-05,
583
+ "loss": 0.4733,
584
+ "step": 164
585
+ },
586
+ {
587
+ "epoch": 0.2656,
588
+ "grad_norm": 7.431089878082275,
589
+ "learning_rate": 1.0614515053706354e-05,
590
+ "loss": 0.4616,
591
+ "step": 166
592
+ },
593
+ {
594
+ "epoch": 0.2688,
595
+ "grad_norm": 11.834856033325195,
596
+ "learning_rate": 1.0726063479621567e-05,
597
+ "loss": 2.1594,
598
+ "step": 168
599
+ },
600
+ {
601
+ "epoch": 0.272,
602
+ "grad_norm": 7.422325611114502,
603
+ "learning_rate": 1.083752115333414e-05,
604
+ "loss": 0.4545,
605
+ "step": 170
606
+ },
607
+ {
608
+ "epoch": 0.2752,
609
+ "grad_norm": 5.249634742736816,
610
+ "learning_rate": 1.0948874143514818e-05,
611
+ "loss": 0.5212,
612
+ "step": 172
613
+ },
614
+ {
615
+ "epoch": 0.2784,
616
+ "grad_norm": 7.874746799468994,
617
+ "learning_rate": 1.1060108531918955e-05,
618
+ "loss": 0.29,
619
+ "step": 174
620
+ },
621
+ {
622
+ "epoch": 0.2816,
623
+ "grad_norm": 4.046351432800293,
624
+ "learning_rate": 1.1171210415126238e-05,
625
+ "loss": 0.5505,
626
+ "step": 176
627
+ },
628
+ {
629
+ "epoch": 0.2848,
630
+ "grad_norm": 4.0093464851379395,
631
+ "learning_rate": 1.1282165906278395e-05,
632
+ "loss": 0.7834,
633
+ "step": 178
634
+ },
635
+ {
636
+ "epoch": 0.288,
637
+ "grad_norm": 80.90779113769531,
638
+ "learning_rate": 1.1392961136815041e-05,
639
+ "loss": 2.8767,
640
+ "step": 180
641
+ },
642
+ {
643
+ "epoch": 0.2912,
644
+ "grad_norm": 7.990501880645752,
645
+ "learning_rate": 1.150358225820707e-05,
646
+ "loss": 0.3704,
647
+ "step": 182
648
+ },
649
+ {
650
+ "epoch": 0.2944,
651
+ "grad_norm": 6.763923645019531,
652
+ "learning_rate": 1.1614015443687708e-05,
653
+ "loss": 0.3771,
654
+ "step": 184
655
+ },
656
+ {
657
+ "epoch": 0.2976,
658
+ "grad_norm": 7.046037673950195,
659
+ "learning_rate": 1.1724246889980626e-05,
660
+ "loss": 0.4318,
661
+ "step": 186
662
+ },
663
+ {
664
+ "epoch": 0.3008,
665
+ "grad_norm": 5.150176048278809,
666
+ "learning_rate": 1.1834262819025317e-05,
667
+ "loss": 0.647,
668
+ "step": 188
669
+ },
670
+ {
671
+ "epoch": 0.304,
672
+ "grad_norm": 5.73397970199585,
673
+ "learning_rate": 1.1944049479699241e-05,
674
+ "loss": 0.6609,
675
+ "step": 190
676
+ },
677
+ {
678
+ "epoch": 0.3072,
679
+ "grad_norm": 7.286134719848633,
680
+ "learning_rate": 1.2053593149536557e-05,
681
+ "loss": 0.52,
682
+ "step": 192
683
+ },
684
+ {
685
+ "epoch": 0.3104,
686
+ "grad_norm": 4.6973981857299805,
687
+ "learning_rate": 1.2162880136443434e-05,
688
+ "loss": 0.5437,
689
+ "step": 194
690
+ },
691
+ {
692
+ "epoch": 0.3136,
693
+ "grad_norm": 10.70791244506836,
694
+ "learning_rate": 1.2271896780409309e-05,
695
+ "loss": 0.8167,
696
+ "step": 196
697
+ },
698
+ {
699
+ "epoch": 0.3168,
700
+ "grad_norm": 19.98440933227539,
701
+ "learning_rate": 1.2380629455214385e-05,
702
+ "loss": 1.2987,
703
+ "step": 198
704
+ },
705
+ {
706
+ "epoch": 0.32,
707
+ "grad_norm": 9.09523868560791,
708
+ "learning_rate": 1.2489064570132761e-05,
709
+ "loss": 0.653,
710
+ "step": 200
711
+ },
712
+ {
713
+ "epoch": 0.3232,
714
+ "grad_norm": 4.038908004760742,
715
+ "learning_rate": 1.259718857163117e-05,
716
+ "loss": 0.3572,
717
+ "step": 202
718
+ },
719
+ {
720
+ "epoch": 0.3264,
721
+ "grad_norm": 7.391031265258789,
722
+ "learning_rate": 1.2704987945063073e-05,
723
+ "loss": 0.5846,
724
+ "step": 204
725
+ },
726
+ {
727
+ "epoch": 0.3296,
728
+ "grad_norm": 12.767005920410156,
729
+ "learning_rate": 1.2812449216357855e-05,
730
+ "loss": 0.5314,
731
+ "step": 206
732
+ },
733
+ {
734
+ "epoch": 0.3328,
735
+ "grad_norm": 7.9224395751953125,
736
+ "learning_rate": 1.2919558953705047e-05,
737
+ "loss": 0.4724,
738
+ "step": 208
739
+ },
740
+ {
741
+ "epoch": 0.336,
742
+ "grad_norm": 8.15713119506836,
743
+ "learning_rate": 1.3026303769233109e-05,
744
+ "loss": 0.6462,
745
+ "step": 210
746
+ },
747
+ {
748
+ "epoch": 0.3392,
749
+ "grad_norm": 10.181936264038086,
750
+ "learning_rate": 1.313267032068285e-05,
751
+ "loss": 0.6951,
752
+ "step": 212
753
+ },
754
+ {
755
+ "epoch": 0.3424,
756
+ "grad_norm": 14.465421676635742,
757
+ "learning_rate": 1.3238645313075109e-05,
758
+ "loss": 0.8577,
759
+ "step": 214
760
+ },
761
+ {
762
+ "epoch": 0.3456,
763
+ "grad_norm": 4.582299709320068,
764
+ "learning_rate": 1.3344215500372517e-05,
765
+ "loss": 0.3597,
766
+ "step": 216
767
+ },
768
+ {
769
+ "epoch": 0.3488,
770
+ "grad_norm": 5.042653560638428,
771
+ "learning_rate": 1.344936768713513e-05,
772
+ "loss": 0.5193,
773
+ "step": 218
774
+ },
775
+ {
776
+ "epoch": 0.352,
777
+ "grad_norm": 9.032974243164062,
778
+ "learning_rate": 1.3554088730169812e-05,
779
+ "loss": 0.627,
780
+ "step": 220
781
+ },
782
+ {
783
+ "epoch": 0.3552,
784
+ "grad_norm": 9.70946979522705,
785
+ "learning_rate": 1.3658365540172948e-05,
786
+ "loss": 0.4374,
787
+ "step": 222
788
+ },
789
+ {
790
+ "epoch": 0.3584,
791
+ "grad_norm": 6.085891246795654,
792
+ "learning_rate": 1.3762185083366562e-05,
793
+ "loss": 0.3727,
794
+ "step": 224
795
+ },
796
+ {
797
+ "epoch": 0.3616,
798
+ "grad_norm": 6.504804611206055,
799
+ "learning_rate": 1.3865534383127413e-05,
800
+ "loss": 0.6068,
801
+ "step": 226
802
+ },
803
+ {
804
+ "epoch": 0.3648,
805
+ "grad_norm": 3.5541486740112305,
806
+ "learning_rate": 1.3968400521608962e-05,
807
+ "loss": 0.7332,
808
+ "step": 228
809
+ },
810
+ {
811
+ "epoch": 0.368,
812
+ "grad_norm": 11.440251350402832,
813
+ "learning_rate": 1.4070770641356069e-05,
814
+ "loss": 0.7768,
815
+ "step": 230
816
+ },
817
+ {
818
+ "epoch": 0.3712,
819
+ "grad_norm": 11.891138076782227,
820
+ "learning_rate": 1.4172631946911964e-05,
821
+ "loss": 0.8255,
822
+ "step": 232
823
+ },
824
+ {
825
+ "epoch": 0.3744,
826
+ "grad_norm": 7.3828840255737305,
827
+ "learning_rate": 1.4273971706417653e-05,
828
+ "loss": 0.3546,
829
+ "step": 234
830
+ },
831
+ {
832
+ "epoch": 0.3776,
833
+ "grad_norm": 4.123514652252197,
834
+ "learning_rate": 1.4374777253203265e-05,
835
+ "loss": 0.4818,
836
+ "step": 236
837
+ },
838
+ {
839
+ "epoch": 0.3808,
840
+ "grad_norm": 4.9134907722473145,
841
+ "learning_rate": 1.4475035987371348e-05,
842
+ "loss": 0.3604,
843
+ "step": 238
844
+ },
845
+ {
846
+ "epoch": 0.384,
847
+ "grad_norm": 5.931992053985596,
848
+ "learning_rate": 1.4574735377371669e-05,
849
+ "loss": 0.6014,
850
+ "step": 240
851
+ },
852
+ {
853
+ "epoch": 0.3872,
854
+ "grad_norm": 8.341025352478027,
855
+ "learning_rate": 1.4673862961567604e-05,
856
+ "loss": 0.4926,
857
+ "step": 242
858
+ },
859
+ {
860
+ "epoch": 0.3904,
861
+ "grad_norm": 6.980299949645996,
862
+ "learning_rate": 1.4772406349793749e-05,
863
+ "loss": 0.7693,
864
+ "step": 244
865
+ },
866
+ {
867
+ "epoch": 0.3936,
868
+ "grad_norm": 7.777401447296143,
869
+ "learning_rate": 1.4870353224904563e-05,
870
+ "loss": 0.4093,
871
+ "step": 246
872
+ },
873
+ {
874
+ "epoch": 0.3968,
875
+ "grad_norm": 10.939884185791016,
876
+ "learning_rate": 1.4967691344313988e-05,
877
+ "loss": 0.7075,
878
+ "step": 248
879
+ },
880
+ {
881
+ "epoch": 0.4,
882
+ "grad_norm": 10.896389961242676,
883
+ "learning_rate": 1.5064408541525568e-05,
884
+ "loss": 0.6076,
885
+ "step": 250
886
+ },
887
+ {
888
+ "epoch": 0.4032,
889
+ "grad_norm": 12.854388236999512,
890
+ "learning_rate": 1.5160492727653245e-05,
891
+ "loss": 0.9032,
892
+ "step": 252
893
+ },
894
+ {
895
+ "epoch": 0.4064,
896
+ "grad_norm": 4.0022382736206055,
897
+ "learning_rate": 1.5255931892932322e-05,
898
+ "loss": 0.3992,
899
+ "step": 254
900
+ },
901
+ {
902
+ "epoch": 0.4096,
903
+ "grad_norm": 12.87756061553955,
904
+ "learning_rate": 1.5350714108220667e-05,
905
+ "loss": 0.5944,
906
+ "step": 256
907
+ },
908
+ {
909
+ "epoch": 0.4128,
910
+ "grad_norm": 12.931772232055664,
911
+ "learning_rate": 1.5444827526489668e-05,
912
+ "loss": 0.8289,
913
+ "step": 258
914
+ },
915
+ {
916
+ "epoch": 0.416,
917
+ "grad_norm": 5.9502153396606445,
918
+ "learning_rate": 1.5538260384305073e-05,
919
+ "loss": 0.8897,
920
+ "step": 260
921
+ },
922
+ {
923
+ "epoch": 0.4192,
924
+ "grad_norm": 6.152744293212891,
925
+ "learning_rate": 1.563100100329731e-05,
926
+ "loss": 0.6036,
927
+ "step": 262
928
+ },
929
+ {
930
+ "epoch": 0.4224,
931
+ "grad_norm": 7.169362545013428,
932
+ "learning_rate": 1.572303779162118e-05,
933
+ "loss": 0.4665,
934
+ "step": 264
935
+ },
936
+ {
937
+ "epoch": 0.4256,
938
+ "grad_norm": 25.105201721191406,
939
+ "learning_rate": 1.581435924540481e-05,
940
+ "loss": 1.1965,
941
+ "step": 266
942
+ },
943
+ {
944
+ "epoch": 0.4288,
945
+ "grad_norm": 5.533500671386719,
946
+ "learning_rate": 1.5904953950187455e-05,
947
+ "loss": 0.7597,
948
+ "step": 268
949
+ },
950
+ {
951
+ "epoch": 0.432,
952
+ "grad_norm": 8.768479347229004,
953
+ "learning_rate": 1.599481058234626e-05,
954
+ "loss": 0.6425,
955
+ "step": 270
956
+ },
957
+ {
958
+ "epoch": 0.4352,
959
+ "grad_norm": 10.155613899230957,
960
+ "learning_rate": 1.6083917910511616e-05,
961
+ "loss": 0.5022,
962
+ "step": 272
963
+ },
964
+ {
965
+ "epoch": 0.4384,
966
+ "grad_norm": 6.755585193634033,
967
+ "learning_rate": 1.617226479697104e-05,
968
+ "loss": 0.4028,
969
+ "step": 274
970
+ },
971
+ {
972
+ "epoch": 0.4416,
973
+ "grad_norm": 10.291326522827148,
974
+ "learning_rate": 1.6259840199061212e-05,
975
+ "loss": 0.6177,
976
+ "step": 276
977
+ },
978
+ {
979
+ "epoch": 0.4448,
980
+ "grad_norm": 9.281944274902344,
981
+ "learning_rate": 1.6346633170548285e-05,
982
+ "loss": 0.6795,
983
+ "step": 278
984
+ },
985
+ {
986
+ "epoch": 0.448,
987
+ "grad_norm": 8.107259750366211,
988
+ "learning_rate": 1.6432632862996042e-05,
989
+ "loss": 0.5509,
990
+ "step": 280
991
+ },
992
+ {
993
+ "epoch": 0.4512,
994
+ "grad_norm": 15.570850372314453,
995
+ "learning_rate": 1.6517828527121928e-05,
996
+ "loss": 0.7286,
997
+ "step": 282
998
+ },
999
+ {
1000
+ "epoch": 0.4544,
1001
+ "grad_norm": 8.614456176757812,
1002
+ "learning_rate": 1.6602209514140542e-05,
1003
+ "loss": 0.8644,
1004
+ "step": 284
1005
+ },
1006
+ {
1007
+ "epoch": 0.4576,
1008
+ "grad_norm": 2.1356348991394043,
1009
+ "learning_rate": 1.6685765277094695e-05,
1010
+ "loss": 0.3399,
1011
+ "step": 286
1012
+ },
1013
+ {
1014
+ "epoch": 0.4608,
1015
+ "grad_norm": 4.229339599609375,
1016
+ "learning_rate": 1.6768485372173696e-05,
1017
+ "loss": 0.4909,
1018
+ "step": 288
1019
+ },
1020
+ {
1021
+ "epoch": 0.464,
1022
+ "grad_norm": 4.009982585906982,
1023
+ "learning_rate": 1.6850359460018733e-05,
1024
+ "loss": 0.8696,
1025
+ "step": 290
1026
+ },
1027
+ {
1028
+ "epoch": 0.4672,
1029
+ "grad_norm": 4.7873101234436035,
1030
+ "learning_rate": 1.6931377307015226e-05,
1031
+ "loss": 0.4585,
1032
+ "step": 292
1033
+ },
1034
+ {
1035
+ "epoch": 0.4704,
1036
+ "grad_norm": 7.444589138031006,
1037
+ "learning_rate": 1.701152878657196e-05,
1038
+ "loss": 0.858,
1039
+ "step": 294
1040
+ },
1041
+ {
1042
+ "epoch": 0.4736,
1043
+ "grad_norm": 9.475300788879395,
1044
+ "learning_rate": 1.7090803880386778e-05,
1045
+ "loss": 0.591,
1046
+ "step": 296
1047
+ },
1048
+ {
1049
+ "epoch": 0.4768,
1050
+ "grad_norm": 7.943042278289795,
1051
+ "learning_rate": 1.716919267969883e-05,
1052
+ "loss": 0.7363,
1053
+ "step": 298
1054
+ },
1055
+ {
1056
+ "epoch": 0.48,
1057
+ "grad_norm": 7.825207233428955,
1058
+ "learning_rate": 1.7246685386527095e-05,
1059
+ "loss": 0.6927,
1060
+ "step": 300
1061
+ },
1062
+ {
1063
+ "epoch": 0.4832,
1064
+ "grad_norm": 3.181072473526001,
1065
+ "learning_rate": 1.7323272314895022e-05,
1066
+ "loss": 0.5384,
1067
+ "step": 302
1068
+ },
1069
+ {
1070
+ "epoch": 0.4864,
1071
+ "grad_norm": 8.772839546203613,
1072
+ "learning_rate": 1.7398943892041227e-05,
1073
+ "loss": 0.6545,
1074
+ "step": 304
1075
+ },
1076
+ {
1077
+ "epoch": 0.4896,
1078
+ "grad_norm": 9.947606086730957,
1079
+ "learning_rate": 1.7473690659615992e-05,
1080
+ "loss": 0.5997,
1081
+ "step": 306
1082
+ },
1083
+ {
1084
+ "epoch": 0.4928,
1085
+ "grad_norm": 7.244208335876465,
1086
+ "learning_rate": 1.7547503274863495e-05,
1087
+ "loss": 0.4672,
1088
+ "step": 308
1089
+ },
1090
+ {
1091
+ "epoch": 0.496,
1092
+ "grad_norm": 5.831227779388428,
1093
+ "learning_rate": 1.7620372511789604e-05,
1094
+ "loss": 0.6727,
1095
+ "step": 310
1096
+ },
1097
+ {
1098
+ "epoch": 0.4992,
1099
+ "grad_norm": 11.163945198059082,
1100
+ "learning_rate": 1.7692289262315e-05,
1101
+ "loss": 0.8782,
1102
+ "step": 312
1103
+ },
1104
+ {
1105
+ "epoch": 0.5024,
1106
+ "grad_norm": 5.730398654937744,
1107
+ "learning_rate": 1.7763244537413657e-05,
1108
+ "loss": 0.4557,
1109
+ "step": 314
1110
+ },
1111
+ {
1112
+ "epoch": 0.5056,
1113
+ "grad_norm": 8.259847640991211,
1114
+ "learning_rate": 1.7833229468236364e-05,
1115
+ "loss": 0.463,
1116
+ "step": 316
1117
+ },
1118
+ {
1119
+ "epoch": 0.5088,
1120
+ "grad_norm": 13.202224731445312,
1121
+ "learning_rate": 1.790223530721933e-05,
1122
+ "loss": 0.5084,
1123
+ "step": 318
1124
+ },
1125
+ {
1126
+ "epoch": 0.512,
1127
+ "grad_norm": 5.565632343292236,
1128
+ "learning_rate": 1.7970253429177477e-05,
1129
+ "loss": 0.4924,
1130
+ "step": 320
1131
+ },
1132
+ {
1133
+ "epoch": 0.5152,
1134
+ "grad_norm": 9.58140754699707,
1135
+ "learning_rate": 1.803727533238257e-05,
1136
+ "loss": 0.387,
1137
+ "step": 322
1138
+ },
1139
+ {
1140
+ "epoch": 0.5184,
1141
+ "grad_norm": 6.084507465362549,
1142
+ "learning_rate": 1.8103292639625842e-05,
1143
+ "loss": 0.4273,
1144
+ "step": 324
1145
+ },
1146
+ {
1147
+ "epoch": 0.5216,
1148
+ "grad_norm": 35.10951232910156,
1149
+ "learning_rate": 1.816829709926509e-05,
1150
+ "loss": 2.1971,
1151
+ "step": 326
1152
+ },
1153
+ {
1154
+ "epoch": 0.5248,
1155
+ "grad_norm": 2.700080394744873,
1156
+ "learning_rate": 1.8232280586256097e-05,
1157
+ "loss": 0.3914,
1158
+ "step": 328
1159
+ },
1160
+ {
1161
+ "epoch": 0.528,
1162
+ "grad_norm": 6.533969879150391,
1163
+ "learning_rate": 1.829523510316813e-05,
1164
+ "loss": 0.4316,
1165
+ "step": 330
1166
+ },
1167
+ {
1168
+ "epoch": 0.5312,
1169
+ "grad_norm": 6.787297248840332,
1170
+ "learning_rate": 1.8357152781183606e-05,
1171
+ "loss": 0.6538,
1172
+ "step": 332
1173
+ },
1174
+ {
1175
+ "epoch": 0.5344,
1176
+ "grad_norm": 6.87823486328125,
1177
+ "learning_rate": 1.8418025881081606e-05,
1178
+ "loss": 0.6642,
1179
+ "step": 334
1180
+ },
1181
+ {
1182
+ "epoch": 0.5376,
1183
+ "grad_norm": 4.521157741546631,
1184
+ "learning_rate": 1.8477846794205258e-05,
1185
+ "loss": 0.5047,
1186
+ "step": 336
1187
+ },
1188
+ {
1189
+ "epoch": 0.5408,
1190
+ "grad_norm": 2.835745334625244,
1191
+ "learning_rate": 1.8536608043412695e-05,
1192
+ "loss": 0.5598,
1193
+ "step": 338
1194
+ },
1195
+ {
1196
+ "epoch": 0.544,
1197
+ "grad_norm": 7.604702472686768,
1198
+ "learning_rate": 1.85943022840117e-05,
1199
+ "loss": 0.9549,
1200
+ "step": 340
1201
+ },
1202
+ {
1203
+ "epoch": 0.5472,
1204
+ "grad_norm": 5.502391815185547,
1205
+ "learning_rate": 1.865092230467769e-05,
1206
+ "loss": 0.5589,
1207
+ "step": 342
1208
+ },
1209
+ {
1210
+ "epoch": 0.5504,
1211
+ "grad_norm": 5.125470161437988,
1212
+ "learning_rate": 1.87064610283551e-05,
1213
+ "loss": 0.3865,
1214
+ "step": 344
1215
+ },
1216
+ {
1217
+ "epoch": 0.5536,
1218
+ "grad_norm": 15.728462219238281,
1219
+ "learning_rate": 1.876091151314196e-05,
1220
+ "loss": 0.6609,
1221
+ "step": 346
1222
+ },
1223
+ {
1224
+ "epoch": 0.5568,
1225
+ "grad_norm": 5.035003185272217,
1226
+ "learning_rate": 1.8814266953157557e-05,
1227
+ "loss": 0.3558,
1228
+ "step": 348
1229
+ },
1230
+ {
1231
+ "epoch": 0.56,
1232
+ "grad_norm": 2.466336965560913,
1233
+ "learning_rate": 1.8866520679393127e-05,
1234
+ "loss": 0.3271,
1235
+ "step": 350
1236
+ },
1237
+ {
1238
+ "epoch": 0.5632,
1239
+ "grad_norm": 4.039994239807129,
1240
+ "learning_rate": 1.8917666160545436e-05,
1241
+ "loss": 0.3596,
1242
+ "step": 352
1243
+ },
1244
+ {
1245
+ "epoch": 0.5664,
1246
+ "grad_norm": 5.38749361038208,
1247
+ "learning_rate": 1.896769700383315e-05,
1248
+ "loss": 0.6196,
1249
+ "step": 354
1250
+ },
1251
+ {
1252
+ "epoch": 0.5696,
1253
+ "grad_norm": 4.640627384185791,
1254
+ "learning_rate": 1.901660695579585e-05,
1255
+ "loss": 0.4562,
1256
+ "step": 356
1257
+ },
1258
+ {
1259
+ "epoch": 0.5728,
1260
+ "grad_norm": 6.774022102355957,
1261
+ "learning_rate": 1.9064389903075676e-05,
1262
+ "loss": 0.6438,
1263
+ "step": 358
1264
+ },
1265
+ {
1266
+ "epoch": 0.576,
1267
+ "grad_norm": 2.7814035415649414,
1268
+ "learning_rate": 1.911103987318148e-05,
1269
+ "loss": 0.3665,
1270
+ "step": 360
1271
+ },
1272
+ {
1273
+ "epoch": 0.5792,
1274
+ "grad_norm": 2.988558053970337,
1275
+ "learning_rate": 1.9156551035235288e-05,
1276
+ "loss": 0.3287,
1277
+ "step": 362
1278
+ },
1279
+ {
1280
+ "epoch": 0.5824,
1281
+ "grad_norm": 11.730210304260254,
1282
+ "learning_rate": 1.9200917700701173e-05,
1283
+ "loss": 0.8063,
1284
+ "step": 364
1285
+ },
1286
+ {
1287
+ "epoch": 0.5856,
1288
+ "grad_norm": 3.510537624359131,
1289
+ "learning_rate": 1.924413432409622e-05,
1290
+ "loss": 1.8139,
1291
+ "step": 366
1292
+ },
1293
+ {
1294
+ "epoch": 0.5888,
1295
+ "grad_norm": 3.427419662475586,
1296
+ "learning_rate": 1.9286195503683705e-05,
1297
+ "loss": 0.6188,
1298
+ "step": 368
1299
+ },
1300
+ {
1301
+ "epoch": 0.592,
1302
+ "grad_norm": 2.8159828186035156,
1303
+ "learning_rate": 1.932709598214825e-05,
1304
+ "loss": 0.3593,
1305
+ "step": 370
1306
+ },
1307
+ {
1308
+ "epoch": 0.5952,
1309
+ "grad_norm": 12.421769142150879,
1310
+ "learning_rate": 1.9366830647252967e-05,
1311
+ "loss": 0.6811,
1312
+ "step": 372
1313
+ },
1314
+ {
1315
+ "epoch": 0.5984,
1316
+ "grad_norm": 7.060184001922607,
1317
+ "learning_rate": 1.940539453247842e-05,
1318
+ "loss": 0.6729,
1319
+ "step": 374
1320
+ },
1321
+ {
1322
+ "epoch": 0.6016,
1323
+ "grad_norm": 3.2482755184173584,
1324
+ "learning_rate": 1.944278281764342e-05,
1325
+ "loss": 0.598,
1326
+ "step": 376
1327
+ },
1328
+ {
1329
+ "epoch": 0.6048,
1330
+ "grad_norm": 12.445656776428223,
1331
+ "learning_rate": 1.9478990829507504e-05,
1332
+ "loss": 0.9253,
1333
+ "step": 378
1334
+ },
1335
+ {
1336
+ "epoch": 0.608,
1337
+ "grad_norm": 7.116509437561035,
1338
+ "learning_rate": 1.951401404235505e-05,
1339
+ "loss": 1.1703,
1340
+ "step": 380
1341
+ },
1342
+ {
1343
+ "epoch": 0.6112,
1344
+ "grad_norm": 4.388617992401123,
1345
+ "learning_rate": 1.9547848078560975e-05,
1346
+ "loss": 0.3192,
1347
+ "step": 382
1348
+ },
1349
+ {
1350
+ "epoch": 0.6144,
1351
+ "grad_norm": 8.191654205322266,
1352
+ "learning_rate": 1.9580488709137858e-05,
1353
+ "loss": 0.7508,
1354
+ "step": 384
1355
+ },
1356
+ {
1357
+ "epoch": 0.6176,
1358
+ "grad_norm": 6.881166934967041,
1359
+ "learning_rate": 1.961193185426459e-05,
1360
+ "loss": 0.8673,
1361
+ "step": 386
1362
+ },
1363
+ {
1364
+ "epoch": 0.6208,
1365
+ "grad_norm": 16.268980026245117,
1366
+ "learning_rate": 1.9642173583796265e-05,
1367
+ "loss": 0.7658,
1368
+ "step": 388
1369
+ },
1370
+ {
1371
+ "epoch": 0.624,
1372
+ "grad_norm": 5.663888931274414,
1373
+ "learning_rate": 1.967121011775546e-05,
1374
+ "loss": 0.471,
1375
+ "step": 390
1376
+ },
1377
+ {
1378
+ "epoch": 0.6272,
1379
+ "grad_norm": 7.135434627532959,
1380
+ "learning_rate": 1.969903782680467e-05,
1381
+ "loss": 0.7198,
1382
+ "step": 392
1383
+ },
1384
+ {
1385
+ "epoch": 0.6304,
1386
+ "grad_norm": 6.507286548614502,
1387
+ "learning_rate": 1.9725653232699962e-05,
1388
+ "loss": 0.4459,
1389
+ "step": 394
1390
+ },
1391
+ {
1392
+ "epoch": 0.6336,
1393
+ "grad_norm": 4.37092924118042,
1394
+ "learning_rate": 1.9751053008725736e-05,
1395
+ "loss": 0.4201,
1396
+ "step": 396
1397
+ },
1398
+ {
1399
+ "epoch": 0.6368,
1400
+ "grad_norm": 10.315455436706543,
1401
+ "learning_rate": 1.9775233980110524e-05,
1402
+ "loss": 0.8372,
1403
+ "step": 398
1404
+ },
1405
+ {
1406
+ "epoch": 0.64,
1407
+ "grad_norm": 5.6190385818481445,
1408
+ "learning_rate": 1.9798193124423804e-05,
1409
+ "loss": 0.4728,
1410
+ "step": 400
1411
+ },
1412
+ {
1413
+ "epoch": 0.6432,
1414
+ "grad_norm": 12.238266944885254,
1415
+ "learning_rate": 1.9819927571953807e-05,
1416
+ "loss": 0.9893,
1417
+ "step": 402
1418
+ },
1419
+ {
1420
+ "epoch": 0.6464,
1421
+ "grad_norm": 6.412506103515625,
1422
+ "learning_rate": 1.9840434606066182e-05,
1423
+ "loss": 0.5909,
1424
+ "step": 404
1425
+ },
1426
+ {
1427
+ "epoch": 0.6496,
1428
+ "grad_norm": 6.781770706176758,
1429
+ "learning_rate": 1.985971166354357e-05,
1430
+ "loss": 0.5031,
1431
+ "step": 406
1432
+ },
1433
+ {
1434
+ "epoch": 0.6528,
1435
+ "grad_norm": 11.029583930969238,
1436
+ "learning_rate": 1.9877756334905983e-05,
1437
+ "loss": 0.8005,
1438
+ "step": 408
1439
+ },
1440
+ {
1441
+ "epoch": 0.656,
1442
+ "grad_norm": 5.955713272094727,
1443
+ "learning_rate": 1.9894566364711965e-05,
1444
+ "loss": 2.9778,
1445
+ "step": 410
1446
+ },
1447
+ {
1448
+ "epoch": 0.6592,
1449
+ "grad_norm": 5.804037094116211,
1450
+ "learning_rate": 1.99101396518405e-05,
1451
+ "loss": 0.4442,
1452
+ "step": 412
1453
+ },
1454
+ {
1455
+ "epoch": 0.6624,
1456
+ "grad_norm": 5.956245422363281,
1457
+ "learning_rate": 1.9924474249753652e-05,
1458
+ "loss": 0.4676,
1459
+ "step": 414
1460
+ },
1461
+ {
1462
+ "epoch": 0.6656,
1463
+ "grad_norm": 4.380008697509766,
1464
+ "learning_rate": 1.9937568366739858e-05,
1465
+ "loss": 0.5896,
1466
+ "step": 416
1467
+ },
1468
+ {
1469
+ "epoch": 0.6688,
1470
+ "grad_norm": 3.2874741554260254,
1471
+ "learning_rate": 1.994942036613787e-05,
1472
+ "loss": 0.3899,
1473
+ "step": 418
1474
+ },
1475
+ {
1476
+ "epoch": 0.672,
1477
+ "grad_norm": 4.871149063110352,
1478
+ "learning_rate": 1.9960028766541336e-05,
1479
+ "loss": 0.3007,
1480
+ "step": 420
1481
+ },
1482
+ {
1483
+ "epoch": 0.6752,
1484
+ "grad_norm": 5.472265720367432,
1485
+ "learning_rate": 1.9969392241983957e-05,
1486
+ "loss": 0.5545,
1487
+ "step": 422
1488
+ },
1489
+ {
1490
+ "epoch": 0.6784,
1491
+ "grad_norm": 4.547043800354004,
1492
+ "learning_rate": 1.9977509622105233e-05,
1493
+ "loss": 0.5253,
1494
+ "step": 424
1495
+ },
1496
+ {
1497
+ "epoch": 0.6816,
1498
+ "grad_norm": 2.6497678756713867,
1499
+ "learning_rate": 1.998437989229673e-05,
1500
+ "loss": 0.5096,
1501
+ "step": 426
1502
+ },
1503
+ {
1504
+ "epoch": 0.6848,
1505
+ "grad_norm": 8.168057441711426,
1506
+ "learning_rate": 1.9990002193828923e-05,
1507
+ "loss": 0.8695,
1508
+ "step": 428
1509
+ },
1510
+ {
1511
+ "epoch": 0.688,
1512
+ "grad_norm": 6.835053443908691,
1513
+ "learning_rate": 1.9994375823958504e-05,
1514
+ "loss": 0.6131,
1515
+ "step": 430
1516
+ },
1517
+ {
1518
+ "epoch": 0.6912,
1519
+ "grad_norm": 4.14987325668335,
1520
+ "learning_rate": 1.9997500236016233e-05,
1521
+ "loss": 0.7508,
1522
+ "step": 432
1523
+ },
1524
+ {
1525
+ "epoch": 0.6944,
1526
+ "grad_norm": 2.14447021484375,
1527
+ "learning_rate": 1.9999375039475275e-05,
1528
+ "loss": 0.3071,
1529
+ "step": 434
1530
+ },
1531
+ {
1532
+ "epoch": 0.6976,
1533
+ "grad_norm": 7.206055641174316,
1534
+ "learning_rate": 2e-05,
1535
+ "loss": 0.3932,
1536
+ "step": 436
1537
+ },
1538
+ {
1539
+ "epoch": 0.7008,
1540
+ "grad_norm": 5.099971771240234,
1541
+ "learning_rate": 1.9999375039475278e-05,
1542
+ "loss": 0.338,
1543
+ "step": 438
1544
+ },
1545
+ {
1546
+ "epoch": 0.704,
1547
+ "grad_norm": 5.983880996704102,
1548
+ "learning_rate": 1.9997500236016233e-05,
1549
+ "loss": 0.55,
1550
+ "step": 440
1551
+ },
1552
+ {
1553
+ "epoch": 0.7072,
1554
+ "grad_norm": 1.6342906951904297,
1555
+ "learning_rate": 1.9994375823958504e-05,
1556
+ "loss": 0.3481,
1557
+ "step": 442
1558
+ },
1559
+ {
1560
+ "epoch": 0.7104,
1561
+ "grad_norm": 3.592808485031128,
1562
+ "learning_rate": 1.9990002193828923e-05,
1563
+ "loss": 0.4214,
1564
+ "step": 444
1565
+ },
1566
+ {
1567
+ "epoch": 0.7136,
1568
+ "grad_norm": 5.754815578460693,
1569
+ "learning_rate": 1.9984379892296735e-05,
1570
+ "loss": 0.4716,
1571
+ "step": 446
1572
+ },
1573
+ {
1574
+ "epoch": 0.7168,
1575
+ "grad_norm": 3.611063003540039,
1576
+ "learning_rate": 1.9977509622105236e-05,
1577
+ "loss": 0.3382,
1578
+ "step": 448
1579
+ },
1580
+ {
1581
+ "epoch": 0.72,
1582
+ "grad_norm": 6.442031383514404,
1583
+ "learning_rate": 1.9969392241983957e-05,
1584
+ "loss": 0.4637,
1585
+ "step": 450
1586
+ },
1587
+ {
1588
+ "epoch": 0.7232,
1589
+ "grad_norm": 4.785464763641357,
1590
+ "learning_rate": 1.9960028766541336e-05,
1591
+ "loss": 0.4822,
1592
+ "step": 452
1593
+ },
1594
+ {
1595
+ "epoch": 0.7264,
1596
+ "grad_norm": 4.596600532531738,
1597
+ "learning_rate": 1.9949420366137873e-05,
1598
+ "loss": 0.4387,
1599
+ "step": 454
1600
+ },
1601
+ {
1602
+ "epoch": 0.7296,
1603
+ "grad_norm": 3.8151254653930664,
1604
+ "learning_rate": 1.993756836673986e-05,
1605
+ "loss": 0.3749,
1606
+ "step": 456
1607
+ },
1608
+ {
1609
+ "epoch": 0.7328,
1610
+ "grad_norm": 4.351461887359619,
1611
+ "learning_rate": 1.9924474249753656e-05,
1612
+ "loss": 0.4763,
1613
+ "step": 458
1614
+ },
1615
+ {
1616
+ "epoch": 0.736,
1617
+ "grad_norm": 7.7334418296813965,
1618
+ "learning_rate": 1.9910139651840497e-05,
1619
+ "loss": 0.4282,
1620
+ "step": 460
1621
+ },
1622
+ {
1623
+ "epoch": 0.7392,
1624
+ "grad_norm": 5.742446422576904,
1625
+ "learning_rate": 1.9894566364711965e-05,
1626
+ "loss": 0.417,
1627
+ "step": 462
1628
+ },
1629
+ {
1630
+ "epoch": 0.7424,
1631
+ "grad_norm": 19.029937744140625,
1632
+ "learning_rate": 1.987775633490599e-05,
1633
+ "loss": 1.3878,
1634
+ "step": 464
1635
+ },
1636
+ {
1637
+ "epoch": 0.7456,
1638
+ "grad_norm": 9.348970413208008,
1639
+ "learning_rate": 1.9859711663543573e-05,
1640
+ "loss": 0.736,
1641
+ "step": 466
1642
+ },
1643
+ {
1644
+ "epoch": 0.7488,
1645
+ "grad_norm": 5.024008750915527,
1646
+ "learning_rate": 1.9840434606066186e-05,
1647
+ "loss": 0.328,
1648
+ "step": 468
1649
+ },
1650
+ {
1651
+ "epoch": 0.752,
1652
+ "grad_norm": 9.727663040161133,
1653
+ "learning_rate": 1.9819927571953804e-05,
1654
+ "loss": 1.1213,
1655
+ "step": 470
1656
+ },
1657
+ {
1658
+ "epoch": 0.7552,
1659
+ "grad_norm": 9.793577194213867,
1660
+ "learning_rate": 1.9798193124423804e-05,
1661
+ "loss": 0.4617,
1662
+ "step": 472
1663
+ },
1664
+ {
1665
+ "epoch": 0.7584,
1666
+ "grad_norm": 3.6658005714416504,
1667
+ "learning_rate": 1.9775233980110524e-05,
1668
+ "loss": 0.3419,
1669
+ "step": 474
1670
+ },
1671
+ {
1672
+ "epoch": 0.7616,
1673
+ "grad_norm": 7.4308085441589355,
1674
+ "learning_rate": 1.9751053008725736e-05,
1675
+ "loss": 0.545,
1676
+ "step": 476
1677
+ },
1678
+ {
1679
+ "epoch": 0.7648,
1680
+ "grad_norm": 2.164323329925537,
1681
+ "learning_rate": 1.9725653232699962e-05,
1682
+ "loss": 0.47,
1683
+ "step": 478
1684
+ },
1685
+ {
1686
+ "epoch": 0.768,
1687
+ "grad_norm": 6.013500690460205,
1688
+ "learning_rate": 1.969903782680467e-05,
1689
+ "loss": 0.4348,
1690
+ "step": 480
1691
+ },
1692
+ {
1693
+ "epoch": 0.7712,
1694
+ "grad_norm": 6.188719749450684,
1695
+ "learning_rate": 1.9671210117755462e-05,
1696
+ "loss": 0.7735,
1697
+ "step": 482
1698
+ },
1699
+ {
1700
+ "epoch": 0.7744,
1701
+ "grad_norm": 7.753485679626465,
1702
+ "learning_rate": 1.9642173583796265e-05,
1703
+ "loss": 0.5738,
1704
+ "step": 484
1705
+ },
1706
+ {
1707
+ "epoch": 0.7776,
1708
+ "grad_norm": 6.78783655166626,
1709
+ "learning_rate": 1.961193185426459e-05,
1710
+ "loss": 0.7013,
1711
+ "step": 486
1712
+ },
1713
+ {
1714
+ "epoch": 0.7808,
1715
+ "grad_norm": 2.9399425983428955,
1716
+ "learning_rate": 1.958048870913786e-05,
1717
+ "loss": 0.5571,
1718
+ "step": 488
1719
+ },
1720
+ {
1721
+ "epoch": 0.784,
1722
+ "grad_norm": 9.357385635375977,
1723
+ "learning_rate": 1.9547848078560982e-05,
1724
+ "loss": 0.7407,
1725
+ "step": 490
1726
+ },
1727
+ {
1728
+ "epoch": 0.7872,
1729
+ "grad_norm": 7.049064636230469,
1730
+ "learning_rate": 1.9514014042355054e-05,
1731
+ "loss": 0.5239,
1732
+ "step": 492
1733
+ },
1734
+ {
1735
+ "epoch": 0.7904,
1736
+ "grad_norm": 6.75682258605957,
1737
+ "learning_rate": 1.947899082950751e-05,
1738
+ "loss": 0.4966,
1739
+ "step": 494
1740
+ },
1741
+ {
1742
+ "epoch": 0.7936,
1743
+ "grad_norm": 5.613454818725586,
1744
+ "learning_rate": 1.9442782817643425e-05,
1745
+ "loss": 0.445,
1746
+ "step": 496
1747
+ },
1748
+ {
1749
+ "epoch": 0.7968,
1750
+ "grad_norm": 3.580491065979004,
1751
+ "learning_rate": 1.9405394532478422e-05,
1752
+ "loss": 0.2899,
1753
+ "step": 498
1754
+ },
1755
+ {
1756
+ "epoch": 0.8,
1757
+ "grad_norm": 4.453496932983398,
1758
+ "learning_rate": 1.9366830647252977e-05,
1759
+ "loss": 0.7,
1760
+ "step": 500
1761
+ },
1762
+ {
1763
+ "epoch": 0.8032,
1764
+ "grad_norm": 5.264734268188477,
1765
+ "learning_rate": 1.9327095982148255e-05,
1766
+ "loss": 0.3101,
1767
+ "step": 502
1768
+ },
1769
+ {
1770
+ "epoch": 0.8064,
1771
+ "grad_norm": 10.876425743103027,
1772
+ "learning_rate": 1.928619550368371e-05,
1773
+ "loss": 0.7457,
1774
+ "step": 504
1775
+ },
1776
+ {
1777
+ "epoch": 0.8096,
1778
+ "grad_norm": 7.00797700881958,
1779
+ "learning_rate": 1.9244134324096216e-05,
1780
+ "loss": 0.6821,
1781
+ "step": 506
1782
+ },
1783
+ {
1784
+ "epoch": 0.8128,
1785
+ "grad_norm": 7.558223247528076,
1786
+ "learning_rate": 1.9200917700701176e-05,
1787
+ "loss": 0.5951,
1788
+ "step": 508
1789
+ },
1790
+ {
1791
+ "epoch": 0.816,
1792
+ "grad_norm": 6.367091178894043,
1793
+ "learning_rate": 1.9156551035235298e-05,
1794
+ "loss": 0.5086,
1795
+ "step": 510
1796
+ },
1797
+ {
1798
+ "epoch": 0.8192,
1799
+ "grad_norm": 7.159903526306152,
1800
+ "learning_rate": 1.9111039873181475e-05,
1801
+ "loss": 0.3697,
1802
+ "step": 512
1803
+ },
1804
+ {
1805
+ "epoch": 0.8224,
1806
+ "grad_norm": 8.145002365112305,
1807
+ "learning_rate": 1.9064389903075683e-05,
1808
+ "loss": 0.496,
1809
+ "step": 514
1810
+ },
1811
+ {
1812
+ "epoch": 0.8256,
1813
+ "grad_norm": 3.874540090560913,
1814
+ "learning_rate": 1.9016606955795843e-05,
1815
+ "loss": 0.1227,
1816
+ "step": 516
1817
+ },
1818
+ {
1819
+ "epoch": 0.8288,
1820
+ "grad_norm": 4.757205963134766,
1821
+ "learning_rate": 1.8967697003833156e-05,
1822
+ "loss": 0.6644,
1823
+ "step": 518
1824
+ },
1825
+ {
1826
+ "epoch": 0.832,
1827
+ "grad_norm": 6.409359455108643,
1828
+ "learning_rate": 1.891766616054545e-05,
1829
+ "loss": 0.257,
1830
+ "step": 520
1831
+ },
1832
+ {
1833
+ "epoch": 0.8352,
1834
+ "grad_norm": 4.814531326293945,
1835
+ "learning_rate": 1.8866520679393124e-05,
1836
+ "loss": 0.3543,
1837
+ "step": 522
1838
+ },
1839
+ {
1840
+ "epoch": 0.8384,
1841
+ "grad_norm": 15.44034481048584,
1842
+ "learning_rate": 1.881426695315756e-05,
1843
+ "loss": 1.3733,
1844
+ "step": 524
1845
+ },
1846
+ {
1847
+ "epoch": 0.8416,
1848
+ "grad_norm": 2.6535580158233643,
1849
+ "learning_rate": 1.8760911513141974e-05,
1850
+ "loss": 0.4062,
1851
+ "step": 526
1852
+ },
1853
+ {
1854
+ "epoch": 0.8448,
1855
+ "grad_norm": 3.2737133502960205,
1856
+ "learning_rate": 1.8706461028355107e-05,
1857
+ "loss": 0.45,
1858
+ "step": 528
1859
+ },
1860
+ {
1861
+ "epoch": 0.848,
1862
+ "grad_norm": 6.56961727142334,
1863
+ "learning_rate": 1.86509223046777e-05,
1864
+ "loss": 0.5543,
1865
+ "step": 530
1866
+ },
1867
+ {
1868
+ "epoch": 0.8512,
1869
+ "grad_norm": 2.931555986404419,
1870
+ "learning_rate": 1.8594302284011697e-05,
1871
+ "loss": 0.3695,
1872
+ "step": 532
1873
+ },
1874
+ {
1875
+ "epoch": 0.8544,
1876
+ "grad_norm": 2.997446060180664,
1877
+ "learning_rate": 1.8536608043412702e-05,
1878
+ "loss": 0.6689,
1879
+ "step": 534
1880
+ },
1881
+ {
1882
+ "epoch": 0.8576,
1883
+ "grad_norm": 5.753312587738037,
1884
+ "learning_rate": 1.847784679420527e-05,
1885
+ "loss": 0.4381,
1886
+ "step": 536
1887
+ },
1888
+ {
1889
+ "epoch": 0.8608,
1890
+ "grad_norm": 2.823542594909668,
1891
+ "learning_rate": 1.841802588108161e-05,
1892
+ "loss": 0.4003,
1893
+ "step": 538
1894
+ },
1895
+ {
1896
+ "epoch": 0.864,
1897
+ "grad_norm": 14.417923927307129,
1898
+ "learning_rate": 1.8357152781183613e-05,
1899
+ "loss": 0.934,
1900
+ "step": 540
1901
+ },
1902
+ {
1903
+ "epoch": 0.8672,
1904
+ "grad_norm": 10.664857864379883,
1905
+ "learning_rate": 1.8295235103168128e-05,
1906
+ "loss": 0.7329,
1907
+ "step": 542
1908
+ },
1909
+ {
1910
+ "epoch": 0.8704,
1911
+ "grad_norm": 5.107484817504883,
1912
+ "learning_rate": 1.8232280586256104e-05,
1913
+ "loss": 0.5008,
1914
+ "step": 544
1915
+ },
1916
+ {
1917
+ "epoch": 0.8736,
1918
+ "grad_norm": 3.2516307830810547,
1919
+ "learning_rate": 1.8168297099265108e-05,
1920
+ "loss": 0.3427,
1921
+ "step": 546
1922
+ },
1923
+ {
1924
+ "epoch": 0.8768,
1925
+ "grad_norm": 2.803086042404175,
1926
+ "learning_rate": 1.8103292639625835e-05,
1927
+ "loss": 0.3771,
1928
+ "step": 548
1929
+ },
1930
+ {
1931
+ "epoch": 0.88,
1932
+ "grad_norm": 4.849076271057129,
1933
+ "learning_rate": 1.8037275332382575e-05,
1934
+ "loss": 0.6314,
1935
+ "step": 550
1936
+ },
1937
+ {
1938
+ "epoch": 0.8832,
1939
+ "grad_norm": 4.658120155334473,
1940
+ "learning_rate": 1.7970253429177494e-05,
1941
+ "loss": 0.5067,
1942
+ "step": 552
1943
+ },
1944
+ {
1945
+ "epoch": 0.8864,
1946
+ "grad_norm": 2.672494888305664,
1947
+ "learning_rate": 1.7902235307219336e-05,
1948
+ "loss": 0.394,
1949
+ "step": 554
1950
+ },
1951
+ {
1952
+ "epoch": 0.8896,
1953
+ "grad_norm": 7.755502223968506,
1954
+ "learning_rate": 1.783322946823638e-05,
1955
+ "loss": 0.6454,
1956
+ "step": 556
1957
+ },
1958
+ {
1959
+ "epoch": 0.8928,
1960
+ "grad_norm": 2.106848955154419,
1961
+ "learning_rate": 1.776324453741365e-05,
1962
+ "loss": 0.2759,
1963
+ "step": 558
1964
+ },
1965
+ {
1966
+ "epoch": 0.896,
1967
+ "grad_norm": 3.1528866291046143,
1968
+ "learning_rate": 1.7692289262315008e-05,
1969
+ "loss": 0.4362,
1970
+ "step": 560
1971
+ },
1972
+ {
1973
+ "epoch": 0.8992,
1974
+ "grad_norm": 15.362929344177246,
1975
+ "learning_rate": 1.762037251178961e-05,
1976
+ "loss": 0.8955,
1977
+ "step": 562
1978
+ },
1979
+ {
1980
+ "epoch": 0.9024,
1981
+ "grad_norm": 1.7654240131378174,
1982
+ "learning_rate": 1.7547503274863502e-05,
1983
+ "loss": 0.2897,
1984
+ "step": 564
1985
+ },
1986
+ {
1987
+ "epoch": 0.9056,
1988
+ "grad_norm": 3.3194448947906494,
1989
+ "learning_rate": 1.7473690659616e-05,
1990
+ "loss": 0.2994,
1991
+ "step": 566
1992
+ },
1993
+ {
1994
+ "epoch": 0.9088,
1995
+ "grad_norm": 10.149787902832031,
1996
+ "learning_rate": 1.739894389204122e-05,
1997
+ "loss": 0.6527,
1998
+ "step": 568
1999
+ },
2000
+ {
2001
+ "epoch": 0.912,
2002
+ "grad_norm": 10.171009063720703,
2003
+ "learning_rate": 1.732327231489503e-05,
2004
+ "loss": 0.6712,
2005
+ "step": 570
2006
+ },
2007
+ {
2008
+ "epoch": 0.9152,
2009
+ "grad_norm": 2.9059488773345947,
2010
+ "learning_rate": 1.7246685386527105e-05,
2011
+ "loss": 0.4296,
2012
+ "step": 572
2013
+ },
2014
+ {
2015
+ "epoch": 0.9184,
2016
+ "grad_norm": 8.409585952758789,
2017
+ "learning_rate": 1.716919267969884e-05,
2018
+ "loss": 0.784,
2019
+ "step": 574
2020
+ },
2021
+ {
2022
+ "epoch": 0.9216,
2023
+ "grad_norm": 12.59908390045166,
2024
+ "learning_rate": 1.7090803880386784e-05,
2025
+ "loss": 0.5448,
2026
+ "step": 576
2027
+ },
2028
+ {
2029
+ "epoch": 0.9248,
2030
+ "grad_norm": 6.19038724899292,
2031
+ "learning_rate": 1.701152878657197e-05,
2032
+ "loss": 0.5914,
2033
+ "step": 578
2034
+ },
2035
+ {
2036
+ "epoch": 0.928,
2037
+ "grad_norm": 3.0019984245300293,
2038
+ "learning_rate": 1.6931377307015236e-05,
2039
+ "loss": 0.4509,
2040
+ "step": 580
2041
+ },
2042
+ {
2043
+ "epoch": 0.9312,
2044
+ "grad_norm": 3.466404914855957,
2045
+ "learning_rate": 1.6850359460018744e-05,
2046
+ "loss": 0.4273,
2047
+ "step": 582
2048
+ },
2049
+ {
2050
+ "epoch": 0.9344,
2051
+ "grad_norm": 2.9927778244018555,
2052
+ "learning_rate": 1.67684853721737e-05,
2053
+ "loss": 0.5488,
2054
+ "step": 584
2055
+ },
2056
+ {
2057
+ "epoch": 0.9376,
2058
+ "grad_norm": 2.9774136543273926,
2059
+ "learning_rate": 1.6685765277094702e-05,
2060
+ "loss": 0.4706,
2061
+ "step": 586
2062
+ },
2063
+ {
2064
+ "epoch": 0.9408,
2065
+ "grad_norm": 13.504951477050781,
2066
+ "learning_rate": 1.6602209514140562e-05,
2067
+ "loss": 0.7795,
2068
+ "step": 588
2069
+ },
2070
+ {
2071
+ "epoch": 0.944,
2072
+ "grad_norm": 2.0812973976135254,
2073
+ "learning_rate": 1.651782852712194e-05,
2074
+ "loss": 0.6015,
2075
+ "step": 590
2076
+ },
2077
+ {
2078
+ "epoch": 0.9472,
2079
+ "grad_norm": 8.495684623718262,
2080
+ "learning_rate": 1.6432632862996062e-05,
2081
+ "loss": 0.5996,
2082
+ "step": 592
2083
+ },
2084
+ {
2085
+ "epoch": 0.9504,
2086
+ "grad_norm": 9.915968894958496,
2087
+ "learning_rate": 1.6346633170548275e-05,
2088
+ "loss": 0.4866,
2089
+ "step": 594
2090
+ },
2091
+ {
2092
+ "epoch": 0.9536,
2093
+ "grad_norm": 2.894171953201294,
2094
+ "learning_rate": 1.625984019906122e-05,
2095
+ "loss": 0.7214,
2096
+ "step": 596
2097
+ },
2098
+ {
2099
+ "epoch": 0.9568,
2100
+ "grad_norm": 6.13911771774292,
2101
+ "learning_rate": 1.6172264796971063e-05,
2102
+ "loss": 0.3976,
2103
+ "step": 598
2104
+ },
2105
+ {
2106
+ "epoch": 0.96,
2107
+ "grad_norm": 2.5138468742370605,
2108
+ "learning_rate": 1.6083917910511623e-05,
2109
+ "loss": 0.4245,
2110
+ "step": 600
2111
+ },
2112
+ {
2113
+ "epoch": 0.9632,
2114
+ "grad_norm": 5.338921070098877,
2115
+ "learning_rate": 1.5994810582346266e-05,
2116
+ "loss": 0.4325,
2117
+ "step": 602
2118
+ },
2119
+ {
2120
+ "epoch": 0.9664,
2121
+ "grad_norm": 10.50223445892334,
2122
+ "learning_rate": 1.5904953950187448e-05,
2123
+ "loss": 0.8188,
2124
+ "step": 604
2125
+ },
2126
+ {
2127
+ "epoch": 0.9696,
2128
+ "grad_norm": 7.8410325050354,
2129
+ "learning_rate": 1.581435924540482e-05,
2130
+ "loss": 0.8112,
2131
+ "step": 606
2132
+ },
2133
+ {
2134
+ "epoch": 0.9728,
2135
+ "grad_norm": 7.011837005615234,
2136
+ "learning_rate": 1.5723037791621203e-05,
2137
+ "loss": 0.5045,
2138
+ "step": 608
2139
+ },
2140
+ {
2141
+ "epoch": 0.976,
2142
+ "grad_norm": 4.2811431884765625,
2143
+ "learning_rate": 1.5631001003297302e-05,
2144
+ "loss": 0.5087,
2145
+ "step": 610
2146
+ },
2147
+ {
2148
+ "epoch": 0.9792,
2149
+ "grad_norm": 8.028616905212402,
2150
+ "learning_rate": 1.5538260384305083e-05,
2151
+ "loss": 0.9064,
2152
+ "step": 612
2153
+ },
2154
+ {
2155
+ "epoch": 0.9824,
2156
+ "grad_norm": 3.3687844276428223,
2157
+ "learning_rate": 1.544482752648966e-05,
2158
+ "loss": 0.4295,
2159
+ "step": 614
2160
+ },
2161
+ {
2162
+ "epoch": 0.9856,
2163
+ "grad_norm": 5.316037654876709,
2164
+ "learning_rate": 1.5350714108220677e-05,
2165
+ "loss": 0.5034,
2166
+ "step": 616
2167
+ },
2168
+ {
2169
+ "epoch": 0.9888,
2170
+ "grad_norm": 2.103130340576172,
2171
+ "learning_rate": 1.5255931892932344e-05,
2172
+ "loss": 0.4537,
2173
+ "step": 618
2174
+ },
2175
+ {
2176
+ "epoch": 0.992,
2177
+ "grad_norm": 2.0409934520721436,
2178
+ "learning_rate": 1.5160492727653238e-05,
2179
+ "loss": 0.3681,
2180
+ "step": 620
2181
+ },
2182
+ {
2183
+ "epoch": 0.9952,
2184
+ "grad_norm": 5.530198097229004,
2185
+ "learning_rate": 1.5064408541525578e-05,
2186
+ "loss": 0.4799,
2187
+ "step": 622
2188
+ },
2189
+ {
2190
+ "epoch": 0.9984,
2191
+ "grad_norm": 6.037550926208496,
2192
+ "learning_rate": 1.4967691344314012e-05,
2193
+ "loss": 0.644,
2194
+ "step": 624
2195
+ },
2196
+ {
2197
+ "epoch": 1.0,
2198
+ "step": 625,
2199
+ "total_flos": 2443576741724160.0,
2200
+ "train_loss": 0.6015953216791153,
2201
+ "train_runtime": 4485.8512,
2202
+ "train_samples_per_second": 2.229,
2203
+ "train_steps_per_second": 0.139
2204
+ }
2205
+ ],
2206
+ "logging_steps": 2,
2207
+ "max_steps": 625,
2208
+ "num_input_tokens_seen": 0,
2209
+ "num_train_epochs": 1,
2210
+ "save_steps": 500,
2211
+ "stateful_callbacks": {},
2212
+ "total_flos": 2443576741724160.0,
2213
+ "train_batch_size": 1,
2214
+ "trial_name": null,
2215
+ "trial_params": null
2216
+ }
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f54bcb68d58bda2894c4538a3272cfd1b6327a6a7cd53546159b4758de7258bb
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc4f69df59205eb6ff4a861186508183f12b211c704f7cdf2e2481e8d91f47c
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12f2995c92e69406d3532e061b731c618d3120117dd7b86080ce7474e8b3de4f
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_gradnorm_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79eea20bd503d8d82054dd83cfea59d210dee3e00a15b50bb3ebfad51f1d820a
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_infoBatch_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json ADDED
@@ -0,0 +1,1904 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 625,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0032,
14
+ "learning_rate": 2.4524967251364995e-06,
15
+ "loss": 0.7036,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.0064,
20
+ "learning_rate": 2.5263093403840022e-06,
21
+ "loss": 0.327,
22
+ "step": 4
23
+ },
24
+ {
25
+ "epoch": 0.0096,
26
+ "learning_rate": 2.6010561079587694e-06,
27
+ "loss": 0.5459,
28
+ "step": 6
29
+ },
30
+ {
31
+ "epoch": 0.0128,
32
+ "learning_rate": 2.6767276851049716e-06,
33
+ "loss": 0.3284,
34
+ "step": 8
35
+ },
36
+ {
37
+ "epoch": 0.016,
38
+ "learning_rate": 2.7533146134728993e-06,
39
+ "loss": 0.4194,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.0192,
44
+ "learning_rate": 2.8308073203011634e-06,
45
+ "loss": 0.6064,
46
+ "step": 12
47
+ },
48
+ {
49
+ "epoch": 0.0224,
50
+ "learning_rate": 2.909196119613218e-06,
51
+ "loss": 0.9268,
52
+ "step": 14
53
+ },
54
+ {
55
+ "epoch": 0.0256,
56
+ "learning_rate": 2.988471213428035e-06,
57
+ "loss": 0.3195,
58
+ "step": 16
59
+ },
60
+ {
61
+ "epoch": 0.0288,
62
+ "learning_rate": 3.068622692984767e-06,
63
+ "loss": 0.5438,
64
+ "step": 18
65
+ },
66
+ {
67
+ "epoch": 0.032,
68
+ "learning_rate": 3.1496405399812602e-06,
69
+ "loss": 0.5279,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.0352,
74
+ "learning_rate": 3.231514627826302e-06,
75
+ "loss": 0.4847,
76
+ "step": 22
77
+ },
78
+ {
79
+ "epoch": 0.0384,
80
+ "learning_rate": 3.314234722905302e-06,
81
+ "loss": 0.4063,
82
+ "step": 24
83
+ },
84
+ {
85
+ "epoch": 0.0416,
86
+ "learning_rate": 3.3977904858594534e-06,
87
+ "loss": 0.612,
88
+ "step": 26
89
+ },
90
+ {
91
+ "epoch": 0.0448,
92
+ "learning_rate": 3.4821714728780654e-06,
93
+ "loss": 0.331,
94
+ "step": 28
95
+ },
96
+ {
97
+ "epoch": 0.048,
98
+ "learning_rate": 3.567367137003953e-06,
99
+ "loss": 0.3023,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.0512,
104
+ "learning_rate": 3.653366829451711e-06,
105
+ "loss": 0.5405,
106
+ "step": 32
107
+ },
108
+ {
109
+ "epoch": 0.0544,
110
+ "learning_rate": 3.740159800938784e-06,
111
+ "loss": 0.4103,
112
+ "step": 34
113
+ },
114
+ {
115
+ "epoch": 0.0576,
116
+ "learning_rate": 3.827735203028956e-06,
117
+ "loss": 0.466,
118
+ "step": 36
119
+ },
120
+ {
121
+ "epoch": 0.0608,
122
+ "learning_rate": 3.916082089488379e-06,
123
+ "loss": 0.5685,
124
+ "step": 38
125
+ },
126
+ {
127
+ "epoch": 0.064,
128
+ "learning_rate": 4.005189417653737e-06,
129
+ "loss": 0.8703,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.0672,
134
+ "learning_rate": 4.095046049812541e-06,
135
+ "loss": 0.7011,
136
+ "step": 42
137
+ },
138
+ {
139
+ "epoch": 0.0704,
140
+ "learning_rate": 4.1856407545951825e-06,
141
+ "loss": 0.3975,
142
+ "step": 44
143
+ },
144
+ {
145
+ "epoch": 0.0736,
146
+ "learning_rate": 4.276962208378814e-06,
147
+ "loss": 0.8369,
148
+ "step": 46
149
+ },
150
+ {
151
+ "epoch": 0.0768,
152
+ "learning_rate": 4.368998996702686e-06,
153
+ "loss": 0.5687,
154
+ "step": 48
155
+ },
156
+ {
157
+ "epoch": 0.08,
158
+ "learning_rate": 4.461739615694921e-06,
159
+ "loss": 0.6999,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.0832,
164
+ "learning_rate": 4.555172473510324e-06,
165
+ "loss": 0.5198,
166
+ "step": 52
167
+ },
168
+ {
169
+ "epoch": 0.0864,
170
+ "learning_rate": 4.649285891779326e-06,
171
+ "loss": 0.3555,
172
+ "step": 54
173
+ },
174
+ {
175
+ "epoch": 0.0896,
176
+ "learning_rate": 4.744068107067673e-06,
177
+ "loss": 0.5996,
178
+ "step": 56
179
+ },
180
+ {
181
+ "epoch": 0.0928,
182
+ "learning_rate": 4.839507272346751e-06,
183
+ "loss": 0.4225,
184
+ "step": 58
185
+ },
186
+ {
187
+ "epoch": 0.096,
188
+ "learning_rate": 4.935591458474425e-06,
189
+ "loss": 0.4745,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.0992,
194
+ "learning_rate": 5.032308655686007e-06,
195
+ "loss": 0.4378,
196
+ "step": 62
197
+ },
198
+ {
199
+ "epoch": 0.1024,
200
+ "learning_rate": 5.129646775095432e-06,
201
+ "loss": 0.3912,
202
+ "step": 64
203
+ },
204
+ {
205
+ "epoch": 0.1056,
206
+ "learning_rate": 5.227593650206246e-06,
207
+ "loss": 0.3867,
208
+ "step": 66
209
+ },
210
+ {
211
+ "epoch": 0.1088,
212
+ "learning_rate": 5.3261370384323904e-06,
213
+ "loss": 0.3087,
214
+ "step": 68
215
+ },
216
+ {
217
+ "epoch": 0.112,
218
+ "learning_rate": 5.425264622628326e-06,
219
+ "loss": 0.4686,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.1152,
224
+ "learning_rate": 5.524964012628644e-06,
225
+ "loss": 0.3876,
226
+ "step": 72
227
+ },
228
+ {
229
+ "epoch": 0.1184,
230
+ "learning_rate": 5.62522274679673e-06,
231
+ "loss": 0.3213,
232
+ "step": 74
233
+ },
234
+ {
235
+ "epoch": 0.1216,
236
+ "learning_rate": 5.726028293582342e-06,
237
+ "loss": 0.5433,
238
+ "step": 76
239
+ },
240
+ {
241
+ "epoch": 0.1248,
242
+ "learning_rate": 5.827368053088032e-06,
243
+ "loss": 0.3393,
244
+ "step": 78
245
+ },
246
+ {
247
+ "epoch": 0.128,
248
+ "learning_rate": 5.929229358643925e-06,
249
+ "loss": 0.9177,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.1312,
254
+ "learning_rate": 6.03159947839103e-06,
255
+ "loss": 0.4592,
256
+ "step": 82
257
+ },
258
+ {
259
+ "epoch": 0.1344,
260
+ "learning_rate": 6.13446561687258e-06,
261
+ "loss": 0.3325,
262
+ "step": 84
263
+ },
264
+ {
265
+ "epoch": 0.1376,
266
+ "learning_rate": 6.237814916633431e-06,
267
+ "loss": 0.4437,
268
+ "step": 86
269
+ },
270
+ {
271
+ "epoch": 0.1408,
272
+ "learning_rate": 6.341634459827044e-06,
273
+ "loss": 0.4454,
274
+ "step": 88
275
+ },
276
+ {
277
+ "epoch": 0.144,
278
+ "learning_rate": 6.445911269830183e-06,
279
+ "loss": 0.4098,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.1472,
284
+ "learning_rate": 6.5506323128648654e-06,
285
+ "loss": 0.5805,
286
+ "step": 92
287
+ },
288
+ {
289
+ "epoch": 0.1504,
290
+ "learning_rate": 6.655784499627476e-06,
291
+ "loss": 0.3599,
292
+ "step": 94
293
+ },
294
+ {
295
+ "epoch": 0.1536,
296
+ "learning_rate": 6.761354686924883e-06,
297
+ "loss": 0.4033,
298
+ "step": 96
299
+ },
300
+ {
301
+ "epoch": 0.1568,
302
+ "learning_rate": 6.867329679317144e-06,
303
+ "loss": 0.4196,
304
+ "step": 98
305
+ },
306
+ {
307
+ "epoch": 0.16,
308
+ "learning_rate": 6.973696230766884e-06,
309
+ "loss": 0.3178,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.1632,
314
+ "learning_rate": 7.080441046294945e-06,
315
+ "loss": 0.4176,
316
+ "step": 102
317
+ },
318
+ {
319
+ "epoch": 0.1664,
320
+ "learning_rate": 7.18755078364214e-06,
321
+ "loss": 0.4496,
322
+ "step": 104
323
+ },
324
+ {
325
+ "epoch": 0.1696,
326
+ "learning_rate": 7.2950120549369204e-06,
327
+ "loss": 0.3414,
328
+ "step": 106
329
+ },
330
+ {
331
+ "epoch": 0.1728,
332
+ "learning_rate": 7.402811428368824e-06,
333
+ "loss": 0.692,
334
+ "step": 108
335
+ },
336
+ {
337
+ "epoch": 0.176,
338
+ "learning_rate": 7.510935429867233e-06,
339
+ "loss": 0.5126,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.1792,
344
+ "learning_rate": 7.619370544785608e-06,
345
+ "loss": 0.3592,
346
+ "step": 112
347
+ },
348
+ {
349
+ "epoch": 0.1824,
350
+ "learning_rate": 7.728103219590684e-06,
351
+ "loss": 0.515,
352
+ "step": 114
353
+ },
354
+ {
355
+ "epoch": 0.1856,
356
+ "learning_rate": 7.83711986355656e-06,
357
+ "loss": 0.5299,
358
+ "step": 116
359
+ },
360
+ {
361
+ "epoch": 0.1888,
362
+ "learning_rate": 7.946406850463435e-06,
363
+ "loss": 0.7093,
364
+ "step": 118
365
+ },
366
+ {
367
+ "epoch": 0.192,
368
+ "learning_rate": 8.055950520300756e-06,
369
+ "loss": 0.3534,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.1952,
374
+ "learning_rate": 8.165737180974676e-06,
375
+ "loss": 0.5856,
376
+ "step": 122
377
+ },
378
+ {
379
+ "epoch": 0.1984,
380
+ "learning_rate": 8.275753110019367e-06,
381
+ "loss": 0.4656,
382
+ "step": 124
383
+ },
384
+ {
385
+ "epoch": 0.2016,
386
+ "learning_rate": 8.385984556312285e-06,
387
+ "loss": 0.849,
388
+ "step": 126
389
+ },
390
+ {
391
+ "epoch": 0.2048,
392
+ "learning_rate": 8.496417741792922e-06,
393
+ "loss": 0.6804,
394
+ "step": 128
395
+ },
396
+ {
397
+ "epoch": 0.208,
398
+ "learning_rate": 8.607038863184952e-06,
399
+ "loss": 0.7425,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.2112,
404
+ "learning_rate": 8.717834093721598e-06,
405
+ "loss": 0.5759,
406
+ "step": 132
407
+ },
408
+ {
409
+ "epoch": 0.2144,
410
+ "learning_rate": 8.828789584873757e-06,
411
+ "loss": 0.2995,
412
+ "step": 134
413
+ },
414
+ {
415
+ "epoch": 0.2176,
416
+ "learning_rate": 8.939891468081036e-06,
417
+ "loss": 0.4656,
418
+ "step": 136
419
+ },
420
+ {
421
+ "epoch": 0.2208,
422
+ "learning_rate": 9.051125856485175e-06,
423
+ "loss": 0.912,
424
+ "step": 138
425
+ },
426
+ {
427
+ "epoch": 0.224,
428
+ "learning_rate": 9.162478846665854e-06,
429
+ "loss": 0.3673,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.2272,
434
+ "learning_rate": 9.273936520378426e-06,
435
+ "loss": 0.5677,
436
+ "step": 142
437
+ },
438
+ {
439
+ "epoch": 0.2304,
440
+ "learning_rate": 9.38548494629364e-06,
441
+ "loss": 0.4492,
442
+ "step": 144
443
+ },
444
+ {
445
+ "epoch": 0.2336,
446
+ "learning_rate": 9.497110181738935e-06,
447
+ "loss": 0.8513,
448
+ "step": 146
449
+ },
450
+ {
451
+ "epoch": 0.2368,
452
+ "learning_rate": 9.608798274441153e-06,
453
+ "loss": 0.6077,
454
+ "step": 148
455
+ },
456
+ {
457
+ "epoch": 0.24,
458
+ "learning_rate": 9.720535264270526e-06,
459
+ "loss": 0.5087,
460
+ "step": 150
461
+ },
462
+ {
463
+ "epoch": 0.2432,
464
+ "learning_rate": 9.832307184985473e-06,
465
+ "loss": 0.3399,
466
+ "step": 152
467
+ },
468
+ {
469
+ "epoch": 0.2464,
470
+ "learning_rate": 9.944100065978354e-06,
471
+ "loss": 0.325,
472
+ "step": 154
473
+ },
474
+ {
475
+ "epoch": 0.2496,
476
+ "learning_rate": 1.0055899934021637e-05,
477
+ "loss": 0.6666,
478
+ "step": 156
479
+ },
480
+ {
481
+ "epoch": 0.2528,
482
+ "learning_rate": 1.016769281501452e-05,
483
+ "loss": 0.4477,
484
+ "step": 158
485
+ },
486
+ {
487
+ "epoch": 0.256,
488
+ "learning_rate": 1.0279464735729467e-05,
489
+ "loss": 0.3785,
490
+ "step": 160
491
+ },
492
+ {
493
+ "epoch": 0.2592,
494
+ "learning_rate": 1.039120172555884e-05,
495
+ "loss": 0.4558,
496
+ "step": 162
497
+ },
498
+ {
499
+ "epoch": 0.2624,
500
+ "learning_rate": 1.0502889818261058e-05,
501
+ "loss": 0.3491,
502
+ "step": 164
503
+ },
504
+ {
505
+ "epoch": 0.2656,
506
+ "learning_rate": 1.0614515053706354e-05,
507
+ "loss": 0.4749,
508
+ "step": 166
509
+ },
510
+ {
511
+ "epoch": 0.2688,
512
+ "learning_rate": 1.0726063479621567e-05,
513
+ "loss": 0.3133,
514
+ "step": 168
515
+ },
516
+ {
517
+ "epoch": 0.272,
518
+ "learning_rate": 1.083752115333414e-05,
519
+ "loss": 0.5629,
520
+ "step": 170
521
+ },
522
+ {
523
+ "epoch": 0.2752,
524
+ "learning_rate": 1.0948874143514818e-05,
525
+ "loss": 0.4181,
526
+ "step": 172
527
+ },
528
+ {
529
+ "epoch": 0.2784,
530
+ "learning_rate": 1.1060108531918955e-05,
531
+ "loss": 0.4558,
532
+ "step": 174
533
+ },
534
+ {
535
+ "epoch": 0.2816,
536
+ "learning_rate": 1.1171210415126238e-05,
537
+ "loss": 0.4645,
538
+ "step": 176
539
+ },
540
+ {
541
+ "epoch": 0.2848,
542
+ "learning_rate": 1.1282165906278395e-05,
543
+ "loss": 0.3057,
544
+ "step": 178
545
+ },
546
+ {
547
+ "epoch": 0.288,
548
+ "learning_rate": 1.1392961136815041e-05,
549
+ "loss": 0.34,
550
+ "step": 180
551
+ },
552
+ {
553
+ "epoch": 0.2912,
554
+ "learning_rate": 1.150358225820707e-05,
555
+ "loss": 0.7983,
556
+ "step": 182
557
+ },
558
+ {
559
+ "epoch": 0.2944,
560
+ "learning_rate": 1.1614015443687708e-05,
561
+ "loss": 0.8228,
562
+ "step": 184
563
+ },
564
+ {
565
+ "epoch": 0.2976,
566
+ "learning_rate": 1.1724246889980626e-05,
567
+ "loss": 0.5895,
568
+ "step": 186
569
+ },
570
+ {
571
+ "epoch": 0.3008,
572
+ "learning_rate": 1.1834262819025317e-05,
573
+ "loss": 0.3662,
574
+ "step": 188
575
+ },
576
+ {
577
+ "epoch": 0.304,
578
+ "learning_rate": 1.1944049479699241e-05,
579
+ "loss": 0.849,
580
+ "step": 190
581
+ },
582
+ {
583
+ "epoch": 0.3072,
584
+ "learning_rate": 1.2053593149536557e-05,
585
+ "loss": 0.4468,
586
+ "step": 192
587
+ },
588
+ {
589
+ "epoch": 0.3104,
590
+ "learning_rate": 1.2162880136443434e-05,
591
+ "loss": 0.5257,
592
+ "step": 194
593
+ },
594
+ {
595
+ "epoch": 0.3136,
596
+ "learning_rate": 1.2271896780409309e-05,
597
+ "loss": 0.3903,
598
+ "step": 196
599
+ },
600
+ {
601
+ "epoch": 0.3168,
602
+ "learning_rate": 1.2380629455214385e-05,
603
+ "loss": 0.3894,
604
+ "step": 198
605
+ },
606
+ {
607
+ "epoch": 0.32,
608
+ "learning_rate": 1.2489064570132761e-05,
609
+ "loss": 0.4684,
610
+ "step": 200
611
+ },
612
+ {
613
+ "epoch": 0.3232,
614
+ "learning_rate": 1.259718857163117e-05,
615
+ "loss": 0.3398,
616
+ "step": 202
617
+ },
618
+ {
619
+ "epoch": 0.3264,
620
+ "learning_rate": 1.2704987945063073e-05,
621
+ "loss": 0.429,
622
+ "step": 204
623
+ },
624
+ {
625
+ "epoch": 0.3296,
626
+ "learning_rate": 1.2812449216357855e-05,
627
+ "loss": 0.3714,
628
+ "step": 206
629
+ },
630
+ {
631
+ "epoch": 0.3328,
632
+ "learning_rate": 1.2919558953705047e-05,
633
+ "loss": 0.47,
634
+ "step": 208
635
+ },
636
+ {
637
+ "epoch": 0.336,
638
+ "learning_rate": 1.3026303769233109e-05,
639
+ "loss": 0.7464,
640
+ "step": 210
641
+ },
642
+ {
643
+ "epoch": 0.3392,
644
+ "learning_rate": 1.313267032068285e-05,
645
+ "loss": 0.3296,
646
+ "step": 212
647
+ },
648
+ {
649
+ "epoch": 0.3424,
650
+ "learning_rate": 1.3238645313075109e-05,
651
+ "loss": 0.3549,
652
+ "step": 214
653
+ },
654
+ {
655
+ "epoch": 0.3456,
656
+ "learning_rate": 1.3344215500372517e-05,
657
+ "loss": 0.5048,
658
+ "step": 216
659
+ },
660
+ {
661
+ "epoch": 0.3488,
662
+ "learning_rate": 1.344936768713513e-05,
663
+ "loss": 0.4158,
664
+ "step": 218
665
+ },
666
+ {
667
+ "epoch": 0.352,
668
+ "learning_rate": 1.3554088730169812e-05,
669
+ "loss": 0.4307,
670
+ "step": 220
671
+ },
672
+ {
673
+ "epoch": 0.3552,
674
+ "learning_rate": 1.3658365540172948e-05,
675
+ "loss": 0.3973,
676
+ "step": 222
677
+ },
678
+ {
679
+ "epoch": 0.3584,
680
+ "learning_rate": 1.3762185083366562e-05,
681
+ "loss": 0.4211,
682
+ "step": 224
683
+ },
684
+ {
685
+ "epoch": 0.3616,
686
+ "learning_rate": 1.3865534383127413e-05,
687
+ "loss": 0.6055,
688
+ "step": 226
689
+ },
690
+ {
691
+ "epoch": 0.3648,
692
+ "learning_rate": 1.3968400521608962e-05,
693
+ "loss": 0.682,
694
+ "step": 228
695
+ },
696
+ {
697
+ "epoch": 0.368,
698
+ "learning_rate": 1.4070770641356069e-05,
699
+ "loss": 0.3319,
700
+ "step": 230
701
+ },
702
+ {
703
+ "epoch": 0.3712,
704
+ "learning_rate": 1.4172631946911964e-05,
705
+ "loss": 0.3834,
706
+ "step": 232
707
+ },
708
+ {
709
+ "epoch": 0.3744,
710
+ "learning_rate": 1.4273971706417653e-05,
711
+ "loss": 0.5636,
712
+ "step": 234
713
+ },
714
+ {
715
+ "epoch": 0.3776,
716
+ "learning_rate": 1.4374777253203265e-05,
717
+ "loss": 0.4051,
718
+ "step": 236
719
+ },
720
+ {
721
+ "epoch": 0.3808,
722
+ "learning_rate": 1.4475035987371348e-05,
723
+ "loss": 0.268,
724
+ "step": 238
725
+ },
726
+ {
727
+ "epoch": 0.384,
728
+ "learning_rate": 1.4574735377371669e-05,
729
+ "loss": 0.3986,
730
+ "step": 240
731
+ },
732
+ {
733
+ "epoch": 0.3872,
734
+ "learning_rate": 1.4673862961567604e-05,
735
+ "loss": 0.5529,
736
+ "step": 242
737
+ },
738
+ {
739
+ "epoch": 0.3904,
740
+ "learning_rate": 1.4772406349793749e-05,
741
+ "loss": 1.1209,
742
+ "step": 244
743
+ },
744
+ {
745
+ "epoch": 0.3936,
746
+ "learning_rate": 1.4870353224904563e-05,
747
+ "loss": 0.5836,
748
+ "step": 246
749
+ },
750
+ {
751
+ "epoch": 0.3968,
752
+ "learning_rate": 1.4967691344313988e-05,
753
+ "loss": 1.0781,
754
+ "step": 248
755
+ },
756
+ {
757
+ "epoch": 0.4,
758
+ "learning_rate": 1.5064408541525568e-05,
759
+ "loss": 0.4529,
760
+ "step": 250
761
+ },
762
+ {
763
+ "epoch": 0.4032,
764
+ "learning_rate": 1.5160492727653245e-05,
765
+ "loss": 0.5813,
766
+ "step": 252
767
+ },
768
+ {
769
+ "epoch": 0.4064,
770
+ "learning_rate": 1.5255931892932322e-05,
771
+ "loss": 0.4578,
772
+ "step": 254
773
+ },
774
+ {
775
+ "epoch": 0.4096,
776
+ "learning_rate": 1.5350714108220667e-05,
777
+ "loss": 0.3759,
778
+ "step": 256
779
+ },
780
+ {
781
+ "epoch": 0.4128,
782
+ "learning_rate": 1.5444827526489668e-05,
783
+ "loss": 0.6413,
784
+ "step": 258
785
+ },
786
+ {
787
+ "epoch": 0.416,
788
+ "learning_rate": 1.5538260384305073e-05,
789
+ "loss": 0.3349,
790
+ "step": 260
791
+ },
792
+ {
793
+ "epoch": 0.4192,
794
+ "learning_rate": 1.563100100329731e-05,
795
+ "loss": 0.5868,
796
+ "step": 262
797
+ },
798
+ {
799
+ "epoch": 0.4224,
800
+ "learning_rate": 1.572303779162118e-05,
801
+ "loss": 0.35,
802
+ "step": 264
803
+ },
804
+ {
805
+ "epoch": 0.4256,
806
+ "learning_rate": 1.581435924540481e-05,
807
+ "loss": 0.8259,
808
+ "step": 266
809
+ },
810
+ {
811
+ "epoch": 0.4288,
812
+ "learning_rate": 1.5904953950187455e-05,
813
+ "loss": 0.4011,
814
+ "step": 268
815
+ },
816
+ {
817
+ "epoch": 0.432,
818
+ "learning_rate": 1.599481058234626e-05,
819
+ "loss": 0.8308,
820
+ "step": 270
821
+ },
822
+ {
823
+ "epoch": 0.4352,
824
+ "learning_rate": 1.6083917910511616e-05,
825
+ "loss": 0.6538,
826
+ "step": 272
827
+ },
828
+ {
829
+ "epoch": 0.4384,
830
+ "learning_rate": 1.617226479697104e-05,
831
+ "loss": 0.5024,
832
+ "step": 274
833
+ },
834
+ {
835
+ "epoch": 0.4416,
836
+ "learning_rate": 1.6259840199061212e-05,
837
+ "loss": 0.3737,
838
+ "step": 276
839
+ },
840
+ {
841
+ "epoch": 0.4448,
842
+ "learning_rate": 1.6346633170548285e-05,
843
+ "loss": 0.371,
844
+ "step": 278
845
+ },
846
+ {
847
+ "epoch": 0.448,
848
+ "learning_rate": 1.6432632862996042e-05,
849
+ "loss": 0.3252,
850
+ "step": 280
851
+ },
852
+ {
853
+ "epoch": 0.4512,
854
+ "learning_rate": 1.6517828527121928e-05,
855
+ "loss": 0.2844,
856
+ "step": 282
857
+ },
858
+ {
859
+ "epoch": 0.4544,
860
+ "learning_rate": 1.6602209514140542e-05,
861
+ "loss": 0.4009,
862
+ "step": 284
863
+ },
864
+ {
865
+ "epoch": 0.4576,
866
+ "learning_rate": 1.6685765277094695e-05,
867
+ "loss": 0.4064,
868
+ "step": 286
869
+ },
870
+ {
871
+ "epoch": 0.4608,
872
+ "learning_rate": 1.6768485372173696e-05,
873
+ "loss": 0.2609,
874
+ "step": 288
875
+ },
876
+ {
877
+ "epoch": 0.464,
878
+ "learning_rate": 1.6850359460018733e-05,
879
+ "loss": 0.3502,
880
+ "step": 290
881
+ },
882
+ {
883
+ "epoch": 0.4672,
884
+ "learning_rate": 1.6931377307015226e-05,
885
+ "loss": 0.348,
886
+ "step": 292
887
+ },
888
+ {
889
+ "epoch": 0.4704,
890
+ "learning_rate": 1.701152878657196e-05,
891
+ "loss": 0.644,
892
+ "step": 294
893
+ },
894
+ {
895
+ "epoch": 0.4736,
896
+ "learning_rate": 1.7090803880386778e-05,
897
+ "loss": 0.4308,
898
+ "step": 296
899
+ },
900
+ {
901
+ "epoch": 0.4768,
902
+ "learning_rate": 1.716919267969883e-05,
903
+ "loss": 0.568,
904
+ "step": 298
905
+ },
906
+ {
907
+ "epoch": 0.48,
908
+ "learning_rate": 1.7246685386527095e-05,
909
+ "loss": 0.6655,
910
+ "step": 300
911
+ },
912
+ {
913
+ "epoch": 0.4832,
914
+ "learning_rate": 1.7323272314895022e-05,
915
+ "loss": 0.4522,
916
+ "step": 302
917
+ },
918
+ {
919
+ "epoch": 0.4864,
920
+ "learning_rate": 1.7398943892041227e-05,
921
+ "loss": 0.3227,
922
+ "step": 304
923
+ },
924
+ {
925
+ "epoch": 0.4896,
926
+ "learning_rate": 1.7473690659615992e-05,
927
+ "loss": 0.6808,
928
+ "step": 306
929
+ },
930
+ {
931
+ "epoch": 0.4928,
932
+ "learning_rate": 1.7547503274863495e-05,
933
+ "loss": 0.7103,
934
+ "step": 308
935
+ },
936
+ {
937
+ "epoch": 0.496,
938
+ "learning_rate": 1.7620372511789604e-05,
939
+ "loss": 0.4166,
940
+ "step": 310
941
+ },
942
+ {
943
+ "epoch": 0.4992,
944
+ "learning_rate": 1.7692289262315e-05,
945
+ "loss": 0.3018,
946
+ "step": 312
947
+ },
948
+ {
949
+ "epoch": 0.5024,
950
+ "learning_rate": 1.7763244537413657e-05,
951
+ "loss": 0.5556,
952
+ "step": 314
953
+ },
954
+ {
955
+ "epoch": 0.5056,
956
+ "learning_rate": 1.7833229468236364e-05,
957
+ "loss": 0.8933,
958
+ "step": 316
959
+ },
960
+ {
961
+ "epoch": 0.5088,
962
+ "learning_rate": 1.790223530721933e-05,
963
+ "loss": 0.6276,
964
+ "step": 318
965
+ },
966
+ {
967
+ "epoch": 0.512,
968
+ "learning_rate": 1.7970253429177477e-05,
969
+ "loss": 0.7065,
970
+ "step": 320
971
+ },
972
+ {
973
+ "epoch": 0.5152,
974
+ "learning_rate": 1.803727533238257e-05,
975
+ "loss": 0.3694,
976
+ "step": 322
977
+ },
978
+ {
979
+ "epoch": 0.5184,
980
+ "learning_rate": 1.8103292639625842e-05,
981
+ "loss": 0.4256,
982
+ "step": 324
983
+ },
984
+ {
985
+ "epoch": 0.5216,
986
+ "learning_rate": 1.816829709926509e-05,
987
+ "loss": 0.3572,
988
+ "step": 326
989
+ },
990
+ {
991
+ "epoch": 0.5248,
992
+ "learning_rate": 1.8232280586256097e-05,
993
+ "loss": 0.4809,
994
+ "step": 328
995
+ },
996
+ {
997
+ "epoch": 0.528,
998
+ "learning_rate": 1.829523510316813e-05,
999
+ "loss": 0.3423,
1000
+ "step": 330
1001
+ },
1002
+ {
1003
+ "epoch": 0.5312,
1004
+ "learning_rate": 1.8357152781183606e-05,
1005
+ "loss": 0.3685,
1006
+ "step": 332
1007
+ },
1008
+ {
1009
+ "epoch": 0.5344,
1010
+ "learning_rate": 1.8418025881081606e-05,
1011
+ "loss": 0.3765,
1012
+ "step": 334
1013
+ },
1014
+ {
1015
+ "epoch": 0.5376,
1016
+ "learning_rate": 1.8477846794205258e-05,
1017
+ "loss": 0.9131,
1018
+ "step": 336
1019
+ },
1020
+ {
1021
+ "epoch": 0.5408,
1022
+ "learning_rate": 1.8536608043412695e-05,
1023
+ "loss": 0.2763,
1024
+ "step": 338
1025
+ },
1026
+ {
1027
+ "epoch": 0.544,
1028
+ "learning_rate": 1.85943022840117e-05,
1029
+ "loss": 0.4316,
1030
+ "step": 340
1031
+ },
1032
+ {
1033
+ "epoch": 0.5472,
1034
+ "learning_rate": 1.865092230467769e-05,
1035
+ "loss": 0.501,
1036
+ "step": 342
1037
+ },
1038
+ {
1039
+ "epoch": 0.5504,
1040
+ "learning_rate": 1.87064610283551e-05,
1041
+ "loss": 0.5479,
1042
+ "step": 344
1043
+ },
1044
+ {
1045
+ "epoch": 0.5536,
1046
+ "learning_rate": 1.876091151314196e-05,
1047
+ "loss": 0.4017,
1048
+ "step": 346
1049
+ },
1050
+ {
1051
+ "epoch": 0.5568,
1052
+ "learning_rate": 1.8814266953157557e-05,
1053
+ "loss": 0.3401,
1054
+ "step": 348
1055
+ },
1056
+ {
1057
+ "epoch": 0.56,
1058
+ "learning_rate": 1.8866520679393127e-05,
1059
+ "loss": 0.5058,
1060
+ "step": 350
1061
+ },
1062
+ {
1063
+ "epoch": 0.5632,
1064
+ "learning_rate": 1.8917666160545436e-05,
1065
+ "loss": 0.3441,
1066
+ "step": 352
1067
+ },
1068
+ {
1069
+ "epoch": 0.5664,
1070
+ "learning_rate": 1.896769700383315e-05,
1071
+ "loss": 0.2944,
1072
+ "step": 354
1073
+ },
1074
+ {
1075
+ "epoch": 0.5696,
1076
+ "learning_rate": 1.901660695579585e-05,
1077
+ "loss": 0.4928,
1078
+ "step": 356
1079
+ },
1080
+ {
1081
+ "epoch": 0.5728,
1082
+ "learning_rate": 1.9064389903075676e-05,
1083
+ "loss": 0.4822,
1084
+ "step": 358
1085
+ },
1086
+ {
1087
+ "epoch": 0.576,
1088
+ "learning_rate": 1.911103987318148e-05,
1089
+ "loss": 0.4891,
1090
+ "step": 360
1091
+ },
1092
+ {
1093
+ "epoch": 0.5792,
1094
+ "learning_rate": 1.9156551035235288e-05,
1095
+ "loss": 0.6295,
1096
+ "step": 362
1097
+ },
1098
+ {
1099
+ "epoch": 0.5824,
1100
+ "learning_rate": 1.9200917700701173e-05,
1101
+ "loss": 0.3944,
1102
+ "step": 364
1103
+ },
1104
+ {
1105
+ "epoch": 0.5856,
1106
+ "learning_rate": 1.924413432409622e-05,
1107
+ "loss": 0.4413,
1108
+ "step": 366
1109
+ },
1110
+ {
1111
+ "epoch": 0.5888,
1112
+ "learning_rate": 1.9286195503683705e-05,
1113
+ "loss": 0.3578,
1114
+ "step": 368
1115
+ },
1116
+ {
1117
+ "epoch": 0.592,
1118
+ "learning_rate": 1.932709598214825e-05,
1119
+ "loss": 0.3674,
1120
+ "step": 370
1121
+ },
1122
+ {
1123
+ "epoch": 0.5952,
1124
+ "learning_rate": 1.9366830647252967e-05,
1125
+ "loss": 0.3141,
1126
+ "step": 372
1127
+ },
1128
+ {
1129
+ "epoch": 0.5984,
1130
+ "learning_rate": 1.940539453247842e-05,
1131
+ "loss": 0.3549,
1132
+ "step": 374
1133
+ },
1134
+ {
1135
+ "epoch": 0.6016,
1136
+ "learning_rate": 1.944278281764342e-05,
1137
+ "loss": 0.2786,
1138
+ "step": 376
1139
+ },
1140
+ {
1141
+ "epoch": 0.6048,
1142
+ "learning_rate": 1.9478990829507504e-05,
1143
+ "loss": 0.5024,
1144
+ "step": 378
1145
+ },
1146
+ {
1147
+ "epoch": 0.608,
1148
+ "learning_rate": 1.951401404235505e-05,
1149
+ "loss": 0.317,
1150
+ "step": 380
1151
+ },
1152
+ {
1153
+ "epoch": 0.6112,
1154
+ "learning_rate": 1.9547848078560975e-05,
1155
+ "loss": 0.549,
1156
+ "step": 382
1157
+ },
1158
+ {
1159
+ "epoch": 0.6144,
1160
+ "learning_rate": 1.9580488709137858e-05,
1161
+ "loss": 0.5291,
1162
+ "step": 384
1163
+ },
1164
+ {
1165
+ "epoch": 0.6176,
1166
+ "learning_rate": 1.961193185426459e-05,
1167
+ "loss": 0.5376,
1168
+ "step": 386
1169
+ },
1170
+ {
1171
+ "epoch": 0.6208,
1172
+ "learning_rate": 1.9642173583796265e-05,
1173
+ "loss": 0.4267,
1174
+ "step": 388
1175
+ },
1176
+ {
1177
+ "epoch": 0.624,
1178
+ "learning_rate": 1.967121011775546e-05,
1179
+ "loss": 0.2718,
1180
+ "step": 390
1181
+ },
1182
+ {
1183
+ "epoch": 0.6272,
1184
+ "learning_rate": 1.969903782680467e-05,
1185
+ "loss": 0.3591,
1186
+ "step": 392
1187
+ },
1188
+ {
1189
+ "epoch": 0.6304,
1190
+ "learning_rate": 1.9725653232699962e-05,
1191
+ "loss": 0.8761,
1192
+ "step": 394
1193
+ },
1194
+ {
1195
+ "epoch": 0.6336,
1196
+ "learning_rate": 1.9751053008725736e-05,
1197
+ "loss": 0.3512,
1198
+ "step": 396
1199
+ },
1200
+ {
1201
+ "epoch": 0.6368,
1202
+ "learning_rate": 1.9775233980110524e-05,
1203
+ "loss": 0.3622,
1204
+ "step": 398
1205
+ },
1206
+ {
1207
+ "epoch": 0.64,
1208
+ "learning_rate": 1.9798193124423804e-05,
1209
+ "loss": 0.526,
1210
+ "step": 400
1211
+ },
1212
+ {
1213
+ "epoch": 0.6432,
1214
+ "learning_rate": 1.9819927571953807e-05,
1215
+ "loss": 0.5224,
1216
+ "step": 402
1217
+ },
1218
+ {
1219
+ "epoch": 0.6464,
1220
+ "learning_rate": 1.9840434606066182e-05,
1221
+ "loss": 0.4473,
1222
+ "step": 404
1223
+ },
1224
+ {
1225
+ "epoch": 0.6496,
1226
+ "learning_rate": 1.985971166354357e-05,
1227
+ "loss": 0.4711,
1228
+ "step": 406
1229
+ },
1230
+ {
1231
+ "epoch": 0.6528,
1232
+ "learning_rate": 1.9877756334905983e-05,
1233
+ "loss": 0.4104,
1234
+ "step": 408
1235
+ },
1236
+ {
1237
+ "epoch": 0.656,
1238
+ "learning_rate": 1.9894566364711965e-05,
1239
+ "loss": 0.3354,
1240
+ "step": 410
1241
+ },
1242
+ {
1243
+ "epoch": 0.6592,
1244
+ "learning_rate": 1.99101396518405e-05,
1245
+ "loss": 0.5118,
1246
+ "step": 412
1247
+ },
1248
+ {
1249
+ "epoch": 0.6624,
1250
+ "learning_rate": 1.9924474249753652e-05,
1251
+ "loss": 0.4001,
1252
+ "step": 414
1253
+ },
1254
+ {
1255
+ "epoch": 0.6656,
1256
+ "learning_rate": 1.9937568366739858e-05,
1257
+ "loss": 0.432,
1258
+ "step": 416
1259
+ },
1260
+ {
1261
+ "epoch": 0.6688,
1262
+ "learning_rate": 1.994942036613787e-05,
1263
+ "loss": 0.4111,
1264
+ "step": 418
1265
+ },
1266
+ {
1267
+ "epoch": 0.672,
1268
+ "learning_rate": 1.9960028766541336e-05,
1269
+ "loss": 0.4681,
1270
+ "step": 420
1271
+ },
1272
+ {
1273
+ "epoch": 0.6752,
1274
+ "learning_rate": 1.9969392241983957e-05,
1275
+ "loss": 0.3202,
1276
+ "step": 422
1277
+ },
1278
+ {
1279
+ "epoch": 0.6784,
1280
+ "learning_rate": 1.9977509622105233e-05,
1281
+ "loss": 0.3838,
1282
+ "step": 424
1283
+ },
1284
+ {
1285
+ "epoch": 0.6816,
1286
+ "learning_rate": 1.998437989229673e-05,
1287
+ "loss": 0.3591,
1288
+ "step": 426
1289
+ },
1290
+ {
1291
+ "epoch": 0.6848,
1292
+ "learning_rate": 1.9990002193828923e-05,
1293
+ "loss": 0.3838,
1294
+ "step": 428
1295
+ },
1296
+ {
1297
+ "epoch": 0.688,
1298
+ "learning_rate": 1.9994375823958504e-05,
1299
+ "loss": 0.3466,
1300
+ "step": 430
1301
+ },
1302
+ {
1303
+ "epoch": 0.6912,
1304
+ "learning_rate": 1.9997500236016233e-05,
1305
+ "loss": 0.7046,
1306
+ "step": 432
1307
+ },
1308
+ {
1309
+ "epoch": 0.6944,
1310
+ "learning_rate": 1.9999375039475275e-05,
1311
+ "loss": 0.3992,
1312
+ "step": 434
1313
+ },
1314
+ {
1315
+ "epoch": 0.6976,
1316
+ "learning_rate": 2e-05,
1317
+ "loss": 0.3986,
1318
+ "step": 436
1319
+ },
1320
+ {
1321
+ "epoch": 0.7008,
1322
+ "learning_rate": 1.9999375039475278e-05,
1323
+ "loss": 0.5007,
1324
+ "step": 438
1325
+ },
1326
+ {
1327
+ "epoch": 0.704,
1328
+ "learning_rate": 1.9997500236016233e-05,
1329
+ "loss": 0.8362,
1330
+ "step": 440
1331
+ },
1332
+ {
1333
+ "epoch": 0.7072,
1334
+ "learning_rate": 1.9994375823958504e-05,
1335
+ "loss": 0.3802,
1336
+ "step": 442
1337
+ },
1338
+ {
1339
+ "epoch": 0.7104,
1340
+ "learning_rate": 1.9990002193828923e-05,
1341
+ "loss": 0.7143,
1342
+ "step": 444
1343
+ },
1344
+ {
1345
+ "epoch": 0.7136,
1346
+ "learning_rate": 1.9984379892296735e-05,
1347
+ "loss": 0.7413,
1348
+ "step": 446
1349
+ },
1350
+ {
1351
+ "epoch": 0.7168,
1352
+ "learning_rate": 1.9977509622105236e-05,
1353
+ "loss": 0.8962,
1354
+ "step": 448
1355
+ },
1356
+ {
1357
+ "epoch": 0.72,
1358
+ "learning_rate": 1.9969392241983957e-05,
1359
+ "loss": 0.3923,
1360
+ "step": 450
1361
+ },
1362
+ {
1363
+ "epoch": 0.7232,
1364
+ "learning_rate": 1.9960028766541336e-05,
1365
+ "loss": 0.5831,
1366
+ "step": 452
1367
+ },
1368
+ {
1369
+ "epoch": 0.7264,
1370
+ "learning_rate": 1.9949420366137873e-05,
1371
+ "loss": 0.4438,
1372
+ "step": 454
1373
+ },
1374
+ {
1375
+ "epoch": 0.7296,
1376
+ "learning_rate": 1.993756836673986e-05,
1377
+ "loss": 0.3784,
1378
+ "step": 456
1379
+ },
1380
+ {
1381
+ "epoch": 0.7328,
1382
+ "learning_rate": 1.9924474249753656e-05,
1383
+ "loss": 0.3393,
1384
+ "step": 458
1385
+ },
1386
+ {
1387
+ "epoch": 0.736,
1388
+ "learning_rate": 1.9910139651840497e-05,
1389
+ "loss": 0.2945,
1390
+ "step": 460
1391
+ },
1392
+ {
1393
+ "epoch": 0.7392,
1394
+ "learning_rate": 1.9894566364711965e-05,
1395
+ "loss": 0.3334,
1396
+ "step": 462
1397
+ },
1398
+ {
1399
+ "epoch": 0.7424,
1400
+ "learning_rate": 1.987775633490599e-05,
1401
+ "loss": 0.5353,
1402
+ "step": 464
1403
+ },
1404
+ {
1405
+ "epoch": 0.7456,
1406
+ "learning_rate": 1.9859711663543573e-05,
1407
+ "loss": 0.4004,
1408
+ "step": 466
1409
+ },
1410
+ {
1411
+ "epoch": 0.7488,
1412
+ "learning_rate": 1.9840434606066186e-05,
1413
+ "loss": 0.4969,
1414
+ "step": 468
1415
+ },
1416
+ {
1417
+ "epoch": 0.752,
1418
+ "learning_rate": 1.9819927571953804e-05,
1419
+ "loss": 0.2853,
1420
+ "step": 470
1421
+ },
1422
+ {
1423
+ "epoch": 0.7552,
1424
+ "learning_rate": 1.9798193124423804e-05,
1425
+ "loss": 0.5008,
1426
+ "step": 472
1427
+ },
1428
+ {
1429
+ "epoch": 0.7584,
1430
+ "learning_rate": 1.9775233980110524e-05,
1431
+ "loss": 0.2996,
1432
+ "step": 474
1433
+ },
1434
+ {
1435
+ "epoch": 0.7616,
1436
+ "learning_rate": 1.9751053008725736e-05,
1437
+ "loss": 1.05,
1438
+ "step": 476
1439
+ },
1440
+ {
1441
+ "epoch": 0.7648,
1442
+ "learning_rate": 1.9725653232699962e-05,
1443
+ "loss": 0.3922,
1444
+ "step": 478
1445
+ },
1446
+ {
1447
+ "epoch": 0.768,
1448
+ "learning_rate": 1.969903782680467e-05,
1449
+ "loss": 0.4267,
1450
+ "step": 480
1451
+ },
1452
+ {
1453
+ "epoch": 0.7712,
1454
+ "learning_rate": 1.9671210117755462e-05,
1455
+ "loss": 0.4111,
1456
+ "step": 482
1457
+ },
1458
+ {
1459
+ "epoch": 0.7744,
1460
+ "learning_rate": 1.9642173583796265e-05,
1461
+ "loss": 0.4971,
1462
+ "step": 484
1463
+ },
1464
+ {
1465
+ "epoch": 0.7776,
1466
+ "learning_rate": 1.961193185426459e-05,
1467
+ "loss": 0.776,
1468
+ "step": 486
1469
+ },
1470
+ {
1471
+ "epoch": 0.7808,
1472
+ "learning_rate": 1.958048870913786e-05,
1473
+ "loss": 0.4126,
1474
+ "step": 488
1475
+ },
1476
+ {
1477
+ "epoch": 0.784,
1478
+ "learning_rate": 1.9547848078560982e-05,
1479
+ "loss": 1.2728,
1480
+ "step": 490
1481
+ },
1482
+ {
1483
+ "epoch": 0.7872,
1484
+ "learning_rate": 1.9514014042355054e-05,
1485
+ "loss": 0.4098,
1486
+ "step": 492
1487
+ },
1488
+ {
1489
+ "epoch": 0.7904,
1490
+ "learning_rate": 1.947899082950751e-05,
1491
+ "loss": 0.3246,
1492
+ "step": 494
1493
+ },
1494
+ {
1495
+ "epoch": 0.7936,
1496
+ "learning_rate": 1.9442782817643425e-05,
1497
+ "loss": 0.4038,
1498
+ "step": 496
1499
+ },
1500
+ {
1501
+ "epoch": 0.7968,
1502
+ "learning_rate": 1.9405394532478422e-05,
1503
+ "loss": 0.5928,
1504
+ "step": 498
1505
+ },
1506
+ {
1507
+ "epoch": 0.8,
1508
+ "learning_rate": 1.9366830647252977e-05,
1509
+ "loss": 0.3213,
1510
+ "step": 500
1511
+ },
1512
+ {
1513
+ "epoch": 0.8032,
1514
+ "learning_rate": 1.9327095982148255e-05,
1515
+ "loss": 0.4125,
1516
+ "step": 502
1517
+ },
1518
+ {
1519
+ "epoch": 0.8064,
1520
+ "learning_rate": 1.928619550368371e-05,
1521
+ "loss": 0.3865,
1522
+ "step": 504
1523
+ },
1524
+ {
1525
+ "epoch": 0.8096,
1526
+ "learning_rate": 1.9244134324096216e-05,
1527
+ "loss": 0.8616,
1528
+ "step": 506
1529
+ },
1530
+ {
1531
+ "epoch": 0.8128,
1532
+ "learning_rate": 1.9200917700701176e-05,
1533
+ "loss": 0.4038,
1534
+ "step": 508
1535
+ },
1536
+ {
1537
+ "epoch": 0.816,
1538
+ "learning_rate": 1.9156551035235298e-05,
1539
+ "loss": 0.4095,
1540
+ "step": 510
1541
+ },
1542
+ {
1543
+ "epoch": 0.8192,
1544
+ "learning_rate": 1.9111039873181475e-05,
1545
+ "loss": 0.2725,
1546
+ "step": 512
1547
+ },
1548
+ {
1549
+ "epoch": 0.8224,
1550
+ "learning_rate": 1.9064389903075683e-05,
1551
+ "loss": 0.3414,
1552
+ "step": 514
1553
+ },
1554
+ {
1555
+ "epoch": 0.8256,
1556
+ "learning_rate": 1.9016606955795843e-05,
1557
+ "loss": 0.369,
1558
+ "step": 516
1559
+ },
1560
+ {
1561
+ "epoch": 0.8288,
1562
+ "learning_rate": 1.8967697003833156e-05,
1563
+ "loss": 0.3659,
1564
+ "step": 518
1565
+ },
1566
+ {
1567
+ "epoch": 0.832,
1568
+ "learning_rate": 1.891766616054545e-05,
1569
+ "loss": 0.3186,
1570
+ "step": 520
1571
+ },
1572
+ {
1573
+ "epoch": 0.8352,
1574
+ "learning_rate": 1.8866520679393124e-05,
1575
+ "loss": 0.2871,
1576
+ "step": 522
1577
+ },
1578
+ {
1579
+ "epoch": 0.8384,
1580
+ "learning_rate": 1.881426695315756e-05,
1581
+ "loss": 0.398,
1582
+ "step": 524
1583
+ },
1584
+ {
1585
+ "epoch": 0.8416,
1586
+ "learning_rate": 1.8760911513141974e-05,
1587
+ "loss": 0.3132,
1588
+ "step": 526
1589
+ },
1590
+ {
1591
+ "epoch": 0.8448,
1592
+ "learning_rate": 1.8706461028355107e-05,
1593
+ "loss": 0.5052,
1594
+ "step": 528
1595
+ },
1596
+ {
1597
+ "epoch": 0.848,
1598
+ "learning_rate": 1.86509223046777e-05,
1599
+ "loss": 0.4231,
1600
+ "step": 530
1601
+ },
1602
+ {
1603
+ "epoch": 0.8512,
1604
+ "learning_rate": 1.8594302284011697e-05,
1605
+ "loss": 0.3517,
1606
+ "step": 532
1607
+ },
1608
+ {
1609
+ "epoch": 0.8544,
1610
+ "learning_rate": 1.8536608043412702e-05,
1611
+ "loss": 0.3629,
1612
+ "step": 534
1613
+ },
1614
+ {
1615
+ "epoch": 0.8576,
1616
+ "learning_rate": 1.847784679420527e-05,
1617
+ "loss": 0.3954,
1618
+ "step": 536
1619
+ },
1620
+ {
1621
+ "epoch": 0.8608,
1622
+ "learning_rate": 1.841802588108161e-05,
1623
+ "loss": 0.5935,
1624
+ "step": 538
1625
+ },
1626
+ {
1627
+ "epoch": 0.864,
1628
+ "learning_rate": 1.8357152781183613e-05,
1629
+ "loss": 0.4527,
1630
+ "step": 540
1631
+ },
1632
+ {
1633
+ "epoch": 0.8672,
1634
+ "learning_rate": 1.8295235103168128e-05,
1635
+ "loss": 0.4106,
1636
+ "step": 542
1637
+ },
1638
+ {
1639
+ "epoch": 0.8704,
1640
+ "learning_rate": 1.8232280586256104e-05,
1641
+ "loss": 0.6128,
1642
+ "step": 544
1643
+ },
1644
+ {
1645
+ "epoch": 0.8736,
1646
+ "learning_rate": 1.8168297099265108e-05,
1647
+ "loss": 0.557,
1648
+ "step": 546
1649
+ },
1650
+ {
1651
+ "epoch": 0.8768,
1652
+ "learning_rate": 1.8103292639625835e-05,
1653
+ "loss": 0.3223,
1654
+ "step": 548
1655
+ },
1656
+ {
1657
+ "epoch": 0.88,
1658
+ "learning_rate": 1.8037275332382575e-05,
1659
+ "loss": 0.7693,
1660
+ "step": 550
1661
+ },
1662
+ {
1663
+ "epoch": 0.8832,
1664
+ "learning_rate": 1.7970253429177494e-05,
1665
+ "loss": 0.6488,
1666
+ "step": 552
1667
+ },
1668
+ {
1669
+ "epoch": 0.8864,
1670
+ "learning_rate": 1.7902235307219336e-05,
1671
+ "loss": 0.4684,
1672
+ "step": 554
1673
+ },
1674
+ {
1675
+ "epoch": 0.8896,
1676
+ "learning_rate": 1.783322946823638e-05,
1677
+ "loss": 0.7015,
1678
+ "step": 556
1679
+ },
1680
+ {
1681
+ "epoch": 0.8928,
1682
+ "learning_rate": 1.776324453741365e-05,
1683
+ "loss": 0.3606,
1684
+ "step": 558
1685
+ },
1686
+ {
1687
+ "epoch": 0.896,
1688
+ "learning_rate": 1.7692289262315008e-05,
1689
+ "loss": 0.5728,
1690
+ "step": 560
1691
+ },
1692
+ {
1693
+ "epoch": 0.8992,
1694
+ "learning_rate": 1.762037251178961e-05,
1695
+ "loss": 0.6078,
1696
+ "step": 562
1697
+ },
1698
+ {
1699
+ "epoch": 0.9024,
1700
+ "learning_rate": 1.7547503274863502e-05,
1701
+ "loss": 0.9526,
1702
+ "step": 564
1703
+ },
1704
+ {
1705
+ "epoch": 0.9056,
1706
+ "learning_rate": 1.7473690659616e-05,
1707
+ "loss": 0.3625,
1708
+ "step": 566
1709
+ },
1710
+ {
1711
+ "epoch": 0.9088,
1712
+ "learning_rate": 1.739894389204122e-05,
1713
+ "loss": 0.3565,
1714
+ "step": 568
1715
+ },
1716
+ {
1717
+ "epoch": 0.912,
1718
+ "learning_rate": 1.732327231489503e-05,
1719
+ "loss": 0.6755,
1720
+ "step": 570
1721
+ },
1722
+ {
1723
+ "epoch": 0.9152,
1724
+ "learning_rate": 1.7246685386527105e-05,
1725
+ "loss": 0.4678,
1726
+ "step": 572
1727
+ },
1728
+ {
1729
+ "epoch": 0.9184,
1730
+ "learning_rate": 1.716919267969884e-05,
1731
+ "loss": 0.5578,
1732
+ "step": 574
1733
+ },
1734
+ {
1735
+ "epoch": 0.9216,
1736
+ "learning_rate": 1.7090803880386784e-05,
1737
+ "loss": 0.3742,
1738
+ "step": 576
1739
+ },
1740
+ {
1741
+ "epoch": 0.9248,
1742
+ "learning_rate": 1.701152878657197e-05,
1743
+ "loss": 0.4484,
1744
+ "step": 578
1745
+ },
1746
+ {
1747
+ "epoch": 0.928,
1748
+ "learning_rate": 1.6931377307015236e-05,
1749
+ "loss": 0.4051,
1750
+ "step": 580
1751
+ },
1752
+ {
1753
+ "epoch": 0.9312,
1754
+ "learning_rate": 1.6850359460018744e-05,
1755
+ "loss": 0.6345,
1756
+ "step": 582
1757
+ },
1758
+ {
1759
+ "epoch": 0.9344,
1760
+ "learning_rate": 1.67684853721737e-05,
1761
+ "loss": 0.595,
1762
+ "step": 584
1763
+ },
1764
+ {
1765
+ "epoch": 0.9376,
1766
+ "learning_rate": 1.6685765277094702e-05,
1767
+ "loss": 0.2562,
1768
+ "step": 586
1769
+ },
1770
+ {
1771
+ "epoch": 0.9408,
1772
+ "learning_rate": 1.6602209514140562e-05,
1773
+ "loss": 0.6204,
1774
+ "step": 588
1775
+ },
1776
+ {
1777
+ "epoch": 0.944,
1778
+ "learning_rate": 1.651782852712194e-05,
1779
+ "loss": 0.2957,
1780
+ "step": 590
1781
+ },
1782
+ {
1783
+ "epoch": 0.9472,
1784
+ "learning_rate": 1.6432632862996062e-05,
1785
+ "loss": 0.4435,
1786
+ "step": 592
1787
+ },
1788
+ {
1789
+ "epoch": 0.9504,
1790
+ "learning_rate": 1.6346633170548275e-05,
1791
+ "loss": 0.3927,
1792
+ "step": 594
1793
+ },
1794
+ {
1795
+ "epoch": 0.9536,
1796
+ "learning_rate": 1.625984019906122e-05,
1797
+ "loss": 0.9383,
1798
+ "step": 596
1799
+ },
1800
+ {
1801
+ "epoch": 0.9568,
1802
+ "learning_rate": 1.6172264796971063e-05,
1803
+ "loss": 0.3296,
1804
+ "step": 598
1805
+ },
1806
+ {
1807
+ "epoch": 0.96,
1808
+ "learning_rate": 1.6083917910511623e-05,
1809
+ "loss": 0.2457,
1810
+ "step": 600
1811
+ },
1812
+ {
1813
+ "epoch": 0.9632,
1814
+ "learning_rate": 1.5994810582346266e-05,
1815
+ "loss": 0.2783,
1816
+ "step": 602
1817
+ },
1818
+ {
1819
+ "epoch": 0.9664,
1820
+ "learning_rate": 1.5904953950187448e-05,
1821
+ "loss": 0.5341,
1822
+ "step": 604
1823
+ },
1824
+ {
1825
+ "epoch": 0.9696,
1826
+ "learning_rate": 1.581435924540482e-05,
1827
+ "loss": 0.5131,
1828
+ "step": 606
1829
+ },
1830
+ {
1831
+ "epoch": 0.9728,
1832
+ "learning_rate": 1.5723037791621203e-05,
1833
+ "loss": 0.6233,
1834
+ "step": 608
1835
+ },
1836
+ {
1837
+ "epoch": 0.976,
1838
+ "learning_rate": 1.5631001003297302e-05,
1839
+ "loss": 0.4588,
1840
+ "step": 610
1841
+ },
1842
+ {
1843
+ "epoch": 0.9792,
1844
+ "learning_rate": 1.5538260384305083e-05,
1845
+ "loss": 0.2961,
1846
+ "step": 612
1847
+ },
1848
+ {
1849
+ "epoch": 0.9824,
1850
+ "learning_rate": 1.544482752648966e-05,
1851
+ "loss": 0.3157,
1852
+ "step": 614
1853
+ },
1854
+ {
1855
+ "epoch": 0.9856,
1856
+ "learning_rate": 1.5350714108220677e-05,
1857
+ "loss": 0.3386,
1858
+ "step": 616
1859
+ },
1860
+ {
1861
+ "epoch": 0.9888,
1862
+ "learning_rate": 1.5255931892932344e-05,
1863
+ "loss": 0.4643,
1864
+ "step": 618
1865
+ },
1866
+ {
1867
+ "epoch": 0.992,
1868
+ "learning_rate": 1.5160492727653238e-05,
1869
+ "loss": 0.3212,
1870
+ "step": 620
1871
+ },
1872
+ {
1873
+ "epoch": 0.9952,
1874
+ "learning_rate": 1.5064408541525578e-05,
1875
+ "loss": 0.3708,
1876
+ "step": 622
1877
+ },
1878
+ {
1879
+ "epoch": 0.9984,
1880
+ "learning_rate": 1.4967691344314012e-05,
1881
+ "loss": 0.2716,
1882
+ "step": 624
1883
+ },
1884
+ {
1885
+ "epoch": 1.0,
1886
+ "step": 625,
1887
+ "total_flos": 2661804507398144.0,
1888
+ "train_loss": 0.48295580658912657,
1889
+ "train_runtime": 2484.1441,
1890
+ "train_samples_per_second": 4.026,
1891
+ "train_steps_per_second": 0.252
1892
+ }
1893
+ ],
1894
+ "logging_steps": 2,
1895
+ "max_steps": 625,
1896
+ "num_input_tokens_seen": 0,
1897
+ "num_train_epochs": 1,
1898
+ "save_steps": 500,
1899
+ "stateful_callbacks": {},
1900
+ "total_flos": 2661804507398144.0,
1901
+ "train_batch_size": 1,
1902
+ "trial_name": null,
1903
+ "trial_params": null
1904
+ }
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06800ee3fb800f76dbce23952c4f3dd12debeccaafda8c7788c6b1b4408bc674
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64bd280b15d88baf053fcba5a17e25b5b1d088f6f50c9d0ef7c225acd0af35b2
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b8b6efaaa6c0edfe31c7c349a95fe5e62461a4b89aa48eee3b4baeaaf05849b
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_infoBatch_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369d3c8936e1a2c22d007bd6f3766bf82f85e5ddba7be9eb91a60579a81687e3
3
+ size 639793378
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_selfsup_scenario18_new_10000_random0_0625_seed1/0_trainer_state.json ADDED
@@ -0,0 +1,1904 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 625,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0032,
14
+ "learning_rate": 2.4524967251364995e-06,
15
+ "loss": 0.0124,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.0064,
20
+ "learning_rate": 2.5263093403840022e-06,
21
+ "loss": 0.1119,
22
+ "step": 4
23
+ },
24
+ {
25
+ "epoch": 0.0096,
26
+ "learning_rate": 2.6010561079587694e-06,
27
+ "loss": 0.3245,
28
+ "step": 6
29
+ },
30
+ {
31
+ "epoch": 0.0128,
32
+ "learning_rate": 2.6767276851049716e-06,
33
+ "loss": 0.7283,
34
+ "step": 8
35
+ },
36
+ {
37
+ "epoch": 0.016,
38
+ "learning_rate": 2.7533146134728993e-06,
39
+ "loss": 0.0138,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.0192,
44
+ "learning_rate": 2.8308073203011634e-06,
45
+ "loss": 0.3943,
46
+ "step": 12
47
+ },
48
+ {
49
+ "epoch": 0.0224,
50
+ "learning_rate": 2.909196119613218e-06,
51
+ "loss": 0.0171,
52
+ "step": 14
53
+ },
54
+ {
55
+ "epoch": 0.0256,
56
+ "learning_rate": 2.988471213428035e-06,
57
+ "loss": 0.5327,
58
+ "step": 16
59
+ },
60
+ {
61
+ "epoch": 0.0288,
62
+ "learning_rate": 3.068622692984767e-06,
63
+ "loss": 0.0363,
64
+ "step": 18
65
+ },
66
+ {
67
+ "epoch": 0.032,
68
+ "learning_rate": 3.1496405399812602e-06,
69
+ "loss": 0.1419,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.0352,
74
+ "learning_rate": 3.231514627826302e-06,
75
+ "loss": 0.0843,
76
+ "step": 22
77
+ },
78
+ {
79
+ "epoch": 0.0384,
80
+ "learning_rate": 3.314234722905302e-06,
81
+ "loss": 0.2904,
82
+ "step": 24
83
+ },
84
+ {
85
+ "epoch": 0.0416,
86
+ "learning_rate": 3.3977904858594534e-06,
87
+ "loss": 0.0209,
88
+ "step": 26
89
+ },
90
+ {
91
+ "epoch": 0.0448,
92
+ "learning_rate": 3.4821714728780654e-06,
93
+ "loss": 0.9589,
94
+ "step": 28
95
+ },
96
+ {
97
+ "epoch": 0.048,
98
+ "learning_rate": 3.567367137003953e-06,
99
+ "loss": 0.02,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.0512,
104
+ "learning_rate": 3.653366829451711e-06,
105
+ "loss": 0.0317,
106
+ "step": 32
107
+ },
108
+ {
109
+ "epoch": 0.0544,
110
+ "learning_rate": 3.740159800938784e-06,
111
+ "loss": 0.044,
112
+ "step": 34
113
+ },
114
+ {
115
+ "epoch": 0.0576,
116
+ "learning_rate": 3.827735203028956e-06,
117
+ "loss": 0.5648,
118
+ "step": 36
119
+ },
120
+ {
121
+ "epoch": 0.0608,
122
+ "learning_rate": 3.916082089488379e-06,
123
+ "loss": 0.1185,
124
+ "step": 38
125
+ },
126
+ {
127
+ "epoch": 0.064,
128
+ "learning_rate": 4.005189417653737e-06,
129
+ "loss": 0.2974,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.0672,
134
+ "learning_rate": 4.095046049812541e-06,
135
+ "loss": 0.0897,
136
+ "step": 42
137
+ },
138
+ {
139
+ "epoch": 0.0704,
140
+ "learning_rate": 4.1856407545951825e-06,
141
+ "loss": 0.0195,
142
+ "step": 44
143
+ },
144
+ {
145
+ "epoch": 0.0736,
146
+ "learning_rate": 4.276962208378814e-06,
147
+ "loss": 0.4563,
148
+ "step": 46
149
+ },
150
+ {
151
+ "epoch": 0.0768,
152
+ "learning_rate": 4.368998996702686e-06,
153
+ "loss": 0.0352,
154
+ "step": 48
155
+ },
156
+ {
157
+ "epoch": 0.08,
158
+ "learning_rate": 4.461739615694921e-06,
159
+ "loss": 0.0451,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.0832,
164
+ "learning_rate": 4.555172473510324e-06,
165
+ "loss": 0.1638,
166
+ "step": 52
167
+ },
168
+ {
169
+ "epoch": 0.0864,
170
+ "learning_rate": 4.649285891779326e-06,
171
+ "loss": 0.0027,
172
+ "step": 54
173
+ },
174
+ {
175
+ "epoch": 0.0896,
176
+ "learning_rate": 4.744068107067673e-06,
177
+ "loss": 0.0295,
178
+ "step": 56
179
+ },
180
+ {
181
+ "epoch": 0.0928,
182
+ "learning_rate": 4.839507272346751e-06,
183
+ "loss": 0.0075,
184
+ "step": 58
185
+ },
186
+ {
187
+ "epoch": 0.096,
188
+ "learning_rate": 4.935591458474425e-06,
189
+ "loss": 0.0477,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.0992,
194
+ "learning_rate": 5.032308655686007e-06,
195
+ "loss": 0.2527,
196
+ "step": 62
197
+ },
198
+ {
199
+ "epoch": 0.1024,
200
+ "learning_rate": 5.129646775095432e-06,
201
+ "loss": 0.2065,
202
+ "step": 64
203
+ },
204
+ {
205
+ "epoch": 0.1056,
206
+ "learning_rate": 5.227593650206246e-06,
207
+ "loss": 0.008,
208
+ "step": 66
209
+ },
210
+ {
211
+ "epoch": 0.1088,
212
+ "learning_rate": 5.3261370384323904e-06,
213
+ "loss": 0.024,
214
+ "step": 68
215
+ },
216
+ {
217
+ "epoch": 0.112,
218
+ "learning_rate": 5.425264622628326e-06,
219
+ "loss": 0.1038,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.1152,
224
+ "learning_rate": 5.524964012628644e-06,
225
+ "loss": 0.0882,
226
+ "step": 72
227
+ },
228
+ {
229
+ "epoch": 0.1184,
230
+ "learning_rate": 5.62522274679673e-06,
231
+ "loss": 0.0133,
232
+ "step": 74
233
+ },
234
+ {
235
+ "epoch": 0.1216,
236
+ "learning_rate": 5.726028293582342e-06,
237
+ "loss": 0.0983,
238
+ "step": 76
239
+ },
240
+ {
241
+ "epoch": 0.1248,
242
+ "learning_rate": 5.827368053088032e-06,
243
+ "loss": 0.0247,
244
+ "step": 78
245
+ },
246
+ {
247
+ "epoch": 0.128,
248
+ "learning_rate": 5.929229358643925e-06,
249
+ "loss": 0.0476,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.1312,
254
+ "learning_rate": 6.03159947839103e-06,
255
+ "loss": 0.0985,
256
+ "step": 82
257
+ },
258
+ {
259
+ "epoch": 0.1344,
260
+ "learning_rate": 6.13446561687258e-06,
261
+ "loss": 0.1289,
262
+ "step": 84
263
+ },
264
+ {
265
+ "epoch": 0.1376,
266
+ "learning_rate": 6.237814916633431e-06,
267
+ "loss": 0.0906,
268
+ "step": 86
269
+ },
270
+ {
271
+ "epoch": 0.1408,
272
+ "learning_rate": 6.341634459827044e-06,
273
+ "loss": 0.2643,
274
+ "step": 88
275
+ },
276
+ {
277
+ "epoch": 0.144,
278
+ "learning_rate": 6.445911269830183e-06,
279
+ "loss": 0.0301,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.1472,
284
+ "learning_rate": 6.5506323128648654e-06,
285
+ "loss": 0.0209,
286
+ "step": 92
287
+ },
288
+ {
289
+ "epoch": 0.1504,
290
+ "learning_rate": 6.655784499627476e-06,
291
+ "loss": 0.0335,
292
+ "step": 94
293
+ },
294
+ {
295
+ "epoch": 0.1536,
296
+ "learning_rate": 6.761354686924883e-06,
297
+ "loss": 0.0118,
298
+ "step": 96
299
+ },
300
+ {
301
+ "epoch": 0.1568,
302
+ "learning_rate": 6.867329679317144e-06,
303
+ "loss": 0.0038,
304
+ "step": 98
305
+ },
306
+ {
307
+ "epoch": 0.16,
308
+ "learning_rate": 6.973696230766884e-06,
309
+ "loss": 0.2888,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.1632,
314
+ "learning_rate": 7.080441046294945e-06,
315
+ "loss": 0.5279,
316
+ "step": 102
317
+ },
318
+ {
319
+ "epoch": 0.1664,
320
+ "learning_rate": 7.18755078364214e-06,
321
+ "loss": 0.0023,
322
+ "step": 104
323
+ },
324
+ {
325
+ "epoch": 0.1696,
326
+ "learning_rate": 7.2950120549369204e-06,
327
+ "loss": 0.7006,
328
+ "step": 106
329
+ },
330
+ {
331
+ "epoch": 0.1728,
332
+ "learning_rate": 7.402811428368824e-06,
333
+ "loss": 0.0933,
334
+ "step": 108
335
+ },
336
+ {
337
+ "epoch": 0.176,
338
+ "learning_rate": 7.510935429867233e-06,
339
+ "loss": 0.6504,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.1792,
344
+ "learning_rate": 7.619370544785608e-06,
345
+ "loss": 0.0698,
346
+ "step": 112
347
+ },
348
+ {
349
+ "epoch": 0.1824,
350
+ "learning_rate": 7.728103219590684e-06,
351
+ "loss": 0.1815,
352
+ "step": 114
353
+ },
354
+ {
355
+ "epoch": 0.1856,
356
+ "learning_rate": 7.83711986355656e-06,
357
+ "loss": 0.0406,
358
+ "step": 116
359
+ },
360
+ {
361
+ "epoch": 0.1888,
362
+ "learning_rate": 7.946406850463435e-06,
363
+ "loss": 0.0528,
364
+ "step": 118
365
+ },
366
+ {
367
+ "epoch": 0.192,
368
+ "learning_rate": 8.055950520300756e-06,
369
+ "loss": 0.0072,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.1952,
374
+ "learning_rate": 8.165737180974676e-06,
375
+ "loss": 0.01,
376
+ "step": 122
377
+ },
378
+ {
379
+ "epoch": 0.1984,
380
+ "learning_rate": 8.275753110019367e-06,
381
+ "loss": 0.0128,
382
+ "step": 124
383
+ },
384
+ {
385
+ "epoch": 0.2016,
386
+ "learning_rate": 8.385984556312285e-06,
387
+ "loss": 0.0469,
388
+ "step": 126
389
+ },
390
+ {
391
+ "epoch": 0.2048,
392
+ "learning_rate": 8.496417741792922e-06,
393
+ "loss": 0.5632,
394
+ "step": 128
395
+ },
396
+ {
397
+ "epoch": 0.208,
398
+ "learning_rate": 8.607038863184952e-06,
399
+ "loss": 0.1772,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.2112,
404
+ "learning_rate": 8.717834093721598e-06,
405
+ "loss": 0.0307,
406
+ "step": 132
407
+ },
408
+ {
409
+ "epoch": 0.2144,
410
+ "learning_rate": 8.828789584873757e-06,
411
+ "loss": 0.9967,
412
+ "step": 134
413
+ },
414
+ {
415
+ "epoch": 0.2176,
416
+ "learning_rate": 8.939891468081036e-06,
417
+ "loss": 0.5046,
418
+ "step": 136
419
+ },
420
+ {
421
+ "epoch": 0.2208,
422
+ "learning_rate": 9.051125856485175e-06,
423
+ "loss": 0.2298,
424
+ "step": 138
425
+ },
426
+ {
427
+ "epoch": 0.224,
428
+ "learning_rate": 9.162478846665854e-06,
429
+ "loss": 0.0459,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.2272,
434
+ "learning_rate": 9.273936520378426e-06,
435
+ "loss": 0.1685,
436
+ "step": 142
437
+ },
438
+ {
439
+ "epoch": 0.2304,
440
+ "learning_rate": 9.38548494629364e-06,
441
+ "loss": 0.0063,
442
+ "step": 144
443
+ },
444
+ {
445
+ "epoch": 0.2336,
446
+ "learning_rate": 9.497110181738935e-06,
447
+ "loss": 0.3046,
448
+ "step": 146
449
+ },
450
+ {
451
+ "epoch": 0.2368,
452
+ "learning_rate": 9.608798274441153e-06,
453
+ "loss": 0.5494,
454
+ "step": 148
455
+ },
456
+ {
457
+ "epoch": 0.24,
458
+ "learning_rate": 9.720535264270526e-06,
459
+ "loss": 0.141,
460
+ "step": 150
461
+ },
462
+ {
463
+ "epoch": 0.2432,
464
+ "learning_rate": 9.832307184985473e-06,
465
+ "loss": 0.0304,
466
+ "step": 152
467
+ },
468
+ {
469
+ "epoch": 0.2464,
470
+ "learning_rate": 9.944100065978354e-06,
471
+ "loss": 0.0139,
472
+ "step": 154
473
+ },
474
+ {
475
+ "epoch": 0.2496,
476
+ "learning_rate": 1.0055899934021637e-05,
477
+ "loss": 0.0148,
478
+ "step": 156
479
+ },
480
+ {
481
+ "epoch": 0.2528,
482
+ "learning_rate": 1.016769281501452e-05,
483
+ "loss": 0.2346,
484
+ "step": 158
485
+ },
486
+ {
487
+ "epoch": 0.256,
488
+ "learning_rate": 1.0279464735729467e-05,
489
+ "loss": 0.0764,
490
+ "step": 160
491
+ },
492
+ {
493
+ "epoch": 0.2592,
494
+ "learning_rate": 1.039120172555884e-05,
495
+ "loss": 0.0553,
496
+ "step": 162
497
+ },
498
+ {
499
+ "epoch": 0.2624,
500
+ "learning_rate": 1.0502889818261058e-05,
501
+ "loss": 0.0073,
502
+ "step": 164
503
+ },
504
+ {
505
+ "epoch": 0.2656,
506
+ "learning_rate": 1.0614515053706354e-05,
507
+ "loss": 0.5272,
508
+ "step": 166
509
+ },
510
+ {
511
+ "epoch": 0.2688,
512
+ "learning_rate": 1.0726063479621567e-05,
513
+ "loss": 0.0259,
514
+ "step": 168
515
+ },
516
+ {
517
+ "epoch": 0.272,
518
+ "learning_rate": 1.083752115333414e-05,
519
+ "loss": 0.1772,
520
+ "step": 170
521
+ },
522
+ {
523
+ "epoch": 0.2752,
524
+ "learning_rate": 1.0948874143514818e-05,
525
+ "loss": 0.0383,
526
+ "step": 172
527
+ },
528
+ {
529
+ "epoch": 0.2784,
530
+ "learning_rate": 1.1060108531918955e-05,
531
+ "loss": 0.685,
532
+ "step": 174
533
+ },
534
+ {
535
+ "epoch": 0.2816,
536
+ "learning_rate": 1.1171210415126238e-05,
537
+ "loss": 0.4268,
538
+ "step": 176
539
+ },
540
+ {
541
+ "epoch": 0.2848,
542
+ "learning_rate": 1.1282165906278395e-05,
543
+ "loss": 0.1043,
544
+ "step": 178
545
+ },
546
+ {
547
+ "epoch": 0.288,
548
+ "learning_rate": 1.1392961136815041e-05,
549
+ "loss": 0.7231,
550
+ "step": 180
551
+ },
552
+ {
553
+ "epoch": 0.2912,
554
+ "learning_rate": 1.150358225820707e-05,
555
+ "loss": 0.0691,
556
+ "step": 182
557
+ },
558
+ {
559
+ "epoch": 0.2944,
560
+ "learning_rate": 1.1614015443687708e-05,
561
+ "loss": 0.9235,
562
+ "step": 184
563
+ },
564
+ {
565
+ "epoch": 0.2976,
566
+ "learning_rate": 1.1724246889980626e-05,
567
+ "loss": 0.1661,
568
+ "step": 186
569
+ },
570
+ {
571
+ "epoch": 0.3008,
572
+ "learning_rate": 1.1834262819025317e-05,
573
+ "loss": 0.0621,
574
+ "step": 188
575
+ },
576
+ {
577
+ "epoch": 0.304,
578
+ "learning_rate": 1.1944049479699241e-05,
579
+ "loss": 0.0336,
580
+ "step": 190
581
+ },
582
+ {
583
+ "epoch": 0.3072,
584
+ "learning_rate": 1.2053593149536557e-05,
585
+ "loss": 0.1993,
586
+ "step": 192
587
+ },
588
+ {
589
+ "epoch": 0.3104,
590
+ "learning_rate": 1.2162880136443434e-05,
591
+ "loss": 0.0189,
592
+ "step": 194
593
+ },
594
+ {
595
+ "epoch": 0.3136,
596
+ "learning_rate": 1.2271896780409309e-05,
597
+ "loss": 0.0297,
598
+ "step": 196
599
+ },
600
+ {
601
+ "epoch": 0.3168,
602
+ "learning_rate": 1.2380629455214385e-05,
603
+ "loss": 0.8886,
604
+ "step": 198
605
+ },
606
+ {
607
+ "epoch": 0.32,
608
+ "learning_rate": 1.2489064570132761e-05,
609
+ "loss": 0.1489,
610
+ "step": 200
611
+ },
612
+ {
613
+ "epoch": 0.3232,
614
+ "learning_rate": 1.259718857163117e-05,
615
+ "loss": 0.4239,
616
+ "step": 202
617
+ },
618
+ {
619
+ "epoch": 0.3264,
620
+ "learning_rate": 1.2704987945063073e-05,
621
+ "loss": 0.0831,
622
+ "step": 204
623
+ },
624
+ {
625
+ "epoch": 0.3296,
626
+ "learning_rate": 1.2812449216357855e-05,
627
+ "loss": 0.0315,
628
+ "step": 206
629
+ },
630
+ {
631
+ "epoch": 0.3328,
632
+ "learning_rate": 1.2919558953705047e-05,
633
+ "loss": 0.0255,
634
+ "step": 208
635
+ },
636
+ {
637
+ "epoch": 0.336,
638
+ "learning_rate": 1.3026303769233109e-05,
639
+ "loss": 0.0152,
640
+ "step": 210
641
+ },
642
+ {
643
+ "epoch": 0.3392,
644
+ "learning_rate": 1.313267032068285e-05,
645
+ "loss": 0.4755,
646
+ "step": 212
647
+ },
648
+ {
649
+ "epoch": 0.3424,
650
+ "learning_rate": 1.3238645313075109e-05,
651
+ "loss": 0.2811,
652
+ "step": 214
653
+ },
654
+ {
655
+ "epoch": 0.3456,
656
+ "learning_rate": 1.3344215500372517e-05,
657
+ "loss": 0.3031,
658
+ "step": 216
659
+ },
660
+ {
661
+ "epoch": 0.3488,
662
+ "learning_rate": 1.344936768713513e-05,
663
+ "loss": 0.179,
664
+ "step": 218
665
+ },
666
+ {
667
+ "epoch": 0.352,
668
+ "learning_rate": 1.3554088730169812e-05,
669
+ "loss": 0.0079,
670
+ "step": 220
671
+ },
672
+ {
673
+ "epoch": 0.3552,
674
+ "learning_rate": 1.3658365540172948e-05,
675
+ "loss": 0.032,
676
+ "step": 222
677
+ },
678
+ {
679
+ "epoch": 0.3584,
680
+ "learning_rate": 1.3762185083366562e-05,
681
+ "loss": 0.0984,
682
+ "step": 224
683
+ },
684
+ {
685
+ "epoch": 0.3616,
686
+ "learning_rate": 1.3865534383127413e-05,
687
+ "loss": 0.0538,
688
+ "step": 226
689
+ },
690
+ {
691
+ "epoch": 0.3648,
692
+ "learning_rate": 1.3968400521608962e-05,
693
+ "loss": 0.2697,
694
+ "step": 228
695
+ },
696
+ {
697
+ "epoch": 0.368,
698
+ "learning_rate": 1.4070770641356069e-05,
699
+ "loss": 0.5622,
700
+ "step": 230
701
+ },
702
+ {
703
+ "epoch": 0.3712,
704
+ "learning_rate": 1.4172631946911964e-05,
705
+ "loss": 0.007,
706
+ "step": 232
707
+ },
708
+ {
709
+ "epoch": 0.3744,
710
+ "learning_rate": 1.4273971706417653e-05,
711
+ "loss": 0.1558,
712
+ "step": 234
713
+ },
714
+ {
715
+ "epoch": 0.3776,
716
+ "learning_rate": 1.4374777253203265e-05,
717
+ "loss": 0.0037,
718
+ "step": 236
719
+ },
720
+ {
721
+ "epoch": 0.3808,
722
+ "learning_rate": 1.4475035987371348e-05,
723
+ "loss": 0.3933,
724
+ "step": 238
725
+ },
726
+ {
727
+ "epoch": 0.384,
728
+ "learning_rate": 1.4574735377371669e-05,
729
+ "loss": 0.0029,
730
+ "step": 240
731
+ },
732
+ {
733
+ "epoch": 0.3872,
734
+ "learning_rate": 1.4673862961567604e-05,
735
+ "loss": 0.0716,
736
+ "step": 242
737
+ },
738
+ {
739
+ "epoch": 0.3904,
740
+ "learning_rate": 1.4772406349793749e-05,
741
+ "loss": 0.3482,
742
+ "step": 244
743
+ },
744
+ {
745
+ "epoch": 0.3936,
746
+ "learning_rate": 1.4870353224904563e-05,
747
+ "loss": 1.1097,
748
+ "step": 246
749
+ },
750
+ {
751
+ "epoch": 0.3968,
752
+ "learning_rate": 1.4967691344313988e-05,
753
+ "loss": 0.0125,
754
+ "step": 248
755
+ },
756
+ {
757
+ "epoch": 0.4,
758
+ "learning_rate": 1.5064408541525568e-05,
759
+ "loss": 0.1143,
760
+ "step": 250
761
+ },
762
+ {
763
+ "epoch": 0.4032,
764
+ "learning_rate": 1.5160492727653245e-05,
765
+ "loss": 0.0952,
766
+ "step": 252
767
+ },
768
+ {
769
+ "epoch": 0.4064,
770
+ "learning_rate": 1.5255931892932322e-05,
771
+ "loss": 0.2256,
772
+ "step": 254
773
+ },
774
+ {
775
+ "epoch": 0.4096,
776
+ "learning_rate": 1.5350714108220667e-05,
777
+ "loss": 0.1509,
778
+ "step": 256
779
+ },
780
+ {
781
+ "epoch": 0.4128,
782
+ "learning_rate": 1.5444827526489668e-05,
783
+ "loss": 0.5427,
784
+ "step": 258
785
+ },
786
+ {
787
+ "epoch": 0.416,
788
+ "learning_rate": 1.5538260384305073e-05,
789
+ "loss": 0.2522,
790
+ "step": 260
791
+ },
792
+ {
793
+ "epoch": 0.4192,
794
+ "learning_rate": 1.563100100329731e-05,
795
+ "loss": 0.3037,
796
+ "step": 262
797
+ },
798
+ {
799
+ "epoch": 0.4224,
800
+ "learning_rate": 1.572303779162118e-05,
801
+ "loss": 0.2966,
802
+ "step": 264
803
+ },
804
+ {
805
+ "epoch": 0.4256,
806
+ "learning_rate": 1.581435924540481e-05,
807
+ "loss": 0.2745,
808
+ "step": 266
809
+ },
810
+ {
811
+ "epoch": 0.4288,
812
+ "learning_rate": 1.5904953950187455e-05,
813
+ "loss": 0.0866,
814
+ "step": 268
815
+ },
816
+ {
817
+ "epoch": 0.432,
818
+ "learning_rate": 1.599481058234626e-05,
819
+ "loss": 0.5184,
820
+ "step": 270
821
+ },
822
+ {
823
+ "epoch": 0.4352,
824
+ "learning_rate": 1.6083917910511616e-05,
825
+ "loss": 0.0244,
826
+ "step": 272
827
+ },
828
+ {
829
+ "epoch": 0.4384,
830
+ "learning_rate": 1.617226479697104e-05,
831
+ "loss": 0.0457,
832
+ "step": 274
833
+ },
834
+ {
835
+ "epoch": 0.4416,
836
+ "learning_rate": 1.6259840199061212e-05,
837
+ "loss": 0.3009,
838
+ "step": 276
839
+ },
840
+ {
841
+ "epoch": 0.4448,
842
+ "learning_rate": 1.6346633170548285e-05,
843
+ "loss": 0.0203,
844
+ "step": 278
845
+ },
846
+ {
847
+ "epoch": 0.448,
848
+ "learning_rate": 1.6432632862996042e-05,
849
+ "loss": 0.4014,
850
+ "step": 280
851
+ },
852
+ {
853
+ "epoch": 0.4512,
854
+ "learning_rate": 1.6517828527121928e-05,
855
+ "loss": 0.0453,
856
+ "step": 282
857
+ },
858
+ {
859
+ "epoch": 0.4544,
860
+ "learning_rate": 1.6602209514140542e-05,
861
+ "loss": 0.9482,
862
+ "step": 284
863
+ },
864
+ {
865
+ "epoch": 0.4576,
866
+ "learning_rate": 1.6685765277094695e-05,
867
+ "loss": 0.7503,
868
+ "step": 286
869
+ },
870
+ {
871
+ "epoch": 0.4608,
872
+ "learning_rate": 1.6768485372173696e-05,
873
+ "loss": 0.5349,
874
+ "step": 288
875
+ },
876
+ {
877
+ "epoch": 0.464,
878
+ "learning_rate": 1.6850359460018733e-05,
879
+ "loss": 0.0047,
880
+ "step": 290
881
+ },
882
+ {
883
+ "epoch": 0.4672,
884
+ "learning_rate": 1.6931377307015226e-05,
885
+ "loss": 0.3031,
886
+ "step": 292
887
+ },
888
+ {
889
+ "epoch": 0.4704,
890
+ "learning_rate": 1.701152878657196e-05,
891
+ "loss": 0.2769,
892
+ "step": 294
893
+ },
894
+ {
895
+ "epoch": 0.4736,
896
+ "learning_rate": 1.7090803880386778e-05,
897
+ "loss": 0.0565,
898
+ "step": 296
899
+ },
900
+ {
901
+ "epoch": 0.4768,
902
+ "learning_rate": 1.716919267969883e-05,
903
+ "loss": 0.5363,
904
+ "step": 298
905
+ },
906
+ {
907
+ "epoch": 0.48,
908
+ "learning_rate": 1.7246685386527095e-05,
909
+ "loss": 0.0496,
910
+ "step": 300
911
+ },
912
+ {
913
+ "epoch": 0.4832,
914
+ "learning_rate": 1.7323272314895022e-05,
915
+ "loss": 0.1653,
916
+ "step": 302
917
+ },
918
+ {
919
+ "epoch": 0.4864,
920
+ "learning_rate": 1.7398943892041227e-05,
921
+ "loss": 0.0299,
922
+ "step": 304
923
+ },
924
+ {
925
+ "epoch": 0.4896,
926
+ "learning_rate": 1.7473690659615992e-05,
927
+ "loss": 1.0825,
928
+ "step": 306
929
+ },
930
+ {
931
+ "epoch": 0.4928,
932
+ "learning_rate": 1.7547503274863495e-05,
933
+ "loss": 0.5881,
934
+ "step": 308
935
+ },
936
+ {
937
+ "epoch": 0.496,
938
+ "learning_rate": 1.7620372511789604e-05,
939
+ "loss": 0.0622,
940
+ "step": 310
941
+ },
942
+ {
943
+ "epoch": 0.4992,
944
+ "learning_rate": 1.7692289262315e-05,
945
+ "loss": 0.363,
946
+ "step": 312
947
+ },
948
+ {
949
+ "epoch": 0.5024,
950
+ "learning_rate": 1.7763244537413657e-05,
951
+ "loss": 0.3924,
952
+ "step": 314
953
+ },
954
+ {
955
+ "epoch": 0.5056,
956
+ "learning_rate": 1.7833229468236364e-05,
957
+ "loss": 0.0844,
958
+ "step": 316
959
+ },
960
+ {
961
+ "epoch": 0.5088,
962
+ "learning_rate": 1.790223530721933e-05,
963
+ "loss": 0.0594,
964
+ "step": 318
965
+ },
966
+ {
967
+ "epoch": 0.512,
968
+ "learning_rate": 1.7970253429177477e-05,
969
+ "loss": 0.0755,
970
+ "step": 320
971
+ },
972
+ {
973
+ "epoch": 0.5152,
974
+ "learning_rate": 1.803727533238257e-05,
975
+ "loss": 0.0391,
976
+ "step": 322
977
+ },
978
+ {
979
+ "epoch": 0.5184,
980
+ "learning_rate": 1.8103292639625842e-05,
981
+ "loss": 0.0105,
982
+ "step": 324
983
+ },
984
+ {
985
+ "epoch": 0.5216,
986
+ "learning_rate": 1.816829709926509e-05,
987
+ "loss": 0.3878,
988
+ "step": 326
989
+ },
990
+ {
991
+ "epoch": 0.5248,
992
+ "learning_rate": 1.8232280586256097e-05,
993
+ "loss": 0.7927,
994
+ "step": 328
995
+ },
996
+ {
997
+ "epoch": 0.528,
998
+ "learning_rate": 1.829523510316813e-05,
999
+ "loss": 0.3557,
1000
+ "step": 330
1001
+ },
1002
+ {
1003
+ "epoch": 0.5312,
1004
+ "learning_rate": 1.8357152781183606e-05,
1005
+ "loss": 0.1976,
1006
+ "step": 332
1007
+ },
1008
+ {
1009
+ "epoch": 0.5344,
1010
+ "learning_rate": 1.8418025881081606e-05,
1011
+ "loss": 0.0702,
1012
+ "step": 334
1013
+ },
1014
+ {
1015
+ "epoch": 0.5376,
1016
+ "learning_rate": 1.8477846794205258e-05,
1017
+ "loss": 0.326,
1018
+ "step": 336
1019
+ },
1020
+ {
1021
+ "epoch": 0.5408,
1022
+ "learning_rate": 1.8536608043412695e-05,
1023
+ "loss": 0.2874,
1024
+ "step": 338
1025
+ },
1026
+ {
1027
+ "epoch": 0.544,
1028
+ "learning_rate": 1.85943022840117e-05,
1029
+ "loss": 0.3472,
1030
+ "step": 340
1031
+ },
1032
+ {
1033
+ "epoch": 0.5472,
1034
+ "learning_rate": 1.865092230467769e-05,
1035
+ "loss": 0.0466,
1036
+ "step": 342
1037
+ },
1038
+ {
1039
+ "epoch": 0.5504,
1040
+ "learning_rate": 1.87064610283551e-05,
1041
+ "loss": 0.1335,
1042
+ "step": 344
1043
+ },
1044
+ {
1045
+ "epoch": 0.5536,
1046
+ "learning_rate": 1.876091151314196e-05,
1047
+ "loss": 0.0237,
1048
+ "step": 346
1049
+ },
1050
+ {
1051
+ "epoch": 0.5568,
1052
+ "learning_rate": 1.8814266953157557e-05,
1053
+ "loss": 0.0153,
1054
+ "step": 348
1055
+ },
1056
+ {
1057
+ "epoch": 0.56,
1058
+ "learning_rate": 1.8866520679393127e-05,
1059
+ "loss": 1.0523,
1060
+ "step": 350
1061
+ },
1062
+ {
1063
+ "epoch": 0.5632,
1064
+ "learning_rate": 1.8917666160545436e-05,
1065
+ "loss": 0.4291,
1066
+ "step": 352
1067
+ },
1068
+ {
1069
+ "epoch": 0.5664,
1070
+ "learning_rate": 1.896769700383315e-05,
1071
+ "loss": 0.2238,
1072
+ "step": 354
1073
+ },
1074
+ {
1075
+ "epoch": 0.5696,
1076
+ "learning_rate": 1.901660695579585e-05,
1077
+ "loss": 0.2438,
1078
+ "step": 356
1079
+ },
1080
+ {
1081
+ "epoch": 0.5728,
1082
+ "learning_rate": 1.9064389903075676e-05,
1083
+ "loss": 0.772,
1084
+ "step": 358
1085
+ },
1086
+ {
1087
+ "epoch": 0.576,
1088
+ "learning_rate": 1.911103987318148e-05,
1089
+ "loss": 0.4986,
1090
+ "step": 360
1091
+ },
1092
+ {
1093
+ "epoch": 0.5792,
1094
+ "learning_rate": 1.9156551035235288e-05,
1095
+ "loss": 0.9014,
1096
+ "step": 362
1097
+ },
1098
+ {
1099
+ "epoch": 0.5824,
1100
+ "learning_rate": 1.9200917700701173e-05,
1101
+ "loss": 0.0132,
1102
+ "step": 364
1103
+ },
1104
+ {
1105
+ "epoch": 0.5856,
1106
+ "learning_rate": 1.924413432409622e-05,
1107
+ "loss": 0.0687,
1108
+ "step": 366
1109
+ },
1110
+ {
1111
+ "epoch": 0.5888,
1112
+ "learning_rate": 1.9286195503683705e-05,
1113
+ "loss": 0.0116,
1114
+ "step": 368
1115
+ },
1116
+ {
1117
+ "epoch": 0.592,
1118
+ "learning_rate": 1.932709598214825e-05,
1119
+ "loss": 0.2541,
1120
+ "step": 370
1121
+ },
1122
+ {
1123
+ "epoch": 0.5952,
1124
+ "learning_rate": 1.9366830647252967e-05,
1125
+ "loss": 0.3812,
1126
+ "step": 372
1127
+ },
1128
+ {
1129
+ "epoch": 0.5984,
1130
+ "learning_rate": 1.940539453247842e-05,
1131
+ "loss": 0.5181,
1132
+ "step": 374
1133
+ },
1134
+ {
1135
+ "epoch": 0.6016,
1136
+ "learning_rate": 1.944278281764342e-05,
1137
+ "loss": 0.19,
1138
+ "step": 376
1139
+ },
1140
+ {
1141
+ "epoch": 0.6048,
1142
+ "learning_rate": 1.9478990829507504e-05,
1143
+ "loss": 0.1293,
1144
+ "step": 378
1145
+ },
1146
+ {
1147
+ "epoch": 0.608,
1148
+ "learning_rate": 1.951401404235505e-05,
1149
+ "loss": 0.0496,
1150
+ "step": 380
1151
+ },
1152
+ {
1153
+ "epoch": 0.6112,
1154
+ "learning_rate": 1.9547848078560975e-05,
1155
+ "loss": 0.2873,
1156
+ "step": 382
1157
+ },
1158
+ {
1159
+ "epoch": 0.6144,
1160
+ "learning_rate": 1.9580488709137858e-05,
1161
+ "loss": 0.5678,
1162
+ "step": 384
1163
+ },
1164
+ {
1165
+ "epoch": 0.6176,
1166
+ "learning_rate": 1.961193185426459e-05,
1167
+ "loss": 0.0317,
1168
+ "step": 386
1169
+ },
1170
+ {
1171
+ "epoch": 0.6208,
1172
+ "learning_rate": 1.9642173583796265e-05,
1173
+ "loss": 0.1365,
1174
+ "step": 388
1175
+ },
1176
+ {
1177
+ "epoch": 0.624,
1178
+ "learning_rate": 1.967121011775546e-05,
1179
+ "loss": 0.1463,
1180
+ "step": 390
1181
+ },
1182
+ {
1183
+ "epoch": 0.6272,
1184
+ "learning_rate": 1.969903782680467e-05,
1185
+ "loss": 0.05,
1186
+ "step": 392
1187
+ },
1188
+ {
1189
+ "epoch": 0.6304,
1190
+ "learning_rate": 1.9725653232699962e-05,
1191
+ "loss": 0.019,
1192
+ "step": 394
1193
+ },
1194
+ {
1195
+ "epoch": 0.6336,
1196
+ "learning_rate": 1.9751053008725736e-05,
1197
+ "loss": 0.5648,
1198
+ "step": 396
1199
+ },
1200
+ {
1201
+ "epoch": 0.6368,
1202
+ "learning_rate": 1.9775233980110524e-05,
1203
+ "loss": 0.0464,
1204
+ "step": 398
1205
+ },
1206
+ {
1207
+ "epoch": 0.64,
1208
+ "learning_rate": 1.9798193124423804e-05,
1209
+ "loss": 1.4197,
1210
+ "step": 400
1211
+ },
1212
+ {
1213
+ "epoch": 0.6432,
1214
+ "learning_rate": 1.9819927571953807e-05,
1215
+ "loss": 0.0523,
1216
+ "step": 402
1217
+ },
1218
+ {
1219
+ "epoch": 0.6464,
1220
+ "learning_rate": 1.9840434606066182e-05,
1221
+ "loss": 0.3419,
1222
+ "step": 404
1223
+ },
1224
+ {
1225
+ "epoch": 0.6496,
1226
+ "learning_rate": 1.985971166354357e-05,
1227
+ "loss": 0.7454,
1228
+ "step": 406
1229
+ },
1230
+ {
1231
+ "epoch": 0.6528,
1232
+ "learning_rate": 1.9877756334905983e-05,
1233
+ "loss": 0.1362,
1234
+ "step": 408
1235
+ },
1236
+ {
1237
+ "epoch": 0.656,
1238
+ "learning_rate": 1.9894566364711965e-05,
1239
+ "loss": 0.0834,
1240
+ "step": 410
1241
+ },
1242
+ {
1243
+ "epoch": 0.6592,
1244
+ "learning_rate": 1.99101396518405e-05,
1245
+ "loss": 0.2647,
1246
+ "step": 412
1247
+ },
1248
+ {
1249
+ "epoch": 0.6624,
1250
+ "learning_rate": 1.9924474249753652e-05,
1251
+ "loss": 0.2932,
1252
+ "step": 414
1253
+ },
1254
+ {
1255
+ "epoch": 0.6656,
1256
+ "learning_rate": 1.9937568366739858e-05,
1257
+ "loss": 0.091,
1258
+ "step": 416
1259
+ },
1260
+ {
1261
+ "epoch": 0.6688,
1262
+ "learning_rate": 1.994942036613787e-05,
1263
+ "loss": 0.0372,
1264
+ "step": 418
1265
+ },
1266
+ {
1267
+ "epoch": 0.672,
1268
+ "learning_rate": 1.9960028766541336e-05,
1269
+ "loss": 0.4564,
1270
+ "step": 420
1271
+ },
1272
+ {
1273
+ "epoch": 0.6752,
1274
+ "learning_rate": 1.9969392241983957e-05,
1275
+ "loss": 0.2194,
1276
+ "step": 422
1277
+ },
1278
+ {
1279
+ "epoch": 0.6784,
1280
+ "learning_rate": 1.9977509622105233e-05,
1281
+ "loss": 0.0529,
1282
+ "step": 424
1283
+ },
1284
+ {
1285
+ "epoch": 0.6816,
1286
+ "learning_rate": 1.998437989229673e-05,
1287
+ "loss": 0.4882,
1288
+ "step": 426
1289
+ },
1290
+ {
1291
+ "epoch": 0.6848,
1292
+ "learning_rate": 1.9990002193828923e-05,
1293
+ "loss": 0.3815,
1294
+ "step": 428
1295
+ },
1296
+ {
1297
+ "epoch": 0.688,
1298
+ "learning_rate": 1.9994375823958504e-05,
1299
+ "loss": 0.1259,
1300
+ "step": 430
1301
+ },
1302
+ {
1303
+ "epoch": 0.6912,
1304
+ "learning_rate": 1.9997500236016233e-05,
1305
+ "loss": 0.5569,
1306
+ "step": 432
1307
+ },
1308
+ {
1309
+ "epoch": 0.6944,
1310
+ "learning_rate": 1.9999375039475275e-05,
1311
+ "loss": 0.0036,
1312
+ "step": 434
1313
+ },
1314
+ {
1315
+ "epoch": 0.6976,
1316
+ "learning_rate": 2e-05,
1317
+ "loss": 0.4201,
1318
+ "step": 436
1319
+ },
1320
+ {
1321
+ "epoch": 0.7008,
1322
+ "learning_rate": 1.9999375039475278e-05,
1323
+ "loss": 0.2861,
1324
+ "step": 438
1325
+ },
1326
+ {
1327
+ "epoch": 0.704,
1328
+ "learning_rate": 1.9997500236016233e-05,
1329
+ "loss": 0.2318,
1330
+ "step": 440
1331
+ },
1332
+ {
1333
+ "epoch": 0.7072,
1334
+ "learning_rate": 1.9994375823958504e-05,
1335
+ "loss": 0.4632,
1336
+ "step": 442
1337
+ },
1338
+ {
1339
+ "epoch": 0.7104,
1340
+ "learning_rate": 1.9990002193828923e-05,
1341
+ "loss": 0.2099,
1342
+ "step": 444
1343
+ },
1344
+ {
1345
+ "epoch": 0.7136,
1346
+ "learning_rate": 1.9984379892296735e-05,
1347
+ "loss": 0.0819,
1348
+ "step": 446
1349
+ },
1350
+ {
1351
+ "epoch": 0.7168,
1352
+ "learning_rate": 1.9977509622105236e-05,
1353
+ "loss": 0.538,
1354
+ "step": 448
1355
+ },
1356
+ {
1357
+ "epoch": 0.72,
1358
+ "learning_rate": 1.9969392241983957e-05,
1359
+ "loss": 0.5216,
1360
+ "step": 450
1361
+ },
1362
+ {
1363
+ "epoch": 0.7232,
1364
+ "learning_rate": 1.9960028766541336e-05,
1365
+ "loss": 0.0739,
1366
+ "step": 452
1367
+ },
1368
+ {
1369
+ "epoch": 0.7264,
1370
+ "learning_rate": 1.9949420366137873e-05,
1371
+ "loss": 0.0176,
1372
+ "step": 454
1373
+ },
1374
+ {
1375
+ "epoch": 0.7296,
1376
+ "learning_rate": 1.993756836673986e-05,
1377
+ "loss": 0.141,
1378
+ "step": 456
1379
+ },
1380
+ {
1381
+ "epoch": 0.7328,
1382
+ "learning_rate": 1.9924474249753656e-05,
1383
+ "loss": 0.0097,
1384
+ "step": 458
1385
+ },
1386
+ {
1387
+ "epoch": 0.736,
1388
+ "learning_rate": 1.9910139651840497e-05,
1389
+ "loss": 0.0071,
1390
+ "step": 460
1391
+ },
1392
+ {
1393
+ "epoch": 0.7392,
1394
+ "learning_rate": 1.9894566364711965e-05,
1395
+ "loss": 0.0406,
1396
+ "step": 462
1397
+ },
1398
+ {
1399
+ "epoch": 0.7424,
1400
+ "learning_rate": 1.987775633490599e-05,
1401
+ "loss": 0.0902,
1402
+ "step": 464
1403
+ },
1404
+ {
1405
+ "epoch": 0.7456,
1406
+ "learning_rate": 1.9859711663543573e-05,
1407
+ "loss": 0.1248,
1408
+ "step": 466
1409
+ },
1410
+ {
1411
+ "epoch": 0.7488,
1412
+ "learning_rate": 1.9840434606066186e-05,
1413
+ "loss": 0.2472,
1414
+ "step": 468
1415
+ },
1416
+ {
1417
+ "epoch": 0.752,
1418
+ "learning_rate": 1.9819927571953804e-05,
1419
+ "loss": 0.1011,
1420
+ "step": 470
1421
+ },
1422
+ {
1423
+ "epoch": 0.7552,
1424
+ "learning_rate": 1.9798193124423804e-05,
1425
+ "loss": 0.0349,
1426
+ "step": 472
1427
+ },
1428
+ {
1429
+ "epoch": 0.7584,
1430
+ "learning_rate": 1.9775233980110524e-05,
1431
+ "loss": 1.139,
1432
+ "step": 474
1433
+ },
1434
+ {
1435
+ "epoch": 0.7616,
1436
+ "learning_rate": 1.9751053008725736e-05,
1437
+ "loss": 0.0181,
1438
+ "step": 476
1439
+ },
1440
+ {
1441
+ "epoch": 0.7648,
1442
+ "learning_rate": 1.9725653232699962e-05,
1443
+ "loss": 0.5289,
1444
+ "step": 478
1445
+ },
1446
+ {
1447
+ "epoch": 0.768,
1448
+ "learning_rate": 1.969903782680467e-05,
1449
+ "loss": 0.4342,
1450
+ "step": 480
1451
+ },
1452
+ {
1453
+ "epoch": 0.7712,
1454
+ "learning_rate": 1.9671210117755462e-05,
1455
+ "loss": 1.4602,
1456
+ "step": 482
1457
+ },
1458
+ {
1459
+ "epoch": 0.7744,
1460
+ "learning_rate": 1.9642173583796265e-05,
1461
+ "loss": 0.0076,
1462
+ "step": 484
1463
+ },
1464
+ {
1465
+ "epoch": 0.7776,
1466
+ "learning_rate": 1.961193185426459e-05,
1467
+ "loss": 0.2398,
1468
+ "step": 486
1469
+ },
1470
+ {
1471
+ "epoch": 0.7808,
1472
+ "learning_rate": 1.958048870913786e-05,
1473
+ "loss": 0.0098,
1474
+ "step": 488
1475
+ },
1476
+ {
1477
+ "epoch": 0.784,
1478
+ "learning_rate": 1.9547848078560982e-05,
1479
+ "loss": 0.63,
1480
+ "step": 490
1481
+ },
1482
+ {
1483
+ "epoch": 0.7872,
1484
+ "learning_rate": 1.9514014042355054e-05,
1485
+ "loss": 0.0606,
1486
+ "step": 492
1487
+ },
1488
+ {
1489
+ "epoch": 0.7904,
1490
+ "learning_rate": 1.947899082950751e-05,
1491
+ "loss": 0.0218,
1492
+ "step": 494
1493
+ },
1494
+ {
1495
+ "epoch": 0.7936,
1496
+ "learning_rate": 1.9442782817643425e-05,
1497
+ "loss": 0.5006,
1498
+ "step": 496
1499
+ },
1500
+ {
1501
+ "epoch": 0.7968,
1502
+ "learning_rate": 1.9405394532478422e-05,
1503
+ "loss": 0.0382,
1504
+ "step": 498
1505
+ },
1506
+ {
1507
+ "epoch": 0.8,
1508
+ "learning_rate": 1.9366830647252977e-05,
1509
+ "loss": 0.102,
1510
+ "step": 500
1511
+ },
1512
+ {
1513
+ "epoch": 0.8032,
1514
+ "learning_rate": 1.9327095982148255e-05,
1515
+ "loss": 0.1899,
1516
+ "step": 502
1517
+ },
1518
+ {
1519
+ "epoch": 0.8064,
1520
+ "learning_rate": 1.928619550368371e-05,
1521
+ "loss": 0.505,
1522
+ "step": 504
1523
+ },
1524
+ {
1525
+ "epoch": 0.8096,
1526
+ "learning_rate": 1.9244134324096216e-05,
1527
+ "loss": 0.3562,
1528
+ "step": 506
1529
+ },
1530
+ {
1531
+ "epoch": 0.8128,
1532
+ "learning_rate": 1.9200917700701176e-05,
1533
+ "loss": 0.5208,
1534
+ "step": 508
1535
+ },
1536
+ {
1537
+ "epoch": 0.816,
1538
+ "learning_rate": 1.9156551035235298e-05,
1539
+ "loss": 0.3292,
1540
+ "step": 510
1541
+ },
1542
+ {
1543
+ "epoch": 0.8192,
1544
+ "learning_rate": 1.9111039873181475e-05,
1545
+ "loss": 0.1156,
1546
+ "step": 512
1547
+ },
1548
+ {
1549
+ "epoch": 0.8224,
1550
+ "learning_rate": 1.9064389903075683e-05,
1551
+ "loss": 0.2045,
1552
+ "step": 514
1553
+ },
1554
+ {
1555
+ "epoch": 0.8256,
1556
+ "learning_rate": 1.9016606955795843e-05,
1557
+ "loss": 0.0543,
1558
+ "step": 516
1559
+ },
1560
+ {
1561
+ "epoch": 0.8288,
1562
+ "learning_rate": 1.8967697003833156e-05,
1563
+ "loss": 0.0878,
1564
+ "step": 518
1565
+ },
1566
+ {
1567
+ "epoch": 0.832,
1568
+ "learning_rate": 1.891766616054545e-05,
1569
+ "loss": 0.1163,
1570
+ "step": 520
1571
+ },
1572
+ {
1573
+ "epoch": 0.8352,
1574
+ "learning_rate": 1.8866520679393124e-05,
1575
+ "loss": 0.0034,
1576
+ "step": 522
1577
+ },
1578
+ {
1579
+ "epoch": 0.8384,
1580
+ "learning_rate": 1.881426695315756e-05,
1581
+ "loss": 0.4083,
1582
+ "step": 524
1583
+ },
1584
+ {
1585
+ "epoch": 0.8416,
1586
+ "learning_rate": 1.8760911513141974e-05,
1587
+ "loss": 0.1199,
1588
+ "step": 526
1589
+ },
1590
+ {
1591
+ "epoch": 0.8448,
1592
+ "learning_rate": 1.8706461028355107e-05,
1593
+ "loss": 0.2598,
1594
+ "step": 528
1595
+ },
1596
+ {
1597
+ "epoch": 0.848,
1598
+ "learning_rate": 1.86509223046777e-05,
1599
+ "loss": 0.4885,
1600
+ "step": 530
1601
+ },
1602
+ {
1603
+ "epoch": 0.8512,
1604
+ "learning_rate": 1.8594302284011697e-05,
1605
+ "loss": 0.1455,
1606
+ "step": 532
1607
+ },
1608
+ {
1609
+ "epoch": 0.8544,
1610
+ "learning_rate": 1.8536608043412702e-05,
1611
+ "loss": 0.0441,
1612
+ "step": 534
1613
+ },
1614
+ {
1615
+ "epoch": 0.8576,
1616
+ "learning_rate": 1.847784679420527e-05,
1617
+ "loss": 0.2049,
1618
+ "step": 536
1619
+ },
1620
+ {
1621
+ "epoch": 0.8608,
1622
+ "learning_rate": 1.841802588108161e-05,
1623
+ "loss": 0.0302,
1624
+ "step": 538
1625
+ },
1626
+ {
1627
+ "epoch": 0.864,
1628
+ "learning_rate": 1.8357152781183613e-05,
1629
+ "loss": 0.0036,
1630
+ "step": 540
1631
+ },
1632
+ {
1633
+ "epoch": 0.8672,
1634
+ "learning_rate": 1.8295235103168128e-05,
1635
+ "loss": 0.0575,
1636
+ "step": 542
1637
+ },
1638
+ {
1639
+ "epoch": 0.8704,
1640
+ "learning_rate": 1.8232280586256104e-05,
1641
+ "loss": 0.0007,
1642
+ "step": 544
1643
+ },
1644
+ {
1645
+ "epoch": 0.8736,
1646
+ "learning_rate": 1.8168297099265108e-05,
1647
+ "loss": 0.02,
1648
+ "step": 546
1649
+ },
1650
+ {
1651
+ "epoch": 0.8768,
1652
+ "learning_rate": 1.8103292639625835e-05,
1653
+ "loss": 0.0007,
1654
+ "step": 548
1655
+ },
1656
+ {
1657
+ "epoch": 0.88,
1658
+ "learning_rate": 1.8037275332382575e-05,
1659
+ "loss": 0.1148,
1660
+ "step": 550
1661
+ },
1662
+ {
1663
+ "epoch": 0.8832,
1664
+ "learning_rate": 1.7970253429177494e-05,
1665
+ "loss": 0.0016,
1666
+ "step": 552
1667
+ },
1668
+ {
1669
+ "epoch": 0.8864,
1670
+ "learning_rate": 1.7902235307219336e-05,
1671
+ "loss": 0.0004,
1672
+ "step": 554
1673
+ },
1674
+ {
1675
+ "epoch": 0.8896,
1676
+ "learning_rate": 1.783322946823638e-05,
1677
+ "loss": 0.004,
1678
+ "step": 556
1679
+ },
1680
+ {
1681
+ "epoch": 0.8928,
1682
+ "learning_rate": 1.776324453741365e-05,
1683
+ "loss": 0.1155,
1684
+ "step": 558
1685
+ },
1686
+ {
1687
+ "epoch": 0.896,
1688
+ "learning_rate": 1.7692289262315008e-05,
1689
+ "loss": 0.0402,
1690
+ "step": 560
1691
+ },
1692
+ {
1693
+ "epoch": 0.8992,
1694
+ "learning_rate": 1.762037251178961e-05,
1695
+ "loss": 0.0801,
1696
+ "step": 562
1697
+ },
1698
+ {
1699
+ "epoch": 0.9024,
1700
+ "learning_rate": 1.7547503274863502e-05,
1701
+ "loss": 0.0028,
1702
+ "step": 564
1703
+ },
1704
+ {
1705
+ "epoch": 0.9056,
1706
+ "learning_rate": 1.7473690659616e-05,
1707
+ "loss": 0.0006,
1708
+ "step": 566
1709
+ },
1710
+ {
1711
+ "epoch": 0.9088,
1712
+ "learning_rate": 1.739894389204122e-05,
1713
+ "loss": 0.8144,
1714
+ "step": 568
1715
+ },
1716
+ {
1717
+ "epoch": 0.912,
1718
+ "learning_rate": 1.732327231489503e-05,
1719
+ "loss": 0.0961,
1720
+ "step": 570
1721
+ },
1722
+ {
1723
+ "epoch": 0.9152,
1724
+ "learning_rate": 1.7246685386527105e-05,
1725
+ "loss": 0.1557,
1726
+ "step": 572
1727
+ },
1728
+ {
1729
+ "epoch": 0.9184,
1730
+ "learning_rate": 1.716919267969884e-05,
1731
+ "loss": 0.723,
1732
+ "step": 574
1733
+ },
1734
+ {
1735
+ "epoch": 0.9216,
1736
+ "learning_rate": 1.7090803880386784e-05,
1737
+ "loss": 0.19,
1738
+ "step": 576
1739
+ },
1740
+ {
1741
+ "epoch": 0.9248,
1742
+ "learning_rate": 1.701152878657197e-05,
1743
+ "loss": 0.0013,
1744
+ "step": 578
1745
+ },
1746
+ {
1747
+ "epoch": 0.928,
1748
+ "learning_rate": 1.6931377307015236e-05,
1749
+ "loss": 0.7352,
1750
+ "step": 580
1751
+ },
1752
+ {
1753
+ "epoch": 0.9312,
1754
+ "learning_rate": 1.6850359460018744e-05,
1755
+ "loss": 0.0095,
1756
+ "step": 582
1757
+ },
1758
+ {
1759
+ "epoch": 0.9344,
1760
+ "learning_rate": 1.67684853721737e-05,
1761
+ "loss": 0.0136,
1762
+ "step": 584
1763
+ },
1764
+ {
1765
+ "epoch": 0.9376,
1766
+ "learning_rate": 1.6685765277094702e-05,
1767
+ "loss": 0.0344,
1768
+ "step": 586
1769
+ },
1770
+ {
1771
+ "epoch": 0.9408,
1772
+ "learning_rate": 1.6602209514140562e-05,
1773
+ "loss": 0.0038,
1774
+ "step": 588
1775
+ },
1776
+ {
1777
+ "epoch": 0.944,
1778
+ "learning_rate": 1.651782852712194e-05,
1779
+ "loss": 0.1917,
1780
+ "step": 590
1781
+ },
1782
+ {
1783
+ "epoch": 0.9472,
1784
+ "learning_rate": 1.6432632862996062e-05,
1785
+ "loss": 0.0179,
1786
+ "step": 592
1787
+ },
1788
+ {
1789
+ "epoch": 0.9504,
1790
+ "learning_rate": 1.6346633170548275e-05,
1791
+ "loss": 0.0439,
1792
+ "step": 594
1793
+ },
1794
+ {
1795
+ "epoch": 0.9536,
1796
+ "learning_rate": 1.625984019906122e-05,
1797
+ "loss": 0.0745,
1798
+ "step": 596
1799
+ },
1800
+ {
1801
+ "epoch": 0.9568,
1802
+ "learning_rate": 1.6172264796971063e-05,
1803
+ "loss": 0.2384,
1804
+ "step": 598
1805
+ },
1806
+ {
1807
+ "epoch": 0.96,
1808
+ "learning_rate": 1.6083917910511623e-05,
1809
+ "loss": 0.0346,
1810
+ "step": 600
1811
+ },
1812
+ {
1813
+ "epoch": 0.9632,
1814
+ "learning_rate": 1.5994810582346266e-05,
1815
+ "loss": 1.3399,
1816
+ "step": 602
1817
+ },
1818
+ {
1819
+ "epoch": 0.9664,
1820
+ "learning_rate": 1.5904953950187448e-05,
1821
+ "loss": 0.0596,
1822
+ "step": 604
1823
+ },
1824
+ {
1825
+ "epoch": 0.9696,
1826
+ "learning_rate": 1.581435924540482e-05,
1827
+ "loss": 0.034,
1828
+ "step": 606
1829
+ },
1830
+ {
1831
+ "epoch": 0.9728,
1832
+ "learning_rate": 1.5723037791621203e-05,
1833
+ "loss": 0.1371,
1834
+ "step": 608
1835
+ },
1836
+ {
1837
+ "epoch": 0.976,
1838
+ "learning_rate": 1.5631001003297302e-05,
1839
+ "loss": 0.5681,
1840
+ "step": 610
1841
+ },
1842
+ {
1843
+ "epoch": 0.9792,
1844
+ "learning_rate": 1.5538260384305083e-05,
1845
+ "loss": 0.2622,
1846
+ "step": 612
1847
+ },
1848
+ {
1849
+ "epoch": 0.9824,
1850
+ "learning_rate": 1.544482752648966e-05,
1851
+ "loss": 0.0031,
1852
+ "step": 614
1853
+ },
1854
+ {
1855
+ "epoch": 0.9856,
1856
+ "learning_rate": 1.5350714108220677e-05,
1857
+ "loss": 0.0025,
1858
+ "step": 616
1859
+ },
1860
+ {
1861
+ "epoch": 0.9888,
1862
+ "learning_rate": 1.5255931892932344e-05,
1863
+ "loss": 0.1535,
1864
+ "step": 618
1865
+ },
1866
+ {
1867
+ "epoch": 0.992,
1868
+ "learning_rate": 1.5160492727653238e-05,
1869
+ "loss": 0.0149,
1870
+ "step": 620
1871
+ },
1872
+ {
1873
+ "epoch": 0.9952,
1874
+ "learning_rate": 1.5064408541525578e-05,
1875
+ "loss": 0.0155,
1876
+ "step": 622
1877
+ },
1878
+ {
1879
+ "epoch": 0.9984,
1880
+ "learning_rate": 1.4967691344314012e-05,
1881
+ "loss": 0.0022,
1882
+ "step": 624
1883
+ },
1884
+ {
1885
+ "epoch": 1.0,
1886
+ "step": 625,
1887
+ "total_flos": 2760627376095232.0,
1888
+ "train_loss": 0.22509195377584545,
1889
+ "train_runtime": 3314.8358,
1890
+ "train_samples_per_second": 3.017,
1891
+ "train_steps_per_second": 0.189
1892
+ }
1893
+ ],
1894
+ "logging_steps": 2,
1895
+ "max_steps": 625,
1896
+ "num_input_tokens_seen": 0,
1897
+ "num_train_epochs": 1,
1898
+ "save_steps": 500,
1899
+ "stateful_callbacks": {},
1900
+ "total_flos": 2760627376095232.0,
1901
+ "train_batch_size": 1,
1902
+ "trial_name": null,
1903
+ "trial_params": null
1904
+ }
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1966630b157259af240e2c25b18fd6a885ee24fb9e84084af2f01e0c2fd77076
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:691c7e216c5ddbc2e717eb31ab2fdcb1295705461b48f57f703090492822d81d
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd8e4eb86c55039a290bf62b18afe66752a2da44f2e8b6a626172b84871616e
3
+ size 1279587682
new_checkpoints/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_0625_selfsup_scenario18_new_10000_random0_0625_seed1/server_model_round3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5bb37bd3858aa50ed7d03838d14928a9cce1bdec346699a8ee288e8950e6f38
3
+ size 1279587682