File size: 149,317 Bytes
b04de69
 
 
 
 
 
 
 
 
 
 
 
 
9e6844a
b04de69
83a2cac
 
 
b04de69
83a2cac
b04de69
 
 
 
 
 
83a2cac
 
 
9e6844a
 
 
 
 
 
83a2cac
 
 
 
 
 
 
 
 
 
9e6844a
b04de69
 
83a2cac
b04de69
83a2cac
b04de69
83a2cac
b04de69
83a2cac
 
 
b04de69
 
83a2cac
b04de69
83a2cac
b04de69
83a2cac
b04de69
83a2cac
 
 
b04de69
 
83a2cac
b04de69
83a2cac
b04de69
83a2cac
b04de69
83a2cac
 
 
b04de69
83a2cac
b04de69
83a2cac
 
9e6844a
b04de69
 
83a2cac
b04de69
83a2cac
b04de69
83a2cac
b04de69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e6844a
b04de69
 
 
 
 
 
83a2cac
b04de69
 
 
 
 
83a2cac
 
b04de69
83a2cac
 
 
 
 
 
b04de69
 
 
 
 
 
9e6844a
 
 
 
b04de69
 
 
9e6844a
 
 
 
 
b04de69
 
83a2cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e6844a
b04de69
 
 
9e6844a
 
 
b04de69
 
 
 
 
 
 
 
 
 
 
 
 
9e6844a
b04de69
83a2cac
 
 
 
 
 
 
b04de69
 
9e6844a
 
 
 
 
b04de69
 
 
83a2cac
 
b04de69
 
83a2cac
b04de69
 
83a2cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b04de69
83a2cac
 
 
 
 
b04de69
83a2cac
 
 
 
 
 
 
b04de69
83a2cac
 
 
 
 
b04de69
83a2cac
 
 
 
 
 
 
b04de69
 
 
83a2cac
 
b04de69
 
83a2cac
 
b04de69
 
83a2cac
 
b04de69
83a2cac
 
 
 
 
 
 
b04de69
 
 
83a2cac
b04de69
 
 
 
 
 
 
 
83a2cac
 
 
b04de69
 
 
 
 
83a2cac
 
 
b04de69
83a2cac
 
 
 
b04de69
 
 
 
 
 
9e6844a
b04de69
 
 
 
 
9e6844a
 
 
 
b04de69
 
 
9e6844a
 
 
 
 
b04de69
83a2cac
b04de69
83a2cac
 
b04de69
9e6844a
b04de69
 
 
9e6844a
 
 
b04de69
 
 
 
 
 
 
83a2cac
b04de69
 
 
 
 
83a2cac
b04de69
 
 
 
 
83a2cac
 
 
 
b04de69
 
 
9e6844a
b04de69
 
 
9e6844a
b04de69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83a2cac
b04de69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83a2cac
b04de69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e6844a
 
 
b04de69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83a2cac
b04de69
 
 
9e6844a
b04de69
 
 
9e6844a
b04de69
83a2cac
 
 
 
b04de69
83a2cac
b04de69
 
 
 
 
 
83a2cac
b04de69
9e6844a
 
 
 
 
 
 
83a2cac
b04de69
 
 
 
 
83a2cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b04de69
 
 
 
 
 
83a2cac
b04de69
 
 
 
 
 
 
83a2cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e6844a
83a2cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e6844a
83a2cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e6844a
83a2cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e6844a
83a2cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e6844a
83a2cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e6844a
83a2cac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b04de69
 
 
 
 
 
 
 
 
 
9e6844a
83a2cac
 
b04de69
 
83a2cac
b04de69
 
 
 
 
 
 
 
 
83a2cac
 
 
 
b04de69
 
 
 
 
 
 
83a2cac
b04de69
83a2cac
 
b04de69
 
 
83a2cac
b04de69
 
 
 
 
 
 
 
 
 
 
 
83a2cac
b04de69
 
 
 
9e6844a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "X4cRE8IbIrIV"
      },
      "source": [
        "Downloading PyTorch Vision Reference Scripts for Image Classification. These scripts are official reference implementations from PyTorch Vision that provide training and quantization utilities for image classification models."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "46CgrVgjg3E-",
        "outputId": "1fafe6a6-33bb-4339-ac70-ef2d25206c57"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "--2025-08-04 11:06:42--  https://raw.githubusercontent.com/pytorch/vision/main/references/classification/presets.py\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 3885 (3.8K) [text/plain]\n",
            "Saving to: ‘presets.py.3’\n",
            "\n",
            "presets.py.3        100%[===================>]   3.79K  --.-KB/s    in 0s      \n",
            "\n",
            "2025-08-04 11:06:42 (17.6 MB/s) - ‘presets.py.3’ saved [3885/3885]\n",
            "\n",
            "--2025-08-04 11:06:42--  https://raw.githubusercontent.com/pytorch/vision/main/references/classification/sampler.py\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 2395 (2.3K) [text/plain]\n",
            "Saving to: ‘sampler.py.3’\n",
            "\n",
            "sampler.py.3        100%[===================>]   2.34K  --.-KB/s    in 0s      \n",
            "\n",
            "2025-08-04 11:06:42 (23.5 MB/s) - ‘sampler.py.3’ saved [2395/2395]\n",
            "\n",
            "--2025-08-04 11:06:43--  https://raw.githubusercontent.com/pytorch/vision/main/references/classification/train.py\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 23324 (23K) [text/plain]\n",
            "Saving to: ‘train.py.3’\n",
            "\n",
            "train.py.3          100%[===================>]  22.78K  --.-KB/s    in 0s      \n",
            "\n",
            "2025-08-04 11:06:43 (49.3 MB/s) - ‘train.py.3’ saved [23324/23324]\n",
            "\n",
            "--2025-08-04 11:06:43--  https://raw.githubusercontent.com/pytorch/vision/main/references/classification/train_quantization.py\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 11647 (11K) [text/plain]\n",
            "Saving to: ‘train_quantization.py.3’\n",
            "\n",
            "train_quantization. 100%[===================>]  11.37K  --.-KB/s    in 0.002s  \n",
            "\n",
            "2025-08-04 11:06:43 (6.37 MB/s) - ‘train_quantization.py.3’ saved [11647/11647]\n",
            "\n",
            "--2025-08-04 11:06:43--  https://raw.githubusercontent.com/pytorch/vision/main/references/classification/transformers.py\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 404 Not Found\n",
            "2025-08-04 11:06:43 ERROR 404: Not Found.\n",
            "\n",
            "--2025-08-04 11:06:43--  https://raw.githubusercontent.com/pytorch/vision/main/references/classification/utils.py\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 15791 (15K) [text/plain]\n",
            "Saving to: ‘utils.py.3’\n",
            "\n",
            "utils.py.3          100%[===================>]  15.42K  --.-KB/s    in 0.002s  \n",
            "\n",
            "2025-08-04 11:06:43 (7.63 MB/s) - ‘utils.py.3’ saved [15791/15791]\n",
            "\n"
          ]
        }
      ],
      "source": [
        "! wget https://raw.githubusercontent.com/pytorch/vision/main/references/classification/presets.py\n",
        "! wget https://raw.githubusercontent.com/pytorch/vision/main/references/classification/sampler.py\n",
        "! wget https://raw.githubusercontent.com/pytorch/vision/main/references/classification/train.py\n",
        "! wget https://raw.githubusercontent.com/pytorch/vision/main/references/classification/train_quantization.py\n",
        "! wget https://raw.githubusercontent.com/pytorch/vision/main/references/classification/transformers.py\n",
        "! wget https://raw.githubusercontent.com/pytorch/vision/main/references/classification/utils.py"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "HFASsisvIrIb"
      },
      "source": [
        "In this block, we build a “loss” function for our sequential policy gradient algorithm. When the right data is plugged in, the gradient of this loss is equal to the policy gradient."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "EaBokYCpg3FA"
      },
      "outputs": [],
      "source": [
        "import types\n",
        "from typing import Optional, List, Union, Callable\n",
        "\n",
        "import torch\n",
        "from torch import nn, Tensor\n",
        "from torch.nn import functional as F\n",
        "\n",
        "from torchvision.models.resnet import ResNet\n",
        "\n",
        "\n",
        "def compute_policy_loss(loss_sequence, mask_sequence, rewards):\n",
        "    losses = sum(mask * padded_loss for mask, padded_loss in zip(mask_sequence, loss_sequence))\n",
        "    returns = sum(padded_reward * mask for padded_reward, mask in zip(rewards, mask_sequence))\n",
        "    loss = torch.mean(losses * returns)\n",
        "\n",
        "    return loss\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "_Ig0Jm2w8DPH"
      },
      "source": [
        "In this block, we build a TPBlock for the Task Replica Prediction (TRP) module; This implementation provides the backbone without the shared prediction head."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "id": "wkBlmJT96jZj"
      },
      "outputs": [],
      "source": [
        "class TPBlock(nn.Module):\n",
        "    def __init__(self, depths: int, in_planes: int, out_planes: int = None, rank=1, shape_dims=3, channel_first=True, dtype=torch.float32) -> None:\n",
        "        super().__init__()\n",
        "        out_planes = in_planes if out_planes is None else out_planes\n",
        "        self.layers = torch.nn.ModuleList([self._make_layer(in_planes, out_planes, rank, shape_dims, channel_first, dtype) for _ in range(depths)])\n",
        "\n",
        "    def forward(self, x: Tensor) -> Tensor:\n",
        "        for layer in self.layers:\n",
        "            x = x + layer(x)\n",
        "        return x\n",
        "\n",
        "    def _make_layer(self, in_planes: int, out_planes: int = None, rank=1, shape_dims=3, channel_first=True, dtype=torch.float32) -> nn.Sequential:\n",
        "\n",
        "        class Permute(nn.Module):\n",
        "            def __init__(self, *dims):\n",
        "                super().__init__()\n",
        "                self.dims = dims\n",
        "            def forward(self, x):\n",
        "                return x.permute(*self.dims)\n",
        "\n",
        "        class RMSNorm(nn.Module):\n",
        "            __constants__ = [\"eps\"]\n",
        "            eps: float\n",
        "\n",
        "            def __init__(self, hidden_size, eps: float = 1e-6, device=None, dtype=None):\n",
        "                \"\"\"\n",
        "                LlamaRMSNorm is equivalent to T5LayerNorm.\n",
        "                \"\"\"\n",
        "                factory_kwargs = {\"device\": device, \"dtype\": dtype}\n",
        "                super().__init__()\n",
        "                self.eps = eps\n",
        "                self.weight = nn.Parameter(torch.ones(hidden_size, **factory_kwargs))\n",
        "\n",
        "            def forward(self, hidden_states):\n",
        "                input_dtype = hidden_states.dtype\n",
        "                hidden_states = hidden_states.to(torch.float32)\n",
        "                variance = hidden_states.pow(2).mean(dim=1, keepdim=True)\n",
        "                hidden_states = hidden_states * torch.rsqrt(variance + self.eps)\n",
        "                weight = self.weight.view(1, -1, *[1] * (hidden_states.ndim - 2))\n",
        "                return weight * hidden_states.to(input_dtype)\n",
        "\n",
        "            def extra_repr(self):\n",
        "                return f\"{self.weight.shape[0]}, eps={self.eps}\"\n",
        "\n",
        "        conv_map = {\n",
        "            2: (nn.Conv1d, (0, 2, 1), (0, 2, 1)),\n",
        "            3: (nn.Conv2d, (0, 3, 1, 2), (0, 2, 3, 1)),\n",
        "            4: (nn.Conv3d, (0, 4, 1, 2, 3), (0, 2, 3, 4, 1)),\n",
        "        }\n",
        "        Conv, pre_dims, post_dims = conv_map[shape_dims]\n",
        "        kernel_size, dilation, padding = self.generate_hyperparameters(rank)\n",
        "\n",
        "        pre_permute = nn.Identity() if channel_first else Permute(*pre_dims)\n",
        "        post_permute = nn.Identity() if channel_first else Permute(*post_dims)\n",
        "        conv1 = Conv(in_planes, out_planes, kernel_size, padding=padding, dilation=dilation, bias=False, dtype=dtype, device='cuda')\n",
        "        nn.init.zeros_(conv1.weight)\n",
        "        bn1 = RMSNorm(out_planes, dtype=dtype, device=\"cuda\")\n",
        "        relu = nn.ReLU(inplace=True)\n",
        "        conv2 = Conv(out_planes, in_planes, kernel_size, padding=padding, dilation=dilation, bias=False, dtype=dtype, device='cuda')\n",
        "        nn.init.zeros_(conv2.weight)\n",
        "        bn2 = RMSNorm(in_planes, dtype=dtype, device=\"cuda\")\n",
        "\n",
        "        return torch.nn.Sequential(pre_permute, conv1, bn1, relu, conv2, bn2, relu, post_permute)\n",
        "\n",
        "    @staticmethod\n",
        "    def generate_hyperparameters(rank: int):\n",
        "        \"\"\"\n",
        "        Generates kernel size and dilation rate pairs sorted by increasing padded kernel size.\n",
        "\n",
        "        Args:\n",
        "            rank: Number of (kernel_size, dilation) pairs to generate. Must be positive.\n",
        "\n",
        "        Returns:\n",
        "            Tuple[int, int]: A (kernel_size, dilation) tuple where:\n",
        "                - kernel_size: Always odd and >= 1\n",
        "                - dilation: Computed to maintain consistent padded kernel size growth\n",
        "\n",
        "        Note:\n",
        "            Padded kernel size is calculated as:\n",
        "                (kernel_size - 1) * dilation + 1\n",
        "            Pairs are generated first in order of increasing padded kernel size,\n",
        "            then by increasing kernel size for equal padded kernel sizes.\n",
        "        \"\"\"\n",
        "        pairs = [(1, 1, 0)]  # Start with smallest possible\n",
        "        padded_kernel_size = 3\n",
        "\n",
        "        while len(pairs) < rank:\n",
        "            for kernel_size in range(3, padded_kernel_size + 1, 2):\n",
        "                if (padded_kernel_size - 1) % (kernel_size - 1) == 0:\n",
        "                    dilation = (padded_kernel_size - 1) // (kernel_size - 1)\n",
        "                    padding = dilation * (kernel_size - 1) // 2\n",
        "                    pairs.append((kernel_size, dilation, padding))\n",
        "                    if len(pairs) >= rank:\n",
        "                        break\n",
        "\n",
        "            # Move to next odd padded kernel size\n",
        "            padded_kernel_size += 2\n",
        "\n",
        "        return pairs[-1]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "UGxQdKZaF2NT"
      },
      "source": [
        "This implementation enables ResNet retraining in SPG mode.\n",
        "\n",
        "Components:\n",
        "-------------------------------------------------------------------------------\n",
        "1. gen_criterion()\n",
        "    - Purpose: compute per-sample losses and positional masks\n",
        "\n",
        "2. gen_shared_head()\n",
        "    - Purpose: Implements a shared prediction head that processes convolutional feature maps for prediction.\n",
        "\n",
        "3. gen_forward()\n",
        "    - Purpose: Extended forward pass supporting both traditional inference and SPG retraining."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "05k1fcibN13b"
      },
      "source": []
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "id": "kTZWkoLr8cfE"
      },
      "outputs": [],
      "source": [
        "class ResNetConfig:\n",
        "    @staticmethod\n",
        "    def gen_shared_head(self):\n",
        "        def func(hidden_states):\n",
        "            \"\"\"\n",
        "            Args:\n",
        "                hidden_states (Tensor): Hidden States tensor of shape [B, C, H, W].\n",
        "\n",
        "            Returns:\n",
        "                logits (Tensor): Logits tensor of shape [B, C].\n",
        "            \"\"\"\n",
        "            x = self.avgpool(hidden_states)\n",
        "            x = torch.flatten(x, 1)\n",
        "            logits = self.fc(x)\n",
        "            return logits\n",
        "        return func\n",
        "\n",
        "    @staticmethod\n",
        "    def gen_logits(self, shared_head):\n",
        "        def func(hidden_states):\n",
        "            \"\"\"\n",
        "            Args:\n",
        "                hidden_states (Tensor): Hidden States tensor of shape [B, L, hidden_units].\n",
        "\n",
        "            Returns:\n",
        "                logits_seqence (List[Tensor]): List of Logits tensors.\n",
        "            \"\"\"\n",
        "            logits_sequence = [shared_head(hidden_states)]\n",
        "            for layer in self.trp_blocks:\n",
        "                logits_sequence.append(shared_head(layer(hidden_states)))\n",
        "            return logits_sequence\n",
        "        return func\n",
        "\n",
        "    @staticmethod\n",
        "    def gen_mask(label_smoothing=0.0, top_k=1):\n",
        "        def func(logits_sequence, labels):\n",
        "            \"\"\"\n",
        "            Args:\n",
        "                logits_sequence (List[Tensor]): List of Logits tensors.\n",
        "                labels (Tensor): Target labels of shape [B] or [B, C].\n",
        "\n",
        "            Returns:\n",
        "                mask_sequence (List[Tensor]): List of Mask tensor.\n",
        "                returns (Tensor): Boolean mask tensor of shape [B*(L-1)].\n",
        "            \"\"\"\n",
        "            labels = torch.argmax(labels, dim=1) if label_smoothing > 0.0 else labels\n",
        "\n",
        "            mask_sequence = [torch.ones_like(labels, dtype=torch.float32, device=labels.device)]\n",
        "            for logits in logits_sequence:\n",
        "                with torch.no_grad():\n",
        "                    topk_values, topk_indices = torch.topk(logits, top_k, dim=-1)\n",
        "                    mask = torch.eq(topk_indices, labels[:, None]).any(dim=-1).to(torch.float32)\n",
        "                    mask_sequence.append(mask_sequence[-1] * mask)\n",
        "            return mask_sequence\n",
        "        return func\n",
        "\n",
        "    @staticmethod\n",
        "    def gen_criterion(label_smoothing=0.0):\n",
        "        def func(logits_sequence, labels):\n",
        "            \"\"\"\n",
        "            Args:\n",
        "                logits_sequence (List[Tensor]): List of Logits tensor.\n",
        "                labels (Tensor): labels labels of shape [B] or [B, C].\n",
        "\n",
        "            Returns:\n",
        "                loss (Tensor): Scalar tensor representing the loss.\n",
        "                mask (Tensor): Boolean mask tensor of shape [B].\n",
        "            \"\"\"\n",
        "            labels = torch.argmax(labels, dim=1) if label_smoothing > 0.0 else labels\n",
        "\n",
        "            loss_sequence = []\n",
        "            for logits in logits_sequence:\n",
        "                loss_sequence.append(F.cross_entropy(logits, labels, reduction=\"none\", label_smoothing=label_smoothing))\n",
        "\n",
        "            return loss_sequence\n",
        "        return func\n",
        "\n",
        "    @staticmethod\n",
        "    def gen_forward(rewards, label_smoothing=0.0, top_k=1):\n",
        "        def func(self, x: Tensor, targets=None) -> Tensor:\n",
        "            x = self.conv1(x)\n",
        "            x = self.bn1(x)\n",
        "            x = self.relu(x)\n",
        "            x = self.maxpool(x)\n",
        "\n",
        "            x = self.layer1(x)\n",
        "            x = self.layer2(x)\n",
        "            x = self.layer3(x)\n",
        "            hidden_states = self.layer4(x)\n",
        "            x = self.avgpool(hidden_states)\n",
        "            x = torch.flatten(x, 1)\n",
        "            logits = self.fc(x)\n",
        "\n",
        "            if self.training:\n",
        "                shared_head = ResNetConfig.gen_shared_head(self)\n",
        "                compute_logits = ResNetConfig.gen_logits(self, shared_head)\n",
        "                compute_mask = ResNetConfig.gen_mask(label_smoothing, top_k)\n",
        "                compute_loss = ResNetConfig.gen_criterion(label_smoothing)\n",
        "\n",
        "                logits_sequence = compute_logits(hidden_states)\n",
        "                mask_sequence = compute_mask(logits_sequence, targets)\n",
        "                loss_sequence = compute_loss(logits_sequence, targets)\n",
        "                loss = compute_policy_loss(loss_sequence, mask_sequence, rewards)\n",
        "\n",
        "                return logits, loss\n",
        "\n",
        "            return logits\n",
        "\n",
        "        return func"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "cCn6vwItH1CW"
      },
      "source": [
        "Applies TRP modules to the base ResNet (main backbone). The k-th TRP module corresponding to a deeper ResNet variant with an additional depth of 3 * sum(depths[:k+1])."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "wXQF0oISH5Yp"
      },
      "outputs": [],
      "source": [
        "def apply_trp(model, depths: List[int], in_planes: int, out_planes: int, rewards, **kwargs):\n",
        "    print(\"✅ Applying TRP to ResNet for Image Classification...\")\n",
        "    model.trp_blocks = torch.nn.ModuleList([TPBlock(depths=d, in_planes=in_planes, out_planes=out_planes, rank=k) for k, d in enumerate(depths)])\n",
        "    model.forward = types.MethodType(ResNetConfig.gen_forward(rewards, label_smoothing=kwargs[\"label_smoothing\"], top_k=1), model)\n",
        "    return model"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "kDjSAv3PJr7P"
      },
      "source": [
        "The following is a training script for classification models, primarily based on the official TorchVision `train.py` reference implementation. We have made two modifications:\n",
        "\n",
        "Adding TRP Modules: We integrate TRP modules into the base model architecture before training begins:\n",
        "\n",
        "```python\n",
        "if args.apply_trp:\n",
        "        model = apply_trp(model, args.trp_depths, args.in_planes, args.out_planes, args.trp_rewards, label_smoothing=args.label_smoothing)\n",
        "```\n",
        "Removing TRP Modules: We remove the TRP components from the base model before saving the base model:\n",
        "```python\n",
        "if args.output_dir:\n",
        "    checkpoint = {\n",
        "        \"model\": model_without_ddp.state_dict() if not args.apply_trp else {k: v for k, v in model_without_ddp.state_dict().items() if not \"trp_blocks\" in k},\n",
        "        \"optimizer\": optimizer.state_dict(),\n",
        "        \"lr_scheduler\": lr_scheduler.state_dict(),\n",
        "        \"epoch\": epoch,\n",
        "        \"args\": args,\n",
        "    }\n",
        "    if model_ema:\n",
        "        checkpoint[\"model_ema\"] = model_ema.state_dict() if not args.apply_trp else {k: v for k, v in model_ema.state_dict().items() if not \"trp_blocks\" in k}\n",
        "    if scaler:\n",
        "        checkpoint[\"scaler\"] = scaler.state_dict()\n",
        "    utils.save_on_master(checkpoint, os.path.join(args.output_dir, f\"model_{epoch}.pth\"))\n",
        "    utils.save_on_master(checkpoint, os.path.join(args.output_dir, \"checkpoint.pth\"))\n",
        "```"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "id": "hK4Y7Sqv4xUa"
      },
      "outputs": [],
      "source": [
        "import datetime\n",
        "import os\n",
        "import time\n",
        "import warnings\n",
        "\n",
        "import presets\n",
        "import torch\n",
        "import torch.utils.data\n",
        "import torchvision\n",
        "import utils\n",
        "from torch import nn\n",
        "from torchvision.transforms.functional import InterpolationMode\n",
        "\n",
        "\n",
        "def load_data(traindir, valdir):\n",
        "    # Data loading code\n",
        "    print(\"Loading data\")\n",
        "    interpolation = InterpolationMode(\"bilinear\")\n",
        "\n",
        "    print(\"Loading training data\")\n",
        "    st = time.time()\n",
        "    dataset = torchvision.datasets.ImageFolder(\n",
        "        traindir,\n",
        "        presets.ClassificationPresetTrain(crop_size=224, interpolation=interpolation, auto_augment_policy=None, random_erase_prob=0.0, ra_magnitude=9, augmix_severity=3),\n",
        "    )\n",
        "    print(\"Took\", time.time() - st)\n",
        "\n",
        "    print(\"Loading validation data\")\n",
        "    dataset_test = torchvision.datasets.ImageFolder(\n",
        "        valdir,\n",
        "        presets.ClassificationPresetEval(crop_size=224, resize_size=256, interpolation=interpolation)\n",
        "    )\n",
        "\n",
        "    print(\"Creating data loaders\")\n",
        "    train_sampler = torch.utils.data.RandomSampler(dataset)\n",
        "    test_sampler = torch.utils.data.SequentialSampler(dataset_test)\n",
        "\n",
        "    return dataset, dataset_test, train_sampler, test_sampler\n",
        "\n",
        "\n",
        "\n",
        "def train_one_epoch(model, optimizer, data_loader, device, epoch, args):\n",
        "    model.train()\n",
        "    metric_logger = utils.MetricLogger(delimiter=\"  \")\n",
        "    metric_logger.add_meter(\"lr\", utils.SmoothedValue(window_size=1, fmt=\"{value}\"))\n",
        "    metric_logger.add_meter(\"img/s\", utils.SmoothedValue(window_size=10, fmt=\"{value}\"))\n",
        "\n",
        "    header = f\"Epoch: [{epoch}]\"\n",
        "    for i, (image, target) in enumerate(metric_logger.log_every(data_loader, args.print_freq, header)):\n",
        "        start_time = time.time()\n",
        "        image, target = image.to(device), target.to(device)\n",
        "        with torch.amp.autocast(\"cuda\", enabled=False):\n",
        "            output, loss = model(image, target)\n",
        "\n",
        "        optimizer.zero_grad()\n",
        "        loss.backward()\n",
        "        optimizer.step()\n",
        "\n",
        "        acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))\n",
        "        batch_size = image.shape[0]\n",
        "        metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0][\"lr\"])\n",
        "        metric_logger.meters[\"acc1\"].update(acc1.item(), n=batch_size)\n",
        "        metric_logger.meters[\"acc5\"].update(acc5.item(), n=batch_size)\n",
        "        metric_logger.meters[\"img/s\"].update(batch_size / (time.time() - start_time))\n",
        "\n",
        "\n",
        "def evaluate(model, criterion, data_loader, device, print_freq=100, log_suffix=\"\"):\n",
        "    model.eval()\n",
        "    metric_logger = utils.MetricLogger(delimiter=\"  \")\n",
        "    header = f\"Test: {log_suffix}\"\n",
        "\n",
        "    num_processed_samples = 0\n",
        "    with torch.inference_mode():\n",
        "        for image, target in metric_logger.log_every(data_loader, print_freq, header):\n",
        "            image = image.to(device, non_blocking=True)\n",
        "            target = target.to(device, non_blocking=True)\n",
        "            output = model(image)\n",
        "            loss = criterion(output, target)\n",
        "\n",
        "            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))\n",
        "            # FIXME need to take into account that the datasets\n",
        "            # could have been padded in distributed setup\n",
        "            batch_size = image.shape[0]\n",
        "            metric_logger.update(loss=loss.item())\n",
        "            metric_logger.meters[\"acc1\"].update(acc1.item(), n=batch_size)\n",
        "            metric_logger.meters[\"acc5\"].update(acc5.item(), n=batch_size)\n",
        "            num_processed_samples += batch_size\n",
        "    # gather the stats from all processes\n",
        "\n",
        "    num_processed_samples = utils.reduce_across_processes(num_processed_samples)\n",
        "    if (\n",
        "        hasattr(data_loader.dataset, \"__len__\")\n",
        "        and len(data_loader.dataset) != num_processed_samples\n",
        "        and torch.distributed.get_rank() == 0\n",
        "    ):\n",
        "        # See FIXME above\n",
        "        warnings.warn(\n",
        "            f\"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} \"\n",
        "            \"samples were used for the validation, which might bias the results. \"\n",
        "            \"Try adjusting the batch size and / or the world size. \"\n",
        "            \"Setting the world size to 1 is always a safe bet.\"\n",
        "        )\n",
        "\n",
        "    metric_logger.synchronize_between_processes()\n",
        "\n",
        "    print(f\"{header} Acc@1 {metric_logger.acc1.global_avg:.3f} Acc@5 {metric_logger.acc5.global_avg:.3f}\")\n",
        "    return metric_logger.acc1.global_avg\n",
        "\n",
        "\n",
        "def main(args):\n",
        "    if args.output_dir:\n",
        "        utils.mkdir(args.output_dir)\n",
        "    print(args)\n",
        "\n",
        "    device = torch.device(args.device)\n",
        "\n",
        "    if args.use_deterministic_algorithms:\n",
        "        torch.backends.cudnn.benchmark = False\n",
        "        torch.use_deterministic_algorithms(True)\n",
        "    else:\n",
        "        torch.backends.cudnn.benchmark = True\n",
        "\n",
        "    train_dir = os.path.join(args.data_path, \"train\")\n",
        "    val_dir = os.path.join(args.data_path, \"val\")\n",
        "    dataset, dataset_test, train_sampler, test_sampler = load_data(train_dir, val_dir)\n",
        "\n",
        "    num_classes = len(dataset.classes)\n",
        "    data_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=16, pin_memory=True, collate_fn=None)\n",
        "    data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=64, sampler=test_sampler, num_workers=16, pin_memory=True)\n",
        "\n",
        "    print(\"Creating model\")\n",
        "    model = torchvision.models.get_model(args.model, weights=args.weights, num_classes=num_classes)\n",
        "    if args.apply_trp:\n",
        "        model = apply_trp(model, args.trp_depths, args.in_planes, args.out_planes, args.trp_rewards, label_smoothing=args.label_smoothing)\n",
        "    model.to(device)\n",
        "\n",
        "    parameters = utils.set_weight_decay(model, args.weight_decay, norm_weight_decay=None, custom_keys_weight_decay=None)\n",
        "    optimizer = torch.optim.SGD(parameters, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=False)\n",
        "\n",
        "    main_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)\n",
        "    warmup_lr_scheduler = torch.optim.lr_scheduler.ConstantLR(optimizer, factor=args.lr_warmup_decay, total_iters=args.lr_warmup_epochs)\n",
        "    lr_scheduler = torch.optim.lr_scheduler.SequentialLR(optimizer, schedulers=[warmup_lr_scheduler, main_lr_scheduler], milestones=[args.lr_warmup_epochs])\n",
        "\n",
        "\n",
        "    print(\"Start training\")\n",
        "    start_time = time.time()\n",
        "    for epoch in range(args.epochs):\n",
        "        train_one_epoch(model, optimizer, data_loader, device, epoch, args)\n",
        "        lr_scheduler.step()\n",
        "        evaluate(model, nn.CrossEntropyLoss(), data_loader_test, device=device)\n",
        "        if args.output_dir:\n",
        "            checkpoint = {\n",
        "                \"model\": model.state_dict() if not args.apply_trp else {k: v for k, v in model.state_dict().items() if not \"trp_blocks\" in k},\n",
        "                \"optimizer\": optimizer.state_dict(),\n",
        "                \"lr_scheduler\": lr_scheduler.state_dict(),\n",
        "                \"epoch\": epoch,\n",
        "                \"args\": args,\n",
        "            }\n",
        "            utils.save_on_master(checkpoint, os.path.join(args.output_dir, f\"model_{epoch}.pth\"))\n",
        "            utils.save_on_master(checkpoint, os.path.join(args.output_dir, \"checkpoint.pth\"))\n",
        "\n",
        "    total_time = time.time() - start_time\n",
        "    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n",
        "    print(f\"Training time {total_time_str}\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "SV8s5k49KwgS"
      },
      "source": [
        "Prepare the [ImageNet](http://image-net.org/) dataset manually and place it in `/path/to/imagenet`. For image classification examples, pass the argument `--data-path=/path/to/imagenet` to the training script. The extracted dataset directory should follow this structure:\n",
        "```setup\n",
        "/path/to/imagenet/:\n",
        "    train/:\n",
        "        n01440764:\n",
        "            n01440764_18.JPEG ...\n",
        "        n01443537:\n",
        "            n01443537_2.JPEG ...\n",
        "    val/:\n",
        "        n01440764:\n",
        "            ILSVRC2012_val_00000293.JPEG ...\n",
        "        n01443537:\n",
        "            ILSVRC2012_val_00000236.JPEG ...\n",
        "```\n",
        "\n",
        "Now you can apply the SPG algorithm in model retraining.\n",
        "\n",
        "**Implementation Note:**\n",
        "\n",
        "- This demonstration runs on Google Colab using a single GPU configuration\n",
        "- Performance Improvement: Enhances ResNet18 validation accuracy (ACC@1) from 69.758% to 70.554%\n",
        "- For optimal results:\n",
        "  - Refer to our README.md for complete setup instructions\n",
        "  - Recommended hardware: 4× RTX A6000 GPUs"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 393
        },
        "id": "UDZxDNfT4xUb",
        "outputId": "9c266547-5118-49a0-aed8-1eb5aa7ef12f"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "namespace(data_path='/home/cs/Documents/datasets/imagenet', model='resnet18', device='cuda', batch_size=512, epochs=16, lr=0.002, momentum=0.9, weight_decay=0.0001, label_smoothing=0.0, lr_warmup_epochs=1, lr_warmup_decay=0.0, lr_step_size=2, lr_gamma=0.5, print_freq=100, output_dir='resnet18', use_deterministic_algorithms=False, weights='ResNet18_Weights.IMAGENET1K_V1', apply_trp=True, trp_depths=[4, 4, 4], in_planes=512, out_planes=8, trp_rewards=[1.0, 0.4, 0.2, 0.1])\n",
            "Loading data\n",
            "Loading training data\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Took 1.663217306137085\n",
            "Loading validation data\n",
            "Creating data loaders\n",
            "Creating model\n",
            "✅ Applying TRP to ResNet for Image Classification...\n",
            "Start training\n",
            "Epoch: [0]  [   0/2503]  eta: 10:08:27  lr: 0.0  img/s: 50.42287085194474  loss: 2.4802 (2.4802)  acc1: 68.1641 (68.1641)  acc5: 88.6719 (88.6719)  time: 14.5854  data: 4.4313  max mem: 14260\n",
            "Epoch: [0]  [ 100/2503]  eta: 0:20:23  lr: 0.0  img/s: 1383.432089555604  loss: 2.5032 (2.5529)  acc1: 68.9453 (69.0652)  acc5: 87.6953 (87.2351)  time: 0.3696  data: 0.0003  max mem: 14260\n",
            "Epoch: [0]  [ 200/2503]  eta: 0:16:54  lr: 0.0  img/s: 1378.2442081212737  loss: 2.5111 (2.5476)  acc1: 69.7266 (69.0396)  acc5: 88.0859 (87.3883)  time: 0.3723  data: 0.0003  max mem: 14260\n",
            "Epoch: [0]  [ 300/2503]  eta: 0:15:21  lr: 0.0  img/s: 1374.120426975581  loss: 2.5257 (2.5469)  acc1: 69.7266 (69.1783)  acc5: 87.6953 (87.3566)  time: 0.3735  data: 0.0003  max mem: 14260\n",
            "Epoch: [0]  [ 400/2503]  eta: 0:14:16  lr: 0.0  img/s: 1371.8057112267902  loss: 2.5656 (2.5481)  acc1: 68.7500 (69.1119)  acc5: 87.1094 (87.2935)  time: 0.3737  data: 0.0003  max mem: 14260\n",
            "Epoch: [0]  [ 500/2503]  eta: 0:13:22  lr: 0.0  img/s: 1371.3563139705075  loss: 2.5417 (2.5455)  acc1: 69.3359 (69.2038)  acc5: 87.3047 (87.3834)  time: 0.3740  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [ 600/2503]  eta: 0:12:33  lr: 0.0  img/s: 1372.5711524572246  loss: 2.5310 (2.5460)  acc1: 68.7500 (69.1926)  acc5: 86.9141 (87.3664)  time: 0.3737  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [ 700/2503]  eta: 0:11:48  lr: 0.0  img/s: 1372.686086194476  loss: 2.6031 (2.5471)  acc1: 68.3594 (69.1802)  acc5: 86.5234 (87.3573)  time: 0.3738  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [ 800/2503]  eta: 0:11:04  lr: 0.0  img/s: 1372.150185617073  loss: 2.5816 (2.5469)  acc1: 68.1641 (69.1716)  acc5: 87.3047 (87.3754)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [ 900/2503]  eta: 0:10:22  lr: 0.0  img/s: 1374.4176827941437  loss: 2.5572 (2.5474)  acc1: 68.5547 (69.1322)  acc5: 86.5234 (87.3606)  time: 0.3734  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [1000/2503]  eta: 0:09:41  lr: 0.0  img/s: 1371.263492760823  loss: 2.6090 (2.5481)  acc1: 69.3359 (69.1545)  acc5: 86.5234 (87.3609)  time: 0.3736  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [1100/2503]  eta: 0:09:01  lr: 0.0  img/s: 1372.8361431672834  loss: 2.5455 (2.5480)  acc1: 68.3594 (69.1791)  acc5: 86.3281 (87.3599)  time: 0.3736  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [1200/2503]  eta: 0:08:21  lr: 0.0  img/s: 1370.863454576676  loss: 2.5028 (2.5465)  acc1: 68.9453 (69.1852)  acc5: 86.7188 (87.3738)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [1300/2503]  eta: 0:07:41  lr: 0.0  img/s: 1372.2045459773494  loss: 2.5447 (2.5479)  acc1: 68.9453 (69.1936)  acc5: 87.5000 (87.3631)  time: 0.3740  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [1400/2503]  eta: 0:07:02  lr: 0.0  img/s: 1371.6970579987953  loss: 2.5078 (2.5488)  acc1: 69.1406 (69.2066)  acc5: 87.3047 (87.3691)  time: 0.3740  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [1500/2503]  eta: 0:06:23  lr: 0.0  img/s: 1372.0747896668404  loss: 2.5085 (2.5485)  acc1: 68.3594 (69.1850)  acc5: 86.5234 (87.3589)  time: 0.3737  data: 0.0005  max mem: 14260\n",
            "Epoch: [0]  [1600/2503]  eta: 0:05:44  lr: 0.0  img/s: 1370.7497007944983  loss: 2.5625 (2.5480)  acc1: 68.7500 (69.1884)  acc5: 86.9141 (87.3685)  time: 0.3738  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [1700/2503]  eta: 0:05:06  lr: 0.0  img/s: 1371.5875459541212  loss: 2.5364 (2.5472)  acc1: 68.7500 (69.2003)  acc5: 87.1094 (87.3729)  time: 0.3738  data: 0.0005  max mem: 14260\n",
            "Epoch: [0]  [1800/2503]  eta: 0:04:27  lr: 0.0  img/s: 1373.9032828701265  loss: 2.5430 (2.5472)  acc1: 68.3594 (69.1841)  acc5: 86.7188 (87.3690)  time: 0.3736  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [1900/2503]  eta: 0:03:49  lr: 0.0  img/s: 1369.5695265111776  loss: 2.4750 (2.5462)  acc1: 69.3359 (69.2043)  acc5: 87.3047 (87.3722)  time: 0.3738  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [2000/2503]  eta: 0:03:11  lr: 0.0  img/s: 1374.2883870947098  loss: 2.4859 (2.5453)  acc1: 69.7266 (69.2057)  acc5: 87.1094 (87.3765)  time: 0.3738  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [2100/2503]  eta: 0:02:33  lr: 0.0  img/s: 1371.118156488072  loss: 2.5923 (2.5462)  acc1: 70.1172 (69.2161)  acc5: 87.6953 (87.3683)  time: 0.3739  data: 0.0005  max mem: 14260\n",
            "Epoch: [0]  [2200/2503]  eta: 0:01:55  lr: 0.0  img/s: 1374.420321735207  loss: 2.5274 (2.5462)  acc1: 69.5312 (69.2215)  acc5: 87.3047 (87.3624)  time: 0.3738  data: 0.0005  max mem: 14260\n",
            "Epoch: [0]  [2300/2503]  eta: 0:01:17  lr: 0.0  img/s: 1375.3401698453972  loss: 2.5620 (2.5461)  acc1: 68.3594 (69.2079)  acc5: 87.8906 (87.3658)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [2400/2503]  eta: 0:00:39  lr: 0.0  img/s: 1372.6720474542253  loss: 2.4811 (2.5456)  acc1: 69.1406 (69.2059)  acc5: 87.1094 (87.3672)  time: 0.3738  data: 0.0004  max mem: 14260\n",
            "Epoch: [0]  [2500/2503]  eta: 0:00:01  lr: 0.0  img/s: 1374.5883551796933  loss: 2.5695 (2.5453)  acc1: 69.3359 (69.1951)  acc5: 87.5000 (87.3620)  time: 0.3730  data: 0.0002  max mem: 14260\n",
            "Epoch: [0] Total time: 0:15:50\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "/home/cs/anaconda3/envs/csenv/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:243: UserWarning: The epoch parameter in `scheduler.step()` was not necessary and is being deprecated where possible. Please use `scheduler.step()` to step the scheduler. During the deprecation, if epoch is different from None, the closed form is used instead of the new chainable form, where available. Please open an issue if you are unable to replicate your use case: https://github.com/pytorch/pytorch/issues/new/choose.\n",
            "  warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Test:   [  0/782]  eta: 0:47:20  loss: 0.6410 (0.6410)  acc1: 85.9375 (85.9375)  acc5: 95.3125 (95.3125)  time: 3.6326  data: 3.1976  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:43  loss: 1.0596 (0.9365)  acc1: 76.5625 (76.3150)  acc5: 89.0625 (92.2030)  time: 0.0358  data: 0.0219  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:28  loss: 0.9542 (0.9132)  acc1: 73.4375 (75.7851)  acc5: 96.8750 (93.2214)  time: 0.0266  data: 0.0126  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:21  loss: 0.8381 (0.9042)  acc1: 76.5625 (76.1991)  acc5: 92.1875 (93.5112)  time: 0.0387  data: 0.0248  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:15  loss: 1.6487 (1.0423)  acc1: 59.3750 (73.5817)  acc5: 82.8125 (91.7160)  time: 0.0286  data: 0.0148  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:11  loss: 1.5886 (1.1231)  acc1: 56.2500 (71.9935)  acc5: 84.3750 (90.5845)  time: 0.0256  data: 0.0116  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.3772 (1.1848)  acc1: 64.0625 (70.8403)  acc5: 84.3750 (89.7853)  time: 0.0265  data: 0.0126  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:03  loss: 1.2929 (1.2347)  acc1: 68.7500 (69.8890)  acc5: 87.5000 (89.0826)  time: 0.0402  data: 0.0263  max mem: 14260\n",
            "Test:  Total time: 0:00:28\n",
            "Test:  Acc@1 69.826 Acc@5 89.124\n",
            "Epoch: [1]  [   0/2503]  eta: 3:02:56  lr: 0.002  img/s: 1377.423834135741  loss: 2.5885 (2.5885)  acc1: 65.4297 (65.4297)  acc5: 85.5469 (85.5469)  time: 4.3852  data: 4.0135  max mem: 14260\n",
            "Epoch: [1]  [ 100/2503]  eta: 0:17:11  lr: 0.002  img/s: 1377.6650711932691  loss: 2.4756 (2.4861)  acc1: 66.7969 (67.5859)  acc5: 85.7422 (86.4693)  time: 0.3722  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [ 200/2503]  eta: 0:15:24  lr: 0.002  img/s: 1372.1571996150901  loss: 2.4426 (2.4819)  acc1: 66.7969 (67.1953)  acc5: 86.3281 (86.2222)  time: 0.3733  data: 0.0003  max mem: 14260\n",
            "Epoch: [1]  [ 300/2503]  eta: 0:14:23  lr: 0.002  img/s: 1373.1486477500014  loss: 2.4722 (2.4755)  acc1: 66.6016 (67.0428)  acc5: 85.9375 (86.1477)  time: 0.3737  data: 0.0005  max mem: 14260\n",
            "Epoch: [1]  [ 400/2503]  eta: 0:13:34  lr: 0.002  img/s: 1373.565834760211  loss: 2.4423 (2.4638)  acc1: 66.0156 (66.9537)  acc5: 85.9375 (86.1133)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [ 500/2503]  eta: 0:12:50  lr: 0.002  img/s: 1374.8294801536492  loss: 2.4545 (2.4537)  acc1: 66.6016 (66.9532)  acc5: 85.9375 (86.0821)  time: 0.3734  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [ 600/2503]  eta: 0:12:08  lr: 0.002  img/s: 1373.2759726992576  loss: 2.3748 (2.4462)  acc1: 66.4062 (66.8648)  acc5: 85.9375 (86.0428)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [ 700/2503]  eta: 0:11:27  lr: 0.002  img/s: 1373.3655533634123  loss: 2.3805 (2.4383)  acc1: 66.0156 (66.8428)  acc5: 86.1328 (86.0194)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [ 800/2503]  eta: 0:10:47  lr: 0.002  img/s: 1376.2903568576633  loss: 2.3818 (2.4345)  acc1: 66.7969 (66.7913)  acc5: 85.5469 (85.9653)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [ 900/2503]  eta: 0:10:08  lr: 0.002  img/s: 1372.8756372972396  loss: 2.4255 (2.4302)  acc1: 65.8203 (66.7045)  acc5: 85.5469 (85.9284)  time: 0.3732  data: 0.0005  max mem: 14260\n",
            "Epoch: [1]  [1000/2503]  eta: 0:09:29  lr: 0.002  img/s: 1374.3983308693175  loss: 2.4043 (2.4290)  acc1: 65.6250 (66.6322)  acc5: 85.7422 (85.8753)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [1100/2503]  eta: 0:08:50  lr: 0.002  img/s: 1374.2945434811545  loss: 2.3880 (2.4263)  acc1: 65.6250 (66.5863)  acc5: 85.5469 (85.8531)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [1200/2503]  eta: 0:08:12  lr: 0.002  img/s: 1373.028369315795  loss: 2.3041 (2.4223)  acc1: 66.6016 (66.5471)  acc5: 86.1328 (85.8172)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [1300/2503]  eta: 0:07:34  lr: 0.002  img/s: 1373.0871889829934  loss: 2.3545 (2.4189)  acc1: 65.6250 (66.5259)  acc5: 85.7422 (85.8100)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [1400/2503]  eta: 0:06:56  lr: 0.002  img/s: 1374.8021953469554  loss: 2.4115 (2.4170)  acc1: 65.4297 (66.4909)  acc5: 85.1562 (85.7717)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [1500/2503]  eta: 0:06:18  lr: 0.002  img/s: 1375.0671678203018  loss: 2.4421 (2.4148)  acc1: 66.7969 (66.4550)  acc5: 85.7422 (85.7547)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [1]  [1600/2503]  eta: 0:05:40  lr: 0.002  img/s: 1372.8387760385867  loss: 2.3420 (2.4105)  acc1: 66.0156 (66.4580)  acc5: 85.3516 (85.7397)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [1]  [1700/2503]  eta: 0:05:02  lr: 0.002  img/s: 1374.548762448642  loss: 2.3955 (2.4074)  acc1: 65.6250 (66.4491)  acc5: 84.9609 (85.7395)  time: 0.3728  data: 0.0003  max mem: 14260\n",
            "Epoch: [1]  [1800/2503]  eta: 0:04:24  lr: 0.002  img/s: 1375.0935826343089  loss: 2.3322 (2.4056)  acc1: 65.2344 (66.4221)  acc5: 85.5469 (85.7143)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [1]  [1900/2503]  eta: 0:03:46  lr: 0.002  img/s: 1374.3622673726154  loss: 2.3569 (2.4030)  acc1: 67.1875 (66.4210)  acc5: 85.9375 (85.6980)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [1]  [2000/2503]  eta: 0:03:09  lr: 0.002  img/s: 1375.7957079972298  loss: 2.3746 (2.4015)  acc1: 65.6250 (66.3911)  acc5: 85.1562 (85.6760)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [1]  [2100/2503]  eta: 0:02:31  lr: 0.002  img/s: 1373.0705082669385  loss: 2.3709 (2.4006)  acc1: 66.7969 (66.3541)  acc5: 84.5703 (85.6470)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [1]  [2200/2503]  eta: 0:01:53  lr: 0.002  img/s: 1374.6710673545942  loss: 2.3266 (2.3979)  acc1: 66.6016 (66.3470)  acc5: 85.7422 (85.6361)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [1]  [2300/2503]  eta: 0:01:16  lr: 0.002  img/s: 1373.223283649662  loss: 2.3981 (2.3968)  acc1: 66.0156 (66.3150)  acc5: 84.9609 (85.6175)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [1]  [2400/2503]  eta: 0:00:38  lr: 0.002  img/s: 1371.8293718730356  loss: 2.2603 (2.3937)  acc1: 66.2109 (66.3234)  acc5: 85.5469 (85.6142)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [1]  [2500/2503]  eta: 0:00:01  lr: 0.002  img/s: 1375.273230286853  loss: 2.2845 (2.3911)  acc1: 66.2109 (66.3097)  acc5: 85.9375 (85.6073)  time: 0.3726  data: 0.0002  max mem: 14260\n",
            "Epoch: [1] Total time: 0:15:39\n",
            "Test:   [  0/782]  eta: 0:19:08  loss: 0.6668 (0.6668)  acc1: 82.8125 (82.8125)  acc5: 93.7500 (93.7500)  time: 1.4691  data: 1.4550  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:31  loss: 1.1441 (1.0760)  acc1: 73.4375 (74.5514)  acc5: 89.0625 (91.1819)  time: 0.0365  data: 0.0225  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:22  loss: 0.9512 (1.0610)  acc1: 68.7500 (73.6785)  acc5: 95.3125 (92.0942)  time: 0.0277  data: 0.0138  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:17  loss: 0.9079 (1.0453)  acc1: 76.5625 (74.2162)  acc5: 92.1875 (92.3692)  time: 0.0325  data: 0.0184  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.9251 (1.2012)  acc1: 56.2500 (71.4970)  acc5: 81.2500 (90.4613)  time: 0.0257  data: 0.0117  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.9088 (1.2877)  acc1: 56.2500 (70.0131)  acc5: 81.2500 (89.2060)  time: 0.0333  data: 0.0194  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.5349 (1.3531)  acc1: 60.9375 (68.7188)  acc5: 84.3750 (88.4437)  time: 0.0350  data: 0.0212  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.4343 (1.4105)  acc1: 62.5000 (67.6912)  acc5: 84.3750 (87.7051)  time: 0.0244  data: 0.0106  max mem: 14260\n",
            "Test:  Total time: 0:00:26\n",
            "Test:  Acc@1 67.732 Acc@5 87.802\n",
            "Epoch: [2]  [   0/2503]  eta: 4:48:07  lr: 0.002  img/s: 1383.4953692011936  loss: 2.1862 (2.1862)  acc1: 66.7969 (66.7969)  acc5: 86.9141 (86.9141)  time: 6.9066  data: 6.5365  max mem: 14260\n",
            "Epoch: [2]  [ 100/2503]  eta: 0:17:27  lr: 0.002  img/s: 1377.0608123662605  loss: 2.3162 (2.3027)  acc1: 66.6016 (66.8858)  acc5: 85.3516 (85.7905)  time: 0.3719  data: 0.0003  max mem: 14260\n",
            "Epoch: [2]  [ 200/2503]  eta: 0:15:31  lr: 0.002  img/s: 1374.1582364056483  loss: 2.3079 (2.3064)  acc1: 66.7969 (66.8406)  acc5: 85.5469 (85.8073)  time: 0.3733  data: 0.0003  max mem: 14260\n",
            "Epoch: [2]  [ 300/2503]  eta: 0:14:28  lr: 0.002  img/s: 1374.2268262637704  loss: 2.3224 (2.3053)  acc1: 66.9922 (66.7521)  acc5: 85.3516 (85.7338)  time: 0.3739  data: 0.0005  max mem: 14260\n",
            "Epoch: [2]  [ 400/2503]  eta: 0:13:38  lr: 0.002  img/s: 1375.5568873168527  loss: 2.2985 (2.3070)  acc1: 66.2109 (66.7662)  acc5: 84.9609 (85.6433)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [ 500/2503]  eta: 0:12:53  lr: 0.002  img/s: 1374.5065327160648  loss: 2.3106 (2.3077)  acc1: 66.6016 (66.7092)  acc5: 85.9375 (85.6284)  time: 0.3735  data: 0.0005  max mem: 14260\n",
            "Epoch: [2]  [ 600/2503]  eta: 0:12:10  lr: 0.002  img/s: 1373.834725304533  loss: 2.3626 (2.3068)  acc1: 66.4062 (66.7270)  acc5: 84.9609 (85.6476)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [ 700/2503]  eta: 0:11:29  lr: 0.002  img/s: 1374.9791257691297  loss: 2.3126 (2.3082)  acc1: 66.0156 (66.6991)  acc5: 84.9609 (85.6310)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [ 800/2503]  eta: 0:10:49  lr: 0.002  img/s: 1372.3746691257763  loss: 2.3272 (2.3085)  acc1: 66.2109 (66.6167)  acc5: 85.5469 (85.5835)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [ 900/2503]  eta: 0:10:09  lr: 0.002  img/s: 1373.2408462169565  loss: 2.3275 (2.3098)  acc1: 66.2109 (66.5671)  acc5: 84.3750 (85.5490)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [1000/2503]  eta: 0:09:30  lr: 0.002  img/s: 1373.5711061121342  loss: 2.2981 (2.3111)  acc1: 65.6250 (66.5204)  acc5: 85.5469 (85.5369)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [1100/2503]  eta: 0:08:51  lr: 0.002  img/s: 1373.5183944135272  loss: 2.3150 (2.3116)  acc1: 66.0156 (66.4719)  acc5: 85.3516 (85.4993)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [1200/2503]  eta: 0:08:13  lr: 0.002  img/s: 1376.8771662337779  loss: 2.2561 (2.3113)  acc1: 66.9922 (66.4666)  acc5: 85.7422 (85.4833)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [1300/2503]  eta: 0:07:34  lr: 0.002  img/s: 1375.0971046861873  loss: 2.2986 (2.3103)  acc1: 66.0156 (66.4510)  acc5: 86.1328 (85.4886)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [1400/2503]  eta: 0:06:56  lr: 0.002  img/s: 1374.8990172998506  loss: 2.2606 (2.3090)  acc1: 66.2109 (66.4484)  acc5: 85.5469 (85.4959)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [1500/2503]  eta: 0:06:18  lr: 0.002  img/s: 1374.708907093296  loss: 2.3684 (2.3107)  acc1: 65.0391 (66.4284)  acc5: 85.3516 (85.4825)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [1600/2503]  eta: 0:05:40  lr: 0.002  img/s: 1376.0213860020876  loss: 2.2740 (2.3100)  acc1: 66.4062 (66.4164)  acc5: 85.9375 (85.4723)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [1700/2503]  eta: 0:05:02  lr: 0.002  img/s: 1376.775652138234  loss: 2.2948 (2.3102)  acc1: 66.2109 (66.3943)  acc5: 84.9609 (85.4654)  time: 0.3732  data: 0.0005  max mem: 14260\n",
            "Epoch: [2]  [1800/2503]  eta: 0:04:24  lr: 0.002  img/s: 1374.726507681563  loss: 2.3000 (2.3090)  acc1: 65.4297 (66.3905)  acc5: 84.9609 (85.4665)  time: 0.3733  data: 0.0005  max mem: 14260\n",
            "Epoch: [2]  [1900/2503]  eta: 0:03:47  lr: 0.002  img/s: 1373.8320886117249  loss: 2.3499 (2.3098)  acc1: 65.2344 (66.3690)  acc5: 84.5703 (85.4492)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [2000/2503]  eta: 0:03:09  lr: 0.002  img/s: 1375.347216493789  loss: 2.3409 (2.3092)  acc1: 65.4297 (66.3607)  acc5: 85.7422 (85.4491)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [2100/2503]  eta: 0:02:31  lr: 0.002  img/s: 1374.2277056653809  loss: 2.3022 (2.3089)  acc1: 65.0391 (66.3468)  acc5: 85.5469 (85.4393)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [2200/2503]  eta: 0:01:53  lr: 0.002  img/s: 1375.106790421884  loss: 2.2930 (2.3095)  acc1: 65.4297 (66.3192)  acc5: 85.3516 (85.4269)  time: 0.3733  data: 0.0005  max mem: 14260\n",
            "Epoch: [2]  [2300/2503]  eta: 0:01:16  lr: 0.002  img/s: 1374.965040087179  loss: 2.3029 (2.3099)  acc1: 65.4297 (66.2895)  acc5: 84.5703 (85.4116)  time: 0.3734  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [2400/2503]  eta: 0:00:38  lr: 0.002  img/s: 1375.4890456435255  loss: 2.2958 (2.3087)  acc1: 65.8203 (66.2865)  acc5: 85.1562 (85.4129)  time: 0.3734  data: 0.0004  max mem: 14260\n",
            "Epoch: [2]  [2500/2503]  eta: 0:00:01  lr: 0.002  img/s: 1376.2780083519401  loss: 2.3475 (2.3089)  acc1: 65.4297 (66.2678)  acc5: 85.3516 (85.3979)  time: 0.3724  data: 0.0002  max mem: 14260\n",
            "Epoch: [2] Total time: 0:15:40\n",
            "Test:   [  0/782]  eta: 0:25:03  loss: 0.6173 (0.6173)  acc1: 82.8125 (82.8125)  acc5: 95.3125 (95.3125)  time: 1.9223  data: 1.9077  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:32  loss: 1.1819 (1.0846)  acc1: 73.4375 (75.0000)  acc5: 90.6250 (91.1046)  time: 0.0417  data: 0.0277  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:23  loss: 0.9468 (1.0605)  acc1: 73.4375 (74.1216)  acc5: 95.3125 (92.1175)  time: 0.0265  data: 0.0124  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:18  loss: 0.9407 (1.0479)  acc1: 78.1250 (74.6937)  acc5: 92.1875 (92.4886)  time: 0.0428  data: 0.0288  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 2.1335 (1.2101)  acc1: 54.6875 (71.6568)  acc5: 81.2500 (90.6016)  time: 0.0252  data: 0.0112  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:10  loss: 1.9338 (1.2989)  acc1: 54.6875 (70.1223)  acc5: 82.8125 (89.4024)  time: 0.0315  data: 0.0176  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.5084 (1.3697)  acc1: 67.1875 (68.8020)  acc5: 84.3750 (88.4853)  time: 0.0277  data: 0.0138  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.5578 (1.4244)  acc1: 67.1875 (67.8272)  acc5: 82.8125 (87.7385)  time: 0.0390  data: 0.0252  max mem: 14260\n",
            "Test:  Total time: 0:00:26\n",
            "Test:  Acc@1 67.768 Acc@5 87.830\n",
            "Epoch: [3]  [   0/2503]  eta: 4:50:51  lr: 0.001  img/s: 1383.2699061945843  loss: 2.1499 (2.1499)  acc1: 67.5781 (67.5781)  acc5: 87.8906 (87.8906)  time: 6.9723  data: 6.6021  max mem: 14260\n",
            "Epoch: [3]  [ 100/2503]  eta: 0:17:29  lr: 0.001  img/s: 1375.8715134445615  loss: 2.2031 (2.2340)  acc1: 67.5781 (67.0831)  acc5: 85.7422 (85.6880)  time: 0.3722  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [ 200/2503]  eta: 0:15:32  lr: 0.001  img/s: 1374.1564777813683  loss: 2.1745 (2.2220)  acc1: 67.5781 (67.2361)  acc5: 86.1328 (85.8044)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [3]  [ 300/2503]  eta: 0:14:28  lr: 0.001  img/s: 1371.6401095532028  loss: 2.1805 (2.2126)  acc1: 67.7734 (67.5003)  acc5: 85.7422 (85.9758)  time: 0.3734  data: 0.0002  max mem: 14260\n",
            "Epoch: [3]  [ 400/2503]  eta: 0:13:38  lr: 0.001  img/s: 1373.5017032190392  loss: 2.1734 (2.2108)  acc1: 67.9688 (67.5884)  acc5: 86.1328 (85.9940)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [3]  [ 500/2503]  eta: 0:12:53  lr: 0.001  img/s: 1372.7466315513593  loss: 2.2091 (2.2140)  acc1: 67.3828 (67.5664)  acc5: 86.1328 (85.9870)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [3]  [ 600/2503]  eta: 0:12:10  lr: 0.001  img/s: 1375.1349678833901  loss: 2.1677 (2.2131)  acc1: 68.3594 (67.6025)  acc5: 86.5234 (86.0051)  time: 0.3728  data: 0.0003  max mem: 14260\n",
            "Epoch: [3]  [ 700/2503]  eta: 0:11:29  lr: 0.001  img/s: 1373.4287938101538  loss: 2.1945 (2.2123)  acc1: 67.7734 (67.6107)  acc5: 86.5234 (86.0339)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [ 800/2503]  eta: 0:10:49  lr: 0.001  img/s: 1376.0152141345873  loss: 2.2110 (2.2132)  acc1: 67.1875 (67.6008)  acc5: 86.3281 (86.0472)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [ 900/2503]  eta: 0:10:09  lr: 0.001  img/s: 1373.67039079701  loss: 2.2309 (2.2128)  acc1: 66.4062 (67.5656)  acc5: 86.3281 (86.0487)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [1000/2503]  eta: 0:09:30  lr: 0.001  img/s: 1372.505359362667  loss: 2.2221 (2.2109)  acc1: 67.3828 (67.5727)  acc5: 85.9375 (86.0774)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [1100/2503]  eta: 0:08:51  lr: 0.001  img/s: 1373.0713861892584  loss: 2.1326 (2.2098)  acc1: 69.1406 (67.6058)  acc5: 86.3281 (86.0906)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [1200/2503]  eta: 0:08:13  lr: 0.001  img/s: 1374.2602443162148  loss: 2.1885 (2.2097)  acc1: 67.3828 (67.5793)  acc5: 86.1328 (86.0725)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [1300/2503]  eta: 0:07:34  lr: 0.001  img/s: 1375.7798427979378  loss: 2.1960 (2.2090)  acc1: 67.5781 (67.5810)  acc5: 85.5469 (86.0725)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [1400/2503]  eta: 0:06:56  lr: 0.001  img/s: 1372.1598298828335  loss: 2.2235 (2.2082)  acc1: 67.3828 (67.5838)  acc5: 85.9375 (86.0805)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [1500/2503]  eta: 0:06:18  lr: 0.001  img/s: 1374.5091719983307  loss: 2.2043 (2.2073)  acc1: 67.5781 (67.5982)  acc5: 85.9375 (86.0873)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [1600/2503]  eta: 0:05:40  lr: 0.001  img/s: 1376.4517898496242  loss: 2.2086 (2.2067)  acc1: 67.9688 (67.6107)  acc5: 85.7422 (86.0932)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [1700/2503]  eta: 0:05:02  lr: 0.001  img/s: 1372.9950111470782  loss: 2.1546 (2.2062)  acc1: 65.6250 (67.6013)  acc5: 86.1328 (86.0813)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [1800/2503]  eta: 0:04:24  lr: 0.001  img/s: 1373.4305505723682  loss: 2.1926 (2.2065)  acc1: 67.3828 (67.5973)  acc5: 85.9375 (86.0772)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [1900/2503]  eta: 0:03:47  lr: 0.001  img/s: 1373.463051483991  loss: 2.1696 (2.2061)  acc1: 67.3828 (67.5950)  acc5: 86.3281 (86.0694)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [2000/2503]  eta: 0:03:09  lr: 0.001  img/s: 1372.961654599217  loss: 2.2190 (2.2065)  acc1: 67.5781 (67.5911)  acc5: 86.1328 (86.0679)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [2100/2503]  eta: 0:02:31  lr: 0.001  img/s: 1373.8813085463426  loss: 2.2275 (2.2058)  acc1: 68.1641 (67.6105)  acc5: 86.1328 (86.0772)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [2200/2503]  eta: 0:01:53  lr: 0.001  img/s: 1373.0116900288224  loss: 2.1538 (2.2060)  acc1: 67.9688 (67.6105)  acc5: 86.5234 (86.0739)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [2300/2503]  eta: 0:01:16  lr: 0.001  img/s: 1374.5918746439477  loss: 2.1686 (2.2049)  acc1: 67.1875 (67.6216)  acc5: 85.7422 (86.0791)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [3]  [2400/2503]  eta: 0:00:38  lr: 0.001  img/s: 1375.5507196154706  loss: 2.1970 (2.2041)  acc1: 66.6016 (67.6306)  acc5: 86.1328 (86.0897)  time: 0.3732  data: 0.0005  max mem: 14260\n",
            "Epoch: [3]  [2500/2503]  eta: 0:00:01  lr: 0.001  img/s: 1373.6352441347408  loss: 2.1789 (2.2035)  acc1: 67.3828 (67.6246)  acc5: 85.9375 (86.0923)  time: 0.3725  data: 0.0002  max mem: 14260\n",
            "Epoch: [3] Total time: 0:15:40\n",
            "Test:   [  0/782]  eta: 0:21:13  loss: 0.6460 (0.6460)  acc1: 84.3750 (84.3750)  acc5: 95.3125 (95.3125)  time: 1.6289  data: 1.6146  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:32  loss: 1.1729 (1.0473)  acc1: 75.0000 (75.9746)  acc5: 89.0625 (91.7079)  time: 0.0438  data: 0.0298  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:22  loss: 0.8982 (1.0269)  acc1: 76.5625 (75.1477)  acc5: 95.3125 (92.7394)  time: 0.0272  data: 0.0132  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:17  loss: 0.8512 (1.0179)  acc1: 79.6875 (75.7164)  acc5: 92.1875 (92.9973)  time: 0.0347  data: 0.0208  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.9055 (1.1681)  acc1: 59.3750 (72.9076)  acc5: 84.3750 (91.2290)  time: 0.0265  data: 0.0123  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.9462 (1.2500)  acc1: 56.2500 (71.4353)  acc5: 84.3750 (90.0792)  time: 0.0314  data: 0.0175  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.5161 (1.3183)  acc1: 62.5000 (70.0525)  acc5: 85.9375 (89.2133)  time: 0.0271  data: 0.0132  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.4580 (1.3705)  acc1: 67.1875 (69.1245)  acc5: 84.3750 (88.5231)  time: 0.0279  data: 0.0141  max mem: 14260\n",
            "Test:  Total time: 0:00:25\n",
            "Test:  Acc@1 69.080 Acc@5 88.544\n",
            "Epoch: [4]  [   0/2503]  eta: 4:30:12  lr: 0.001  img/s: 1382.6509293287766  loss: 2.3736 (2.3736)  acc1: 63.6719 (63.6719)  acc5: 86.5234 (86.5234)  time: 6.4770  data: 6.1067  max mem: 14260\n",
            "Epoch: [4]  [ 100/2503]  eta: 0:17:17  lr: 0.001  img/s: 1378.5105091765527  loss: 2.1491 (2.1932)  acc1: 68.1641 (67.9513)  acc5: 87.1094 (86.2740)  time: 0.3720  data: 0.0003  max mem: 14260\n",
            "Epoch: [4]  [ 200/2503]  eta: 0:15:26  lr: 0.001  img/s: 1372.9177668739112  loss: 2.1940 (2.1872)  acc1: 67.1875 (67.9969)  acc5: 85.7422 (86.2212)  time: 0.3733  data: 0.0003  max mem: 14260\n",
            "Epoch: [4]  [ 300/2503]  eta: 0:14:25  lr: 0.001  img/s: 1371.3895925584864  loss: 2.1707 (2.1943)  acc1: 67.5781 (67.9720)  acc5: 86.1328 (86.1562)  time: 0.3735  data: 0.0003  max mem: 14260\n",
            "Epoch: [4]  [ 400/2503]  eta: 0:13:35  lr: 0.001  img/s: 1373.3567704286377  loss: 2.1956 (2.1902)  acc1: 66.9922 (67.8923)  acc5: 85.7422 (86.1913)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [4]  [ 500/2503]  eta: 0:12:51  lr: 0.001  img/s: 1376.375920448391  loss: 2.1416 (2.1890)  acc1: 67.7734 (67.9278)  acc5: 86.5234 (86.2201)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [4]  [ 600/2503]  eta: 0:12:08  lr: 0.001  img/s: 1376.8489173300918  loss: 2.1248 (2.1860)  acc1: 67.5781 (67.9216)  acc5: 86.3281 (86.2482)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [4]  [ 700/2503]  eta: 0:11:27  lr: 0.001  img/s: 1373.8479089203872  loss: 2.1350 (2.1847)  acc1: 68.5547 (67.8983)  acc5: 86.5234 (86.2509)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [ 800/2503]  eta: 0:10:48  lr: 0.001  img/s: 1373.659846609481  loss: 2.1745 (2.1827)  acc1: 67.9688 (67.9524)  acc5: 86.3281 (86.2828)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [ 900/2503]  eta: 0:10:08  lr: 0.001  img/s: 1375.2564964153403  loss: 2.1277 (2.1816)  acc1: 68.5547 (67.9666)  acc5: 86.5234 (86.2778)  time: 0.3733  data: 0.0005  max mem: 14260\n",
            "Epoch: [4]  [1000/2503]  eta: 0:09:29  lr: 0.001  img/s: 1374.41944208706  loss: 2.1935 (2.1810)  acc1: 67.5781 (67.9851)  acc5: 86.3281 (86.2672)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [1100/2503]  eta: 0:08:51  lr: 0.001  img/s: 1374.5874753164455  loss: 2.2101 (2.1814)  acc1: 67.1875 (67.9473)  acc5: 85.5469 (86.2471)  time: 0.3733  data: 0.0003  max mem: 14260\n",
            "Epoch: [4]  [1200/2503]  eta: 0:08:12  lr: 0.001  img/s: 1375.4238534578003  loss: 2.1642 (2.1809)  acc1: 67.3828 (67.9583)  acc5: 86.5234 (86.2507)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [1300/2503]  eta: 0:07:34  lr: 0.001  img/s: 1374.8013152099318  loss: 2.1941 (2.1805)  acc1: 68.3594 (67.9688)  acc5: 85.5469 (86.2435)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [1400/2503]  eta: 0:06:56  lr: 0.001  img/s: 1374.1810989309142  loss: 2.1316 (2.1809)  acc1: 68.5547 (67.9814)  acc5: 85.9375 (86.2453)  time: 0.3734  data: 0.0005  max mem: 14260\n",
            "Epoch: [4]  [1500/2503]  eta: 0:06:18  lr: 0.001  img/s: 1377.0043006464114  loss: 2.1451 (2.1803)  acc1: 67.3828 (67.9863)  acc5: 86.5234 (86.2486)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [1600/2503]  eta: 0:05:40  lr: 0.001  img/s: 1373.8909771622075  loss: 2.1422 (2.1813)  acc1: 68.1641 (67.9723)  acc5: 86.7188 (86.2364)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [1700/2503]  eta: 0:05:02  lr: 0.001  img/s: 1374.3481943263146  loss: 2.1529 (2.1815)  acc1: 68.3594 (67.9662)  acc5: 86.7188 (86.2380)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [1800/2503]  eta: 0:04:24  lr: 0.001  img/s: 1374.854125426064  loss: 2.1541 (2.1817)  acc1: 68.5547 (67.9540)  acc5: 86.5234 (86.2261)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [1900/2503]  eta: 0:03:46  lr: 0.001  img/s: 1371.723343491722  loss: 2.1893 (2.1806)  acc1: 69.1406 (67.9705)  acc5: 86.3281 (86.2388)  time: 0.3734  data: 0.0005  max mem: 14260\n",
            "Epoch: [4]  [2000/2503]  eta: 0:03:09  lr: 0.001  img/s: 1376.041665385128  loss: 2.1741 (2.1805)  acc1: 67.1875 (67.9476)  acc5: 86.7188 (86.2363)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [2100/2503]  eta: 0:02:31  lr: 0.001  img/s: 1374.115151406567  loss: 2.1454 (2.1806)  acc1: 67.3828 (67.9351)  acc5: 85.9375 (86.2380)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [2200/2503]  eta: 0:01:53  lr: 0.001  img/s: 1371.6120751323401  loss: 2.1076 (2.1804)  acc1: 68.3594 (67.9465)  acc5: 86.5234 (86.2448)  time: 0.3733  data: 0.0005  max mem: 14260\n",
            "Epoch: [4]  [2300/2503]  eta: 0:01:16  lr: 0.001  img/s: 1372.1826259590175  loss: 2.1178 (2.1807)  acc1: 68.1641 (67.9456)  acc5: 86.5234 (86.2320)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [2400/2503]  eta: 0:00:38  lr: 0.001  img/s: 1372.0204395865571  loss: 2.1817 (2.1806)  acc1: 68.5547 (67.9448)  acc5: 86.5234 (86.2347)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [4]  [2500/2503]  eta: 0:00:01  lr: 0.001  img/s: 1377.6827475597668  loss: 2.1835 (2.1796)  acc1: 67.9688 (67.9440)  acc5: 85.9375 (86.2433)  time: 0.3722  data: 0.0001  max mem: 14260\n",
            "Epoch: [4] Total time: 0:15:40\n",
            "Test:   [  0/782]  eta: 0:14:26  loss: 0.7886 (0.7886)  acc1: 81.2500 (81.2500)  acc5: 90.6250 (90.6250)  time: 1.1077  data: 1.0937  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:28  loss: 1.1426 (1.0401)  acc1: 76.5625 (76.3923)  acc5: 90.6250 (91.9400)  time: 0.0392  data: 0.0253  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:20  loss: 1.0887 (1.0258)  acc1: 73.4375 (75.6141)  acc5: 95.3125 (92.8172)  time: 0.0252  data: 0.0113  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:17  loss: 0.8914 (1.0167)  acc1: 76.5625 (76.0330)  acc5: 92.1875 (93.1167)  time: 0.0417  data: 0.0278  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.8164 (1.1685)  acc1: 59.3750 (73.2271)  acc5: 82.8125 (91.2329)  time: 0.0310  data: 0.0171  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.8076 (1.2538)  acc1: 56.2500 (71.8001)  acc5: 84.3750 (90.1073)  time: 0.0291  data: 0.0153  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.4276 (1.3227)  acc1: 64.0625 (70.3619)  acc5: 82.8125 (89.2679)  time: 0.0267  data: 0.0127  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.3476 (1.3775)  acc1: 68.7500 (69.3808)  acc5: 85.9375 (88.4763)  time: 0.0369  data: 0.0229  max mem: 14260\n",
            "Test:  Total time: 0:00:25\n",
            "Test:  Acc@1 69.350 Acc@5 88.510\n",
            "Epoch: [5]  [   0/2503]  eta: 3:38:30  lr: 0.0005  img/s: 1384.6568204297596  loss: 2.1909 (2.1909)  acc1: 68.1641 (68.1641)  acc5: 88.8672 (88.8672)  time: 5.2378  data: 4.8680  max mem: 14260\n",
            "Epoch: [5]  [ 100/2503]  eta: 0:17:31  lr: 0.0005  img/s: 1376.145716838  loss: 2.1311 (2.1426)  acc1: 68.5547 (68.7597)  acc5: 86.3281 (86.6066)  time: 0.3722  data: 0.0003  max mem: 14260\n",
            "Epoch: [5]  [ 200/2503]  eta: 0:15:34  lr: 0.0005  img/s: 1370.6832073273868  loss: 2.0784 (2.1287)  acc1: 69.9219 (68.9269)  acc5: 86.9141 (86.6711)  time: 0.3737  data: 0.0003  max mem: 14260\n",
            "Epoch: [5]  [ 300/2503]  eta: 0:14:30  lr: 0.0005  img/s: 1371.4062324581598  loss: 2.1135 (2.1308)  acc1: 68.3594 (68.9343)  acc5: 86.5234 (86.7252)  time: 0.3739  data: 0.0003  max mem: 14260\n",
            "Epoch: [5]  [ 400/2503]  eta: 0:13:39  lr: 0.0005  img/s: 1371.5691496461995  loss: 2.1033 (2.1284)  acc1: 67.9688 (68.9088)  acc5: 86.7188 (86.6929)  time: 0.3735  data: 0.0003  max mem: 14260\n",
            "Epoch: [5]  [ 500/2503]  eta: 0:12:54  lr: 0.0005  img/s: 1375.4335437970838  loss: 2.1092 (2.1302)  acc1: 68.9453 (68.8681)  acc5: 86.5234 (86.5979)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [5]  [ 600/2503]  eta: 0:12:11  lr: 0.0005  img/s: 1375.2107005335026  loss: 2.1298 (2.1278)  acc1: 68.5547 (68.8559)  acc5: 86.9141 (86.6174)  time: 0.3734  data: 0.0003  max mem: 14260\n",
            "Epoch: [5]  [ 700/2503]  eta: 0:11:30  lr: 0.0005  img/s: 1372.7413665256945  loss: 2.1303 (2.1261)  acc1: 68.1641 (68.8720)  acc5: 87.3047 (86.6388)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [5]  [ 800/2503]  eta: 0:10:49  lr: 0.0005  img/s: 1373.4033212630675  loss: 2.1036 (2.1246)  acc1: 67.9688 (68.8614)  acc5: 85.9375 (86.6395)  time: 0.3737  data: 0.0005  max mem: 14260\n",
            "Epoch: [5]  [ 900/2503]  eta: 0:10:10  lr: 0.0005  img/s: 1371.7811751223753  loss: 2.1149 (2.1246)  acc1: 67.9688 (68.8159)  acc5: 85.9375 (86.6190)  time: 0.3734  data: 0.0004  max mem: 14260\n",
            "Epoch: [5]  [1000/2503]  eta: 0:09:31  lr: 0.0005  img/s: 1371.9284049158534  loss: 2.0887 (2.1243)  acc1: 67.9688 (68.7960)  acc5: 86.3281 (86.6101)  time: 0.3737  data: 0.0004  max mem: 14260\n",
            "Epoch: [5]  [1100/2503]  eta: 0:08:52  lr: 0.0005  img/s: 1368.3695843055893  loss: 2.0901 (2.1223)  acc1: 68.5547 (68.7977)  acc5: 86.7188 (86.6175)  time: 0.3736  data: 0.0004  max mem: 14260\n",
            "Epoch: [5]  [1200/2503]  eta: 0:08:13  lr: 0.0005  img/s: 1373.9410803513733  loss: 2.1272 (2.1232)  acc1: 68.3594 (68.7907)  acc5: 85.9375 (86.6052)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [5]  [1300/2503]  eta: 0:07:35  lr: 0.0005  img/s: 1374.7273877228067  loss: 2.1486 (2.1239)  acc1: 67.7734 (68.7691)  acc5: 86.1328 (86.6065)  time: 0.3737  data: 0.0005  max mem: 14260\n",
            "Epoch: [5]  [1400/2503]  eta: 0:06:57  lr: 0.0005  img/s: 1374.2514499343106  loss: 2.1073 (2.1244)  acc1: 70.1172 (68.7754)  acc5: 86.7188 (86.6071)  time: 0.3737  data: 0.0004  max mem: 14260\n",
            "Epoch: [5]  [1500/2503]  eta: 0:06:19  lr: 0.0005  img/s: 1374.0210772279306  loss: 2.1250 (2.1237)  acc1: 68.1641 (68.7741)  acc5: 86.9141 (86.6221)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [5]  [1600/2503]  eta: 0:05:40  lr: 0.0005  img/s: 1372.7457540442772  loss: 2.1263 (2.1235)  acc1: 68.5547 (68.7815)  acc5: 86.5234 (86.6377)  time: 0.3736  data: 0.0004  max mem: 14260\n",
            "Epoch: [5]  [1700/2503]  eta: 0:05:03  lr: 0.0005  img/s: 1374.649068464126  loss: 2.1610 (2.1234)  acc1: 68.3594 (68.7772)  acc5: 87.3047 (86.6524)  time: 0.3736  data: 0.0004  max mem: 14260\n",
            "Epoch: [5]  [1800/2503]  eta: 0:04:25  lr: 0.0005  img/s: 1370.6219690514713  loss: 2.1261 (2.1233)  acc1: 68.9453 (68.7823)  acc5: 86.9141 (86.6581)  time: 0.3737  data: 0.0004  max mem: 14260\n",
            "Epoch: [5]  [1900/2503]  eta: 0:03:47  lr: 0.0005  img/s: 1373.3084662953322  loss: 2.1031 (2.1242)  acc1: 69.3359 (68.7770)  acc5: 86.5234 (86.6429)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [5]  [2000/2503]  eta: 0:03:09  lr: 0.0005  img/s: 1373.4867693362507  loss: 2.1362 (2.1242)  acc1: 68.7500 (68.7747)  acc5: 85.9375 (86.6454)  time: 0.3736  data: 0.0004  max mem: 14260\n",
            "Epoch: [5]  [2100/2503]  eta: 0:02:31  lr: 0.0005  img/s: 1374.148564027812  loss: 2.0572 (2.1238)  acc1: 69.5312 (68.7785)  acc5: 86.7188 (86.6375)  time: 0.3733  data: 0.0003  max mem: 14260\n",
            "Epoch: [5]  [2200/2503]  eta: 0:01:54  lr: 0.0005  img/s: 1375.4590917097134  loss: 2.0764 (2.1235)  acc1: 68.5547 (68.7793)  acc5: 86.9141 (86.6440)  time: 0.3733  data: 0.0003  max mem: 14260\n",
            "Epoch: [5]  [2300/2503]  eta: 0:01:16  lr: 0.0005  img/s: 1372.3834395034714  loss: 2.1352 (2.1226)  acc1: 68.3594 (68.7862)  acc5: 87.1094 (86.6495)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [5]  [2400/2503]  eta: 0:00:38  lr: 0.0005  img/s: 1372.4115054654028  loss: 2.1190 (2.1228)  acc1: 68.3594 (68.7825)  acc5: 86.3281 (86.6402)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [5]  [2500/2503]  eta: 0:00:01  lr: 0.0005  img/s: 1373.438456057949  loss: 2.1008 (2.1223)  acc1: 67.9688 (68.7743)  acc5: 86.3281 (86.6373)  time: 0.3727  data: 0.0001  max mem: 14260\n",
            "Epoch: [5] Total time: 0:15:41\n",
            "Test:   [  0/782]  eta: 0:14:44  loss: 0.6408 (0.6408)  acc1: 85.9375 (85.9375)  acc5: 95.3125 (95.3125)  time: 1.1316  data: 1.1173  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:29  loss: 1.0767 (1.0193)  acc1: 78.1250 (76.7172)  acc5: 90.6250 (92.3422)  time: 0.0415  data: 0.0275  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:21  loss: 0.8614 (0.9945)  acc1: 75.0000 (76.2127)  acc5: 96.8750 (93.1203)  time: 0.0266  data: 0.0126  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:17  loss: 0.8539 (0.9893)  acc1: 78.1250 (76.5729)  acc5: 92.1875 (93.3711)  time: 0.0287  data: 0.0147  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.8363 (1.1393)  acc1: 60.9375 (73.8661)  acc5: 82.8125 (91.5680)  time: 0.0270  data: 0.0129  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.9171 (1.2267)  acc1: 56.2500 (72.3179)  acc5: 84.3750 (90.4036)  time: 0.0264  data: 0.0125  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.4282 (1.2941)  acc1: 62.5000 (70.8611)  acc5: 82.8125 (89.6215)  time: 0.0340  data: 0.0202  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.3874 (1.3462)  acc1: 68.7500 (69.8734)  acc5: 85.9375 (88.9087)  time: 0.0280  data: 0.0142  max mem: 14260\n",
            "Test:  Total time: 0:00:26\n",
            "Test:  Acc@1 69.826 Acc@5 88.892\n",
            "Epoch: [6]  [   0/2503]  eta: 3:58:54  lr: 0.0005  img/s: 1382.5289805388652  loss: 2.2078 (2.2078)  acc1: 67.9688 (67.9688)  acc5: 85.5469 (85.5469)  time: 5.7268  data: 5.3564  max mem: 14260\n",
            "Epoch: [6]  [ 100/2503]  eta: 0:16:59  lr: 0.0005  img/s: 1378.7317676984603  loss: 2.0922 (2.1056)  acc1: 69.1406 (69.0555)  acc5: 86.5234 (86.8696)  time: 0.3720  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [ 200/2503]  eta: 0:15:17  lr: 0.0005  img/s: 1373.7485985738494  loss: 2.1611 (2.1031)  acc1: 69.1406 (69.1630)  acc5: 86.5234 (86.7839)  time: 0.3733  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [ 300/2503]  eta: 0:14:19  lr: 0.0005  img/s: 1372.0318352143677  loss: 2.1514 (2.1035)  acc1: 68.7500 (69.0595)  acc5: 87.1094 (86.6883)  time: 0.3735  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [ 400/2503]  eta: 0:13:31  lr: 0.0005  img/s: 1375.015221646458  loss: 2.0837 (2.1048)  acc1: 68.7500 (69.0237)  acc5: 87.1094 (86.6959)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [ 500/2503]  eta: 0:12:47  lr: 0.0005  img/s: 1373.273338150751  loss: 2.0800 (2.1028)  acc1: 68.3594 (68.9991)  acc5: 87.5000 (86.7152)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [ 600/2503]  eta: 0:12:06  lr: 0.0005  img/s: 1376.3044697069715  loss: 2.0787 (2.1029)  acc1: 68.7500 (69.0110)  acc5: 86.3281 (86.7356)  time: 0.3728  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [ 700/2503]  eta: 0:11:25  lr: 0.0005  img/s: 1374.2127559910693  loss: 2.1109 (2.1046)  acc1: 68.9453 (69.0022)  acc5: 86.3281 (86.7051)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [6]  [ 800/2503]  eta: 0:10:46  lr: 0.0005  img/s: 1374.1213062410218  loss: 2.0157 (2.1058)  acc1: 69.7266 (68.9948)  acc5: 86.9141 (86.6980)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [6]  [ 900/2503]  eta: 0:10:07  lr: 0.0005  img/s: 1376.1695274335636  loss: 2.1286 (2.1067)  acc1: 68.3594 (68.9700)  acc5: 86.7188 (86.6797)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [6]  [1000/2503]  eta: 0:09:28  lr: 0.0005  img/s: 1374.0782235576708  loss: 2.1129 (2.1050)  acc1: 69.1406 (68.9937)  acc5: 86.3281 (86.7209)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [6]  [1100/2503]  eta: 0:08:50  lr: 0.0005  img/s: 1372.4018576615117  loss: 2.1073 (2.1074)  acc1: 69.1406 (68.9714)  acc5: 86.5234 (86.7030)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [1200/2503]  eta: 0:08:11  lr: 0.0005  img/s: 1373.360283589067  loss: 2.1258 (2.1072)  acc1: 68.1641 (68.9421)  acc5: 86.1328 (86.7062)  time: 0.3733  data: 0.0005  max mem: 14260\n",
            "Epoch: [6]  [1300/2503]  eta: 0:07:33  lr: 0.0005  img/s: 1375.2661843965818  loss: 2.0873 (2.1069)  acc1: 68.5547 (68.9518)  acc5: 87.1094 (86.7148)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [6]  [1400/2503]  eta: 0:06:55  lr: 0.0005  img/s: 1375.4476390797913  loss: 2.1331 (2.1069)  acc1: 68.3594 (68.9512)  acc5: 86.7188 (86.7203)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [6]  [1500/2503]  eta: 0:06:17  lr: 0.0005  img/s: 1374.7300278532987  loss: 2.0836 (2.1075)  acc1: 68.5547 (68.9473)  acc5: 86.7188 (86.7115)  time: 0.3733  data: 0.0005  max mem: 14260\n",
            "Epoch: [6]  [1600/2503]  eta: 0:05:39  lr: 0.0005  img/s: 1373.1591840910544  loss: 2.0973 (2.1071)  acc1: 68.7500 (68.9613)  acc5: 87.1094 (86.7238)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [6]  [1700/2503]  eta: 0:05:02  lr: 0.0005  img/s: 1374.3235671887749  loss: 2.0603 (2.1067)  acc1: 70.1172 (68.9723)  acc5: 87.5000 (86.7394)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [6]  [1800/2503]  eta: 0:04:24  lr: 0.0005  img/s: 1374.2101178470139  loss: 2.0859 (2.1060)  acc1: 68.7500 (68.9833)  acc5: 86.9141 (86.7514)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [1900/2503]  eta: 0:03:46  lr: 0.0005  img/s: 1372.5983487787776  loss: 2.1176 (2.1065)  acc1: 68.5547 (68.9724)  acc5: 86.1328 (86.7435)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [2000/2503]  eta: 0:03:08  lr: 0.0005  img/s: 1374.1177891860107  loss: 2.0992 (2.1062)  acc1: 68.9453 (68.9624)  acc5: 86.5234 (86.7397)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [2100/2503]  eta: 0:02:31  lr: 0.0005  img/s: 1373.3427179666571  loss: 2.1189 (2.1061)  acc1: 68.7500 (68.9620)  acc5: 86.7188 (86.7408)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [2200/2503]  eta: 0:01:53  lr: 0.0005  img/s: 1376.7085727766707  loss: 2.1298 (2.1069)  acc1: 68.9453 (68.9591)  acc5: 87.1094 (86.7350)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [2300/2503]  eta: 0:01:16  lr: 0.0005  img/s: 1374.38953472  loss: 2.0977 (2.1065)  acc1: 68.7500 (68.9612)  acc5: 86.5234 (86.7406)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [2400/2503]  eta: 0:00:38  lr: 0.0005  img/s: 1374.8963765202404  loss: 2.0938 (2.1068)  acc1: 69.5312 (68.9646)  acc5: 86.3281 (86.7427)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [6]  [2500/2503]  eta: 0:00:01  lr: 0.0005  img/s: 1374.408006763611  loss: 2.0790 (2.1071)  acc1: 68.5547 (68.9612)  acc5: 86.3281 (86.7348)  time: 0.3723  data: 0.0001  max mem: 14260\n",
            "Epoch: [6] Total time: 0:15:39\n",
            "Test:   [  0/782]  eta: 0:14:41  loss: 0.6884 (0.6884)  acc1: 84.3750 (84.3750)  acc5: 95.3125 (95.3125)  time: 1.1274  data: 1.1136  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:28  loss: 1.0928 (1.0222)  acc1: 78.1250 (76.8100)  acc5: 90.6250 (92.2339)  time: 0.0378  data: 0.0239  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:20  loss: 0.9227 (1.0006)  acc1: 73.4375 (76.1039)  acc5: 95.3125 (93.0271)  time: 0.0235  data: 0.0096  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:17  loss: 0.8622 (0.9921)  acc1: 79.6875 (76.6352)  acc5: 92.1875 (93.3140)  time: 0.0376  data: 0.0237  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.8481 (1.1427)  acc1: 60.9375 (73.9129)  acc5: 84.3750 (91.5368)  time: 0.0304  data: 0.0165  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.9001 (1.2259)  acc1: 56.2500 (72.4988)  acc5: 84.3750 (90.3942)  time: 0.0282  data: 0.0143  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.4800 (1.2927)  acc1: 62.5000 (71.1366)  acc5: 82.8125 (89.6293)  time: 0.0225  data: 0.0086  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.4293 (1.3471)  acc1: 71.8750 (70.0807)  acc5: 85.9375 (88.9087)  time: 0.0324  data: 0.0184  max mem: 14260\n",
            "Test:  Total time: 0:00:25\n",
            "Test:  Acc@1 69.970 Acc@5 88.936\n",
            "Epoch: [7]  [   0/2503]  eta: 3:39:34  lr: 0.00025  img/s: 1386.0098412288628  loss: 2.2069 (2.2069)  acc1: 69.7266 (69.7266)  acc5: 87.6953 (87.6953)  time: 5.2636  data: 4.8942  max mem: 14260\n",
            "Epoch: [7]  [ 100/2503]  eta: 0:17:35  lr: 0.00025  img/s: 1375.9808290291555  loss: 2.0126 (2.0921)  acc1: 69.9219 (69.4868)  acc5: 87.3047 (86.8038)  time: 0.3723  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [ 200/2503]  eta: 0:15:35  lr: 0.00025  img/s: 1374.2417762442183  loss: 2.1093 (2.0838)  acc1: 68.7500 (69.3855)  acc5: 86.7188 (86.9131)  time: 0.3734  data: 0.0003  max mem: 14260\n",
            "Epoch: [7]  [ 300/2503]  eta: 0:14:31  lr: 0.00025  img/s: 1373.4164966004885  loss: 2.0579 (2.0859)  acc1: 69.7266 (69.3249)  acc5: 87.1094 (86.8641)  time: 0.3740  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [ 400/2503]  eta: 0:13:40  lr: 0.00025  img/s: 1372.4237846847204  loss: 2.0518 (2.0854)  acc1: 69.9219 (69.3491)  acc5: 87.3047 (86.8585)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [7]  [ 500/2503]  eta: 0:12:54  lr: 0.00025  img/s: 1372.9625323823432  loss: 2.0516 (2.0824)  acc1: 68.9453 (69.3800)  acc5: 87.1094 (86.9191)  time: 0.3733  data: 0.0003  max mem: 14260\n",
            "Epoch: [7]  [ 600/2503]  eta: 0:12:11  lr: 0.00025  img/s: 1374.5953941262244  loss: 2.0705 (2.0839)  acc1: 68.9453 (69.3554)  acc5: 86.1328 (86.8676)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [7]  [ 700/2503]  eta: 0:11:30  lr: 0.00025  img/s: 1376.5126678589056  loss: 2.0908 (2.0819)  acc1: 68.9453 (69.3568)  acc5: 85.9375 (86.8781)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [ 800/2503]  eta: 0:10:49  lr: 0.00025  img/s: 1372.6887184902419  loss: 2.0244 (2.0802)  acc1: 69.7266 (69.3450)  acc5: 86.7188 (86.8797)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [ 900/2503]  eta: 0:10:10  lr: 0.00025  img/s: 1370.5467409458663  loss: 2.0942 (2.0803)  acc1: 67.5781 (69.3468)  acc5: 86.9141 (86.8755)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [1000/2503]  eta: 0:09:31  lr: 0.00025  img/s: 1375.6784904384012  loss: 2.0316 (2.0799)  acc1: 69.3359 (69.3418)  acc5: 87.1094 (86.8893)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [1100/2503]  eta: 0:08:52  lr: 0.00025  img/s: 1372.4553608290908  loss: 2.0875 (2.0782)  acc1: 70.1172 (69.3746)  acc5: 87.3047 (86.9123)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [7]  [1200/2503]  eta: 0:08:13  lr: 0.00025  img/s: 1372.5079909653455  loss: 2.0568 (2.0775)  acc1: 69.1406 (69.3959)  acc5: 87.3047 (86.9233)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [1300/2503]  eta: 0:07:35  lr: 0.00025  img/s: 1370.9010849831086  loss: 2.0831 (2.0785)  acc1: 69.1406 (69.3987)  acc5: 86.9141 (86.9133)  time: 0.3736  data: 0.0005  max mem: 14260\n",
            "Epoch: [7]  [1400/2503]  eta: 0:06:56  lr: 0.00025  img/s: 1374.925425653903  loss: 2.1004 (2.0775)  acc1: 69.1406 (69.4249)  acc5: 86.9141 (86.9272)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [1500/2503]  eta: 0:06:18  lr: 0.00025  img/s: 1374.322687664472  loss: 2.0689 (2.0784)  acc1: 70.1172 (69.4242)  acc5: 87.5000 (86.9249)  time: 0.3734  data: 0.0005  max mem: 14260\n",
            "Epoch: [7]  [1600/2503]  eta: 0:05:40  lr: 0.00025  img/s: 1375.4106395788233  loss: 2.0802 (2.0790)  acc1: 70.3125 (69.4226)  acc5: 86.3281 (86.9213)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [1700/2503]  eta: 0:05:02  lr: 0.00025  img/s: 1374.4950959433684  loss: 2.0082 (2.0790)  acc1: 69.7266 (69.4192)  acc5: 86.3281 (86.9269)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [1800/2503]  eta: 0:04:25  lr: 0.00025  img/s: 1374.163512305496  loss: 2.0532 (2.0788)  acc1: 69.5312 (69.4340)  acc5: 87.3047 (86.9320)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [1900/2503]  eta: 0:03:47  lr: 0.00025  img/s: 1372.0528737938141  loss: 2.0800 (2.0794)  acc1: 69.1406 (69.4261)  acc5: 87.1094 (86.9262)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [7]  [2000/2503]  eta: 0:03:09  lr: 0.00025  img/s: 1373.3014405226452  loss: 2.1463 (2.0796)  acc1: 68.7500 (69.4094)  acc5: 86.1328 (86.9198)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [7]  [2100/2503]  eta: 0:02:31  lr: 0.00025  img/s: 1376.33357837278  loss: 2.1390 (2.0798)  acc1: 68.7500 (69.4111)  acc5: 86.5234 (86.9282)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [7]  [2200/2503]  eta: 0:01:54  lr: 0.00025  img/s: 1373.950749808541  loss: 2.0798 (2.0800)  acc1: 68.9453 (69.4145)  acc5: 86.3281 (86.9272)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [7]  [2300/2503]  eta: 0:01:16  lr: 0.00025  img/s: 1375.9376296658897  loss: 2.0959 (2.0796)  acc1: 69.1406 (69.4167)  acc5: 86.5234 (86.9196)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [7]  [2400/2503]  eta: 0:00:38  lr: 0.00025  img/s: 1375.1649077015584  loss: 2.0908 (2.0796)  acc1: 69.3359 (69.4115)  acc5: 87.3047 (86.9178)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [7]  [2500/2503]  eta: 0:00:01  lr: 0.00025  img/s: 1374.5839558747189  loss: 2.1112 (2.0791)  acc1: 68.5547 (69.4110)  acc5: 85.7422 (86.9191)  time: 0.3727  data: 0.0002  max mem: 14260\n",
            "Epoch: [7] Total time: 0:15:40\n",
            "Test:   [  0/782]  eta: 0:19:46  loss: 0.6660 (0.6660)  acc1: 85.9375 (85.9375)  acc5: 93.7500 (93.7500)  time: 1.5172  data: 1.5031  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:33  loss: 1.0802 (1.0143)  acc1: 76.5625 (76.8255)  acc5: 89.0625 (91.9554)  time: 0.0373  data: 0.0233  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:23  loss: 0.8769 (0.9842)  acc1: 73.4375 (76.4925)  acc5: 95.3125 (93.1126)  time: 0.0272  data: 0.0132  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:18  loss: 0.8049 (0.9795)  acc1: 78.1250 (76.8792)  acc5: 92.1875 (93.4489)  time: 0.0353  data: 0.0213  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.8327 (1.1295)  acc1: 60.9375 (74.1700)  acc5: 84.3750 (91.6576)  time: 0.0281  data: 0.0142  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:10  loss: 1.7833 (1.2144)  acc1: 57.8125 (72.6547)  acc5: 84.3750 (90.4722)  time: 0.0298  data: 0.0159  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.4185 (1.2814)  acc1: 65.6250 (71.3290)  acc5: 84.3750 (89.6813)  time: 0.0280  data: 0.0142  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.3914 (1.3335)  acc1: 68.7500 (70.2969)  acc5: 87.5000 (88.9934)  time: 0.0310  data: 0.0171  max mem: 14260\n",
            "Test:  Total time: 0:00:26\n",
            "Test:  Acc@1 70.206 Acc@5 89.020\n",
            "Epoch: [8]  [   0/2503]  eta: 4:02:00  lr: 0.00025  img/s: 1381.9729176292822  loss: 2.2176 (2.2176)  acc1: 67.5781 (67.5781)  acc5: 85.7422 (85.7422)  time: 5.8013  data: 5.4308  max mem: 14260\n",
            "Epoch: [8]  [ 100/2503]  eta: 0:17:01  lr: 0.00025  img/s: 1376.8427380369246  loss: 2.0530 (2.0660)  acc1: 69.3359 (69.3940)  acc5: 86.7188 (86.9566)  time: 0.3724  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [ 200/2503]  eta: 0:15:19  lr: 0.00025  img/s: 1373.6756629514773  loss: 2.0625 (2.0794)  acc1: 68.9453 (69.2815)  acc5: 87.1094 (87.0219)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [ 300/2503]  eta: 0:14:21  lr: 0.00025  img/s: 1370.3534600810924  loss: 2.1067 (2.0796)  acc1: 68.5547 (69.3119)  acc5: 86.3281 (87.0081)  time: 0.3739  data: 0.0003  max mem: 14260\n",
            "Epoch: [8]  [ 400/2503]  eta: 0:13:33  lr: 0.00025  img/s: 1374.1133928925642  loss: 2.0060 (2.0789)  acc1: 69.3359 (69.3622)  acc5: 86.9141 (86.9710)  time: 0.3737  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [ 500/2503]  eta: 0:12:49  lr: 0.00025  img/s: 1374.0421768576903  loss: 2.0901 (2.0752)  acc1: 69.5312 (69.4194)  acc5: 87.5000 (86.9994)  time: 0.3735  data: 0.0003  max mem: 14260\n",
            "Epoch: [8]  [ 600/2503]  eta: 0:12:07  lr: 0.00025  img/s: 1373.150403795615  loss: 2.1061 (2.0733)  acc1: 69.9219 (69.5267)  acc5: 86.5234 (87.0294)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [8]  [ 700/2503]  eta: 0:11:26  lr: 0.00025  img/s: 1374.3851366875626  loss: 2.0608 (2.0724)  acc1: 69.7266 (69.5502)  acc5: 87.5000 (87.0545)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [ 800/2503]  eta: 0:10:47  lr: 0.00025  img/s: 1374.9747239625185  loss: 2.0107 (2.0721)  acc1: 70.8984 (69.5725)  acc5: 87.8906 (87.0782)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [ 900/2503]  eta: 0:10:07  lr: 0.00025  img/s: 1376.1880473553276  loss: 2.0283 (2.0728)  acc1: 70.3125 (69.5599)  acc5: 87.6953 (87.0573)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [1000/2503]  eta: 0:09:29  lr: 0.00025  img/s: 1375.5427897949642  loss: 2.0446 (2.0714)  acc1: 69.7266 (69.5689)  acc5: 87.3047 (87.0789)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [1100/2503]  eta: 0:08:50  lr: 0.00025  img/s: 1374.167029594568  loss: 2.0998 (2.0707)  acc1: 69.7266 (69.5696)  acc5: 85.9375 (87.0767)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [1200/2503]  eta: 0:08:12  lr: 0.00025  img/s: 1372.257156878783  loss: 1.9798 (2.0707)  acc1: 69.3359 (69.5560)  acc5: 87.1094 (87.0492)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [1300/2503]  eta: 0:07:33  lr: 0.00025  img/s: 1373.8786716747245  loss: 2.0388 (2.0701)  acc1: 69.9219 (69.5659)  acc5: 87.1094 (87.0505)  time: 0.3734  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [1400/2503]  eta: 0:06:55  lr: 0.00025  img/s: 1373.1679644987819  loss: 2.1186 (2.0718)  acc1: 68.3594 (69.5570)  acc5: 86.7188 (87.0430)  time: 0.3734  data: 0.0005  max mem: 14260\n",
            "Epoch: [8]  [1500/2503]  eta: 0:06:17  lr: 0.00025  img/s: 1367.52956879347  loss: 2.0425 (2.0716)  acc1: 70.3125 (69.5558)  acc5: 87.1094 (87.0372)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [1600/2503]  eta: 0:05:40  lr: 0.00025  img/s: 1375.0328300792244  loss: 2.0820 (2.0712)  acc1: 69.5312 (69.5382)  acc5: 86.5234 (87.0091)  time: 0.3732  data: 0.0005  max mem: 14260\n",
            "Epoch: [8]  [1700/2503]  eta: 0:05:02  lr: 0.00025  img/s: 1373.782872203635  loss: 2.0926 (2.0712)  acc1: 69.7266 (69.5286)  acc5: 86.5234 (86.9977)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [8]  [1800/2503]  eta: 0:04:24  lr: 0.00025  img/s: 1372.195777885978  loss: 1.9958 (2.0698)  acc1: 70.5078 (69.5476)  acc5: 87.3047 (87.0125)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [8]  [1900/2503]  eta: 0:03:46  lr: 0.00025  img/s: 1372.7589167683059  loss: 2.0798 (2.0702)  acc1: 69.5312 (69.5529)  acc5: 86.9141 (87.0155)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [8]  [2000/2503]  eta: 0:03:09  lr: 0.00025  img/s: 1372.930055154169  loss: 2.0876 (2.0700)  acc1: 69.3359 (69.5524)  acc5: 87.3047 (87.0223)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [8]  [2100/2503]  eta: 0:02:31  lr: 0.00025  img/s: 1374.3033384146147  loss: 2.0913 (2.0698)  acc1: 70.1172 (69.5614)  acc5: 86.9141 (87.0259)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [8]  [2200/2503]  eta: 0:01:53  lr: 0.00025  img/s: 1375.885617705268  loss: 2.0623 (2.0696)  acc1: 70.1172 (69.5517)  acc5: 87.1094 (87.0260)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [8]  [2300/2503]  eta: 0:01:16  lr: 0.00025  img/s: 1373.4235235504755  loss: 2.0851 (2.0702)  acc1: 68.7500 (69.5565)  acc5: 87.1094 (87.0240)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [8]  [2400/2503]  eta: 0:00:38  lr: 0.00025  img/s: 1376.1016253181558  loss: 2.1105 (2.0703)  acc1: 69.5312 (69.5624)  acc5: 87.3047 (87.0282)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [8]  [2500/2503]  eta: 0:00:01  lr: 0.00025  img/s: 1376.230380476926  loss: 2.0784 (2.0705)  acc1: 69.1406 (69.5512)  acc5: 86.9141 (87.0310)  time: 0.3724  data: 0.0001  max mem: 14260\n",
            "Epoch: [8] Total time: 0:15:39\n",
            "Test:   [  0/782]  eta: 0:14:16  loss: 0.6473 (0.6473)  acc1: 84.3750 (84.3750)  acc5: 93.7500 (93.7500)  time: 1.0953  data: 1.0812  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:28  loss: 1.0667 (1.0034)  acc1: 76.5625 (77.0421)  acc5: 90.6250 (92.2184)  time: 0.0370  data: 0.0232  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:20  loss: 0.9032 (0.9782)  acc1: 73.4375 (76.7257)  acc5: 95.3125 (93.2914)  time: 0.0260  data: 0.0121  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:16  loss: 0.8587 (0.9763)  acc1: 79.6875 (76.9778)  acc5: 92.1875 (93.5579)  time: 0.0270  data: 0.0130  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:12  loss: 1.8124 (1.1293)  acc1: 60.9375 (74.2012)  acc5: 82.8125 (91.7706)  time: 0.0265  data: 0.0126  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.8634 (1.2136)  acc1: 56.2500 (72.7389)  acc5: 84.3750 (90.6468)  time: 0.0345  data: 0.0206  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:05  loss: 1.4885 (1.2794)  acc1: 64.0625 (71.4434)  acc5: 82.8125 (89.8814)  time: 0.0287  data: 0.0148  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.3933 (1.3320)  acc1: 70.3125 (70.4774)  acc5: 87.5000 (89.1962)  time: 0.0231  data: 0.0091  max mem: 14260\n",
            "Test:  Total time: 0:00:25\n",
            "Test:  Acc@1 70.380 Acc@5 89.206\n",
            "Epoch: [9]  [   0/2503]  eta: 3:25:47  lr: 0.000125  img/s: 1382.3981542993392  loss: 2.0060 (2.0060)  acc1: 70.3125 (70.3125)  acc5: 87.8906 (87.8906)  time: 4.9332  data: 4.5628  max mem: 14260\n",
            "Epoch: [9]  [ 100/2503]  eta: 0:17:10  lr: 0.000125  img/s: 1379.8098694262646  loss: 2.0278 (2.0596)  acc1: 70.5078 (70.0379)  acc5: 86.3281 (87.2215)  time: 0.3722  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [ 200/2503]  eta: 0:15:23  lr: 0.000125  img/s: 1373.381362929101  loss: 2.0648 (2.0507)  acc1: 69.1406 (69.8354)  acc5: 86.7188 (87.1978)  time: 0.3733  data: 0.0003  max mem: 14260\n",
            "Epoch: [9]  [ 300/2503]  eta: 0:14:23  lr: 0.000125  img/s: 1370.5756065680907  loss: 2.0451 (2.0516)  acc1: 70.5078 (69.8953)  acc5: 86.9141 (87.2320)  time: 0.3739  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [ 400/2503]  eta: 0:13:34  lr: 0.000125  img/s: 1373.5842846689557  loss: 2.0512 (2.0560)  acc1: 68.5547 (69.8021)  acc5: 87.5000 (87.1858)  time: 0.3736  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [ 500/2503]  eta: 0:12:50  lr: 0.000125  img/s: 1375.2600193018172  loss: 2.0238 (2.0557)  acc1: 69.9219 (69.7995)  acc5: 87.1094 (87.2392)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [9]  [ 600/2503]  eta: 0:12:08  lr: 0.000125  img/s: 1373.601856471425  loss: 2.0332 (2.0551)  acc1: 69.5312 (69.8397)  acc5: 86.7188 (87.2312)  time: 0.3732  data: 0.0005  max mem: 14260\n",
            "Epoch: [9]  [ 700/2503]  eta: 0:11:27  lr: 0.000125  img/s: 1376.009042322452  loss: 1.9914 (2.0538)  acc1: 70.3125 (69.8528)  acc5: 87.1094 (87.2197)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [ 800/2503]  eta: 0:10:47  lr: 0.000125  img/s: 1373.102992140484  loss: 2.0790 (2.0518)  acc1: 69.5312 (69.8431)  acc5: 87.6953 (87.2196)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [ 900/2503]  eta: 0:10:08  lr: 0.000125  img/s: 1372.0940762142566  loss: 2.0324 (2.0535)  acc1: 70.3125 (69.8267)  acc5: 87.8906 (87.1774)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [1000/2503]  eta: 0:09:29  lr: 0.000125  img/s: 1374.0650355212113  loss: 2.0063 (2.0558)  acc1: 70.3125 (69.8356)  acc5: 87.5000 (87.1644)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [1100/2503]  eta: 0:08:50  lr: 0.000125  img/s: 1374.108117377561  loss: 2.0290 (2.0554)  acc1: 69.3359 (69.8089)  acc5: 86.7188 (87.1386)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [1200/2503]  eta: 0:08:12  lr: 0.000125  img/s: 1370.906335901741  loss: 2.0536 (2.0560)  acc1: 69.7266 (69.8074)  acc5: 86.7188 (87.1367)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [1300/2503]  eta: 0:07:34  lr: 0.000125  img/s: 1372.0020316645787  loss: 2.0173 (2.0568)  acc1: 69.5312 (69.7901)  acc5: 87.1094 (87.1280)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [1400/2503]  eta: 0:06:56  lr: 0.000125  img/s: 1372.0423544234407  loss: 2.1000 (2.0587)  acc1: 69.1406 (69.7776)  acc5: 87.3047 (87.1274)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [1500/2503]  eta: 0:06:18  lr: 0.000125  img/s: 1375.2415243491648  loss: 2.0611 (2.0592)  acc1: 68.7500 (69.7620)  acc5: 86.3281 (87.1099)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [9]  [1600/2503]  eta: 0:05:40  lr: 0.000125  img/s: 1372.8756372972396  loss: 2.0571 (2.0591)  acc1: 69.1406 (69.7624)  acc5: 86.5234 (87.1041)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [9]  [1700/2503]  eta: 0:05:02  lr: 0.000125  img/s: 1374.8435630582644  loss: 2.0257 (2.0585)  acc1: 70.5078 (69.7687)  acc5: 88.0859 (87.1151)  time: 0.3730  data: 0.0002  max mem: 14260\n",
            "Epoch: [9]  [1800/2503]  eta: 0:04:24  lr: 0.000125  img/s: 1373.9533869568445  loss: 2.1389 (2.0594)  acc1: 69.5312 (69.7545)  acc5: 86.7188 (87.1051)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [9]  [1900/2503]  eta: 0:03:46  lr: 0.000125  img/s: 1372.5588706007475  loss: 2.0832 (2.0594)  acc1: 70.3125 (69.7470)  acc5: 86.9141 (87.1027)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [9]  [2000/2503]  eta: 0:03:09  lr: 0.000125  img/s: 1372.5290441500738  loss: 2.0365 (2.0601)  acc1: 70.5078 (69.7273)  acc5: 87.3047 (87.0952)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [9]  [2100/2503]  eta: 0:02:31  lr: 0.000125  img/s: 1373.7494773635067  loss: 2.0595 (2.0607)  acc1: 69.3359 (69.7347)  acc5: 86.9141 (87.0952)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [9]  [2200/2503]  eta: 0:01:53  lr: 0.000125  img/s: 1375.2741110282134  loss: 2.0011 (2.0604)  acc1: 69.7266 (69.7380)  acc5: 87.3047 (87.0957)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [9]  [2300/2503]  eta: 0:01:16  lr: 0.000125  img/s: 1375.9006037991164  loss: 2.0468 (2.0588)  acc1: 68.5547 (69.7525)  acc5: 86.5234 (87.1121)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [9]  [2400/2503]  eta: 0:00:38  lr: 0.000125  img/s: 1373.2689472590366  loss: 2.0125 (2.0589)  acc1: 69.1406 (69.7448)  acc5: 87.5000 (87.1090)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [9]  [2500/2503]  eta: 0:00:01  lr: 0.000125  img/s: 1373.3269092906673  loss: 2.0498 (2.0590)  acc1: 68.7500 (69.7391)  acc5: 87.3047 (87.1050)  time: 0.3724  data: 0.0001  max mem: 14260\n",
            "Epoch: [9] Total time: 0:15:39\n",
            "Test:   [  0/782]  eta: 0:14:51  loss: 0.6752 (0.6752)  acc1: 84.3750 (84.3750)  acc5: 95.3125 (95.3125)  time: 1.1400  data: 1.1260  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:28  loss: 1.0416 (1.0025)  acc1: 76.5625 (77.1813)  acc5: 89.0625 (92.1720)  time: 0.0410  data: 0.0270  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:21  loss: 0.9427 (0.9753)  acc1: 73.4375 (76.7879)  acc5: 96.8750 (93.2914)  time: 0.0282  data: 0.0143  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:17  loss: 0.8311 (0.9728)  acc1: 78.1250 (77.1024)  acc5: 92.1875 (93.5787)  time: 0.0382  data: 0.0243  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.8319 (1.1207)  acc1: 60.9375 (74.4233)  acc5: 82.8125 (91.8056)  time: 0.0262  data: 0.0124  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.8749 (1.2052)  acc1: 57.8125 (72.9104)  acc5: 84.3750 (90.6531)  time: 0.0289  data: 0.0151  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.4533 (1.2725)  acc1: 65.6250 (71.5266)  acc5: 82.8125 (89.8658)  time: 0.0287  data: 0.0147  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.3377 (1.3256)  acc1: 70.3125 (70.5020)  acc5: 87.5000 (89.1784)  time: 0.0303  data: 0.0164  max mem: 14260\n",
            "Test:  Total time: 0:00:26\n",
            "Test:  Acc@1 70.452 Acc@5 89.192\n",
            "Epoch: [10]  [   0/2503]  eta: 4:32:54  lr: 0.000125  img/s: 1382.579715743309  loss: 1.9930 (1.9930)  acc1: 71.2891 (71.2891)  acc5: 88.2812 (88.2812)  time: 6.5421  data: 6.1717  max mem: 14260\n",
            "Epoch: [10]  [ 100/2503]  eta: 0:17:19  lr: 0.000125  img/s: 1378.5184732510816  loss: 1.9991 (2.0550)  acc1: 69.3359 (69.9606)  acc5: 87.6953 (87.4188)  time: 0.3721  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [ 200/2503]  eta: 0:15:27  lr: 0.000125  img/s: 1371.4903139156074  loss: 2.0567 (2.0572)  acc1: 69.3359 (69.8140)  acc5: 86.5234 (87.2464)  time: 0.3736  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [ 300/2503]  eta: 0:14:26  lr: 0.000125  img/s: 1370.4811417806243  loss: 2.0618 (2.0589)  acc1: 70.1172 (69.7843)  acc5: 87.5000 (87.2275)  time: 0.3737  data: 0.0004  max mem: 14260\n",
            "Epoch: [10]  [ 400/2503]  eta: 0:13:36  lr: 0.000125  img/s: 1370.7978250906106  loss: 2.0730 (2.0551)  acc1: 69.5312 (69.7602)  acc5: 86.5234 (87.2009)  time: 0.3735  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [ 500/2503]  eta: 0:12:51  lr: 0.000125  img/s: 1373.3629184711845  loss: 2.0050 (2.0545)  acc1: 70.8984 (69.7757)  acc5: 87.3047 (87.2033)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [10]  [ 600/2503]  eta: 0:12:09  lr: 0.000125  img/s: 1375.585964368369  loss: 2.0323 (2.0552)  acc1: 69.9219 (69.8081)  acc5: 86.9141 (87.2082)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [10]  [ 700/2503]  eta: 0:11:28  lr: 0.000125  img/s: 1373.3725797921156  loss: 2.0423 (2.0540)  acc1: 70.7031 (69.8168)  acc5: 87.5000 (87.1893)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [10]  [ 800/2503]  eta: 0:10:48  lr: 0.000125  img/s: 1376.9972370075554  loss: 2.0162 (2.0539)  acc1: 70.5078 (69.8663)  acc5: 87.8906 (87.2198)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [ 900/2503]  eta: 0:10:09  lr: 0.000125  img/s: 1375.6441221594093  loss: 2.0468 (2.0543)  acc1: 69.5312 (69.8345)  acc5: 85.7422 (87.2141)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [10]  [1000/2503]  eta: 0:09:30  lr: 0.000125  img/s: 1370.3455900698802  loss: 2.0709 (2.0541)  acc1: 69.3359 (69.8107)  acc5: 87.1094 (87.2145)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [10]  [1100/2503]  eta: 0:08:51  lr: 0.000125  img/s: 1375.358667451433  loss: 2.0207 (2.0531)  acc1: 70.1172 (69.8209)  acc5: 87.5000 (87.2023)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [1200/2503]  eta: 0:08:12  lr: 0.000125  img/s: 1371.2451051444912  loss: 2.1047 (2.0534)  acc1: 69.1406 (69.8272)  acc5: 86.5234 (87.1899)  time: 0.3736  data: 0.0004  max mem: 14260\n",
            "Epoch: [10]  [1300/2503]  eta: 0:07:34  lr: 0.000125  img/s: 1372.708022301002  loss: 2.0050 (2.0531)  acc1: 69.7266 (69.8270)  acc5: 87.5000 (87.1895)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [10]  [1400/2503]  eta: 0:06:56  lr: 0.000125  img/s: 1374.0430560229931  loss: 2.0484 (2.0529)  acc1: 70.1172 (69.8191)  acc5: 87.5000 (87.1819)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [10]  [1500/2503]  eta: 0:06:18  lr: 0.000125  img/s: 1373.6159142371016  loss: 2.0442 (2.0530)  acc1: 69.1406 (69.8084)  acc5: 87.3047 (87.1868)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [10]  [1600/2503]  eta: 0:05:40  lr: 0.000125  img/s: 1375.1508182129628  loss: 2.0196 (2.0531)  acc1: 70.1172 (69.8065)  acc5: 86.9141 (87.1785)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [1700/2503]  eta: 0:05:02  lr: 0.000125  img/s: 1375.0856580835482  loss: 2.1207 (2.0539)  acc1: 69.1406 (69.8000)  acc5: 87.1094 (87.1728)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [1800/2503]  eta: 0:04:24  lr: 0.000125  img/s: 1376.025794512771  loss: 2.0315 (2.0537)  acc1: 69.5312 (69.8069)  acc5: 87.3047 (87.1767)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [10]  [1900/2503]  eta: 0:03:46  lr: 0.000125  img/s: 1371.8214849002668  loss: 2.0731 (2.0541)  acc1: 69.1406 (69.8190)  acc5: 86.1328 (87.1675)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [2000/2503]  eta: 0:03:09  lr: 0.000125  img/s: 1375.8388984491824  loss: 2.0230 (2.0535)  acc1: 69.3359 (69.8203)  acc5: 87.5000 (87.1698)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [2100/2503]  eta: 0:02:31  lr: 0.000125  img/s: 1373.8479089203872  loss: 2.0099 (2.0527)  acc1: 69.9219 (69.8253)  acc5: 87.5000 (87.1809)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [2200/2503]  eta: 0:01:53  lr: 0.000125  img/s: 1373.565834760211  loss: 1.9812 (2.0526)  acc1: 69.9219 (69.8284)  acc5: 87.3047 (87.1823)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [2300/2503]  eta: 0:01:16  lr: 0.000125  img/s: 1372.2185751566162  loss: 2.0163 (2.0535)  acc1: 69.1406 (69.8205)  acc5: 87.3047 (87.1743)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [2400/2503]  eta: 0:00:38  lr: 0.000125  img/s: 1374.3147719964034  loss: 2.0282 (2.0529)  acc1: 70.8984 (69.8330)  acc5: 87.1094 (87.1781)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [10]  [2500/2503]  eta: 0:00:01  lr: 0.000125  img/s: 1374.5285270446548  loss: 1.9206 (2.0523)  acc1: 70.3125 (69.8248)  acc5: 88.2812 (87.1807)  time: 0.3722  data: 0.0001  max mem: 14260\n",
            "Epoch: [10] Total time: 0:15:40\n",
            "Test:   [  0/782]  eta: 0:14:52  loss: 0.6813 (0.6813)  acc1: 84.3750 (84.3750)  acc5: 92.1875 (92.1875)  time: 1.1410  data: 1.1270  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:28  loss: 1.0771 (0.9967)  acc1: 76.5625 (77.0885)  acc5: 90.6250 (92.3267)  time: 0.0389  data: 0.0251  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:20  loss: 0.9244 (0.9740)  acc1: 75.0000 (76.5858)  acc5: 96.8750 (93.4624)  time: 0.0259  data: 0.0120  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:16  loss: 0.8188 (0.9703)  acc1: 78.1250 (77.0089)  acc5: 92.1875 (93.6618)  time: 0.0312  data: 0.0173  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:12  loss: 1.8375 (1.1196)  acc1: 60.9375 (74.2519)  acc5: 84.3750 (91.9070)  time: 0.0265  data: 0.0126  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.8717 (1.2043)  acc1: 57.8125 (72.7233)  acc5: 84.3750 (90.7622)  time: 0.0313  data: 0.0174  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:05  loss: 1.4483 (1.2716)  acc1: 65.6250 (71.4018)  acc5: 82.8125 (89.9334)  time: 0.0292  data: 0.0153  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.3360 (1.3255)  acc1: 70.3125 (70.3883)  acc5: 85.9375 (89.2052)  time: 0.0277  data: 0.0137  max mem: 14260\n",
            "Test:  Total time: 0:00:25\n",
            "Test:  Acc@1 70.322 Acc@5 89.218\n",
            "Epoch: [11]  [   0/2503]  eta: 4:20:37  lr: 6.25e-05  img/s: 1381.262697904774  loss: 2.1742 (2.1742)  acc1: 66.9922 (66.9922)  acc5: 86.5234 (86.5234)  time: 6.2474  data: 5.8767  max mem: 14260\n",
            "Epoch: [11]  [ 100/2503]  eta: 0:17:12  lr: 6.25e-05  img/s: 1376.4323806019943  loss: 2.0443 (2.0504)  acc1: 70.1172 (69.6724)  acc5: 87.3047 (87.2931)  time: 0.3722  data: 0.0004  max mem: 14260\n",
            "Epoch: [11]  [ 200/2503]  eta: 0:15:24  lr: 6.25e-05  img/s: 1374.9993744425528  loss: 2.0107 (2.0489)  acc1: 70.5078 (69.8296)  acc5: 87.1094 (87.2882)  time: 0.3734  data: 0.0004  max mem: 14260\n",
            "Epoch: [11]  [ 300/2503]  eta: 0:14:24  lr: 6.25e-05  img/s: 1371.8118453900383  loss: 1.9970 (2.0489)  acc1: 70.1172 (69.9193)  acc5: 87.1094 (87.2443)  time: 0.3738  data: 0.0004  max mem: 14260\n",
            "Epoch: [11]  [ 400/2503]  eta: 0:13:35  lr: 6.25e-05  img/s: 1374.7150672479243  loss: 2.0609 (2.0489)  acc1: 70.1172 (69.9686)  acc5: 87.3047 (87.2414)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [11]  [ 500/2503]  eta: 0:12:50  lr: 6.25e-05  img/s: 1373.91910481851  loss: 1.9974 (2.0468)  acc1: 68.9453 (69.9367)  acc5: 87.8906 (87.2415)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [11]  [ 600/2503]  eta: 0:12:08  lr: 6.25e-05  img/s: 1373.854940285636  loss: 2.0522 (2.0475)  acc1: 68.9453 (69.8640)  acc5: 87.3047 (87.2348)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [11]  [ 700/2503]  eta: 0:11:27  lr: 6.25e-05  img/s: 1373.041537460183  loss: 2.0108 (2.0488)  acc1: 69.9219 (69.8528)  acc5: 87.3047 (87.1835)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [ 800/2503]  eta: 0:10:47  lr: 6.25e-05  img/s: 1375.1340873202387  loss: 2.0206 (2.0482)  acc1: 69.3359 (69.8631)  acc5: 87.6953 (87.1889)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [11]  [ 900/2503]  eta: 0:10:08  lr: 6.25e-05  img/s: 1376.1695274335636  loss: 2.0308 (2.0494)  acc1: 69.3359 (69.8616)  acc5: 87.1094 (87.1690)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [1000/2503]  eta: 0:09:29  lr: 6.25e-05  img/s: 1372.219451989845  loss: 2.0692 (2.0504)  acc1: 69.5312 (69.8571)  acc5: 86.5234 (87.1572)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [11]  [1100/2503]  eta: 0:08:50  lr: 6.25e-05  img/s: 1376.1139706589122  loss: 2.0327 (2.0503)  acc1: 70.1172 (69.8695)  acc5: 86.9141 (87.1390)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [11]  [1200/2503]  eta: 0:08:12  lr: 6.25e-05  img/s: 1374.2620032061025  loss: 2.0255 (2.0494)  acc1: 69.5312 (69.8687)  acc5: 87.6953 (87.1538)  time: 0.3732  data: 0.0005  max mem: 14260\n",
            "Epoch: [11]  [1300/2503]  eta: 0:07:34  lr: 6.25e-05  img/s: 1373.102992140484  loss: 2.0055 (2.0487)  acc1: 70.1172 (69.8582)  acc5: 87.6953 (87.1675)  time: 0.3734  data: 0.0005  max mem: 14260\n",
            "Epoch: [11]  [1400/2503]  eta: 0:06:56  lr: 6.25e-05  img/s: 1372.7246942108611  loss: 2.0225 (2.0474)  acc1: 70.3125 (69.8951)  acc5: 87.8906 (87.1913)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [1500/2503]  eta: 0:06:18  lr: 6.25e-05  img/s: 1373.4586593790193  loss: 1.9940 (2.0476)  acc1: 70.3125 (69.9000)  acc5: 87.5000 (87.1904)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [1600/2503]  eta: 0:05:40  lr: 6.25e-05  img/s: 1375.3930214684633  loss: 2.0315 (2.0465)  acc1: 68.9453 (69.9097)  acc5: 86.3281 (87.1944)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [1700/2503]  eta: 0:05:02  lr: 6.25e-05  img/s: 1376.0108056916981  loss: 2.0554 (2.0463)  acc1: 69.9219 (69.9079)  acc5: 86.9141 (87.1987)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [1800/2503]  eta: 0:04:24  lr: 6.25e-05  img/s: 1373.0424153454562  loss: 2.0084 (2.0453)  acc1: 70.3125 (69.9111)  acc5: 87.3047 (87.2084)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [1900/2503]  eta: 0:03:46  lr: 6.25e-05  img/s: 1374.3868958971598  loss: 2.0449 (2.0455)  acc1: 69.9219 (69.9041)  acc5: 87.3047 (87.2079)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [2000/2503]  eta: 0:03:09  lr: 6.25e-05  img/s: 1375.017862882623  loss: 2.0545 (2.0451)  acc1: 70.7031 (69.9195)  acc5: 86.9141 (87.2159)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [2100/2503]  eta: 0:02:31  lr: 6.25e-05  img/s: 1375.8988407123077  loss: 2.0061 (2.0450)  acc1: 69.7266 (69.9315)  acc5: 87.3047 (87.2093)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [2200/2503]  eta: 0:01:53  lr: 6.25e-05  img/s: 1372.419399224026  loss: 2.1206 (2.0444)  acc1: 70.1172 (69.9415)  acc5: 87.1094 (87.2164)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [2300/2503]  eta: 0:01:16  lr: 6.25e-05  img/s: 1373.0722641127009  loss: 2.0822 (2.0451)  acc1: 69.1406 (69.9367)  acc5: 86.9141 (87.2123)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [2400/2503]  eta: 0:00:38  lr: 6.25e-05  img/s: 1373.2557747523645  loss: 2.1119 (2.0445)  acc1: 68.7500 (69.9370)  acc5: 87.1094 (87.2118)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [11]  [2500/2503]  eta: 0:00:01  lr: 6.25e-05  img/s: 1373.3330570658604  loss: 2.0422 (2.0442)  acc1: 69.5312 (69.9368)  acc5: 86.5234 (87.2109)  time: 0.3725  data: 0.0002  max mem: 14260\n",
            "Epoch: [11] Total time: 0:15:39\n",
            "Test:   [  0/782]  eta: 0:20:32  loss: 0.6754 (0.6754)  acc1: 84.3750 (84.3750)  acc5: 93.7500 (93.7500)  time: 1.5756  data: 1.5614  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:33  loss: 1.0414 (0.9982)  acc1: 76.5625 (77.1349)  acc5: 90.6250 (92.2494)  time: 0.0401  data: 0.0261  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:22  loss: 0.9151 (0.9707)  acc1: 73.4375 (76.7646)  acc5: 95.3125 (93.3302)  time: 0.0253  data: 0.0114  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:18  loss: 0.8344 (0.9680)  acc1: 78.1250 (77.1439)  acc5: 93.7500 (93.5683)  time: 0.0422  data: 0.0281  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.8671 (1.1180)  acc1: 60.9375 (74.3688)  acc5: 84.3750 (91.8524)  time: 0.0247  data: 0.0108  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.8661 (1.2026)  acc1: 56.2500 (72.7982)  acc5: 84.3750 (90.7186)  time: 0.0266  data: 0.0127  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.4113 (1.2681)  acc1: 65.6250 (71.4642)  acc5: 84.3750 (89.9542)  time: 0.0232  data: 0.0093  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.3475 (1.3217)  acc1: 68.7500 (70.4351)  acc5: 87.5000 (89.2542)  time: 0.0323  data: 0.0185  max mem: 14260\n",
            "Test:  Total time: 0:00:25\n",
            "Test:  Acc@1 70.374 Acc@5 89.256\n",
            "Epoch: [12]  [   0/2503]  eta: 3:09:11  lr: 6.25e-05  img/s: 1383.3251512006839  loss: 2.3231 (2.3231)  acc1: 68.5547 (68.5547)  acc5: 85.5469 (85.5469)  time: 4.5352  data: 4.1650  max mem: 14260\n",
            "Epoch: [12]  [ 100/2503]  eta: 0:16:32  lr: 6.25e-05  img/s: 1375.9808290291555  loss: 1.9817 (2.0261)  acc1: 70.5078 (70.1501)  acc5: 87.3047 (87.3588)  time: 0.3724  data: 0.0004  max mem: 14260\n",
            "Epoch: [12]  [ 200/2503]  eta: 0:15:05  lr: 6.25e-05  img/s: 1371.7110768029913  loss: 2.0268 (2.0401)  acc1: 69.9219 (70.0200)  acc5: 87.5000 (87.3066)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [12]  [ 300/2503]  eta: 0:14:11  lr: 6.25e-05  img/s: 1372.0975829174947  loss: 2.0198 (2.0401)  acc1: 70.1172 (69.9692)  acc5: 87.1094 (87.2599)  time: 0.3737  data: 0.0004  max mem: 14260\n",
            "Epoch: [12]  [ 400/2503]  eta: 0:13:26  lr: 6.25e-05  img/s: 1373.314613905404  loss: 1.9947 (2.0414)  acc1: 69.5312 (69.9116)  acc5: 86.9141 (87.2779)  time: 0.3735  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [ 500/2503]  eta: 0:12:43  lr: 6.25e-05  img/s: 1372.5036049664877  loss: 2.0371 (2.0364)  acc1: 70.5078 (69.9741)  acc5: 87.8906 (87.3409)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [12]  [ 600/2503]  eta: 0:12:03  lr: 6.25e-05  img/s: 1373.3330570658604  loss: 2.0693 (2.0400)  acc1: 69.7266 (69.9661)  acc5: 86.9141 (87.2803)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [12]  [ 700/2503]  eta: 0:11:23  lr: 6.25e-05  img/s: 1373.9911871950085  loss: 2.0024 (2.0375)  acc1: 70.8984 (69.9826)  acc5: 87.3047 (87.2640)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [12]  [ 800/2503]  eta: 0:10:44  lr: 6.25e-05  img/s: 1370.92558961421  loss: 2.0543 (2.0381)  acc1: 69.9219 (69.9672)  acc5: 86.7188 (87.2486)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [12]  [ 900/2503]  eta: 0:10:05  lr: 6.25e-05  img/s: 1373.4639299083562  loss: 2.0341 (2.0390)  acc1: 69.7266 (69.9670)  acc5: 87.3047 (87.2446)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [12]  [1000/2503]  eta: 0:09:27  lr: 6.25e-05  img/s: 1371.6593838823205  loss: 2.0126 (2.0369)  acc1: 70.8984 (70.0270)  acc5: 87.1094 (87.2698)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [12]  [1100/2503]  eta: 0:08:48  lr: 6.25e-05  img/s: 1372.8299998401815  loss: 2.0512 (2.0370)  acc1: 68.5547 (70.0012)  acc5: 86.7188 (87.2460)  time: 0.3733  data: 0.0005  max mem: 14260\n",
            "Epoch: [12]  [1200/2503]  eta: 0:08:10  lr: 6.25e-05  img/s: 1375.3128647648884  loss: 2.0615 (2.0393)  acc1: 69.3359 (69.9552)  acc5: 86.7188 (87.2149)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [1300/2503]  eta: 0:07:32  lr: 6.25e-05  img/s: 1375.0125804204401  loss: 1.9920 (2.0386)  acc1: 70.3125 (69.9842)  acc5: 87.5000 (87.2299)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [1400/2503]  eta: 0:06:54  lr: 6.25e-05  img/s: 1375.3243151505214  loss: 2.0218 (2.0401)  acc1: 70.5078 (69.9591)  acc5: 87.1094 (87.2227)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [1500/2503]  eta: 0:06:17  lr: 6.25e-05  img/s: 1374.8048357647879  loss: 2.0416 (2.0395)  acc1: 69.5312 (69.9544)  acc5: 86.7188 (87.2231)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [1600/2503]  eta: 0:05:39  lr: 6.25e-05  img/s: 1374.7432286578692  loss: 2.0357 (2.0403)  acc1: 69.7266 (69.9436)  acc5: 87.1094 (87.2176)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [1700/2503]  eta: 0:05:01  lr: 6.25e-05  img/s: 1374.8479640251246  loss: 1.9842 (2.0392)  acc1: 69.3359 (69.9500)  acc5: 87.3047 (87.2301)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [1800/2503]  eta: 0:04:23  lr: 6.25e-05  img/s: 1375.505785174785  loss: 2.0072 (2.0403)  acc1: 69.3359 (69.9105)  acc5: 86.9141 (87.2120)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [1900/2503]  eta: 0:03:46  lr: 6.25e-05  img/s: 1372.3781772634027  loss: 2.0518 (2.0407)  acc1: 69.5312 (69.9174)  acc5: 87.1094 (87.2151)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [2000/2503]  eta: 0:03:08  lr: 6.25e-05  img/s: 1375.270588069541  loss: 2.0746 (2.0404)  acc1: 69.5312 (69.9352)  acc5: 87.1094 (87.2322)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [2100/2503]  eta: 0:02:31  lr: 6.25e-05  img/s: 1372.6615185874805  loss: 2.0678 (2.0406)  acc1: 69.9219 (69.9291)  acc5: 87.8906 (87.2411)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [2200/2503]  eta: 0:01:53  lr: 6.25e-05  img/s: 1372.3729150636889  loss: 1.9724 (2.0405)  acc1: 69.7266 (69.9273)  acc5: 87.6953 (87.2538)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [2300/2503]  eta: 0:01:16  lr: 6.25e-05  img/s: 1372.3299419431316  loss: 2.0217 (2.0405)  acc1: 70.8984 (69.9357)  acc5: 87.5000 (87.2557)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [2400/2503]  eta: 0:00:38  lr: 6.25e-05  img/s: 1374.4467117032068  loss: 2.0661 (2.0404)  acc1: 69.5312 (69.9217)  acc5: 86.7188 (87.2521)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [12]  [2500/2503]  eta: 0:00:01  lr: 6.25e-05  img/s: 1374.0791027691023  loss: 2.0501 (2.0406)  acc1: 68.9453 (69.9067)  acc5: 86.3281 (87.2391)  time: 0.3724  data: 0.0001  max mem: 14260\n",
            "Epoch: [12] Total time: 0:15:38\n",
            "Test:   [  0/782]  eta: 0:15:40  loss: 0.6648 (0.6648)  acc1: 84.3750 (84.3750)  acc5: 92.1875 (92.1875)  time: 1.2028  data: 1.1888  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:29  loss: 1.0592 (0.9972)  acc1: 76.5625 (77.2741)  acc5: 89.0625 (92.2339)  time: 0.0427  data: 0.0288  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:21  loss: 0.8749 (0.9692)  acc1: 75.0000 (77.0367)  acc5: 96.8750 (93.3691)  time: 0.0256  data: 0.0117  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:17  loss: 0.8319 (0.9672)  acc1: 78.1250 (77.2996)  acc5: 93.7500 (93.6254)  time: 0.0395  data: 0.0256  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.7955 (1.1164)  acc1: 59.3750 (74.5558)  acc5: 84.3750 (91.8758)  time: 0.0310  data: 0.0170  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.8665 (1.2014)  acc1: 56.2500 (73.0009)  acc5: 84.3750 (90.7498)  time: 0.0372  data: 0.0232  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.4344 (1.2676)  acc1: 65.6250 (71.6046)  acc5: 82.8125 (89.9620)  time: 0.0279  data: 0.0139  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.3379 (1.3211)  acc1: 70.3125 (70.6023)  acc5: 85.9375 (89.2386)  time: 0.0267  data: 0.0128  max mem: 14260\n",
            "Test:  Total time: 0:00:26\n",
            "Test:  Acc@1 70.510 Acc@5 89.252\n",
            "Epoch: [13]  [   0/2503]  eta: 2:56:58  lr: 3.125e-05  img/s: 1362.685699781715  loss: 2.0875 (2.0875)  acc1: 70.8984 (70.8984)  acc5: 87.8906 (87.8906)  time: 4.2422  data: 3.8664  max mem: 14260\n",
            "Epoch: [13]  [ 100/2503]  eta: 0:16:36  lr: 3.125e-05  img/s: 1375.1085514793976  loss: 2.0670 (2.0296)  acc1: 69.7266 (69.8851)  acc5: 87.1094 (87.3936)  time: 0.3722  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [ 200/2503]  eta: 0:15:07  lr: 3.125e-05  img/s: 1370.70945397796  loss: 2.0368 (2.0414)  acc1: 69.3359 (69.8801)  acc5: 87.5000 (87.2658)  time: 0.3735  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [ 300/2503]  eta: 0:14:13  lr: 3.125e-05  img/s: 1371.2687464560654  loss: 2.0066 (2.0358)  acc1: 69.7266 (69.9193)  acc5: 86.9141 (87.3027)  time: 0.3736  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [ 400/2503]  eta: 0:13:27  lr: 3.125e-05  img/s: 1373.9437174625577  loss: 2.0444 (2.0372)  acc1: 70.1172 (69.9238)  acc5: 86.5234 (87.2779)  time: 0.3733  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [ 500/2503]  eta: 0:12:44  lr: 3.125e-05  img/s: 1374.8092364837137  loss: 2.0196 (2.0386)  acc1: 70.1172 (69.9577)  acc5: 87.5000 (87.2400)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [ 600/2503]  eta: 0:12:03  lr: 3.125e-05  img/s: 1373.2390899400118  loss: 2.0298 (2.0383)  acc1: 69.5312 (69.9352)  acc5: 87.3047 (87.2299)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [ 700/2503]  eta: 0:11:23  lr: 3.125e-05  img/s: 1376.6750355470137  loss: 2.0218 (2.0391)  acc1: 70.3125 (69.9391)  acc5: 87.1094 (87.2077)  time: 0.3732  data: 0.0005  max mem: 14260\n",
            "Epoch: [13]  [ 800/2503]  eta: 0:10:44  lr: 3.125e-05  img/s: 1371.932787239235  loss: 2.0102 (2.0380)  acc1: 70.3125 (69.9453)  acc5: 88.2812 (87.2189)  time: 0.3733  data: 0.0005  max mem: 14260\n",
            "Epoch: [13]  [ 900/2503]  eta: 0:10:05  lr: 3.125e-05  img/s: 1374.2936639939999  loss: 2.0232 (2.0380)  acc1: 70.3125 (69.9494)  acc5: 86.9141 (87.2032)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [13]  [1000/2503]  eta: 0:09:27  lr: 3.125e-05  img/s: 1375.566579530773  loss: 2.0282 (2.0361)  acc1: 70.7031 (69.9773)  acc5: 87.5000 (87.2298)  time: 0.3731  data: 0.0004  max mem: 14260\n",
            "Epoch: [13]  [1100/2503]  eta: 0:08:49  lr: 3.125e-05  img/s: 1374.9175030411677  loss: 2.0242 (2.0356)  acc1: 70.7031 (70.0044)  acc5: 87.3047 (87.2437)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [13]  [1200/2503]  eta: 0:08:10  lr: 3.125e-05  img/s: 1376.128079891011  loss: 1.9957 (2.0354)  acc1: 69.9219 (70.0115)  acc5: 86.9141 (87.2474)  time: 0.3731  data: 0.0002  max mem: 14260\n",
            "Epoch: [13]  [1300/2503]  eta: 0:07:32  lr: 3.125e-05  img/s: 1372.9458546948713  loss: 1.9955 (2.0338)  acc1: 69.7266 (70.0258)  acc5: 86.9141 (87.2739)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [1400/2503]  eta: 0:06:55  lr: 3.125e-05  img/s: 1373.6097639292127  loss: 2.0231 (2.0348)  acc1: 69.9219 (70.0185)  acc5: 87.6953 (87.2631)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [1500/2503]  eta: 0:06:17  lr: 3.125e-05  img/s: 1373.1100158827999  loss: 1.9771 (2.0341)  acc1: 69.7266 (70.0170)  acc5: 87.8906 (87.2686)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [1600/2503]  eta: 0:05:39  lr: 3.125e-05  img/s: 1372.219451989845  loss: 1.9552 (2.0336)  acc1: 70.5078 (70.0344)  acc5: 88.0859 (87.2886)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [1700/2503]  eta: 0:05:01  lr: 3.125e-05  img/s: 1376.569139232295  loss: 2.0286 (2.0346)  acc1: 69.7266 (70.0382)  acc5: 87.3047 (87.2775)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [1800/2503]  eta: 0:04:24  lr: 3.125e-05  img/s: 1376.067235894002  loss: 2.0475 (2.0350)  acc1: 69.9219 (70.0394)  acc5: 86.7188 (87.2817)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [1900/2503]  eta: 0:03:46  lr: 3.125e-05  img/s: 1372.440449690967  loss: 2.0581 (2.0364)  acc1: 70.3125 (70.0344)  acc5: 87.1094 (87.2695)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [2000/2503]  eta: 0:03:08  lr: 3.125e-05  img/s: 1372.9265441945063  loss: 2.0339 (2.0362)  acc1: 70.1172 (70.0219)  acc5: 87.5000 (87.2702)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [2100/2503]  eta: 0:02:31  lr: 3.125e-05  img/s: 1371.8556624372518  loss: 1.9990 (2.0362)  acc1: 70.3125 (70.0184)  acc5: 87.3047 (87.2685)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [2200/2503]  eta: 0:01:53  lr: 3.125e-05  img/s: 1375.8512390763947  loss: 1.9911 (2.0363)  acc1: 70.7031 (70.0283)  acc5: 87.1094 (87.2624)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [2300/2503]  eta: 0:01:16  lr: 3.125e-05  img/s: 1374.9351089712397  loss: 2.0383 (2.0371)  acc1: 69.9219 (70.0119)  acc5: 87.5000 (87.2586)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [13]  [2400/2503]  eta: 0:00:38  lr: 3.125e-05  img/s: 1374.646428644585  loss: 2.0366 (2.0371)  acc1: 68.3594 (70.0009)  acc5: 86.5234 (87.2537)  time: 0.3730  data: 0.0002  max mem: 14260\n",
            "Epoch: [13]  [2500/2503]  eta: 0:00:01  lr: 3.125e-05  img/s: 1375.8406613823731  loss: 2.0402 (2.0375)  acc1: 70.3125 (69.9959)  acc5: 87.6953 (87.2585)  time: 0.3727  data: 0.0002  max mem: 14260\n",
            "Epoch: [13] Total time: 0:15:38\n",
            "Test:   [  0/782]  eta: 0:20:48  loss: 0.6720 (0.6720)  acc1: 84.3750 (84.3750)  acc5: 92.1875 (92.1875)  time: 1.5971  data: 1.5828  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:33  loss: 1.0491 (0.9983)  acc1: 76.5625 (77.3670)  acc5: 90.6250 (92.0947)  time: 0.0389  data: 0.0249  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:23  loss: 0.9042 (0.9695)  acc1: 73.4375 (76.9123)  acc5: 95.3125 (93.2914)  time: 0.0253  data: 0.0114  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:18  loss: 0.8337 (0.9674)  acc1: 78.1250 (77.1958)  acc5: 93.7500 (93.6047)  time: 0.0311  data: 0.0171  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.8155 (1.1160)  acc1: 59.3750 (74.4311)  acc5: 82.8125 (91.8836)  time: 0.0291  data: 0.0152  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.8470 (1.1998)  acc1: 56.2500 (72.9323)  acc5: 85.9375 (90.7404)  time: 0.0379  data: 0.0240  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.4066 (1.2660)  acc1: 65.6250 (71.5838)  acc5: 82.8125 (89.9750)  time: 0.0264  data: 0.0125  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.3310 (1.3201)  acc1: 70.3125 (70.5599)  acc5: 87.5000 (89.2809)  time: 0.0309  data: 0.0170  max mem: 14260\n",
            "Test:  Total time: 0:00:25\n",
            "Test:  Acc@1 70.490 Acc@5 89.280\n",
            "Epoch: [14]  [   0/2503]  eta: 3:57:23  lr: 3.125e-05  img/s: 1380.4706736941034  loss: 1.9210 (1.9210)  acc1: 69.5312 (69.5312)  acc5: 87.6953 (87.6953)  time: 5.6905  data: 5.3196  max mem: 14260\n",
            "Epoch: [14]  [ 100/2503]  eta: 0:17:12  lr: 3.125e-05  img/s: 1378.0841231307486  loss: 2.0083 (2.0326)  acc1: 70.3125 (70.0611)  acc5: 86.9141 (87.2467)  time: 0.3722  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [ 200/2503]  eta: 0:15:24  lr: 3.125e-05  img/s: 1373.5219084009013  loss: 1.9993 (2.0409)  acc1: 70.5078 (70.0492)  acc5: 86.9141 (87.1900)  time: 0.3735  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [ 300/2503]  eta: 0:14:24  lr: 3.125e-05  img/s: 1374.2022034756221  loss: 2.0283 (2.0404)  acc1: 69.3359 (70.0127)  acc5: 86.7188 (87.1788)  time: 0.3737  data: 0.0004  max mem: 14260\n",
            "Epoch: [14]  [ 400/2503]  eta: 0:13:35  lr: 3.125e-05  img/s: 1375.437067590677  loss: 2.0173 (2.0388)  acc1: 69.5312 (69.9453)  acc5: 87.1094 (87.1878)  time: 0.3736  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [ 500/2503]  eta: 0:12:50  lr: 3.125e-05  img/s: 1372.851940546588  loss: 2.0037 (2.0397)  acc1: 69.7266 (69.9227)  acc5: 86.7188 (87.1550)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [14]  [ 600/2503]  eta: 0:12:08  lr: 3.125e-05  img/s: 1373.6739055621583  loss: 2.0059 (2.0369)  acc1: 70.3125 (69.9001)  acc5: 87.3047 (87.1705)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [14]  [ 700/2503]  eta: 0:11:27  lr: 3.125e-05  img/s: 1374.0966872340166  loss: 2.0571 (2.0377)  acc1: 70.8984 (69.8993)  acc5: 87.1094 (87.1762)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [14]  [ 800/2503]  eta: 0:10:47  lr: 3.125e-05  img/s: 1373.261921890697  loss: 2.0219 (2.0374)  acc1: 70.1172 (69.9389)  acc5: 86.5234 (87.1969)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [14]  [ 900/2503]  eta: 0:10:08  lr: 3.125e-05  img/s: 1373.3277875408965  loss: 2.0973 (2.0395)  acc1: 69.9219 (69.9585)  acc5: 87.1094 (87.2089)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [1000/2503]  eta: 0:09:29  lr: 3.125e-05  img/s: 1373.5737418032681  loss: 2.0244 (2.0396)  acc1: 69.5312 (69.9582)  acc5: 87.6953 (87.2063)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [1100/2503]  eta: 0:08:51  lr: 3.125e-05  img/s: 1374.7775519362585  loss: 2.0600 (2.0392)  acc1: 70.1172 (69.9916)  acc5: 87.5000 (87.2339)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [14]  [1200/2503]  eta: 0:08:12  lr: 3.125e-05  img/s: 1375.140251285982  loss: 2.0365 (2.0393)  acc1: 69.9219 (70.0162)  acc5: 86.9141 (87.2287)  time: 0.3735  data: 0.0004  max mem: 14260\n",
            "Epoch: [14]  [1300/2503]  eta: 0:07:34  lr: 3.125e-05  img/s: 1371.594554201231  loss: 2.0376 (2.0391)  acc1: 68.9453 (69.9830)  acc5: 86.3281 (87.2190)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [1400/2503]  eta: 0:06:56  lr: 3.125e-05  img/s: 1375.0090588015337  loss: 1.9764 (2.0392)  acc1: 70.7031 (69.9927)  acc5: 87.6953 (87.2277)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [1500/2503]  eta: 0:06:18  lr: 3.125e-05  img/s: 1373.7775992262007  loss: 2.0324 (2.0398)  acc1: 69.3359 (69.9670)  acc5: 87.1094 (87.2201)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [1600/2503]  eta: 0:05:40  lr: 3.125e-05  img/s: 1376.0910437738055  loss: 1.9701 (2.0382)  acc1: 70.8984 (70.0030)  acc5: 87.5000 (87.2339)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [1700/2503]  eta: 0:05:02  lr: 3.125e-05  img/s: 1372.8142029658127  loss: 2.0675 (2.0385)  acc1: 69.9219 (70.0081)  acc5: 86.9141 (87.2303)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [1800/2503]  eta: 0:04:24  lr: 3.125e-05  img/s: 1374.377220300106  loss: 2.0315 (2.0378)  acc1: 69.9219 (70.0282)  acc5: 86.9141 (87.2299)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [1900/2503]  eta: 0:03:46  lr: 3.125e-05  img/s: 1373.054705857154  loss: 2.0286 (2.0366)  acc1: 69.7266 (70.0487)  acc5: 87.3047 (87.2401)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [2000/2503]  eta: 0:03:09  lr: 3.125e-05  img/s: 1373.4876477909545  loss: 2.0133 (2.0363)  acc1: 69.9219 (70.0554)  acc5: 87.5000 (87.2516)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [2100/2503]  eta: 0:02:31  lr: 3.125e-05  img/s: 1376.08222594449  loss: 2.0194 (2.0352)  acc1: 69.5312 (70.0728)  acc5: 87.1094 (87.2645)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [2200/2503]  eta: 0:01:53  lr: 3.125e-05  img/s: 1373.9173468062497  loss: 2.0050 (2.0351)  acc1: 70.5078 (70.0787)  acc5: 87.3047 (87.2626)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [2300/2503]  eta: 0:01:16  lr: 3.125e-05  img/s: 1371.4351342962937  loss: 2.0315 (2.0355)  acc1: 69.5312 (70.0646)  acc5: 86.9141 (87.2594)  time: 0.3729  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [2400/2503]  eta: 0:00:38  lr: 3.125e-05  img/s: 1373.9366851885618  loss: 2.0668 (2.0363)  acc1: 69.9219 (70.0614)  acc5: 87.1094 (87.2539)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [14]  [2500/2503]  eta: 0:00:01  lr: 3.125e-05  img/s: 1375.7948265876696  loss: 2.0316 (2.0364)  acc1: 69.7266 (70.0786)  acc5: 86.9141 (87.2595)  time: 0.3723  data: 0.0001  max mem: 14260\n",
            "Epoch: [14] Total time: 0:15:39\n",
            "Test:   [  0/782]  eta: 0:19:11  loss: 0.6806 (0.6806)  acc1: 84.3750 (84.3750)  acc5: 93.7500 (93.7500)  time: 1.4730  data: 1.4588  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:32  loss: 1.0519 (0.9977)  acc1: 78.1250 (77.2896)  acc5: 90.6250 (92.1875)  time: 0.0394  data: 0.0254  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:23  loss: 0.9032 (0.9713)  acc1: 71.8750 (76.8424)  acc5: 95.3125 (93.3691)  time: 0.0254  data: 0.0113  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:18  loss: 0.8211 (0.9690)  acc1: 79.6875 (77.2270)  acc5: 93.7500 (93.6150)  time: 0.0313  data: 0.0172  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.8256 (1.1178)  acc1: 59.3750 (74.4779)  acc5: 84.3750 (91.8992)  time: 0.0286  data: 0.0146  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.8515 (1.2028)  acc1: 56.2500 (72.9510)  acc5: 84.3750 (90.7622)  time: 0.0285  data: 0.0146  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.4281 (1.2684)  acc1: 65.6250 (71.5812)  acc5: 84.3750 (90.0166)  time: 0.0263  data: 0.0124  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.3673 (1.3217)  acc1: 70.3125 (70.5867)  acc5: 87.5000 (89.3300)  time: 0.0315  data: 0.0177  max mem: 14260\n",
            "Test:  Total time: 0:00:26\n",
            "Test:  Acc@1 70.512 Acc@5 89.322\n",
            "Epoch: [15]  [   0/2503]  eta: 4:19:49  lr: 1.5625e-05  img/s: 1382.5699244489797  loss: 2.0446 (2.0446)  acc1: 69.5312 (69.5312)  acc5: 85.9375 (85.9375)  time: 6.2283  data: 5.8580  max mem: 14260\n",
            "Epoch: [15]  [ 100/2503]  eta: 0:17:11  lr: 1.5625e-05  img/s: 1375.492569721511  loss: 2.0509 (2.0361)  acc1: 69.7266 (70.2003)  acc5: 87.5000 (87.4981)  time: 0.3721  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [ 200/2503]  eta: 0:15:24  lr: 1.5625e-05  img/s: 1372.2334814739081  loss: 2.0622 (2.0405)  acc1: 69.9219 (70.1386)  acc5: 86.5234 (87.3756)  time: 0.3736  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [ 300/2503]  eta: 0:14:23  lr: 1.5625e-05  img/s: 1371.9766120129207  loss: 2.0198 (2.0362)  acc1: 70.3125 (70.1457)  acc5: 87.3047 (87.3754)  time: 0.3736  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [ 400/2503]  eta: 0:13:34  lr: 1.5625e-05  img/s: 1372.0730363712337  loss: 2.0471 (2.0389)  acc1: 69.9219 (70.0329)  acc5: 87.3047 (87.3641)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [ 500/2503]  eta: 0:12:50  lr: 1.5625e-05  img/s: 1372.7185518811661  loss: 2.0607 (2.0376)  acc1: 70.7031 (70.0295)  acc5: 87.1094 (87.3671)  time: 0.3733  data: 0.0004  max mem: 14260\n",
            "Epoch: [15]  [ 600/2503]  eta: 0:12:08  lr: 1.5625e-05  img/s: 1374.9923313580673  loss: 1.9689 (2.0341)  acc1: 71.0938 (70.0493)  acc5: 88.0859 (87.3791)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [15]  [ 700/2503]  eta: 0:11:27  lr: 1.5625e-05  img/s: 1374.1195477112658  loss: 2.0411 (2.0306)  acc1: 69.9219 (70.0534)  acc5: 87.3047 (87.3874)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [15]  [ 800/2503]  eta: 0:10:47  lr: 1.5625e-05  img/s: 1376.2453740820422  loss: 1.9856 (2.0296)  acc1: 70.5078 (70.0550)  acc5: 87.6953 (87.3856)  time: 0.3732  data: 0.0004  max mem: 14260\n",
            "Epoch: [15]  [ 900/2503]  eta: 0:10:08  lr: 1.5625e-05  img/s: 1372.197531495288  loss: 2.0350 (2.0331)  acc1: 71.0938 (70.0600)  acc5: 87.1094 (87.3346)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [1000/2503]  eta: 0:09:29  lr: 1.5625e-05  img/s: 1375.1693107259541  loss: 1.9944 (2.0325)  acc1: 70.7031 (70.0959)  acc5: 88.2812 (87.3466)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [1100/2503]  eta: 0:08:50  lr: 1.5625e-05  img/s: 1374.9579973544262  loss: 2.0496 (2.0320)  acc1: 70.1172 (70.1296)  acc5: 86.9141 (87.3591)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [1200/2503]  eta: 0:08:12  lr: 1.5625e-05  img/s: 1375.4573297542347  loss: 2.0908 (2.0326)  acc1: 70.1172 (70.1387)  acc5: 87.1094 (87.3567)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [1300/2503]  eta: 0:07:34  lr: 1.5625e-05  img/s: 1372.6974928823736  loss: 1.9990 (2.0327)  acc1: 70.1172 (70.1239)  acc5: 87.1094 (87.3365)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [1400/2503]  eta: 0:06:56  lr: 1.5625e-05  img/s: 1374.5047732001851  loss: 2.0173 (2.0331)  acc1: 70.1172 (70.1233)  acc5: 87.1094 (87.3351)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [1500/2503]  eta: 0:06:18  lr: 1.5625e-05  img/s: 1373.2171368572367  loss: 1.9933 (2.0325)  acc1: 70.8984 (70.1289)  acc5: 87.3047 (87.3381)  time: 0.3728  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [1600/2503]  eta: 0:05:40  lr: 1.5625e-05  img/s: 1375.046036699762  loss: 2.0170 (2.0308)  acc1: 70.5078 (70.1535)  acc5: 87.6953 (87.3612)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [1700/2503]  eta: 0:05:02  lr: 1.5625e-05  img/s: 1374.531166411387  loss: 2.0664 (2.0322)  acc1: 69.5312 (70.1297)  acc5: 86.5234 (87.3411)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [1800/2503]  eta: 0:04:24  lr: 1.5625e-05  img/s: 1373.415618236796  loss: 2.0155 (2.0328)  acc1: 69.7266 (70.1297)  acc5: 87.3047 (87.3422)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [1900/2503]  eta: 0:03:46  lr: 1.5625e-05  img/s: 1372.1396647544952  loss: 2.0226 (2.0328)  acc1: 71.0938 (70.1318)  acc5: 87.3047 (87.3468)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [2000/2503]  eta: 0:03:09  lr: 1.5625e-05  img/s: 1374.69922696185  loss: 2.0819 (2.0339)  acc1: 69.5312 (70.1231)  acc5: 86.9141 (87.3370)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [2100/2503]  eta: 0:02:31  lr: 1.5625e-05  img/s: 1375.4467581161584  loss: 2.0481 (2.0333)  acc1: 69.7266 (70.1219)  acc5: 87.3047 (87.3377)  time: 0.3732  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [2200/2503]  eta: 0:01:53  lr: 1.5625e-05  img/s: 1373.7705686525878  loss: 2.0255 (2.0329)  acc1: 70.5078 (70.1487)  acc5: 87.3047 (87.3507)  time: 0.3729  data: 0.0002  max mem: 14260\n",
            "Epoch: [15]  [2300/2503]  eta: 0:01:16  lr: 1.5625e-05  img/s: 1373.8892192219282  loss: 2.0713 (2.0330)  acc1: 69.5312 (70.1352)  acc5: 87.1094 (87.3470)  time: 0.3730  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [2400/2503]  eta: 0:00:38  lr: 1.5625e-05  img/s: 1374.6948269471684  loss: 1.9726 (2.0331)  acc1: 70.7031 (70.1268)  acc5: 87.5000 (87.3482)  time: 0.3731  data: 0.0003  max mem: 14260\n",
            "Epoch: [15]  [2500/2503]  eta: 0:00:01  lr: 1.5625e-05  img/s: 1377.165018010698  loss: 2.0347 (2.0337)  acc1: 70.8984 (70.1293)  acc5: 87.5000 (87.3426)  time: 0.3725  data: 0.0001  max mem: 14260\n",
            "Epoch: [15] Total time: 0:15:39\n",
            "Test:   [  0/782]  eta: 0:20:03  loss: 0.6819 (0.6819)  acc1: 84.3750 (84.3750)  acc5: 93.7500 (93.7500)  time: 1.5387  data: 1.5247  max mem: 14260\n",
            "Test:   [100/782]  eta: 0:00:31  loss: 1.0299 (0.9982)  acc1: 76.5625 (77.3670)  acc5: 90.6250 (92.1875)  time: 0.0363  data: 0.0224  max mem: 14260\n",
            "Test:   [200/782]  eta: 0:00:22  loss: 0.9097 (0.9699)  acc1: 75.0000 (76.9356)  acc5: 95.3125 (93.3613)  time: 0.0257  data: 0.0118  max mem: 14260\n",
            "Test:   [300/782]  eta: 0:00:17  loss: 0.8155 (0.9677)  acc1: 79.6875 (77.2425)  acc5: 93.7500 (93.6358)  time: 0.0269  data: 0.0130  max mem: 14260\n",
            "Test:   [400/782]  eta: 0:00:13  loss: 1.8431 (1.1161)  acc1: 59.3750 (74.5246)  acc5: 84.3750 (91.8797)  time: 0.0277  data: 0.0138  max mem: 14260\n",
            "Test:   [500/782]  eta: 0:00:09  loss: 1.8323 (1.2011)  acc1: 56.2500 (73.0071)  acc5: 84.3750 (90.7404)  time: 0.0390  data: 0.0251  max mem: 14260\n",
            "Test:   [600/782]  eta: 0:00:06  loss: 1.4322 (1.2668)  acc1: 65.6250 (71.6488)  acc5: 85.9375 (89.9620)  time: 0.0238  data: 0.0098  max mem: 14260\n",
            "Test:   [700/782]  eta: 0:00:02  loss: 1.3594 (1.3203)  acc1: 70.3125 (70.6468)  acc5: 87.5000 (89.2676)  time: 0.0243  data: 0.0103  max mem: 14260\n",
            "Test:  Total time: 0:00:26\n",
            "Test:  Acc@1 70.554 Acc@5 89.268\n",
            "Training time 4:17:53\n"
          ]
        }
      ],
      "source": [
        "from types import SimpleNamespace\n",
        "\n",
        "args = SimpleNamespace(\n",
        "    data_path=\"/home/cs/Documents/datasets/imagenet\",  # Replace with your /path/to/imagenet\n",
        "    model=\"resnet18\",\n",
        "    device=\"cuda\",\n",
        "    batch_size=512,\n",
        "    epochs=16,\n",
        "    lr=0.002,\n",
        "    momentum=0.9,\n",
        "    weight_decay=1e-4,\n",
        "    label_smoothing=0.0,\n",
        "    lr_warmup_epochs=1,\n",
        "    lr_warmup_decay=0.0,\n",
        "    lr_step_size=2,\n",
        "    lr_gamma=0.5,\n",
        "    print_freq=100,\n",
        "    output_dir=\"resnet18\",\n",
        "    use_deterministic_algorithms=False,\n",
        "    weights=\"ResNet18_Weights.IMAGENET1K_V1\",\n",
        "    apply_trp=True,\n",
        "    trp_depths=[4, 4, 4],\n",
        "    in_planes=512,\n",
        "    out_planes=8,\n",
        "    trp_rewards=[1.0, 0.4, 0.2, 0.1],\n",
        ")\n",
        "\n",
        "main(args)"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "L4",
      "machine_shape": "hm",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.18"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}