jpterry commited on
Commit
68bc350
·
1 Parent(s): bc0a3bf
Files changed (1) hide show
  1. app.py +718 -715
app.py CHANGED
@@ -73,843 +73,846 @@ effnet_hparams = {61: {
73
  activation_indices = {'efficientnet': [0, 3]}
74
 
75
 
76
- ########## EfficientNet ############
77
- @dataclass
78
- class _MBConvConfig:
79
- expand_ratio: float
80
- kernel: int
81
- stride: int
82
- input_channels: int
83
- out_channels: int
84
- num_layers: int
85
- block: Callable[..., nn.Module]
86
-
87
- @staticmethod
88
- def adjust_channels(
89
- channels: int, width_mult: float, min_value: Optional[int] = None
90
- ) -> int:
91
- return _make_divisible(channels * width_mult, 8, min_value)
92
 
 
93
 
94
- class MBConvConfig(_MBConvConfig):
95
- # Stores information listed at Table 1 of the EfficientNet paper & Table 4 of the EfficientNetV2 paper
96
- def __init__(
97
- self,
98
- expand_ratio: float,
99
- kernel: int,
100
- stride: int,
101
- input_channels: int,
102
- out_channels: int,
103
- num_layers: int,
104
- width_mult: float = 1.0,
105
- depth_mult: float = 1.0,
106
- block: Optional[Callable[..., nn.Module]] = None,
107
- ) -> None:
108
- input_channels = self.adjust_channels(input_channels, width_mult)
109
- out_channels = self.adjust_channels(out_channels, width_mult)
110
- num_layers = self.adjust_depth(num_layers, depth_mult)
111
- if block is None:
112
- block = MBConv
113
- super().__init__(
114
- expand_ratio,
115
- kernel,
116
- stride,
117
- input_channels,
118
- out_channels,
119
- num_layers,
120
- block,
121
- )
122
 
123
- @staticmethod
124
- def adjust_depth(num_layers: int, depth_mult: float):
125
- return int(math.ceil(num_layers * depth_mult))
126
 
 
 
 
 
127
 
128
- class FusedMBConvConfig(_MBConvConfig):
129
- # Stores information listed at Table 4 of the EfficientNetV2 paper
130
  def __init__(
131
  self,
132
- expand_ratio: float,
133
- kernel: int,
134
- stride: int,
135
- input_channels: int,
136
- out_channels: int,
137
- num_layers: int,
138
- block: Optional[Callable[..., nn.Module]] = None,
139
- ) -> None:
140
- if block is None:
141
- block = FusedMBConv
142
- super().__init__(
143
- expand_ratio,
144
- kernel,
145
- stride,
146
- input_channels,
147
- out_channels,
148
- num_layers,
149
- block,
 
 
 
 
 
 
 
 
 
150
  )
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
- class MBConv(nn.Module):
 
154
  def __init__(
155
  self,
156
- cnf: MBConvConfig,
157
- stochastic_depth_prob: float,
158
- norm_layer: Callable[..., nn.Module],
159
- se_layer: Callable[..., nn.Module] = SqueezeExcitation,
 
 
 
 
 
 
 
 
160
  ) -> None:
161
- super().__init__()
162
-
163
- if not (1 <= cnf.stride <= 2):
164
- raise ValueError("illegal stride value")
165
-
166
- self.use_res_connect = (
167
- cnf.stride == 1 and cnf.input_channels == cnf.out_channels
168
- )
169
 
170
- layers: List[nn.Module] = []
171
- activation_layer = nn.SiLU
 
 
172
 
173
- # expand
174
- expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
175
- if expanded_channels != cnf.input_channels:
176
- layers.append(
177
- Conv2dNormActivation(
178
- cnf.input_channels,
179
- expanded_channels,
180
- kernel_size=1,
181
- norm_layer=norm_layer,
182
- activation_layer=activation_layer,
183
- )
184
  )
 
185
 
186
- # depthwise
187
- layers.append(
188
- Conv2dNormActivation(
189
- expanded_channels,
190
- expanded_channels,
191
- kernel_size=cnf.kernel,
192
- stride=cnf.stride,
193
- groups=expanded_channels,
194
- norm_layer=norm_layer,
195
- activation_layer=activation_layer,
196
- )
197
- )
198
 
199
- # squeeze and excitation
200
- squeeze_channels = max(1, cnf.input_channels // 4)
201
- layers.append(
202
- se_layer(
203
- expanded_channels,
204
- squeeze_channels,
205
- activation=partial(nn.SiLU, inplace=True),
206
- )
207
- )
208
 
209
- # project
210
- layers.append(
211
- Conv2dNormActivation(
212
- expanded_channels,
213
- cnf.out_channels,
214
- kernel_size=1,
215
- norm_layer=norm_layer,
216
- activation_layer=None,
217
  )
218
- )
219
 
220
- self.block = nn.Sequential(*layers)
221
- self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
222
- self.out_channels = cnf.out_channels
223
 
224
- def forward(self, input: Tensor) -> Tensor:
225
- result = self.block(input)
226
- if self.use_res_connect:
227
- result = self.stochastic_depth(result)
228
- result += input
229
- return result
 
 
 
 
 
 
 
 
 
 
230
 
 
231
 
232
- class FusedMBConv(nn.Module):
233
  def __init__(
234
  self,
235
- cnf: FusedMBConvConfig,
236
- stochastic_depth_prob: float,
237
- norm_layer: Callable[..., nn.Module],
 
 
 
 
 
 
 
 
238
  ) -> None:
239
- super().__init__()
240
-
241
- if not (1 <= cnf.stride <= 2):
242
- raise ValueError("illegal stride value")
243
 
244
- self.use_res_connect = (
245
- cnf.stride == 1 and cnf.input_channels == cnf.out_channels
 
 
 
 
 
 
 
 
 
 
 
246
  )
247
 
248
- layers: List[nn.Module] = []
249
- activation_layer = nn.SiLU
250
-
251
- expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
252
- if expanded_channels != cnf.input_channels:
253
- # fused expand
254
- layers.append(
255
- Conv2dNormActivation(
256
- cnf.input_channels,
257
- expanded_channels,
258
- kernel_size=cnf.kernel,
259
- stride=cnf.stride,
260
- norm_layer=norm_layer,
261
- activation_layer=activation_layer,
262
- )
263
- )
264
-
265
- # project
266
- layers.append(
267
- Conv2dNormActivation(
268
- expanded_channels,
269
- cnf.out_channels,
270
- kernel_size=1,
271
- norm_layer=norm_layer,
272
- activation_layer=None,
273
- )
274
- )
275
- else:
276
- layers.append(
277
- Conv2dNormActivation(
278
- cnf.input_channels,
279
- cnf.out_channels,
280
- kernel_size=cnf.kernel,
281
- stride=cnf.stride,
282
- norm_layer=norm_layer,
283
- activation_layer=activation_layer,
284
- )
285
- )
286
-
287
- self.block = nn.Sequential(*layers)
288
- self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
289
- self.out_channels = cnf.out_channels
290
 
291
- def forward(self, input: Tensor) -> Tensor:
292
- result = self.block(input)
293
- if self.use_res_connect:
294
- result = self.stochastic_depth(result)
295
- result += input
296
- return result
297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
- class EfficientNetConfig(PretrainedConfig):
300
-
301
- model_type = "efficientnet"
302
-
303
  def __init__(
304
  self,
305
- # inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
306
- dropout: float=0.25,
307
- num_channels: int = 61,
308
- stochastic_depth_prob: float = 0.2,
309
- num_classes: int = 2,
310
- norm_layer: Optional[Callable[..., nn.Module]] = None,
311
- # last_channel: Optional[int] = None,
312
- size: str='v2_s',
313
- width_mult: float = 1.0,
314
- depth_mult: float = 1.0,
315
- **kwargs: Any,
316
  ) -> None:
317
- """
318
- EfficientNet V1 and V2 main class
319
 
320
- Args:
321
- inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]]): Network structure
322
- dropout (float): The droupout probability
323
- stochastic_depth_prob (float): The stochastic depth probability
324
- num_classes (int): Number of classes
325
- norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
326
- last_channel (int): The number of channels on the penultimate layer
327
- """
328
-
329
-
330
- # self.model = EfficientNet(
331
- # dropout=dropout,
332
- # num_channels=num_channels,
333
- # num_classes=num_classes,
334
- # size=size,
335
- # stochastic_depth_prob=stochastic_depth_prob,
336
- # width_mult=width_mult,
337
- # depth_mult=depth_mult,
338
- # )
339
-
340
- #
341
- self.dropout=dropout
342
- self.num_channels=num_channels
343
- self.num_classes=num_classes
344
- self.size=size
345
- self.stochastic_depth_prob=stochastic_depth_prob
346
- self.width_mult=width_mult
347
- self.depth_mult=depth_mult
348
-
349
- super().__init__(**kwargs)
350
-
351
-
352
- class EfficientNetPreTrained(PreTrainedModel):
353
-
354
- config_class = EfficientNetConfig
355
-
356
- def __init__(
357
- self,
358
- config
359
- ):
360
- super().__init__(config)
361
- self.model = EfficientNet( dropout=config.dropout,
362
- num_channels=config.num_channels,
363
- num_classes=config.num_classes,
364
- size=config.size,
365
- stochastic_depth_prob=config.stochastic_depth_prob,
366
- width_mult=config.width_mult,
367
- depth_mult=config.depth_mult,)
368
-
369
- def forward(self, tensor):
370
- return self.model.forward(tensor)
371
-
372
-
373
- class EfficientNet(nn.Module):
374
-
375
-
376
  def __init__(
377
  self,
378
- # inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
379
- dropout: float=0.25,
380
- num_channels: int = 61,
381
- stochastic_depth_prob: float = 0.2,
382
- num_classes: int = 2,
383
- norm_layer: Optional[Callable[..., nn.Module]] = None,
384
- # last_channel: Optional[int] = None,
385
- size: str='v2_s',
386
- width_mult: float = 1.0,
387
- depth_mult: float = 1.0,
388
- **kwargs: Any,
389
  ) -> None:
390
- """
391
- EfficientNet V1 and V2 main class
392
-
393
- Args:
394
- inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]]): Network structure
395
- dropout (float): The droupout probability
396
- stochastic_depth_prob (float): The stochastic depth probability
397
- num_classes (int): Number of classes
398
- norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
399
- last_channel (int): The number of channels on the penultimate layer
400
- """
401
  super().__init__()
402
  # _log_api_usage_once(self)
403
-
404
- inverted_residual_setting, last_channel = _efficientnet_conf(
405
- "efficientnet_%s" % (size), width_mult=width_mult, depth_mult=depth_mult
406
- )
 
407
 
408
- if not inverted_residual_setting:
409
- raise ValueError("The inverted_residual_setting should not be empty")
410
- elif not (
411
- isinstance(inverted_residual_setting, Sequence)
412
- and all([isinstance(s, _MBConvConfig) for s in inverted_residual_setting])
413
- ):
414
- raise TypeError(
415
- "The inverted_residual_setting should be List[MBConvConfig]"
416
- )
417
 
418
- if "block" in kwargs:
419
- warnings.warn(
420
- "The parameter 'block' is deprecated since 0.13 and will be removed 0.15. "
421
- "Please pass this information on 'MBConvConfig.block' instead."
422
- )
423
- if kwargs["block"] is not None:
424
- for s in inverted_residual_setting:
425
- if isinstance(s, MBConvConfig):
426
- s.block = kwargs["block"]
427
 
428
- if norm_layer is None:
429
- norm_layer = nn.BatchNorm2d
430
 
431
- layers: List[nn.Module] = []
432
-
433
- # building first layer
434
- firstconv_output_channels = inverted_residual_setting[0].input_channels
435
- layers.append(
436
- Conv2dNormActivation(
437
- num_channels,
438
- firstconv_output_channels,
439
- kernel_size=3,
440
- stride=2,
441
- norm_layer=norm_layer,
442
- activation_layer=nn.SiLU,
443
- )
444
- )
445
-
446
- # building inverted residual blocks
447
- total_stage_blocks = sum(cnf.num_layers for cnf in inverted_residual_setting)
448
- stage_block_id = 0
449
- for cnf in inverted_residual_setting:
450
- stage: List[nn.Module] = []
451
- for _ in range(cnf.num_layers):
452
- # copy to avoid modifications. shallow copy is enough
453
- block_cnf = copy.copy(cnf)
454
-
455
- # overwrite info if not the first conv in the stage
456
- if stage:
457
- block_cnf.input_channels = block_cnf.out_channels
458
- block_cnf.stride = 1
459
-
460
- # adjust stochastic depth probability based on the depth of the stage block
461
- sd_prob = (
462
- stochastic_depth_prob * float(stage_block_id) / total_stage_blocks
463
- )
464
 
465
- stage.append(block_cnf.block(block_cnf, sd_prob, norm_layer))
466
- stage_block_id += 1
 
 
 
 
 
 
 
467
 
468
- layers.append(nn.Sequential(*stage))
 
 
 
 
 
 
 
 
 
 
 
 
469
 
470
- # building last several layers
471
- lastconv_input_channels = inverted_residual_setting[-1].out_channels
472
- lastconv_output_channels = (
473
- last_channel if last_channel is not None else 4 * lastconv_input_channels
474
- )
475
- layers.append(
476
- Conv2dNormActivation(
477
- lastconv_input_channels,
478
- lastconv_output_channels,
479
- kernel_size=1,
480
- norm_layer=norm_layer,
481
- activation_layer=nn.SiLU,
482
- )
483
- )
484
 
485
- self.features = nn.Sequential(*layers)
486
- self.avgpool = nn.AdaptiveAvgPool2d(1)
487
- self.classifier = nn.Sequential(
488
- nn.Dropout(p=dropout, inplace=True),
489
- nn.Linear(lastconv_output_channels, num_classes),
490
- )
491
 
492
- for m in self.modules():
493
- if isinstance(m, nn.Conv2d):
494
- nn.init.kaiming_normal_(m.weight, mode="fan_out")
495
- if m.bias is not None:
496
- nn.init.zeros_(m.bias)
497
- elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
498
- nn.init.ones_(m.weight)
499
- nn.init.zeros_(m.bias)
500
- elif isinstance(m, nn.Linear):
501
- init_range = 1.0 / math.sqrt(m.out_features)
502
- nn.init.uniform_(m.weight, -init_range, init_range)
503
- nn.init.zeros_(m.bias)
504
-
505
- # super().__init__(**kwargs)
506
 
507
- def _forward_impl(self, x: Tensor) -> Tensor:
508
- x = self.features(x)
509
 
510
- x = self.avgpool(x)
511
- x = torch.flatten(x, 1)
512
 
513
- x = self.classifier(x)
 
 
514
 
515
- return x
 
 
516
 
517
  def forward(self, x: Tensor) -> Tensor:
518
- return self._forward_impl(x)
519
-
520
-
521
- # def _efficientnet(
522
- # inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
523
- # dropout: float,
524
- # last_channel: Optional[int],
525
- # weights=None,
526
- # num_channels: int = 61,
527
- # stochastic_depth_prob: float = 0.2,
528
- # progress: bool = True,
529
- # num_classes: int = 2,
530
- # **kwargs: Any,
531
- # ) -> EfficientNetCongig:
532
-
533
- # model = EfficientNetCongif(
534
- # inverted_residual_setting,
535
- # dropout,
536
- # num_classes=num_classes,
537
- # num_channels=num_channels,
538
- # stochastic_depth_prob=stochastic_depth_prob,
539
- # last_channel=last_channel,
540
- # **kwargs,
541
- # )
542
 
543
- # return model
544
 
545
 
546
- def _efficientnet_conf(
547
- arch: str,
548
- **kwargs: Any,
549
- ) -> Tuple[Sequence[Union[MBConvConfig, FusedMBConvConfig]], Optional[int]]:
550
- inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]]
551
- if arch.startswith("efficientnet_b"):
552
- bneck_conf = partial(
553
- MBConvConfig,
554
- width_mult=kwargs.pop("width_mult"),
555
- depth_mult=kwargs.pop("depth_mult"),
556
- )
557
- inverted_residual_setting = [
558
- bneck_conf(1, 3, 1, 32, 16, 1),
559
- bneck_conf(6, 3, 2, 16, 24, 2),
560
- bneck_conf(6, 5, 2, 24, 40, 2),
561
- bneck_conf(6, 3, 2, 40, 80, 3),
562
- bneck_conf(6, 5, 1, 80, 112, 3),
563
- bneck_conf(6, 5, 2, 112, 192, 4),
564
- bneck_conf(6, 3, 1, 192, 320, 1),
565
- ]
566
- last_channel = None
567
- elif arch.startswith("efficientnet_v2_s"):
568
- inverted_residual_setting = [
569
- FusedMBConvConfig(1, 3, 1, 24, 24, 2),
570
- FusedMBConvConfig(4, 3, 2, 24, 48, 4),
571
- FusedMBConvConfig(4, 3, 2, 48, 64, 4),
572
- MBConvConfig(4, 3, 2, 64, 128, 6),
573
- MBConvConfig(6, 3, 1, 128, 160, 9),
574
- MBConvConfig(6, 3, 2, 160, 256, 15),
575
- ]
576
- last_channel = 1280
577
- elif arch.startswith("efficientnet_v2_m"):
578
- inverted_residual_setting = [
579
- FusedMBConvConfig(1, 3, 1, 24, 24, 3),
580
- FusedMBConvConfig(4, 3, 2, 24, 48, 5),
581
- FusedMBConvConfig(4, 3, 2, 48, 80, 5),
582
- MBConvConfig(4, 3, 2, 80, 160, 7),
583
- MBConvConfig(6, 3, 1, 160, 176, 14),
584
- MBConvConfig(6, 3, 2, 176, 304, 18),
585
- MBConvConfig(6, 3, 1, 304, 512, 5),
586
- ]
587
- last_channel = 1280
588
- elif arch.startswith("efficientnet_v2_l"):
589
- inverted_residual_setting = [
590
- FusedMBConvConfig(1, 3, 1, 32, 32, 4),
591
- FusedMBConvConfig(4, 3, 2, 32, 64, 7),
592
- FusedMBConvConfig(4, 3, 2, 64, 96, 7),
593
- MBConvConfig(4, 3, 2, 96, 192, 10),
594
- MBConvConfig(6, 3, 1, 192, 224, 19),
595
- MBConvConfig(6, 3, 2, 224, 384, 25),
596
- MBConvConfig(6, 3, 1, 384, 640, 7),
597
- ]
598
- last_channel = 1280
599
- else:
600
- raise ValueError(f"Unsupported model type {arch}")
601
 
602
- return inverted_residual_setting, last_channel
 
 
 
 
 
 
 
 
 
603
 
 
 
 
 
 
604
 
605
- #### extra torchvision stuff ####
606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
 
608
- class FrozenBatchNorm2d(torch.nn.Module):
609
- """
610
- BatchNorm2d where the batch statistics and the affine parameters are fixed
611
 
612
- Args:
613
- num_features (int): Number of features ``C`` from an expected input of size ``(N, C, H, W)``
614
- eps (float): a value added to the denominator for numerical stability. Default: 1e-5
615
- """
616
 
 
 
617
  def __init__(
618
  self,
619
- num_features: int,
620
- eps: float = 1e-5,
621
- ):
622
- super().__init__()
623
- # _log_api_usage_once(self)
624
- self.eps = eps
625
- self.register_buffer("weight", torch.ones(num_features))
626
- self.register_buffer("bias", torch.zeros(num_features))
627
- self.register_buffer("running_mean", torch.zeros(num_features))
628
- self.register_buffer("running_var", torch.ones(num_features))
629
-
630
- def _load_from_state_dict(
631
- self,
632
- state_dict: dict,
633
- prefix: str,
634
- local_metadata: dict,
635
- strict: bool,
636
- missing_keys: List[str],
637
- unexpected_keys: List[str],
638
- error_msgs: List[str],
639
- ):
640
- num_batches_tracked_key = prefix + "num_batches_tracked"
641
- if num_batches_tracked_key in state_dict:
642
- del state_dict[num_batches_tracked_key]
643
-
644
- super()._load_from_state_dict(
645
- state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
646
  )
647
 
648
- def forward(self, x: Tensor) -> Tensor:
649
- # move reshapes to the beginning
650
- # to make it fuser-friendly
651
- w = self.weight.reshape(1, -1, 1, 1)
652
- b = self.bias.reshape(1, -1, 1, 1)
653
- rv = self.running_var.reshape(1, -1, 1, 1)
654
- rm = self.running_mean.reshape(1, -1, 1, 1)
655
- scale = w * (rv + self.eps).rsqrt()
656
- bias = b - rm * scale
657
- return x * scale + bias
658
-
659
- def __repr__(self) -> str:
660
- return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})"
661
-
662
 
663
- class ConvNormActivation(torch.nn.Sequential):
664
  def __init__(
665
  self,
666
- in_channels: int,
667
- out_channels: int,
668
- kernel_size: int = 3,
669
- stride: int = 1,
670
- padding: Optional[int] = None,
671
- groups: int = 1,
672
- norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d,
673
- activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
674
- dilation: int = 1,
675
- inplace: Optional[bool] = True,
676
- bias: Optional[bool] = None,
677
- conv_layer: Callable[..., torch.nn.Module] = torch.nn.Conv2d,
678
  ) -> None:
 
679
 
680
- if padding is None:
681
- padding = (kernel_size - 1) // 2 * dilation
682
- if bias is None:
683
- bias = norm_layer is None
684
 
685
- layers = [
686
- conv_layer(
687
- in_channels,
688
- out_channels,
689
- kernel_size,
690
- stride,
691
- padding,
692
- dilation=dilation,
693
- groups=groups,
694
- bias=bias,
695
- )
696
- ]
697
 
698
- if norm_layer is not None:
699
- layers.append(norm_layer(out_channels))
700
 
701
- if activation_layer is not None:
702
- params = {} if inplace is None else {"inplace": inplace}
703
- layers.append(activation_layer(**params))
704
- super().__init__(*layers)
705
- # _log_api_usage_once(self)
706
- self.out_channels = out_channels
 
 
 
 
 
 
707
 
708
- if self.__class__ == ConvNormActivation:
709
- warnings.warn(
710
- "Don't use ConvNormActivation directly, please use Conv2dNormActivation and Conv3dNormActivation instead."
 
 
 
 
 
 
 
711
  )
 
712
 
 
 
 
 
 
 
 
 
 
713
 
714
- class Conv2dNormActivation(ConvNormActivation):
715
- """
716
- Configurable block used for Convolution2d-Normalization-Activation blocks.
 
 
 
 
 
 
 
717
 
718
- Args:
719
- in_channels (int): Number of channels in the input image
720
- out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block
721
- kernel_size: (int, optional): Size of the convolving kernel. Default: 3
722
- stride (int, optional): Stride of the convolution. Default: 1
723
- padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
724
- groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
725
- norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm2d``
726
- activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
727
- dilation (int): Spacing between kernel elements. Default: 1
728
- inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
729
- bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``.
730
 
731
- """
 
 
 
 
 
732
 
 
 
733
  def __init__(
734
  self,
735
- in_channels: int,
736
- out_channels: int,
737
- kernel_size: int = 3,
738
- stride: int = 1,
739
- padding: Optional[int] = None,
740
- groups: int = 1,
741
- norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d,
742
- activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
743
- dilation: int = 1,
744
- inplace: Optional[bool] = True,
745
- bias: Optional[bool] = None,
746
  ) -> None:
 
747
 
748
- super().__init__(
749
- in_channels,
750
- out_channels,
751
- kernel_size,
752
- stride,
753
- padding,
754
- groups,
755
- norm_layer,
756
- activation_layer,
757
- dilation,
758
- inplace,
759
- bias,
760
- torch.nn.Conv2d,
761
  )
762
 
 
 
763
 
764
- class Conv3dNormActivation(ConvNormActivation):
765
- """
766
- Configurable block used for Convolution3d-Normalization-Activation blocks.
 
 
 
 
 
 
 
 
 
 
767
 
768
- Args:
769
- in_channels (int): Number of channels in the input video.
770
- out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block
771
- kernel_size: (int, optional): Size of the convolving kernel. Default: 3
772
- stride (int, optional): Stride of the convolution. Default: 1
773
- padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
774
- groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
775
- norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm3d``
776
- activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
777
- dilation (int): Spacing between kernel elements. Default: 1
778
- inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
779
- bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``.
780
- """
 
 
 
 
 
 
 
 
781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  def __init__(
783
  self,
784
- in_channels: int,
785
- out_channels: int,
786
- kernel_size: int = 3,
787
- stride: int = 1,
788
- padding: Optional[int] = None,
789
- groups: int = 1,
790
- norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm3d,
791
- activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
792
- dilation: int = 1,
793
- inplace: Optional[bool] = True,
794
- bias: Optional[bool] = None,
795
  ) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
796
 
797
- super().__init__(
798
- in_channels,
799
- out_channels,
800
- kernel_size,
801
- stride,
802
- padding,
803
- groups,
804
- norm_layer,
805
- activation_layer,
806
- dilation,
807
- inplace,
808
- bias,
809
- torch.nn.Conv3d,
 
 
 
810
  )
811
 
 
 
 
 
 
 
 
 
812
 
813
- class SqueezeExcitation(torch.nn.Module):
814
- """
815
- This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1).
816
- Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in eq. 3.
817
 
818
- Args:
819
- input_channels (int): Number of channels in the input image
820
- squeeze_channels (int): Number of squeeze channels
821
- activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU``
822
- scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid``
823
- """
824
 
825
- def __init__(
826
- self,
827
- input_channels: int,
828
- squeeze_channels: int,
829
- activation: Callable[..., torch.nn.Module] = torch.nn.ReLU,
830
- scale_activation: Callable[..., torch.nn.Module] = torch.nn.Sigmoid,
831
- ) -> None:
832
- super().__init__()
833
- # _log_api_usage_once(self)
834
- self.avgpool = torch.nn.AdaptiveAvgPool2d(1)
835
- self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1)
836
- self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1)
837
- self.activation = activation()
838
- self.scale_activation = scale_activation()
839
 
840
- def _scale(self, input: Tensor) -> Tensor:
841
- scale = self.avgpool(input)
842
- scale = self.fc1(scale)
843
- scale = self.activation(scale)
844
- scale = self.fc2(scale)
845
- return self.scale_activation(scale)
846
 
847
- def forward(self, input: Tensor) -> Tensor:
848
- scale = self._scale(input)
849
- return scale * input
 
 
 
 
 
 
 
 
 
 
 
850
 
 
 
 
 
 
 
851
 
852
- class MLP(torch.nn.Sequential):
853
- """This block implements the multi-layer perceptron (MLP) module.
 
 
 
 
 
 
 
 
 
 
 
 
854
 
855
- Args:
856
- in_channels (int): Number of channels of the input
857
- hidden_channels (List[int]): List of the hidden channel dimensions
858
- norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer wont be used. Default: ``None``
859
- activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
860
- inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
861
- bias (bool): Whether to use bias in the linear layer. Default ``True``
862
- dropout (float): The probability for the dropout layer. Default: 0.0
863
- """
864
 
865
- def __init__(
866
- self,
867
- in_channels: int,
868
- hidden_channels: List[int],
869
- norm_layer: Optional[Callable[..., torch.nn.Module]] = None,
870
- activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
871
- inplace: Optional[bool] = True,
872
- bias: bool = True,
873
- dropout: float = 0.0,
874
- ):
875
- # The addition of `norm_layer` is inspired from the implementation of TorchMultimodal:
876
- # https://github.com/facebookresearch/multimodal/blob/5dec8a/torchmultimodal/modules/layers/mlp.py
877
- params = {} if inplace is None else {"inplace": inplace}
878
 
879
- layers = []
880
- in_dim = in_channels
881
- for hidden_dim in hidden_channels[:-1]:
882
- layers.append(torch.nn.Linear(in_dim, hidden_dim, bias=bias))
883
- if norm_layer is not None:
884
- layers.append(norm_layer(hidden_dim))
885
- layers.append(activation_layer(**params))
886
- layers.append(torch.nn.Dropout(dropout, **params))
887
- in_dim = hidden_dim
888
 
889
- layers.append(torch.nn.Linear(in_dim, hidden_channels[-1], bias=bias))
890
- layers.append(torch.nn.Dropout(dropout, **params))
891
 
892
- super().__init__(*layers)
893
- # _log_api_usage_once(self)
894
 
895
 
896
- class Permute(torch.nn.Module):
897
- """This module returns a view of the tensor input with its dimensions permuted.
 
 
 
 
 
 
 
 
 
898
 
899
- Args:
900
- dims (List[int]): The desired ordering of dimensions
901
- """
 
 
 
 
 
 
902
 
903
- def __init__(self, dims: List[int]):
904
- super().__init__()
905
- self.dims = dims
906
 
907
- def forward(self, x: Tensor) -> Tensor:
908
- return torch.permute(x, self.dims)
909
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
910
 
911
 
912
 
 
913
 
914
  def normalize_array(x: list):
915
 
 
73
  activation_indices = {'efficientnet': [0, 3]}
74
 
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ #### extra torchvision stuff ####
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ class FrozenBatchNorm2d(torch.nn.Module):
81
+ """
82
+ BatchNorm2d where the batch statistics and the affine parameters are fixed
83
 
84
+ Args:
85
+ num_features (int): Number of features ``C`` from an expected input of size ``(N, C, H, W)``
86
+ eps (float): a value added to the denominator for numerical stability. Default: 1e-5
87
+ """
88
 
 
 
89
  def __init__(
90
  self,
91
+ num_features: int,
92
+ eps: float = 1e-5,
93
+ ):
94
+ super().__init__()
95
+ # _log_api_usage_once(self)
96
+ self.eps = eps
97
+ self.register_buffer("weight", torch.ones(num_features))
98
+ self.register_buffer("bias", torch.zeros(num_features))
99
+ self.register_buffer("running_mean", torch.zeros(num_features))
100
+ self.register_buffer("running_var", torch.ones(num_features))
101
+
102
+ def _load_from_state_dict(
103
+ self,
104
+ state_dict: dict,
105
+ prefix: str,
106
+ local_metadata: dict,
107
+ strict: bool,
108
+ missing_keys: List[str],
109
+ unexpected_keys: List[str],
110
+ error_msgs: List[str],
111
+ ):
112
+ num_batches_tracked_key = prefix + "num_batches_tracked"
113
+ if num_batches_tracked_key in state_dict:
114
+ del state_dict[num_batches_tracked_key]
115
+
116
+ super()._load_from_state_dict(
117
+ state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
118
  )
119
 
120
+ def forward(self, x: Tensor) -> Tensor:
121
+ # move reshapes to the beginning
122
+ # to make it fuser-friendly
123
+ w = self.weight.reshape(1, -1, 1, 1)
124
+ b = self.bias.reshape(1, -1, 1, 1)
125
+ rv = self.running_var.reshape(1, -1, 1, 1)
126
+ rm = self.running_mean.reshape(1, -1, 1, 1)
127
+ scale = w * (rv + self.eps).rsqrt()
128
+ bias = b - rm * scale
129
+ return x * scale + bias
130
+
131
+ def __repr__(self) -> str:
132
+ return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})"
133
 
134
+
135
+ class ConvNormActivation(torch.nn.Sequential):
136
  def __init__(
137
  self,
138
+ in_channels: int,
139
+ out_channels: int,
140
+ kernel_size: int = 3,
141
+ stride: int = 1,
142
+ padding: Optional[int] = None,
143
+ groups: int = 1,
144
+ norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d,
145
+ activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
146
+ dilation: int = 1,
147
+ inplace: Optional[bool] = True,
148
+ bias: Optional[bool] = None,
149
+ conv_layer: Callable[..., torch.nn.Module] = torch.nn.Conv2d,
150
  ) -> None:
 
 
 
 
 
 
 
 
151
 
152
+ if padding is None:
153
+ padding = (kernel_size - 1) // 2 * dilation
154
+ if bias is None:
155
+ bias = norm_layer is None
156
 
157
+ layers = [
158
+ conv_layer(
159
+ in_channels,
160
+ out_channels,
161
+ kernel_size,
162
+ stride,
163
+ padding,
164
+ dilation=dilation,
165
+ groups=groups,
166
+ bias=bias,
 
167
  )
168
+ ]
169
 
170
+ if norm_layer is not None:
171
+ layers.append(norm_layer(out_channels))
 
 
 
 
 
 
 
 
 
 
172
 
173
+ if activation_layer is not None:
174
+ params = {} if inplace is None else {"inplace": inplace}
175
+ layers.append(activation_layer(**params))
176
+ super().__init__(*layers)
177
+ # _log_api_usage_once(self)
178
+ self.out_channels = out_channels
 
 
 
179
 
180
+ if self.__class__ == ConvNormActivation:
181
+ warnings.warn(
182
+ "Don't use ConvNormActivation directly, please use Conv2dNormActivation and Conv3dNormActivation instead."
 
 
 
 
 
183
  )
 
184
 
 
 
 
185
 
186
+ class Conv2dNormActivation(ConvNormActivation):
187
+ """
188
+ Configurable block used for Convolution2d-Normalization-Activation blocks.
189
+
190
+ Args:
191
+ in_channels (int): Number of channels in the input image
192
+ out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block
193
+ kernel_size: (int, optional): Size of the convolving kernel. Default: 3
194
+ stride (int, optional): Stride of the convolution. Default: 1
195
+ padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
196
+ groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
197
+ norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm2d``
198
+ activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
199
+ dilation (int): Spacing between kernel elements. Default: 1
200
+ inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
201
+ bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``.
202
 
203
+ """
204
 
 
205
  def __init__(
206
  self,
207
+ in_channels: int,
208
+ out_channels: int,
209
+ kernel_size: int = 3,
210
+ stride: int = 1,
211
+ padding: Optional[int] = None,
212
+ groups: int = 1,
213
+ norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d,
214
+ activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
215
+ dilation: int = 1,
216
+ inplace: Optional[bool] = True,
217
+ bias: Optional[bool] = None,
218
  ) -> None:
 
 
 
 
219
 
220
+ super().__init__(
221
+ in_channels,
222
+ out_channels,
223
+ kernel_size,
224
+ stride,
225
+ padding,
226
+ groups,
227
+ norm_layer,
228
+ activation_layer,
229
+ dilation,
230
+ inplace,
231
+ bias,
232
+ torch.nn.Conv2d,
233
  )
234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
+ class Conv3dNormActivation(ConvNormActivation):
237
+ """
238
+ Configurable block used for Convolution3d-Normalization-Activation blocks.
 
 
 
239
 
240
+ Args:
241
+ in_channels (int): Number of channels in the input video.
242
+ out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block
243
+ kernel_size: (int, optional): Size of the convolving kernel. Default: 3
244
+ stride (int, optional): Stride of the convolution. Default: 1
245
+ padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation``
246
+ groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
247
+ norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm3d``
248
+ activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
249
+ dilation (int): Spacing between kernel elements. Default: 1
250
+ inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
251
+ bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``.
252
+ """
253
 
 
 
 
 
254
  def __init__(
255
  self,
256
+ in_channels: int,
257
+ out_channels: int,
258
+ kernel_size: int = 3,
259
+ stride: int = 1,
260
+ padding: Optional[int] = None,
261
+ groups: int = 1,
262
+ norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm3d,
263
+ activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
264
+ dilation: int = 1,
265
+ inplace: Optional[bool] = True,
266
+ bias: Optional[bool] = None,
267
  ) -> None:
 
 
268
 
269
+ super().__init__(
270
+ in_channels,
271
+ out_channels,
272
+ kernel_size,
273
+ stride,
274
+ padding,
275
+ groups,
276
+ norm_layer,
277
+ activation_layer,
278
+ dilation,
279
+ inplace,
280
+ bias,
281
+ torch.nn.Conv3d,
282
+ )
283
+
284
+
285
+ class SqueezeExcitation(torch.nn.Module):
286
+ """
287
+ This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1).
288
+ Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in eq. 3.
289
+
290
+ Args:
291
+ input_channels (int): Number of channels in the input image
292
+ squeeze_channels (int): Number of squeeze channels
293
+ activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU``
294
+ scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid``
295
+ """
296
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  def __init__(
298
  self,
299
+ input_channels: int,
300
+ squeeze_channels: int,
301
+ activation: Callable[..., torch.nn.Module] = torch.nn.ReLU,
302
+ scale_activation: Callable[..., torch.nn.Module] = torch.nn.Sigmoid,
 
 
 
 
 
 
 
303
  ) -> None:
 
 
 
 
 
 
 
 
 
 
 
304
  super().__init__()
305
  # _log_api_usage_once(self)
306
+ self.avgpool = torch.nn.AdaptiveAvgPool2d(1)
307
+ self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1)
308
+ self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1)
309
+ self.activation = activation()
310
+ self.scale_activation = scale_activation()
311
 
312
+ def _scale(self, input: Tensor) -> Tensor:
313
+ scale = self.avgpool(input)
314
+ scale = self.fc1(scale)
315
+ scale = self.activation(scale)
316
+ scale = self.fc2(scale)
317
+ return self.scale_activation(scale)
 
 
 
318
 
319
+ def forward(self, input: Tensor) -> Tensor:
320
+ scale = self._scale(input)
321
+ return scale * input
 
 
 
 
 
 
322
 
 
 
323
 
324
+ class MLP(torch.nn.Sequential):
325
+ """This block implements the multi-layer perceptron (MLP) module.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
+ Args:
328
+ in_channels (int): Number of channels of the input
329
+ hidden_channels (List[int]): List of the hidden channel dimensions
330
+ norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer wont be used. Default: ``None``
331
+ activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU``
332
+ inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
333
+ bias (bool): Whether to use bias in the linear layer. Default ``True``
334
+ dropout (float): The probability for the dropout layer. Default: 0.0
335
+ """
336
 
337
+ def __init__(
338
+ self,
339
+ in_channels: int,
340
+ hidden_channels: List[int],
341
+ norm_layer: Optional[Callable[..., torch.nn.Module]] = None,
342
+ activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
343
+ inplace: Optional[bool] = True,
344
+ bias: bool = True,
345
+ dropout: float = 0.0,
346
+ ):
347
+ # The addition of `norm_layer` is inspired from the implementation of TorchMultimodal:
348
+ # https://github.com/facebookresearch/multimodal/blob/5dec8a/torchmultimodal/modules/layers/mlp.py
349
+ params = {} if inplace is None else {"inplace": inplace}
350
 
351
+ layers = []
352
+ in_dim = in_channels
353
+ for hidden_dim in hidden_channels[:-1]:
354
+ layers.append(torch.nn.Linear(in_dim, hidden_dim, bias=bias))
355
+ if norm_layer is not None:
356
+ layers.append(norm_layer(hidden_dim))
357
+ layers.append(activation_layer(**params))
358
+ layers.append(torch.nn.Dropout(dropout, **params))
359
+ in_dim = hidden_dim
 
 
 
 
 
360
 
361
+ layers.append(torch.nn.Linear(in_dim, hidden_channels[-1], bias=bias))
362
+ layers.append(torch.nn.Dropout(dropout, **params))
 
 
 
 
363
 
364
+ super().__init__(*layers)
365
+ # _log_api_usage_once(self)
 
 
 
 
 
 
 
 
 
 
 
 
366
 
 
 
367
 
368
+ class Permute(torch.nn.Module):
369
+ """This module returns a view of the tensor input with its dimensions permuted.
370
 
371
+ Args:
372
+ dims (List[int]): The desired ordering of dimensions
373
+ """
374
 
375
+ def __init__(self, dims: List[int]):
376
+ super().__init__()
377
+ self.dims = dims
378
 
379
  def forward(self, x: Tensor) -> Tensor:
380
+ return torch.permute(x, self.dims)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
 
382
 
383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
+ ########## EfficientNet ############
386
+ @dataclass
387
+ class _MBConvConfig:
388
+ expand_ratio: float
389
+ kernel: int
390
+ stride: int
391
+ input_channels: int
392
+ out_channels: int
393
+ num_layers: int
394
+ block: Callable[..., nn.Module]
395
 
396
+ @staticmethod
397
+ def adjust_channels(
398
+ channels: int, width_mult: float, min_value: Optional[int] = None
399
+ ) -> int:
400
+ return _make_divisible(channels * width_mult, 8, min_value)
401
 
 
402
 
403
+ class MBConvConfig(_MBConvConfig):
404
+ # Stores information listed at Table 1 of the EfficientNet paper & Table 4 of the EfficientNetV2 paper
405
+ def __init__(
406
+ self,
407
+ expand_ratio: float,
408
+ kernel: int,
409
+ stride: int,
410
+ input_channels: int,
411
+ out_channels: int,
412
+ num_layers: int,
413
+ width_mult: float = 1.0,
414
+ depth_mult: float = 1.0,
415
+ block: Optional[Callable[..., nn.Module]] = None,
416
+ ) -> None:
417
+ input_channels = self.adjust_channels(input_channels, width_mult)
418
+ out_channels = self.adjust_channels(out_channels, width_mult)
419
+ num_layers = self.adjust_depth(num_layers, depth_mult)
420
+ if block is None:
421
+ block = MBConv
422
+ super().__init__(
423
+ expand_ratio,
424
+ kernel,
425
+ stride,
426
+ input_channels,
427
+ out_channels,
428
+ num_layers,
429
+ block,
430
+ )
431
 
432
+ @staticmethod
433
+ def adjust_depth(num_layers: int, depth_mult: float):
434
+ return int(math.ceil(num_layers * depth_mult))
435
 
 
 
 
 
436
 
437
+ class FusedMBConvConfig(_MBConvConfig):
438
+ # Stores information listed at Table 4 of the EfficientNetV2 paper
439
  def __init__(
440
  self,
441
+ expand_ratio: float,
442
+ kernel: int,
443
+ stride: int,
444
+ input_channels: int,
445
+ out_channels: int,
446
+ num_layers: int,
447
+ block: Optional[Callable[..., nn.Module]] = None,
448
+ ) -> None:
449
+ if block is None:
450
+ block = FusedMBConv
451
+ super().__init__(
452
+ expand_ratio,
453
+ kernel,
454
+ stride,
455
+ input_channels,
456
+ out_channels,
457
+ num_layers,
458
+ block,
 
 
 
 
 
 
 
 
 
459
  )
460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
 
462
+ class MBConv(nn.Module):
463
  def __init__(
464
  self,
465
+ cnf: MBConvConfig,
466
+ stochastic_depth_prob: float,
467
+ norm_layer: Callable[..., nn.Module],
468
+ se_layer: Callable[..., nn.Module] = SqueezeExcitation,
 
 
 
 
 
 
 
 
469
  ) -> None:
470
+ super().__init__()
471
 
472
+ if not (1 <= cnf.stride <= 2):
473
+ raise ValueError("illegal stride value")
 
 
474
 
475
+ self.use_res_connect = (
476
+ cnf.stride == 1 and cnf.input_channels == cnf.out_channels
477
+ )
 
 
 
 
 
 
 
 
 
478
 
479
+ layers: List[nn.Module] = []
480
+ activation_layer = nn.SiLU
481
 
482
+ # expand
483
+ expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
484
+ if expanded_channels != cnf.input_channels:
485
+ layers.append(
486
+ Conv2dNormActivation(
487
+ cnf.input_channels,
488
+ expanded_channels,
489
+ kernel_size=1,
490
+ norm_layer=norm_layer,
491
+ activation_layer=activation_layer,
492
+ )
493
+ )
494
 
495
+ # depthwise
496
+ layers.append(
497
+ Conv2dNormActivation(
498
+ expanded_channels,
499
+ expanded_channels,
500
+ kernel_size=cnf.kernel,
501
+ stride=cnf.stride,
502
+ groups=expanded_channels,
503
+ norm_layer=norm_layer,
504
+ activation_layer=activation_layer,
505
  )
506
+ )
507
 
508
+ # squeeze and excitation
509
+ squeeze_channels = max(1, cnf.input_channels // 4)
510
+ layers.append(
511
+ se_layer(
512
+ expanded_channels,
513
+ squeeze_channels,
514
+ activation=partial(nn.SiLU, inplace=True),
515
+ )
516
+ )
517
 
518
+ # project
519
+ layers.append(
520
+ Conv2dNormActivation(
521
+ expanded_channels,
522
+ cnf.out_channels,
523
+ kernel_size=1,
524
+ norm_layer=norm_layer,
525
+ activation_layer=None,
526
+ )
527
+ )
528
 
529
+ self.block = nn.Sequential(*layers)
530
+ self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
531
+ self.out_channels = cnf.out_channels
 
 
 
 
 
 
 
 
 
532
 
533
+ def forward(self, input: Tensor) -> Tensor:
534
+ result = self.block(input)
535
+ if self.use_res_connect:
536
+ result = self.stochastic_depth(result)
537
+ result += input
538
+ return result
539
 
540
+
541
+ class FusedMBConv(nn.Module):
542
  def __init__(
543
  self,
544
+ cnf: FusedMBConvConfig,
545
+ stochastic_depth_prob: float,
546
+ norm_layer: Callable[..., nn.Module],
 
 
 
 
 
 
 
 
547
  ) -> None:
548
+ super().__init__()
549
 
550
+ if not (1 <= cnf.stride <= 2):
551
+ raise ValueError("illegal stride value")
552
+
553
+ self.use_res_connect = (
554
+ cnf.stride == 1 and cnf.input_channels == cnf.out_channels
 
 
 
 
 
 
 
 
555
  )
556
 
557
+ layers: List[nn.Module] = []
558
+ activation_layer = nn.SiLU
559
 
560
+ expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
561
+ if expanded_channels != cnf.input_channels:
562
+ # fused expand
563
+ layers.append(
564
+ Conv2dNormActivation(
565
+ cnf.input_channels,
566
+ expanded_channels,
567
+ kernel_size=cnf.kernel,
568
+ stride=cnf.stride,
569
+ norm_layer=norm_layer,
570
+ activation_layer=activation_layer,
571
+ )
572
+ )
573
 
574
+ # project
575
+ layers.append(
576
+ Conv2dNormActivation(
577
+ expanded_channels,
578
+ cnf.out_channels,
579
+ kernel_size=1,
580
+ norm_layer=norm_layer,
581
+ activation_layer=None,
582
+ )
583
+ )
584
+ else:
585
+ layers.append(
586
+ Conv2dNormActivation(
587
+ cnf.input_channels,
588
+ cnf.out_channels,
589
+ kernel_size=cnf.kernel,
590
+ stride=cnf.stride,
591
+ norm_layer=norm_layer,
592
+ activation_layer=activation_layer,
593
+ )
594
+ )
595
 
596
+ self.block = nn.Sequential(*layers)
597
+ self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
598
+ self.out_channels = cnf.out_channels
599
+
600
+ def forward(self, input: Tensor) -> Tensor:
601
+ result = self.block(input)
602
+ if self.use_res_connect:
603
+ result = self.stochastic_depth(result)
604
+ result += input
605
+ return result
606
+
607
+
608
+ class EfficientNetConfig(PretrainedConfig):
609
+
610
+ model_type = "efficientnet"
611
+
612
  def __init__(
613
  self,
614
+ # inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
615
+ dropout: float=0.25,
616
+ num_channels: int = 61,
617
+ stochastic_depth_prob: float = 0.2,
618
+ num_classes: int = 2,
619
+ norm_layer: Optional[Callable[..., nn.Module]] = None,
620
+ # last_channel: Optional[int] = None,
621
+ size: str='v2_s',
622
+ width_mult: float = 1.0,
623
+ depth_mult: float = 1.0,
624
+ **kwargs: Any,
625
  ) -> None:
626
+ """
627
+ EfficientNet V1 and V2 main class
628
+
629
+ Args:
630
+ inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]]): Network structure
631
+ dropout (float): The droupout probability
632
+ stochastic_depth_prob (float): The stochastic depth probability
633
+ num_classes (int): Number of classes
634
+ norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
635
+ last_channel (int): The number of channels on the penultimate layer
636
+ """
637
+
638
+
639
+ # self.model = EfficientNet(
640
+ # dropout=dropout,
641
+ # num_channels=num_channels,
642
+ # num_classes=num_classes,
643
+ # size=size,
644
+ # stochastic_depth_prob=stochastic_depth_prob,
645
+ # width_mult=width_mult,
646
+ # depth_mult=depth_mult,
647
+ # )
648
+
649
+ #
650
+ self.dropout=dropout
651
+ self.num_channels=num_channels
652
+ self.num_classes=num_classes
653
+ self.size=size
654
+ self.stochastic_depth_prob=stochastic_depth_prob
655
+ self.width_mult=width_mult
656
+ self.depth_mult=depth_mult
657
+
658
+ super().__init__(**kwargs)
659
+
660
+
661
+ class EfficientNetPreTrained(PreTrainedModel):
662
+
663
+ config_class = EfficientNetConfig
664
+
665
+ def __init__(
666
+ self,
667
+ config
668
+ ):
669
+ super().__init__(config)
670
+ self.model = EfficientNet( dropout=config.dropout,
671
+ num_channels=config.num_channels,
672
+ num_classes=config.num_classes,
673
+ size=config.size,
674
+ stochastic_depth_prob=config.stochastic_depth_prob,
675
+ width_mult=config.width_mult,
676
+ depth_mult=config.depth_mult,)
677
+
678
+ def forward(self, tensor):
679
+ return self.model.forward(tensor)
680
+
681
+
682
+ class EfficientNet(nn.Module):
683
+
684
+
685
+ def __init__(
686
+ self,
687
+ # inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
688
+ dropout: float=0.25,
689
+ num_channels: int = 61,
690
+ stochastic_depth_prob: float = 0.2,
691
+ num_classes: int = 2,
692
+ norm_layer: Optional[Callable[..., nn.Module]] = None,
693
+ # last_channel: Optional[int] = None,
694
+ size: str='v2_s',
695
+ width_mult: float = 1.0,
696
+ depth_mult: float = 1.0,
697
+ **kwargs: Any,
698
+ ) -> None:
699
+ """
700
+ EfficientNet V1 and V2 main class
701
+
702
+ Args:
703
+ inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]]): Network structure
704
+ dropout (float): The droupout probability
705
+ stochastic_depth_prob (float): The stochastic depth probability
706
+ num_classes (int): Number of classes
707
+ norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
708
+ last_channel (int): The number of channels on the penultimate layer
709
+ """
710
+ super().__init__()
711
+ # _log_api_usage_once(self)
712
+
713
+ inverted_residual_setting, last_channel = _efficientnet_conf(
714
+ "efficientnet_%s" % (size), width_mult=width_mult, depth_mult=depth_mult
715
+ )
716
+
717
+ if not inverted_residual_setting:
718
+ raise ValueError("The inverted_residual_setting should not be empty")
719
+ elif not (
720
+ isinstance(inverted_residual_setting, Sequence)
721
+ and all([isinstance(s, _MBConvConfig) for s in inverted_residual_setting])
722
+ ):
723
+ raise TypeError(
724
+ "The inverted_residual_setting should be List[MBConvConfig]"
725
+ )
726
+
727
+ if "block" in kwargs:
728
+ warnings.warn(
729
+ "The parameter 'block' is deprecated since 0.13 and will be removed 0.15. "
730
+ "Please pass this information on 'MBConvConfig.block' instead."
731
+ )
732
+ if kwargs["block"] is not None:
733
+ for s in inverted_residual_setting:
734
+ if isinstance(s, MBConvConfig):
735
+ s.block = kwargs["block"]
736
 
737
+ if norm_layer is None:
738
+ norm_layer = nn.BatchNorm2d
739
+
740
+ layers: List[nn.Module] = []
741
+
742
+ # building first layer
743
+ firstconv_output_channels = inverted_residual_setting[0].input_channels
744
+ layers.append(
745
+ Conv2dNormActivation(
746
+ num_channels,
747
+ firstconv_output_channels,
748
+ kernel_size=3,
749
+ stride=2,
750
+ norm_layer=norm_layer,
751
+ activation_layer=nn.SiLU,
752
+ )
753
  )
754
 
755
+ # building inverted residual blocks
756
+ total_stage_blocks = sum(cnf.num_layers for cnf in inverted_residual_setting)
757
+ stage_block_id = 0
758
+ for cnf in inverted_residual_setting:
759
+ stage: List[nn.Module] = []
760
+ for _ in range(cnf.num_layers):
761
+ # copy to avoid modifications. shallow copy is enough
762
+ block_cnf = copy.copy(cnf)
763
 
764
+ # overwrite info if not the first conv in the stage
765
+ if stage:
766
+ block_cnf.input_channels = block_cnf.out_channels
767
+ block_cnf.stride = 1
768
 
769
+ # adjust stochastic depth probability based on the depth of the stage block
770
+ sd_prob = (
771
+ stochastic_depth_prob * float(stage_block_id) / total_stage_blocks
772
+ )
 
 
773
 
774
+ stage.append(block_cnf.block(block_cnf, sd_prob, norm_layer))
775
+ stage_block_id += 1
 
 
 
 
 
 
 
 
 
 
 
 
776
 
777
+ layers.append(nn.Sequential(*stage))
 
 
 
 
 
778
 
779
+ # building last several layers
780
+ lastconv_input_channels = inverted_residual_setting[-1].out_channels
781
+ lastconv_output_channels = (
782
+ last_channel if last_channel is not None else 4 * lastconv_input_channels
783
+ )
784
+ layers.append(
785
+ Conv2dNormActivation(
786
+ lastconv_input_channels,
787
+ lastconv_output_channels,
788
+ kernel_size=1,
789
+ norm_layer=norm_layer,
790
+ activation_layer=nn.SiLU,
791
+ )
792
+ )
793
 
794
+ self.features = nn.Sequential(*layers)
795
+ self.avgpool = nn.AdaptiveAvgPool2d(1)
796
+ self.classifier = nn.Sequential(
797
+ nn.Dropout(p=dropout, inplace=True),
798
+ nn.Linear(lastconv_output_channels, num_classes),
799
+ )
800
 
801
+ for m in self.modules():
802
+ if isinstance(m, nn.Conv2d):
803
+ nn.init.kaiming_normal_(m.weight, mode="fan_out")
804
+ if m.bias is not None:
805
+ nn.init.zeros_(m.bias)
806
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
807
+ nn.init.ones_(m.weight)
808
+ nn.init.zeros_(m.bias)
809
+ elif isinstance(m, nn.Linear):
810
+ init_range = 1.0 / math.sqrt(m.out_features)
811
+ nn.init.uniform_(m.weight, -init_range, init_range)
812
+ nn.init.zeros_(m.bias)
813
+
814
+ # super().__init__(**kwargs)
815
 
816
+ def _forward_impl(self, x: Tensor) -> Tensor:
817
+ x = self.features(x)
 
 
 
 
 
 
 
818
 
819
+ x = self.avgpool(x)
820
+ x = torch.flatten(x, 1)
 
 
 
 
 
 
 
 
 
 
 
821
 
822
+ x = self.classifier(x)
 
 
 
 
 
 
 
 
823
 
824
+ return x
 
825
 
826
+ def forward(self, x: Tensor) -> Tensor:
827
+ return self._forward_impl(x)
828
 
829
 
830
+ # def _efficientnet(
831
+ # inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
832
+ # dropout: float,
833
+ # last_channel: Optional[int],
834
+ # weights=None,
835
+ # num_channels: int = 61,
836
+ # stochastic_depth_prob: float = 0.2,
837
+ # progress: bool = True,
838
+ # num_classes: int = 2,
839
+ # **kwargs: Any,
840
+ # ) -> EfficientNetCongig:
841
 
842
+ # model = EfficientNetCongif(
843
+ # inverted_residual_setting,
844
+ # dropout,
845
+ # num_classes=num_classes,
846
+ # num_channels=num_channels,
847
+ # stochastic_depth_prob=stochastic_depth_prob,
848
+ # last_channel=last_channel,
849
+ # **kwargs,
850
+ # )
851
 
852
+ # return model
 
 
853
 
 
 
854
 
855
+ def _efficientnet_conf(
856
+ arch: str,
857
+ **kwargs: Any,
858
+ ) -> Tuple[Sequence[Union[MBConvConfig, FusedMBConvConfig]], Optional[int]]:
859
+ inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]]
860
+ if arch.startswith("efficientnet_b"):
861
+ bneck_conf = partial(
862
+ MBConvConfig,
863
+ width_mult=kwargs.pop("width_mult"),
864
+ depth_mult=kwargs.pop("depth_mult"),
865
+ )
866
+ inverted_residual_setting = [
867
+ bneck_conf(1, 3, 1, 32, 16, 1),
868
+ bneck_conf(6, 3, 2, 16, 24, 2),
869
+ bneck_conf(6, 5, 2, 24, 40, 2),
870
+ bneck_conf(6, 3, 2, 40, 80, 3),
871
+ bneck_conf(6, 5, 1, 80, 112, 3),
872
+ bneck_conf(6, 5, 2, 112, 192, 4),
873
+ bneck_conf(6, 3, 1, 192, 320, 1),
874
+ ]
875
+ last_channel = None
876
+ elif arch.startswith("efficientnet_v2_s"):
877
+ inverted_residual_setting = [
878
+ FusedMBConvConfig(1, 3, 1, 24, 24, 2),
879
+ FusedMBConvConfig(4, 3, 2, 24, 48, 4),
880
+ FusedMBConvConfig(4, 3, 2, 48, 64, 4),
881
+ MBConvConfig(4, 3, 2, 64, 128, 6),
882
+ MBConvConfig(6, 3, 1, 128, 160, 9),
883
+ MBConvConfig(6, 3, 2, 160, 256, 15),
884
+ ]
885
+ last_channel = 1280
886
+ elif arch.startswith("efficientnet_v2_m"):
887
+ inverted_residual_setting = [
888
+ FusedMBConvConfig(1, 3, 1, 24, 24, 3),
889
+ FusedMBConvConfig(4, 3, 2, 24, 48, 5),
890
+ FusedMBConvConfig(4, 3, 2, 48, 80, 5),
891
+ MBConvConfig(4, 3, 2, 80, 160, 7),
892
+ MBConvConfig(6, 3, 1, 160, 176, 14),
893
+ MBConvConfig(6, 3, 2, 176, 304, 18),
894
+ MBConvConfig(6, 3, 1, 304, 512, 5),
895
+ ]
896
+ last_channel = 1280
897
+ elif arch.startswith("efficientnet_v2_l"):
898
+ inverted_residual_setting = [
899
+ FusedMBConvConfig(1, 3, 1, 32, 32, 4),
900
+ FusedMBConvConfig(4, 3, 2, 32, 64, 7),
901
+ FusedMBConvConfig(4, 3, 2, 64, 96, 7),
902
+ MBConvConfig(4, 3, 2, 96, 192, 10),
903
+ MBConvConfig(6, 3, 1, 192, 224, 19),
904
+ MBConvConfig(6, 3, 2, 224, 384, 25),
905
+ MBConvConfig(6, 3, 1, 384, 640, 7),
906
+ ]
907
+ last_channel = 1280
908
+ else:
909
+ raise ValueError(f"Unsupported model type {arch}")
910
+
911
+ return inverted_residual_setting, last_channel
912
 
913
 
914
 
915
+ ##### normal stuff ####
916
 
917
  def normalize_array(x: list):
918