Spaces:

HemaAM
/

YoloV3_PascalVOC_Dataset

Sleeping

App Files Files Community

HemaAM commited on Aug 27, 2023

Commit

9240536

1 Parent(s): 62b93e3

upload modified model

Browse files

Files changed (2) hide show

utils.py +14 -5
yolov3.py +174 -0

utils.py CHANGED Viewed

@@ -3,11 +3,22 @@ import torch
 import numpy as np
 import cv2
 import random
 from pytorch_grad_cam.base_cam import BaseCAM
 from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
 from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
 def cells_to_bboxes(predictions, anchors, S, is_preds=True):
     """
@@ -142,9 +153,7 @@ def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
     return bboxes_after_nms
-def draw_prediction_boxes(image: np.ndarray, boxes: List[List], class_labels: List[str]) -> np.ndarray:
     """Plots predicted bounding boxes on the image"""
     colors = [[random.randint(0, 255) for _ in range(3)] for name in class_labels]
@@ -250,10 +259,10 @@ class YoloGradCAM(BaseCAM):
         # This gives you more flexibility in case you just want to
         # use all conv layers for example, all Batchnorm layers,
         # or something else.
-        cam_per_layer = self.compute_cam_per_layer(input_tensor,
                                                    targets,
                                                    eigen_smooth)
-        return self.aggregate_multi_layers(cam_per_layer)
     def get_cam_image(self,
                       input_tensor,

 import numpy as np
 import cv2
 import random
+import config
 from pytorch_grad_cam.base_cam import BaseCAM
 from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
 from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
+def load_checkpoint(checkpoint_file, model, optimizer, lr):
+    print("=> Loading checkpoint")
+    checkpoint = torch.load(checkpoint_file, map_location=config.DEVICE)
+    model.load_state_dict(checkpoint["state_dict"])
+    optimizer.load_state_dict(checkpoint["optimizer"])
+    # If we don't do this then it will just have learning rate of old checkpoint
+    # and it will lead to many hours of debugging \:
+    for param_group in optimizer.param_groups:
+        param_group["lr"] = lr
 def cells_to_bboxes(predictions, anchors, S, is_preds=True):
     """
     return bboxes_after_nms
+def draw_bounding_boxes(image: np.ndarray, boxes: List[List], class_labels: List[str]) -> np.ndarray:
     """Plots predicted bounding boxes on the image"""
     colors = [[random.randint(0, 255) for _ in range(3)] for name in class_labels]
         # This gives you more flexibility in case you just want to
         # use all conv layers for example, all Batchnorm layers,
         # or something else.
+        grad_cam_per_layer = self.compute_cam_per_layer(input_tensor,
                                                    targets,
                                                    eigen_smooth)
+        return self.aggregate_multi_layers(grad_cam_per_layer)
     def get_cam_image(self,
                       input_tensor,

yolov3.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""
+Implementation of YOLOv3 architecture
+"""
+import torch
+import torch.nn as nn
+import config as modelConfig
+"""
+Information about architecture config:
+Tuple is structured by (filters, kernel_size, stride)
+Every conv is a same convolution.
+List is structured by "B" indicating a residual block followed by the number of repeats
+"S" is for scale prediction block and computing the yolo loss
+"U" is for upsampling the feature map and concatenating with a previous layer
+"""
+config = [
+    (32, 3, 1),
+    (64, 3, 2),
+    ["B", 1],
+    (128, 3, 2),
+    ["B", 2],
+    (256, 3, 2),
+    ["B", 8],
+    (512, 3, 2),
+    ["B", 8],
+    (1024, 3, 2),
+    ["B", 4],  # To this point is Darknet-53
+    (512, 1, 1),
+    (1024, 3, 1),
+    "S",
+    (256, 1, 1),
+    "U",
+    (256, 1, 1),
+    (512, 3, 1),
+    "S",
+    (128, 1, 1),
+    "U",
+    (128, 1, 1),
+    (256, 3, 1),
+    "S",
+]
+class CNNBlock(nn.Module):
+    @staticmethod
+    def get_activation_function(activation_type, param=0.1):
+        if activation_type == 'lrelu':
+            return nn.LeakyReLU(param)
+        elif activation_type == 'relu':
+            return nn.ReLU()
+    def __init__(self, in_channels, out_channels,
+                 activation=modelConfig.ACTIVATION, bn_act=True,
+                 **kwargs):
+        super(CNNBlock, self).__init__()
+        bias = not bn_act
+        layers = []
+        layers.append(nn.Conv2d(in_channels, out_channels, bias=bias, **kwargs))
+        if bn_act:
+            layers.append(nn.BatchNorm2d(out_channels))
+            layers.append(self.get_activation_function(activation))
+        self.layers = nn.Sequential(*layers)
+    def forward(self, x):
+        return self.layers(x)
+class ResidualBlock(nn.Module):
+    def __init__(self, channels, use_residual=True, num_repeats=1):
+        super(ResidualBlock, self).__init__()
+        self.layers = nn.ModuleList()
+        for repeat in range(num_repeats):
+            self.layers += [
+                nn.Sequential(
+                    CNNBlock(channels, channels // 2, kernel_size=1),
+                    CNNBlock(channels // 2, channels, kernel_size=3, padding=1),
+                )
+            ]
+        self.use_residual = use_residual
+        self.num_repeats = num_repeats
+    def forward(self, x):
+        for layer in self.layers:
+            if self.use_residual:
+                x = x + layer(x)
+            else:
+                x = layer(x)
+        return x
+class ScalePrediction(nn.Module):
+    def __init__(self, in_channels, num_classes):
+        super(ScalePrediction, self).__init__()
+        self.pred = nn.Sequential(
+            CNNBlock(in_channels, 2 * in_channels, kernel_size=3, padding=1),
+            CNNBlock(2 * in_channels, (num_classes + 5) * 3, kernel_size=1, bn_act=False),
+        )
+        self.num_classes = num_classes
+    def forward(self, x):
+        x = self.pred(x)
+        return x.reshape(x.shape[0], 3,
+                         self.num_classes + 5, x.shape[2],
+                         x.shape[3]).permute(0, 1, 3, 4, 2)
+class YOLOv3(nn.Module):
+    def __init__(self, in_channels=3, num_classes=80):
+        super(YOLOv3, self).__init__()
+        self.num_classes = num_classes
+        self.in_channels = in_channels
+        self.layers = self._create_conv_layers()
+    def forward(self, x):
+        outputs = []  # for each scale
+        route_connections = []
+        for layer in self.layers:
+            x_ = layer(x)
+            if isinstance(layer, ScalePrediction):
+                outputs.append(x_)
+                continue
+            x = x_
+            if isinstance(layer, ResidualBlock) and layer.num_repeats == 8:
+                route_connections.append(x)
+            elif isinstance(layer, nn.Upsample):
+                x = torch.cat([x, route_connections[-1]], dim=1)
+                route_connections.pop()
+        return outputs
+    def _create_conv_layers(self):
+        layers = nn.ModuleList()
+        in_channels = self.in_channels
+        for module in config:
+            if isinstance(module, tuple):
+                out_channels, kernel_size, stride = module
+                layers.append(
+                    CNNBlock(
+                        in_channels,
+                        out_channels,
+                        kernel_size=kernel_size,
+                        stride=stride,
+                        padding=1 if kernel_size == 3 else 0,
+                    )
+                )
+                in_channels = out_channels
+            elif isinstance(module, list):
+                num_repeats = module[1]
+                layers.append(ResidualBlock(in_channels, num_repeats=num_repeats,))
+            elif isinstance(module, str):
+                if module == "S":
+                    layers += [
+                        ResidualBlock(in_channels, use_residual=False, num_repeats=1),
+                        CNNBlock(in_channels, in_channels // 2, kernel_size=1),
+                        ScalePrediction(in_channels // 2, num_classes=self.num_classes),
+                    ]
+                    in_channels = in_channels // 2
+                elif module == "U":
+                    layers.append(nn.Upsample(scale_factor=2),)
+                    in_channels = in_channels * 3
+        return layers