Shannon Shen commited on
Commit
24cbdc6
·
2 Parent(s): 4d61931 e2c51cc

Merge pull request #2 from nasheedyasin/master

Browse files

Improve the coco split utility and enhance the training script

Files changed (2) hide show
  1. tools/train_net.py +44 -8
  2. utils/cocosplit.py +40 -20
tools/train_net.py CHANGED
@@ -6,25 +6,49 @@ import logging
6
  import os
7
  import json
8
  from collections import OrderedDict
9
- import torch
10
- import sys
11
  import detectron2.utils.comm as comm
 
12
  from detectron2.checkpoint import DetectionCheckpointer
13
  from detectron2.config import get_cfg
 
14
 
15
- from detectron2.data import MetadataCatalog, DatasetCatalog
16
  from detectron2.data.datasets import register_coco_instances
17
 
18
  from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
19
  from detectron2.evaluation import (
20
  COCOEvaluator,
21
- DatasetEvaluators,
22
- SemSegEvaluator,
23
  verify_results,
24
  )
25
  from detectron2.modeling import GeneralizedRCNNWithTTA
26
  import pandas as pd
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  class Trainer(DefaultTrainer):
29
  """
30
  We use the "DefaultTrainer" which contains pre-defined default logic for
@@ -32,8 +56,16 @@ class Trainer(DefaultTrainer):
32
  are working on a new research project. In that case you can use the cleaner
33
  "SimpleTrainer", or write your own training loop. You can use
34
  "tools/plain_train_net.py" as an example.
 
 
 
35
  """
36
 
 
 
 
 
 
37
  @classmethod
38
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):
39
  """
@@ -78,7 +110,8 @@ def setup(args):
78
  Create configs and perform basic setups.
79
  """
80
  cfg = get_cfg()
81
- cfg.merge_from_file(args.config_file)
 
82
  cfg.merge_from_list(args.opts)
83
 
84
  with open(args.json_annotation_train, 'r') as fp:
@@ -114,6 +147,9 @@ def main(args):
114
  pd.DataFrame(res).to_csv(f'{cfg.OUTPUT_DIR}/eval.csv')
115
  return res
116
 
 
 
 
117
  """
118
  If you'd like to do anything fancier than the standard training logic,
119
  consider writing your own training loop (see plain_train_net.py) or
@@ -143,14 +179,14 @@ if __name__ == "__main__":
143
 
144
  args = parser.parse_args()
145
  print("Command Line Args:", args)
146
-
147
  # Register Datasets
148
  dataset_name = args.dataset_name
149
  register_coco_instances(f"{dataset_name}-train", {},
150
  args.json_annotation_train,
151
  args.image_path_train)
152
 
153
- register_coco_instances(f"{dataset_name}-val", {},
154
  args.json_annotation_val,
155
  args.image_path_val)
156
 
 
6
  import os
7
  import json
8
  from collections import OrderedDict
 
 
9
  import detectron2.utils.comm as comm
10
+ import detectron2.data.transforms as T
11
  from detectron2.checkpoint import DetectionCheckpointer
12
  from detectron2.config import get_cfg
13
+ from detectron2.data import DatasetMapper, build_detection_train_loader
14
 
 
15
  from detectron2.data.datasets import register_coco_instances
16
 
17
  from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
18
  from detectron2.evaluation import (
19
  COCOEvaluator,
 
 
20
  verify_results,
21
  )
22
  from detectron2.modeling import GeneralizedRCNNWithTTA
23
  import pandas as pd
24
 
25
+
26
+ def get_augs(cfg):
27
+ """Add all the desired augmentations here. A list of availble augmentations
28
+ can be found here:
29
+ https://detectron2.readthedocs.io/en/latest/modules/data_transforms.html
30
+ """
31
+ augs = [
32
+ T.ResizeShortestEdge(
33
+ cfg.INPUT.MIN_SIZE_TRAIN, cfg.INPUT.MAX_SIZE_TRAIN, cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
34
+ )
35
+ ]
36
+ if cfg.INPUT.CROP.ENABLED:
37
+ augs.append(
38
+ T.RandomCrop_CategoryAreaConstraint(
39
+ cfg.INPUT.CROP.TYPE,
40
+ cfg.INPUT.CROP.SIZE,
41
+ cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA,
42
+ cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
43
+ )
44
+ )
45
+ horizontal_flip: bool = (cfg.INPUT.RANDOM_FLIP == 'horizontal')
46
+ augs.append(T.RandomFlip(horizontal=horizontal_flip,
47
+ vertical=not horizontal_flip))
48
+ # Rotate the image between -90 to 0 degrees clockwise around the centre
49
+ augs.append(T.RandomRotation(angle=[-90.0, 0.0]))
50
+ return augs
51
+
52
  class Trainer(DefaultTrainer):
53
  """
54
  We use the "DefaultTrainer" which contains pre-defined default logic for
 
56
  are working on a new research project. In that case you can use the cleaner
57
  "SimpleTrainer", or write your own training loop. You can use
58
  "tools/plain_train_net.py" as an example.
59
+
60
+ Adapted from:
61
+ https://github.com/facebookresearch/detectron2/blob/master/projects/DeepLab/train_net.py
62
  """
63
 
64
+ @classmethod
65
+ def build_train_loader(cls, cfg):
66
+ mapper = DatasetMapper(cfg, is_train=True, augmentations=get_augs(cfg))
67
+ return build_detection_train_loader(cfg, mapper=mapper)
68
+
69
  @classmethod
70
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):
71
  """
 
110
  Create configs and perform basic setups.
111
  """
112
  cfg = get_cfg()
113
+ if args.config_file != "":
114
+ cfg.merge_from_file(args.config_file)
115
  cfg.merge_from_list(args.opts)
116
 
117
  with open(args.json_annotation_train, 'r') as fp:
 
147
  pd.DataFrame(res).to_csv(f'{cfg.OUTPUT_DIR}/eval.csv')
148
  return res
149
 
150
+ # Ensure that the Output directory exists
151
+ os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
152
+
153
  """
154
  If you'd like to do anything fancier than the standard training logic,
155
  consider writing your own training loop (see plain_train_net.py) or
 
179
 
180
  args = parser.parse_args()
181
  print("Command Line Args:", args)
182
+
183
  # Register Datasets
184
  dataset_name = args.dataset_name
185
  register_coco_instances(f"{dataset_name}-train", {},
186
  args.json_annotation_train,
187
  args.image_path_train)
188
 
189
+ register_coco_instances(f"{dataset_name}-val", {},
190
  args.json_annotation_val,
191
  args.image_path_val)
192
 
utils/cocosplit.py CHANGED
@@ -6,19 +6,18 @@ import funcy
6
  from sklearn.model_selection import train_test_split
7
 
8
  parser = argparse.ArgumentParser(description='Splits COCO annotations file into training and test sets.')
9
- parser.add_argument('annotations', metavar='coco_annotations', type=str,
10
  help='Path to COCO annotations file.')
11
- parser.add_argument('train', type=str, help='Where to store COCO training annotations')
12
- parser.add_argument('test', type=str, help='Where to store COCO test annotations')
13
- parser.add_argument('-s', dest='split_ratio', type=float, required=True,
14
  help="A percentage of a split; a number in (0, 1)")
15
  parser.add_argument('--having-annotations', dest='having_annotations', action='store_true',
16
  help='Ignore all images without annotations. Keep only these with at least one annotation')
17
 
18
- def save_coco(file, info, licenses, images, annotations, categories):
19
  with open(file, 'wt', encoding='UTF-8') as coco:
20
- json.dump({ 'info': info, 'licenses': licenses, 'images': images,
21
- 'annotations': annotations, 'categories': categories}, coco, indent=2, sort_keys=True)
22
 
23
  def filter_annotations(annotations, images):
24
  image_ids = funcy.lmap(lambda i: int(i['id']), images)
@@ -33,25 +32,46 @@ def main(annotation_path,
33
 
34
  with open(annotation_path, 'rt', encoding='UTF-8') as annotations:
35
  coco = json.load(annotations)
36
- info = coco['info']
37
- licenses = coco['licenses']
38
- images = coco['images']
39
- annotations = coco['annotations']
40
- categories = coco['categories']
41
 
42
- number_of_images = len(images)
 
43
 
44
- images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations)
45
 
46
- if having_annotations:
47
- images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images)
 
 
48
 
49
- x, y = train_test_split(images, train_size=split_ratio, random_state=random_state)
 
 
 
50
 
51
- save_coco(train_save_path, info, licenses, x, filter_annotations(annotations, x), categories)
52
- save_coco(test_save_path, info, licenses, y, filter_annotations(annotations, y), categories)
53
 
54
- print("Saved {} entries in {} and {} in {}".format(len(x), train_save_path, len(y), test_save_path))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
 
57
  if __name__ == "__main__":
 
6
  from sklearn.model_selection import train_test_split
7
 
8
  parser = argparse.ArgumentParser(description='Splits COCO annotations file into training and test sets.')
9
+ parser.add_argument('--annotation_path', metavar='coco_annotations', type=str,
10
  help='Path to COCO annotations file.')
11
+ parser.add_argument('--train', type=str, help='Where to store COCO training annotations')
12
+ parser.add_argument('--test', type=str, help='Where to store COCO test annotations')
13
+ parser.add_argument('--split-ratio', dest='split_ratio', type=float, required=True,
14
  help="A percentage of a split; a number in (0, 1)")
15
  parser.add_argument('--having-annotations', dest='having_annotations', action='store_true',
16
  help='Ignore all images without annotations. Keep only these with at least one annotation')
17
 
18
+ def save_coco(file, tagged_data):
19
  with open(file, 'wt', encoding='UTF-8') as coco:
20
+ json.dump(tagged_data, coco, indent=2, sort_keys=True)
 
21
 
22
  def filter_annotations(annotations, images):
23
  image_ids = funcy.lmap(lambda i: int(i['id']), images)
 
32
 
33
  with open(annotation_path, 'rt', encoding='UTF-8') as annotations:
34
  coco = json.load(annotations)
 
 
 
 
 
35
 
36
+ images = coco['images']
37
+ annotations = coco['annotations']
38
 
39
+ ids_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations)
40
 
41
+ # Images with annotations
42
+ img_ann = funcy.lremove(lambda i: i['id'] not in ids_with_annotations, images)
43
+ tr_ann, ts_ann = train_test_split(img_ann, train_size=split_ratio,
44
+ random_state=random_state)
45
 
46
+ # Images without annotations
47
+ img_wo_ann = funcy.lremove(lambda i: i['id'] in ids_with_annotations, images)
48
+ tr_wo_ann, ts_wo_ann = train_test_split(img_wo_ann, train_size=split_ratio,
49
+ random_state=random_state)
50
 
51
+ if having_annotations:
52
+ tr, ts = tr_ann, ts_ann
53
 
54
+ else:
55
+ # Merging the 2 image lists (i.e. with and without annotation)
56
+ tr_ann.extend(tr_wo_ann)
57
+ ts_ann.extend(ts_wo_ann)
58
+
59
+ tr, ts = tr_ann, ts_ann
60
+
61
+ # Train Data
62
+ coco.update({'images': tr,
63
+ 'annotations': filter_annotations(annotations, tr)})
64
+ save_coco(train_save_path, coco)
65
+
66
+ # Test Data
67
+ coco.update({'images': ts,
68
+ 'annotations': filter_annotations(annotations, ts)})
69
+ save_coco(test_save_path, coco)
70
+
71
+ print("Saved {} entries in {} and {} in {}".format(len(tr),
72
+ train_save_path,
73
+ len(ts),
74
+ test_save_path))
75
 
76
 
77
  if __name__ == "__main__":