Spaces:

mckabue
/

document-similarity-matching-using-visual-layout-features-archive

Build error

App Files Files Community

Shannon Shen commited on Jun 20, 2021

Commit

24cbdc6

2 Parent(s): 4d61931 e2c51cc

Merge pull request #2 from nasheedyasin/master

Browse files

Improve the coco split utility and enhance the training script

Files changed (2) hide show

tools/train_net.py +44 -8
utils/cocosplit.py +40 -20

tools/train_net.py CHANGED Viewed

@@ -6,25 +6,49 @@ import logging
 import os
 import json
 from collections import OrderedDict
-import torch
-import sys
 import detectron2.utils.comm as comm
 from detectron2.checkpoint import DetectionCheckpointer
 from detectron2.config import get_cfg
-from detectron2.data import MetadataCatalog, DatasetCatalog
 from detectron2.data.datasets import register_coco_instances
 from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
 from detectron2.evaluation import (
     COCOEvaluator,
-    DatasetEvaluators,
-    SemSegEvaluator,
     verify_results,
 )
 from detectron2.modeling import GeneralizedRCNNWithTTA
 import pandas as pd
 class Trainer(DefaultTrainer):
     """
     We use the "DefaultTrainer" which contains pre-defined default logic for
@@ -32,8 +56,16 @@ class Trainer(DefaultTrainer):
     are working on a new research project. In that case you can use the cleaner
     "SimpleTrainer", or write your own training loop. You can use
     "tools/plain_train_net.py" as an example.
     """
     @classmethod
     def build_evaluator(cls, cfg, dataset_name, output_folder=None):
         """
@@ -78,7 +110,8 @@ def setup(args):
     Create configs and perform basic setups.
     """
     cfg = get_cfg()
-    cfg.merge_from_file(args.config_file)
     cfg.merge_from_list(args.opts)
     with open(args.json_annotation_train, 'r') as fp:
@@ -114,6 +147,9 @@ def main(args):
         pd.DataFrame(res).to_csv(f'{cfg.OUTPUT_DIR}/eval.csv')
         return res
     """
     If you'd like to do anything fancier than the standard training logic,
     consider writing your own training loop (see plain_train_net.py) or
@@ -143,14 +179,14 @@ if __name__ == "__main__":
     args = parser.parse_args()
     print("Command Line Args:", args)
     # Register Datasets
     dataset_name = args.dataset_name
     register_coco_instances(f"{dataset_name}-train", {},
                             args.json_annotation_train,
                             args.image_path_train)
-    register_coco_instances(f"{dataset_name}-val",   {},
                             args.json_annotation_val,
                             args.image_path_val)

 import os
 import json
 from collections import OrderedDict
 import detectron2.utils.comm as comm
+import detectron2.data.transforms as T
 from detectron2.checkpoint import DetectionCheckpointer
 from detectron2.config import get_cfg
+from detectron2.data import DatasetMapper, build_detection_train_loader
 from detectron2.data.datasets import register_coco_instances
 from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
 from detectron2.evaluation import (
     COCOEvaluator,
     verify_results,
 )
 from detectron2.modeling import GeneralizedRCNNWithTTA
 import pandas as pd
+def get_augs(cfg):
+    """Add all the desired augmentations here. A list of availble augmentations
+    can be found here:
+       https://detectron2.readthedocs.io/en/latest/modules/data_transforms.html
+    """
+    augs = [
+        T.ResizeShortestEdge(
+            cfg.INPUT.MIN_SIZE_TRAIN, cfg.INPUT.MAX_SIZE_TRAIN, cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
+        )
+    ]
+    if cfg.INPUT.CROP.ENABLED:
+        augs.append(
+            T.RandomCrop_CategoryAreaConstraint(
+                cfg.INPUT.CROP.TYPE,
+                cfg.INPUT.CROP.SIZE,
+                cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA,
+                cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
+            )
+        )
+    horizontal_flip: bool = (cfg.INPUT.RANDOM_FLIP == 'horizontal')
+    augs.append(T.RandomFlip(horizontal=horizontal_flip,
+                             vertical=not horizontal_flip))
+    # Rotate the image between -90 to 0 degrees clockwise around the centre
+    augs.append(T.RandomRotation(angle=[-90.0, 0.0]))
+    return augs
 class Trainer(DefaultTrainer):
     """
     We use the "DefaultTrainer" which contains pre-defined default logic for
     are working on a new research project. In that case you can use the cleaner
     "SimpleTrainer", or write your own training loop. You can use
     "tools/plain_train_net.py" as an example.
+    Adapted from:
+        https://github.com/facebookresearch/detectron2/blob/master/projects/DeepLab/train_net.py
     """
+    @classmethod
+    def build_train_loader(cls, cfg):
+        mapper = DatasetMapper(cfg, is_train=True, augmentations=get_augs(cfg))
+        return build_detection_train_loader(cfg, mapper=mapper)
     @classmethod
     def build_evaluator(cls, cfg, dataset_name, output_folder=None):
         """
     Create configs and perform basic setups.
     """
     cfg = get_cfg()
+    if args.config_file != "":
+        cfg.merge_from_file(args.config_file)
     cfg.merge_from_list(args.opts)
     with open(args.json_annotation_train, 'r') as fp:
         pd.DataFrame(res).to_csv(f'{cfg.OUTPUT_DIR}/eval.csv')
         return res
+    # Ensure that the Output directory exists
+    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
     """
     If you'd like to do anything fancier than the standard training logic,
     consider writing your own training loop (see plain_train_net.py) or
     args = parser.parse_args()
     print("Command Line Args:", args)
     # Register Datasets
     dataset_name = args.dataset_name
     register_coco_instances(f"{dataset_name}-train", {},
                             args.json_annotation_train,
                             args.image_path_train)
+    register_coco_instances(f"{dataset_name}-val", {},
                             args.json_annotation_val,
                             args.image_path_val)

utils/cocosplit.py CHANGED Viewed

@@ -6,19 +6,18 @@ import funcy
 from sklearn.model_selection import train_test_split
 parser = argparse.ArgumentParser(description='Splits COCO annotations file into training and test sets.')
-parser.add_argument('annotations', metavar='coco_annotations', type=str,
                     help='Path to COCO annotations file.')
-parser.add_argument('train', type=str, help='Where to store COCO training annotations')
-parser.add_argument('test', type=str, help='Where to store COCO test annotations')
-parser.add_argument('-s', dest='split_ratio', type=float, required=True,
                     help="A percentage of a split; a number in (0, 1)")
 parser.add_argument('--having-annotations', dest='having_annotations', action='store_true',
                     help='Ignore all images without annotations. Keep only these with at least one annotation')
-def save_coco(file, info, licenses, images, annotations, categories):
     with open(file, 'wt', encoding='UTF-8') as coco:
-        json.dump({ 'info': info, 'licenses': licenses, 'images': images,
-            'annotations': annotations, 'categories': categories}, coco, indent=2, sort_keys=True)
 def filter_annotations(annotations, images):
     image_ids = funcy.lmap(lambda i: int(i['id']), images)
@@ -33,25 +32,46 @@ def main(annotation_path,
     with open(annotation_path, 'rt', encoding='UTF-8') as annotations:
         coco = json.load(annotations)
-        info = coco['info']
-        licenses = coco['licenses']
-        images = coco['images']
-        annotations = coco['annotations']
-        categories = coco['categories']
-        number_of_images = len(images)
-        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations)
-        if having_annotations:
-            images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images)
-        x, y = train_test_split(images, train_size=split_ratio, random_state=random_state)
-        save_coco(train_save_path, info, licenses, x, filter_annotations(annotations, x), categories)
-        save_coco(test_save_path, info, licenses, y, filter_annotations(annotations, y), categories)
-        print("Saved {} entries in {} and {} in {}".format(len(x), train_save_path, len(y), test_save_path))
 if __name__ == "__main__":

 from sklearn.model_selection import train_test_split
 parser = argparse.ArgumentParser(description='Splits COCO annotations file into training and test sets.')
+parser.add_argument('--annotation_path', metavar='coco_annotations', type=str,
                     help='Path to COCO annotations file.')
+parser.add_argument('--train', type=str, help='Where to store COCO training annotations')
+parser.add_argument('--test', type=str, help='Where to store COCO test annotations')
+parser.add_argument('--split-ratio', dest='split_ratio', type=float, required=True,
                     help="A percentage of a split; a number in (0, 1)")
 parser.add_argument('--having-annotations', dest='having_annotations', action='store_true',
                     help='Ignore all images without annotations. Keep only these with at least one annotation')
+def save_coco(file, tagged_data):
     with open(file, 'wt', encoding='UTF-8') as coco:
+        json.dump(tagged_data, coco, indent=2, sort_keys=True)
 def filter_annotations(annotations, images):
     image_ids = funcy.lmap(lambda i: int(i['id']), images)
     with open(annotation_path, 'rt', encoding='UTF-8') as annotations:
         coco = json.load(annotations)
+    images = coco['images']
+    annotations = coco['annotations']
+    ids_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations)
+    # Images with annotations
+    img_ann = funcy.lremove(lambda i: i['id'] not in ids_with_annotations, images)
+    tr_ann, ts_ann = train_test_split(img_ann, train_size=split_ratio,
+                                      random_state=random_state)
+    # Images without annotations
+    img_wo_ann = funcy.lremove(lambda i: i['id'] in ids_with_annotations, images)
+    tr_wo_ann, ts_wo_ann = train_test_split(img_wo_ann, train_size=split_ratio,
+                                            random_state=random_state)
+    if having_annotations:
+        tr, ts = tr_ann, ts_ann
+    else:
+        # Merging the 2 image lists (i.e. with and without annotation)
+        tr_ann.extend(tr_wo_ann)
+        ts_ann.extend(ts_wo_ann)
+        tr, ts = tr_ann, ts_ann
+    # Train Data
+    coco.update({'images': tr,
+                 'annotations': filter_annotations(annotations, tr)})
+    save_coco(train_save_path, coco)
+    # Test Data
+    coco.update({'images': ts,
+                 'annotations': filter_annotations(annotations, ts)})
+    save_coco(test_save_path, coco)
+    print("Saved {} entries in {} and {} in {}".format(len(tr),
+                                                       train_save_path,
+                                                       len(ts),
+                                                       test_save_path))
 if __name__ == "__main__":