loliipopshock commited on
Commit
ea5f6fe
·
1 Parent(s): 0701c1d

Add the cocosplit script

Browse files
Files changed (3) hide show
  1. README.md +5 -0
  2. utils/__init__.py +0 -0
  3. utils/cocosplit.py +65 -0
README.md CHANGED
@@ -1 +1,6 @@
1
  # Scripts for training Layout Detection Models using Detectron2
 
 
 
 
 
 
1
  # Scripts for training Layout Detection Models using Detectron2
2
+
3
+
4
+ ## Reference
5
+
6
+ - **[cocosplit](https://github.com/akarazniewicz/cocosplit)** A script that splits the coco annotations.
utils/__init__.py ADDED
File without changes
utils/cocosplit.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Modified based on https://github.com/akarazniewicz/cocosplit/blob/master/cocosplit.py
2
+
3
+ import json
4
+ import argparse
5
+ import funcy
6
+ from sklearn.model_selection import train_test_split
7
+
8
+ parser = argparse.ArgumentParser(description='Splits COCO annotations file into training and test sets.')
9
+ parser.add_argument('annotations', metavar='coco_annotations', type=str,
10
+ help='Path to COCO annotations file.')
11
+ parser.add_argument('train', type=str, help='Where to store COCO training annotations')
12
+ parser.add_argument('test', type=str, help='Where to store COCO test annotations')
13
+ parser.add_argument('-s', dest='split_ratio', type=float, required=True,
14
+ help="A percentage of a split; a number in (0, 1)")
15
+ parser.add_argument('--having-annotations', dest='having_annotations', action='store_true',
16
+ help='Ignore all images without annotations. Keep only these with at least one annotation')
17
+
18
+ def save_coco(file, info, licenses, images, annotations, categories):
19
+ with open(file, 'wt', encoding='UTF-8') as coco:
20
+ json.dump({ 'info': info, 'licenses': licenses, 'images': images,
21
+ 'annotations': annotations, 'categories': categories}, coco, indent=2, sort_keys=True)
22
+
23
+ def filter_annotations(annotations, images):
24
+ image_ids = funcy.lmap(lambda i: int(i['id']), images)
25
+ return funcy.lfilter(lambda a: int(a['image_id']) in image_ids, annotations)
26
+
27
+ def main(annotation_path,
28
+ split_ratio,
29
+ having_annotations,
30
+ train_save_path,
31
+ test_save_path,
32
+ random_state=None):
33
+
34
+ with open(annotation_path, 'rt', encoding='UTF-8') as annotations:
35
+ coco = json.load(annotations)
36
+ info = coco['info']
37
+ licenses = coco['licenses']
38
+ images = coco['images']
39
+ annotations = coco['annotations']
40
+ categories = coco['categories']
41
+
42
+ number_of_images = len(images)
43
+
44
+ images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations)
45
+
46
+ if having_annotations:
47
+ images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images)
48
+
49
+ x, y = train_test_split(images, train_size=split_ratio, random_state=random_state)
50
+
51
+ save_coco(train_save_path, info, licenses, x, filter_annotations(annotations, x), categories)
52
+ save_coco(test_save_path, info, licenses, y, filter_annotations(annotations, y), categories)
53
+
54
+ print("Saved {} entries in {} and {} in {}".format(len(x), train_save_path, len(y), test_save_path))
55
+
56
+
57
+ if __name__ == "__main__":
58
+ args = parser.parse_args()
59
+
60
+ main(args.annotation_path,
61
+ args.split_ratio,
62
+ args.having_annotations,
63
+ args.train,
64
+ args.test,
65
+ random_state=24)