File size: 3,136 Bytes
db3da1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import json
import random
random.seed(0)

def minify_dataset(path, num_images=10):
    with open(path, 'r') as f:
        data = json.load(f)
    
    new_file = {}
    new_file['info'] = data['info']
    idx = random.sample(range(len(data['images'])), num_images)
    new_file['images'] = [data['images'][i] for i in idx]
    new_file['categories'] = data['categories']
    # grab only annotation for the image ids
    new_file['annotations'] = [ann for ann in data['annotations'] if ann['image_id'] in [img['id'] for img in new_file['images']]]
    
    with open(path.replace('.json', '_mini.json'), 'w') as f:
        json.dump(new_file, f)

cats = set({'bicycle', 'books', 'bottle', 'chair', 'cup', 'laptop', 'shoes', 'towel', 'blinds', 'window', 'lamp', 'shelves', 'mirror', 'sink', 'cabinet', 'bathtub', 'door', 'toilet', 'desk', 'box', 'bookcase', 'picture', 'table', 'counter', 'bed', 'night stand', 'pillow', 'sofa', 'television', 'floor mat', 'curtain', 'clothes', 'stationery', 'refrigerator', 'bin', 'stove', 'oven', 'machine'})
n_images = 103
# minify_dataset('datasets/Omni3D/SUNRGBD_test.json', n_images*2)
# minify_dataset('datasets/Omni3D/SUNRGBD_train.json', n_images)
# minify_dataset('datasets/Omni3D/SUNRGBD_val.json', n_images)

minify_dataset('datasets/Omni3D/KITTI_test.json', n_images*2)
minify_dataset('datasets/Omni3D/KITTI_train.json', n_images)
minify_dataset('datasets/Omni3D/KITTI_val.json', n_images)

def minify_dataset_cats(path, cats):
    '''make a mini dataset which has all the specified categories'''
    with open(path, 'r') as f:
        data = json.load(f)
    
    new_file = {}
    new_file['info'] = data['info']
    i = 0
    while len(cats) > 0:
        idx = random.sample(range(len(data['images'])), 1)
        new_file['images'] = [data['images'][i] for i in idx]
        # grab only annotation for the image ids
        new_file['annotations'] = [ann for ann in data['annotations'] if ann['image_id'] in [img['id'] for img in new_file['images']]]
        # check if all categories are present
        cat_in_img = set([i['category_name'] for i in new_file['annotations']])
        cats = cats - cat_in_img
        i += 1
    print('num_ ', i)
    with open(path.replace('.json', '_mini.json'), 'w') as f:
        json.dump(new_file, f)


# minify_dataset_cats('datasets/Omni3D/SUNRGBD_test.json', cats)
# minify_dataset_cats('datasets/Omni3D/SUNRGBD_train.json', cats)
# minify_dataset_cats('datasets/Omni3D/SUNRGBD_val.json', cats)

def minify_dataset_idx(path, idx):
    with open(path, 'r') as f:
        data = json.load(f)
    
    new_file = {}
    new_file['info'] = data['info']
    # find only image with idx
    new_file['images'] = [i for i in data['images'] if i['id'] == idx]
    new_file['categories'] = data['categories']
    # grab only annotation for the image ids
    new_file['annotations'] = [ann for ann in data['annotations'] if ann['image_id'] in [img['id'] for img in new_file['images']]]
    
    with open(path.replace('.json', f'_mini_{idx}.json'), 'w') as f:
        json.dump(new_file, f)

# minify_dataset_idx('datasets/Omni3D/SUNRGBD_test.json', 168509)