Spaces:
Sleeping
Sleeping
import json | |
import random | |
random.seed(0) | |
def minify_dataset(path, num_images=10): | |
with open(path, 'r') as f: | |
data = json.load(f) | |
new_file = {} | |
new_file['info'] = data['info'] | |
idx = random.sample(range(len(data['images'])), num_images) | |
new_file['images'] = [data['images'][i] for i in idx] | |
new_file['categories'] = data['categories'] | |
# grab only annotation for the image ids | |
new_file['annotations'] = [ann for ann in data['annotations'] if ann['image_id'] in [img['id'] for img in new_file['images']]] | |
with open(path.replace('.json', '_mini.json'), 'w') as f: | |
json.dump(new_file, f) | |
cats = set({'bicycle', 'books', 'bottle', 'chair', 'cup', 'laptop', 'shoes', 'towel', 'blinds', 'window', 'lamp', 'shelves', 'mirror', 'sink', 'cabinet', 'bathtub', 'door', 'toilet', 'desk', 'box', 'bookcase', 'picture', 'table', 'counter', 'bed', 'night stand', 'pillow', 'sofa', 'television', 'floor mat', 'curtain', 'clothes', 'stationery', 'refrigerator', 'bin', 'stove', 'oven', 'machine'}) | |
n_images = 103 | |
# minify_dataset('datasets/Omni3D/SUNRGBD_test.json', n_images*2) | |
# minify_dataset('datasets/Omni3D/SUNRGBD_train.json', n_images) | |
# minify_dataset('datasets/Omni3D/SUNRGBD_val.json', n_images) | |
minify_dataset('datasets/Omni3D/KITTI_test.json', n_images*2) | |
minify_dataset('datasets/Omni3D/KITTI_train.json', n_images) | |
minify_dataset('datasets/Omni3D/KITTI_val.json', n_images) | |
def minify_dataset_cats(path, cats): | |
'''make a mini dataset which has all the specified categories''' | |
with open(path, 'r') as f: | |
data = json.load(f) | |
new_file = {} | |
new_file['info'] = data['info'] | |
i = 0 | |
while len(cats) > 0: | |
idx = random.sample(range(len(data['images'])), 1) | |
new_file['images'] = [data['images'][i] for i in idx] | |
# grab only annotation for the image ids | |
new_file['annotations'] = [ann for ann in data['annotations'] if ann['image_id'] in [img['id'] for img in new_file['images']]] | |
# check if all categories are present | |
cat_in_img = set([i['category_name'] for i in new_file['annotations']]) | |
cats = cats - cat_in_img | |
i += 1 | |
print('num_ ', i) | |
with open(path.replace('.json', '_mini.json'), 'w') as f: | |
json.dump(new_file, f) | |
# minify_dataset_cats('datasets/Omni3D/SUNRGBD_test.json', cats) | |
# minify_dataset_cats('datasets/Omni3D/SUNRGBD_train.json', cats) | |
# minify_dataset_cats('datasets/Omni3D/SUNRGBD_val.json', cats) | |
def minify_dataset_idx(path, idx): | |
with open(path, 'r') as f: | |
data = json.load(f) | |
new_file = {} | |
new_file['info'] = data['info'] | |
# find only image with idx | |
new_file['images'] = [i for i in data['images'] if i['id'] == idx] | |
new_file['categories'] = data['categories'] | |
# grab only annotation for the image ids | |
new_file['annotations'] = [ann for ann in data['annotations'] if ann['image_id'] in [img['id'] for img in new_file['images']]] | |
with open(path.replace('.json', f'_mini_{idx}.json'), 'w') as f: | |
json.dump(new_file, f) | |
# minify_dataset_idx('datasets/Omni3D/SUNRGBD_test.json', 168509) |