File size: 4,413 Bytes
4893ce0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""
Chunking Data

Author: Xiaoyang Wu ([email protected])
Please cite our work if the code is helpful to you.
"""

import os
import argparse
import numpy as np
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor
from itertools import repeat
from pathlib import Path


def chunking_scene(
    name,
    dataset_root,
    split,
    grid_size=None,
    chunk_range=(6, 6),
    chunk_stride=(3, 3),
    chunk_minimum_size=10000,
):
    print(f"Chunking scene {name} in {split} split")
    dataset_root = Path(dataset_root)
    scene_path = dataset_root / split / name
    assets = os.listdir(scene_path)
    data_dict = dict()
    for asset in assets:
        if not asset.endswith(".npy"):
            continue
        data_dict[asset[:-4]] = np.load(scene_path / asset)
    coord = data_dict["coord"] - data_dict["coord"].min(axis=0)

    if grid_size is not None:
        grid_coord = np.floor(coord / grid_size).astype(int)
        _, idx = np.unique(grid_coord, axis=0, return_index=True)
        coord = coord[idx]
        for key in data_dict.keys():
            data_dict[key] = data_dict[key][idx]

    bev_range = coord.max(axis=0)[:2]
    x, y = np.meshgrid(
        np.arange(0, bev_range[0] + chunk_stride[0] - chunk_range[0], chunk_stride[0]),
        np.arange(0, bev_range[0] + chunk_stride[0] - chunk_range[0], chunk_stride[0]),
        indexing="ij",
    )
    chunks = np.concatenate([x.reshape([-1, 1]), y.reshape([-1, 1])], axis=-1)
    chunk_idx = 0
    for chunk in chunks:
        mask = (
            (coord[:, 0] >= chunk[0])
            & (coord[:, 0] < chunk[0] + chunk_range[0])
            & (coord[:, 1] >= chunk[1])
            & (coord[:, 1] < chunk[1] + chunk_range[1])
        )
        if np.sum(mask) < chunk_minimum_size:
            continue

        chunk_data_name = f"{name}_{chunk_idx}"
        if grid_size is not None:
            chunk_split_name = (
                f"{split}_"
                f"grid{grid_size * 100:.0f}mm_"
                f"chunk{chunk_range[0]}x{chunk_range[1]}_"
                f"stride{chunk_stride[0]}x{chunk_stride[1]}"
            )
        else:
            chunk_split_name = (
                f"{split}_"
                f"chunk{chunk_range[0]}x{chunk_range[1]}_"
                f"stride{chunk_stride[0]}x{chunk_stride[1]}"
            )

        chunk_save_path = dataset_root / chunk_split_name / chunk_data_name
        chunk_save_path.mkdir(parents=True, exist_ok=True)
        for key in data_dict.keys():
            np.save(chunk_save_path / f"{key}.npy", data_dict[key][mask])
        chunk_idx += 1


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--dataset_root",
        required=True,
        help="Path to the Pointcept processed ScanNet++ dataset.",
    )
    parser.add_argument(
        "--split",
        required=True,
        default="train",
        type=str,
        help="Split need to process.",
    )
    parser.add_argument(
        "--grid_size",
        default=None,
        type=float,
        help="Grid size for initial grid sampling",
    )
    parser.add_argument(
        "--chunk_range",
        default=[6, 6],
        type=int,
        nargs="+",
        help="Range of each chunk, e.g. --chunk_range 6 6",
    )
    parser.add_argument(
        "--chunk_stride",
        default=[3, 3],
        type=int,
        nargs="+",
        help="Stride of each chunk, e.g. --chunk_stride 3 3",
    )
    parser.add_argument(
        "--chunk_minimum_size",
        default=10000,
        type=int,
        help="Minimum number of points in each chunk",
    )
    parser.add_argument(
        "--num_workers",
        default=mp.cpu_count(),
        type=int,
        help="Num workers for preprocessing.",
    )

    config = parser.parse_args()
    config.dataset_root = Path(config.dataset_root)
    data_list = os.listdir(config.dataset_root / config.split)

    print("Processing scenes...")
    pool = ProcessPoolExecutor(max_workers=config.num_workers)
    _ = list(
        pool.map(
            chunking_scene,
            data_list,
            repeat(config.dataset_root),
            repeat(config.split),
            repeat(config.grid_size),
            repeat(config.chunk_range),
            repeat(config.chunk_stride),
            repeat(config.chunk_minimum_size),
        )
    )
    pool.shutdown()