ziqima's picture
initial commit
4893ce0
"""
Preprocessing ArkitScenes
"""
import os
import argparse
import glob
import plyfile
import numpy as np
import pandas as pd
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor
from itertools import repeat
import torch
def read_plymesh(filepath):
"""Read ply file and return it as numpy array. Returns None if emtpy."""
with open(filepath, "rb") as f:
plydata = plyfile.PlyData.read(f)
if plydata.elements:
vertices = pd.DataFrame(plydata["vertex"].data).values
faces = np.stack(plydata["face"].data["vertex_indices"], axis=0)
return vertices, faces
def face_normal(vertex, face):
v01 = vertex[face[:, 1]] - vertex[face[:, 0]]
v02 = vertex[face[:, 2]] - vertex[face[:, 0]]
vec = np.cross(v01, v02)
length = np.sqrt(np.sum(vec**2, axis=1, keepdims=True)) + 1.0e-8
nf = vec / length
area = length * 0.5
return nf, area
def vertex_normal(vertex, face):
nf, area = face_normal(vertex, face)
nf = nf * area
nv = np.zeros_like(vertex)
for i in range(face.shape[0]):
nv[face[i]] += nf[i]
length = np.sqrt(np.sum(nv**2, axis=1, keepdims=True)) + 1.0e-8
nv = nv / length
return nv
def parse_scene(scene_path, output_dir):
print(f"Parsing scene {scene_path}")
split = os.path.basename(os.path.dirname(os.path.dirname(scene_path)))
scene_id = os.path.basename(os.path.dirname(scene_path))
vertices, faces = read_plymesh(scene_path)
coords = vertices[:, :3]
colors = vertices[:, 3:6]
data_dict = dict(coord=coords, color=colors, scene_id=scene_id)
data_dict["normal"] = vertex_normal(coords, faces)
torch.save(data_dict, os.path.join(output_dir, split, f"{scene_id}.pth"))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--dataset_root",
required=True,
help="Path to the ScanNet dataset containing scene folders",
)
parser.add_argument(
"--output_root",
required=True,
help="Output path where train/val folders will be located",
)
opt = parser.parse_args()
# Create output directories
train_output_dir = os.path.join(opt.output_root, "Training")
os.makedirs(train_output_dir, exist_ok=True)
val_output_dir = os.path.join(opt.output_root, "Validation")
os.makedirs(val_output_dir, exist_ok=True)
# Load scene paths
scene_paths = sorted(glob.glob(opt.dataset_root + "/3dod/*/*/*_mesh.ply"))
# Preprocess data.
pool = ProcessPoolExecutor(max_workers=mp.cpu_count())
# pool = ProcessPoolExecutor(max_workers=1)
print("Processing scenes...")
_ = list(pool.map(parse_scene, scene_paths, repeat(opt.output_root)))