Spaces:

andreped
/

DDMR

Running

App Files Files Community

DDMR / Brain_study /format_dataset.py

jpdefrutos

Updating latest changes

286a978 over 2 years ago

raw

history blame contribute delete

5.05 kB

	import h5py
	import nibabel as nib
	from nilearn.image import resample_img
	import os, sys
	import re
	import numpy as np
	from scipy.ndimage import zoom
	from tqdm import tqdm

	currentdir = os.path.dirname(os.path.realpath(__file__))
	parentdir = os.path.dirname(currentdir)
	sys.path.append(parentdir) # PYTHON > 3.3 does not allow relative referencing

	from Brain_study.split_dataset import split

	SEGMENTATION_NR2LBL_LUT = {0: 'background',
	2: 'parietal-right-gm',
	3: 'lateral-ventricle-left',
	4: 'occipital-right-gm',
	6: 'parietal-left-gm',
	8: 'occipital-left-gm',
	9: 'lateral-ventricle-right',
	11: 'globus-pallidus-right',
	12: 'globus-pallidus-left',
	14: 'putamen-left',
	16: 'putamen-right',
	20: 'brain-stem',
	23: 'subthalamic-nucleus-right',
	29: 'fornix-left',
	33: 'subthalamic-nucleus-left',
	39: 'caudate-left',
	53: 'caudate-right',
	67: 'cerebellum-left',
	76: 'cerebellum-right',
	102: 'thalamus-left',
	203: 'thalamus-right',
	210: 'frontal-left-gm',
	211: 'frontal-right-gm',
	218: 'temporal-left-gm',
	219: 'temporal-right-gm',
	232: '3rd-ventricle',
	233: '4th-ventricle',
	254: 'fornix-right',
	255: 'csf'}

	SEGMENTATION_LBL2NR_LUT = {v: k for k, v in SEGMENTATION_NR2LBL_LUT.items()}

	ALL_LABELS = {2., 3., 4., 6., 8., 9., 11., 12., 14., 16., 20., 23., 29., 33., 39., 53., 67., 76., 102., 203., 210.,
	211., 218., 219., 232., 233., 254., 255.}
	LABELS_COMBINED = {0, (2, 6), (3, 9), (4, 8), (11, 12), (14, 16), 20, (23, 33), (29, 254), (39, 53), (67, 76), (102, 203), (210, 211), (218, 219), 232, 233, 255}
	SEGMENTATION_LOC = {}
	for loc, label in enumerate(LABELS_COMBINED):
	if isinstance(label, tuple):
	SEGMENTATION_LOC.update(dict.fromkeys(label, loc))
	else:
	SEGMENTATION_LOC[label] = loc

	IMG_DIRECTORY = '/mnt/EncryptedData1/Users/javier/ext_datasets/IXI_dataset/T1'
	SEG_DIRECTORY = '/mnt/EncryptedData1/Users/javier/ext_datasets/IXI_dataset/T1/anatomical_masks'

	IMG_NAME_PATTERN = '(.*).nii.gz'
	SEG_NAME_PATTERN = '(.*)_lobes.nii.gz'

	OUT_DIRECTORY = '/mnt/EncryptedData1/Users/javier/ext_datasets/IXI_dataset/T1/ERASEME_sequential'

	if __name__ == '__main__':
	img_list = [os.path.join(IMG_DIRECTORY, f) for f in os.listdir(IMG_DIRECTORY) if f.endswith('.nii.gz')]
	img_list.sort()

	seg_list = [os.path.join(SEG_DIRECTORY, f) for f in os.listdir(SEG_DIRECTORY) if f.endswith('.nii.gz')]
	seg_list.sort()

	os.makedirs(OUT_DIRECTORY, exist_ok=True)

	vectorize_fnc = np.vectorize(lambda x: SEGMENTATION_LOC[x] if x in SEGMENTATION_LOC.keys() else 0)
	change_labels = lambda x: np.reshape(vectorize_fnc(x.ravel()), x.shape)
	for seg_file in tqdm(seg_list):
	img_name = re.match(SEG_NAME_PATTERN, os.path.split(seg_file)[-1])[1]
	img_file = os.path.join(IMG_DIRECTORY, img_name + '.nii.gz')

	img = resample_img(nib.load(img_file), np.eye(3))
	seg = resample_img(nib.load(seg_file), np.eye(3), interpolation='nearest')

	isot_shape = img.shape

	# Resize to 128x128x128
	img = np.asarray(img.dataobj)
	img = zoom(img, np.asarray([128]*3) / np.asarray(isot_shape), order=3)

	seg = np.asarray(seg.dataobj)
	seg = zoom(seg, np.asarray([128]*3) / np.asarray(isot_shape), order=0)

	seg = change_labels(seg) # This way the segmentation numbering is continuous

	unique_lbls = np.unique(seg)[1:] # Omit background
	seg_expanded = np.tile(np.zeros_like(seg)[..., np.newaxis], (1, 1, 1, len(unique_lbls)))
	for ch, lbl in enumerate(unique_lbls):
	seg_expanded[seg == lbl, ch] = 1

	h5_file = h5py.File(os.path.join(OUT_DIRECTORY, img_name + '.h5'), 'w')

	h5_file.create_dataset('image', data=img[..., np.newaxis], dtype=np.float32)
	h5_file.create_dataset('segmentation', data=seg[..., np.newaxis].astype(np.uint8), dtype=np.uint8)
	# h5_file.create_dataset('segmentation_expanded', data=seg_expanded.astype(np.uint8), dtype=np.uint8)
	h5_file.create_dataset('segmentation_labels', data=unique_lbls)
	h5_file.create_dataset('isotropic_shape', data=isot_shape)

	h5_file.close()
	# We should only have train and test. The val split is done by the batch generator
	split(train_perc=0.70, validation_perc=0.15, test_perc=0.15, data_dir=OUT_DIRECTORY, move_files=True)