Spaces:
Paused
Paused
| import h5py | |
| from .common import TestCase | |
| def is_aligned(dataset, offset=4096): | |
| # Here we check if the dataset is aligned | |
| return dataset.id.get_offset() % offset == 0 | |
| def dataset_name(i): | |
| return f"data{i:03}" | |
| class TestFileAlignment(TestCase): | |
| """ | |
| Ensure that setting the file alignment has the desired effect | |
| in the internal structure. | |
| """ | |
| def test_no_alignment_set(self): | |
| fname = self.mktemp() | |
| # 881 is a prime number, so hopefully this help randomize the alignment | |
| # enough | |
| # A nice even number might give a pathological case where | |
| # While we don't want the data to be aligned, it ends up aligned... | |
| shape = (881,) | |
| with h5py.File(fname, 'w') as h5file: | |
| # Create up to 1000 datasets | |
| # At least one of them should be misaligned. | |
| # While this isn't perfect, it seems that there | |
| # The case where 1000 datasets get created is one where the data | |
| # is aligned. Therefore, during correct operation, this test is | |
| # expected to finish quickly | |
| for i in range(1000): | |
| dataset = h5file.create_dataset( | |
| dataset_name(i), shape, dtype='uint8') | |
| # Assign data so that the dataset is instantiated in | |
| # the file | |
| dataset[...] = i | |
| if not is_aligned(dataset): | |
| # Break early asserting that the file is not aligned | |
| break | |
| else: | |
| raise RuntimeError("Data was all found to be aligned to 4096") | |
| def test_alignment_set_above_threshold(self): | |
| # 2022/01/19 hmaarrfk | |
| # UnitTest (TestCase) doesn't play well with pytest parametrization. | |
| alignment_threshold = 1000 | |
| alignment_interval = 4096 | |
| for shape in [ | |
| (1033,), # A prime number above the threshold | |
| (1000,), # Exactly equal to the threshold | |
| (1001,), # one above the threshold | |
| ]: | |
| fname = self.mktemp() | |
| with h5py.File(fname, 'w', | |
| alignment_threshold=alignment_threshold, | |
| alignment_interval=alignment_interval) as h5file: | |
| # Create up to 1000 datasets | |
| # They are all expected to be aligned | |
| for i in range(1000): | |
| dataset = h5file.create_dataset( | |
| dataset_name(i), shape, dtype='uint8') | |
| # Assign data so that the dataset is instantiated in | |
| # the file | |
| dataset[...] = (i % 256) # Truncate to uint8 | |
| assert is_aligned(dataset, offset=alignment_interval) | |
| def test_alignment_set_below_threshold(self): | |
| # 2022/01/19 hmaarrfk | |
| # UnitTest (TestCase) doesn't play well with pytest parametrization. | |
| alignment_threshold = 1000 | |
| alignment_interval = 1024 | |
| for shape in [ | |
| (881,), # A prime number below the threshold | |
| (999,), # Exactly one below the threshold | |
| ]: | |
| fname = self.mktemp() | |
| with h5py.File(fname, 'w', | |
| alignment_threshold=alignment_threshold, | |
| alignment_interval=alignment_interval) as h5file: | |
| # Create up to 1000 datasets | |
| # At least one of them should be misaligned. | |
| # While this isn't perfect, it seems that there | |
| # The case where 1000 datasets get created is one where the | |
| # data is aligned. Therefore, during correct operation, this | |
| # test is expected to finish quickly | |
| for i in range(1000): | |
| dataset = h5file.create_dataset( | |
| dataset_name(i), shape, dtype='uint8') | |
| # Assign data so that the dataset is instantiated in | |
| # the file | |
| dataset[...] = i | |
| if not is_aligned(dataset, offset=alignment_interval): | |
| # Break early asserting that the file is not aligned | |
| break | |
| else: | |
| raise RuntimeError( | |
| "Data was all found to be aligned to " | |
| f"{alignment_interval}. This is highly unlikely.") | |