Spaces:
Configuration error
Configuration error
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (c) 2022 Intel Corporation | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
# SPDX-License-Identifier: Apache-2.0 | |
# | |
import os | |
import pytest | |
import shutil | |
import tempfile | |
from click.testing import CliRunner | |
from unittest.mock import MagicMock, patch | |
from tlt.tools.cli.commands.train import train | |
from tlt.utils.types import FrameworkType | |
def test_train_preprocess_with_image_size(mock_inspect, mock_load_dataset, mock_get_model, model_name, framework): | |
""" | |
Tests the train command with a dataset preprocessing method that has an image_size. Actual calls for the model and | |
dataset are mocked out. The test verifies that the proper args are used for calling preprocess() | |
""" | |
runner = CliRunner() | |
tmp_dir = tempfile.mkdtemp() | |
dataset_dir = os.path.join(tmp_dir, 'data') | |
output_dir = os.path.join(tmp_dir, 'output') | |
dummy_image_size = 100 | |
try: | |
for new_dir in [output_dir, dataset_dir]: | |
os.makedirs(new_dir) | |
model_mock = MagicMock() | |
model_mock.image_size = dummy_image_size | |
data_mock = MagicMock() | |
# Test where the preprocessing command will have an image size | |
inspect_mock = MagicMock() | |
inspect_mock.args = ['image_size', 'batch_size'] | |
mock_inspect.return_value = inspect_mock | |
mock_get_model.return_value = model_mock | |
mock_load_dataset.return_value = data_mock | |
model_mock.export.return_value = output_dir | |
# Call the train command | |
result = runner.invoke(train, | |
["--framework", str(framework), "--model-name", model_name, "--dataset_dir", dataset_dir, | |
"--output-dir", output_dir]) | |
# Verify that the expected calls were made | |
mock_get_model.assert_called_once_with(model_name, str(framework)) | |
mock_load_dataset.assert_called_once_with(dataset_dir, model_mock.use_case, model_mock.framework) | |
assert data_mock.shuffle_split.called | |
assert model_mock.train.called | |
# Verify that preprocess was called with the right arguments | |
data_mock.preprocess.assert_called_once_with(image_size=dummy_image_size, batch_size=32, add_aug=[]) | |
# Verify that the train command exit code is successful | |
assert result.exit_code == 0 | |
finally: | |
if os.path.exists(tmp_dir): | |
shutil.rmtree(tmp_dir) | |
def test_train_preprocess_without_image_size(mock_inspect, mock_load_dataset, mock_get_model, model_name, framework): | |
""" | |
Tests the train command with a dataset preprocessing method that just has a batch size arg. Actual calls for the | |
model and dataset are mocked out. The test verifies that the proper args are used for calling preprocess() | |
""" | |
runner = CliRunner() | |
tmp_dir = tempfile.mkdtemp() | |
dataset_dir = os.path.join(tmp_dir, 'data') | |
output_dir = os.path.join(tmp_dir, 'output') | |
dummy_image_size = 100 | |
try: | |
for new_dir in [output_dir, dataset_dir]: | |
os.makedirs(new_dir) | |
model_mock = MagicMock() | |
model_mock.image_size = dummy_image_size | |
data_mock = MagicMock() | |
# Test where the preprocessing command just has a batch_size arg | |
inspect_mock = MagicMock() | |
inspect_mock.args = ['batch_size'] | |
mock_inspect.return_value = inspect_mock | |
mock_get_model.return_value = model_mock | |
mock_load_dataset.return_value = data_mock | |
model_mock.export.return_value = output_dir | |
# Call the train command | |
result = runner.invoke(train, | |
["--framework", str(framework), "--model-name", model_name, "--dataset_dir", dataset_dir, | |
"--output-dir", output_dir]) | |
# Verify that the expected calls were made | |
mock_get_model.assert_called_once_with(model_name, str(framework)) | |
mock_load_dataset.assert_called_once_with(dataset_dir, model_mock.use_case, model_mock.framework) | |
assert data_mock.shuffle_split.called | |
assert model_mock.train.called | |
# Verify preprocess was called with the right arguments | |
data_mock.preprocess.assert_called_once_with(batch_size=32) | |
# Verify that the train command exit code is successful | |
assert result.exit_code == 0 | |
finally: | |
if os.path.exists(tmp_dir): | |
shutil.rmtree(tmp_dir) | |
def test_train_add_augmentation(mock_inspect, mock_load_dataset, mock_get_model, model_name, framework, add_aug): | |
""" | |
Tests the train command with add augmentation. Actual calls for the model and dataset are mocked out. The test | |
verifies that the proper args are passed to the model train() method. | |
""" | |
runner = CliRunner() | |
tmp_dir = tempfile.mkdtemp() | |
dataset_dir = os.path.join(tmp_dir, 'data') | |
output_dir = os.path.join(tmp_dir, 'output') | |
try: | |
for new_dir in [output_dir, dataset_dir]: | |
os.makedirs(new_dir) | |
model_mock = MagicMock() | |
data_mock = MagicMock() | |
mock_get_model.return_value = model_mock | |
mock_load_dataset.return_value = data_mock | |
model_mock.export.return_value = output_dir | |
# Call the train command | |
result = runner.invoke(train, | |
["--framework", str(framework), "--model-name", model_name, "--dataset_dir", dataset_dir, | |
"--output-dir", output_dir, "--add_aug", add_aug]) | |
# Verify that the expected calls were made | |
mock_get_model.assert_called_once_with(model_name, str(framework)) | |
mock_load_dataset.assert_called_once_with(dataset_dir, model_mock.use_case, model_mock.framework) | |
assert data_mock.shuffle_split.called | |
assert model_mock.train.called | |
# Verify preprocess was called with the right arguments | |
data_mock.preprocess.assert_called_once_with(batch_size=32) | |
# Verify that the train command exit code is successful | |
assert result.exit_code == 0 | |
finally: | |
if os.path.exists(tmp_dir): | |
shutil.rmtree(tmp_dir) | |
def test_train_init_checkpoints(mock_load_dataset, mock_get_model, model_name, framework, init_checkpoints): | |
""" | |
Tests the train command with init checkpoints. Actual calls for the model and dataset are mocked out. The test | |
verifies that the proper args are passed to the model train() method. | |
""" | |
runner = CliRunner() | |
tmp_dir = tempfile.mkdtemp() | |
dataset_dir = os.path.join(tmp_dir, 'data') | |
output_dir = os.path.join(tmp_dir, 'output') | |
try: | |
for new_dir in [output_dir, dataset_dir]: | |
os.makedirs(new_dir) | |
# Setup mocks | |
model_mock = MagicMock() | |
data_mock = MagicMock() | |
mock_get_model.return_value = model_mock | |
mock_load_dataset.return_value = data_mock | |
model_mock.export.return_value = output_dir | |
# Call the train command | |
result = runner.invoke(train, | |
["--framework", str(framework), "--model-name", model_name, "--dataset_dir", dataset_dir, | |
"--output-dir", output_dir, "--init-checkpoints", init_checkpoints, "--epochs", 2]) | |
# Verify that the expected calls were made | |
mock_get_model.assert_called_once_with(model_name, str(framework)) | |
mock_load_dataset.assert_called_once_with(dataset_dir, model_mock.use_case, model_mock.framework) | |
# Verify that train and preprocess were called with the right arguments | |
if framework == FrameworkType.TENSORFLOW: | |
model_mock.train.assert_called_once_with(data_mock, output_dir=output_dir, epochs=2, | |
initial_checkpoints=init_checkpoints, early_stopping=False, | |
lr_decay=False, distributed=False, hostfile=None, nnodes=1, | |
nproc_per_node=1, use_horovod=False) | |
elif framework == FrameworkType.PYTORCH: | |
model_mock.train.assert_called_once_with(data_mock, output_dir=output_dir, epochs=2, | |
initial_checkpoints=init_checkpoints, early_stopping=False, | |
lr_decay=False, ipex_optimize=False, distributed=False, | |
hostfile=None, nnodes=1, nproc_per_node=1, use_horovod=False, | |
hvd_start_timeout=30) | |
data_mock.preprocess.assert_called_once_with(batch_size=32) | |
# Verify that the train command exit code is successful | |
assert result.exit_code == 0 | |
finally: | |
if os.path.exists(tmp_dir): | |
shutil.rmtree(tmp_dir) | |
def test_train_features(mock_inspect, mock_load_dataset, mock_get_model, model_name, framework, epochs, early_stopping, lr_decay): # noqa: E501 | |
""" | |
Tests the train command with early stopping. Actual calls for the model and dataset are mocked out. The test | |
verifies that the proper args are passed to the model train() method. | |
""" | |
runner = CliRunner() | |
tmp_dir = tempfile.mkdtemp() | |
dataset_dir = os.path.join(tmp_dir, 'data') | |
output_dir = os.path.join(tmp_dir, 'output') | |
try: | |
for new_dir in [output_dir, dataset_dir]: | |
os.makedirs(new_dir) | |
model_mock = MagicMock() | |
data_mock = MagicMock() | |
mock_get_model.return_value = model_mock | |
mock_load_dataset.return_value = data_mock | |
model_mock.export.return_value = output_dir | |
# Call the train command | |
if early_stopping and lr_decay: | |
result = runner.invoke(train, | |
["--framework", str(framework), "--model-name", model_name, "--dataset_dir", | |
dataset_dir, "--output-dir", output_dir, "--epochs", epochs, "--early_stopping", | |
"--lr_decay"]) | |
elif early_stopping: | |
result = runner.invoke(train, | |
["--framework", str(framework), "--model-name", model_name, "--dataset_dir", | |
dataset_dir, "--output-dir", output_dir, "--epochs", epochs, "--early_stopping"]) | |
elif lr_decay: | |
result = runner.invoke(train, | |
["--framework", str(framework), "--model-name", model_name, "--dataset_dir", | |
dataset_dir, "--output-dir", output_dir, "--epochs", epochs, "--lr_decay"]) | |
else: | |
result = runner.invoke(train, | |
["--framework", str(framework), "--model-name", model_name, "--dataset_dir", | |
dataset_dir, "--output-dir", output_dir, "--epochs", epochs]) | |
# Verify that the expected calls were made | |
mock_get_model.assert_called_once_with(model_name, str(framework)) | |
mock_load_dataset.assert_called_once_with(dataset_dir, model_mock.use_case, model_mock.framework) | |
assert data_mock.shuffle_split.called | |
assert model_mock.train.called | |
# Verify that train and preprocess were called with the right arguments | |
if framework == FrameworkType.TENSORFLOW: | |
model_mock.train.assert_called_once_with(data_mock, output_dir=output_dir, epochs=15, | |
initial_checkpoints=None, early_stopping=early_stopping, | |
lr_decay=lr_decay, distributed=False, hostfile=None, nnodes=1, | |
nproc_per_node=1, use_horovod=False) | |
elif framework == FrameworkType.PYTORCH: | |
model_mock.train.assert_called_once_with(data_mock, output_dir=output_dir, epochs=15, | |
initial_checkpoints=None, early_stopping=early_stopping, | |
lr_decay=lr_decay, ipex_optimize=False, distributed=False, | |
hostfile=None, nnodes=1, nproc_per_node=1, use_horovod=False, | |
hvd_start_timeout=30) | |
# Verify that the train command exit code is successful | |
assert result.exit_code == 0 | |
finally: | |
if os.path.exists(tmp_dir): | |
shutil.rmtree(tmp_dir) | |
def test_train_dataset_catalog(mock_get_dataset, mock_get_model, model_name, framework, dataset_name, dataset_catalog): | |
""" | |
Tests the train command a named dataset and verifies that get_dataset is called (vs load_dataset, which is used | |
for custom dataset directories in other tests). | |
""" | |
runner = CliRunner() | |
tmp_dir = tempfile.mkdtemp() | |
dataset_dir = os.path.join(tmp_dir, 'data') | |
output_dir = os.path.join(tmp_dir, 'output') | |
try: | |
for new_dir in [output_dir, dataset_dir]: | |
os.makedirs(new_dir) | |
# Setup mocks | |
model_mock = MagicMock() | |
data_mock = MagicMock() | |
mock_get_model.return_value = model_mock | |
mock_get_dataset.return_value = data_mock | |
model_mock.export.return_value = output_dir | |
# Call the train command | |
result = runner.invoke(train, | |
["--framework", str(framework), "--model-name", model_name, "--dataset_dir", dataset_dir, | |
"--output-dir", output_dir, "--dataset-name", dataset_name, | |
"--dataset-catalog", dataset_catalog]) | |
# Verify that the expected calls were made | |
mock_get_model.assert_called_once_with(model_name, str(framework)) | |
mock_get_dataset.assert_called_once_with(dataset_dir, model_mock.use_case, model_mock.framework, | |
dataset_name, dataset_catalog) | |
# Verify that the train command exit code is successful | |
assert model_mock.train.called | |
assert result.exit_code == 0 | |
finally: | |
if os.path.exists(tmp_dir): | |
shutil.rmtree(tmp_dir) | |
class TestTrainArgs: | |
""" | |
Class for tests that are testing bad inputs for training with generic folders for the dataset dir and output dir. | |
""" | |
def setup_class(self): | |
self._runner = CliRunner() | |
self._tmp_dir = tempfile.mkdtemp() | |
self._dataset_dir = os.path.join(self._tmp_dir, 'data') | |
self._output_dir = os.path.join(self._tmp_dir, 'output') | |
def setup_method(self): | |
for new_dir in [self._output_dir, self._dataset_dir]: | |
if not os.path.exists(new_dir): | |
os.makedirs(new_dir) | |
def teardown_method(self): | |
if os.path.exists(self._tmp_dir): | |
shutil.rmtree(self._tmp_dir) | |
def teardown_class(self): | |
if os.path.exists(self._tmp_dir): | |
shutil.rmtree(self._tmp_dir) | |
def test_train_invalid_epochs(self, epochs): | |
""" | |
Verifies that train command fails if the epoch parameter is invalid | |
""" | |
result = self._runner.invoke(train, | |
["--model-name", "foo", | |
"--dataset_dir", self._dataset_dir, | |
"--output-dir", self._output_dir, | |
"--epochs", epochs]) | |
assert result.exit_code == 2 | |
assert "Invalid value for '--epochs'" in result.output | |
def test_train_invalid_framework(self, framework): | |
""" | |
Verifies that train command fails if the framework value is invalid | |
""" | |
result = self._runner.invoke(train, | |
["--model-name", "foo", | |
"--dataset_dir", self._dataset_dir, | |
"--output-dir", self._output_dir, | |
"--framework", framework]) | |
assert result.exit_code == 2 | |
assert "Invalid value for '--framework'" in result.output | |
assert "'{}' is not one of 'tensorflow', 'pytorch'".format(framework) in result.output | |
def test_train_invalid_dataset_catalog(self, dataset_catalog): | |
""" | |
Verifies that train command fails if the dataset catalog value is invalid | |
""" | |
result = self._runner.invoke(train, | |
["--model-name", "foo", | |
"--dataset_dir", self._dataset_dir, | |
"--output-dir", self._output_dir, | |
"--dataset-catalog", dataset_catalog]) | |
assert result.exit_code == 2 | |
assert "Invalid value for '--dataset-catalog'" in result.output | |
assert "'{}' is not one of 'tf_datasets', 'torchvision', 'huggingface'".format(dataset_catalog) in result.output | |