Spaces:
Configuration error
Configuration error
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (c) 2022 Intel Corporation | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
# SPDX-License-Identifier: Apache-2.0 | |
# | |
import pytest | |
from unittest.mock import MagicMock, patch | |
from test_utils import platform_config | |
from tlt.models import model_factory | |
from tlt.utils.types import FrameworkType, UseCaseType | |
from tlt.datasets.image_classification.image_classification_dataset import ImageClassificationDataset | |
from tlt.datasets.text_classification.text_classification_dataset import TextClassificationDataset | |
# True when all imports are successful, false when an import fails | |
# This is necessary to protect from import errors when testing in a tensorflow only environment | |
tf_env = True | |
try: | |
from tensorflow import keras | |
except ModuleNotFoundError: | |
print("WARNING: Unable to import Keras. Tensorflow may not be installed") | |
tf_env = False | |
try: | |
# Do TF specific imports in a try/except to prevent pytest test loading from failing when running in a PyTorch env | |
from tlt.models.image_classification.tfhub_image_classification_model import TFHubImageClassificationModel | |
from tlt.models.image_classification.keras_image_classification_model import KerasImageClassificationModel | |
from tlt.models.image_classification.tf_image_classification_model import TFImageClassificationModel | |
except ModuleNotFoundError: | |
TFHubImageClassificationModel = None | |
KerasImageClassificationModel = None | |
TFImageClassificationModel = None | |
print("WARNING: Unable to import TFHubImageClassificationModel or TFImageClassificationModel. " | |
"TensorFlow may not be installed") | |
tf_env = False | |
try: | |
# Do TF specific imports in a try/except to prevent pytest test loading from failing when running in a PyTorch env | |
from tlt.models.text_classification.tf_hf_text_classification_model import TFHFTextClassificationModel | |
from tlt.models.text_classification.tf_text_classification_model import TFTextClassificationModel | |
except ModuleNotFoundError: | |
TFHFTextClassificationModel = None | |
TFTextClassificationModel = None | |
print("WARNING: Unable to import TFHFTextClassificationModel. TensorFlow may not be installed") | |
tf_env = False | |
# This is necessary to protect from import errors when testing in a tensorflow only environment | |
if tf_env: | |
# Define a custom model | |
ALEXNET = keras.models.Sequential([ | |
keras.layers.Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), activation='relu', | |
input_shape=(227, 227, 3)), | |
keras.layers.BatchNormalization(), | |
keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)), | |
keras.layers.Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding="same"), | |
keras.layers.BatchNormalization(), | |
keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)), | |
keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"), | |
keras.layers.BatchNormalization(), | |
keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"), | |
keras.layers.BatchNormalization(), | |
keras.layers.Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"), | |
keras.layers.BatchNormalization(), | |
keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)), | |
keras.layers.Flatten(), | |
keras.layers.Dense(4096, activation='relu'), | |
keras.layers.Dropout(0.5), | |
keras.layers.Dense(4096, activation='relu'), | |
keras.layers.Dropout(0.5), | |
keras.layers.Dense(3, activation='softmax') | |
]) | |
def test_tf_model_load(model_name, expected_class, expected_image_size): | |
""" | |
Checks that a model can be downloaded | |
""" | |
model = model_factory.get_model(model_name, 'tensorflow') | |
assert type(model) == expected_class | |
if expected_image_size: | |
assert model.image_size == expected_image_size | |
# This is necessary to protect from import errors when testing in a tensorflow only environment | |
if tf_env: | |
def test_keras_model_load(model_name, expected_class, expected_image_size): | |
""" | |
Checks that a model can be downloaded from Keras.applications | |
""" | |
model = model_factory.get_model(model_name, 'tensorflow') | |
assert type(model) == expected_class | |
if expected_image_size: | |
assert model.image_size == expected_image_size | |
assert callable(model.preprocessor) | |
# This is necessary to protect from import errors when testing in a tensorflow only environment | |
if tf_env: | |
def test_custom_model_load(model_name, use_case, expected_class, expected_image_size, expected_num_classes): | |
""" | |
Checks that a custom model can be loaded | |
""" | |
model = model_factory.load_model(model_name, ALEXNET, 'tensorflow', use_case) | |
assert type(model) == expected_class | |
assert model.num_classes == expected_num_classes | |
if use_case == 'image_classification': | |
assert model.image_size == expected_image_size | |
def test_get_supported_models(model_name, use_case, hub): | |
""" | |
Call get supported models and checks to make sure the dictionary has keys for each use case, | |
and checks for a known supported model. | |
""" | |
model_dict = model_factory.get_supported_models() | |
# Ensure there are keys for each use case | |
for k in UseCaseType: | |
assert str(k) in model_dict.keys() | |
# Check for a known model | |
assert model_name in model_dict[use_case] | |
model_info = model_dict[use_case][model_name] | |
assert str(FrameworkType.TENSORFLOW) in model_info | |
assert hub == model_info[str(FrameworkType.TENSORFLOW)]['model_hub'] | |
def test_get_supported_models_with_filter(framework, use_case): | |
""" | |
Tests getting the dictionary of supported models while filtering by framework and/or use case. | |
Checks to ensure that keys for the expected use cases are there. If filtering by framework, then the test will | |
also check to make sure we only have models for the specified framework. | |
""" | |
model_dict = model_factory.get_supported_models(framework, use_case) | |
if use_case is not None: | |
# Model dictionary should only have a key for the specified use case | |
assert 1 == len(model_dict.keys()) | |
assert use_case in model_dict | |
else: | |
# Model dictionary should have keys for every use case | |
assert len(UseCaseType) == len(model_dict.keys()) | |
for k in UseCaseType: | |
assert str(k) in model_dict.keys() | |
# If filtering by framework, we should not find models from other frameworks | |
if framework is not None: | |
for use_case_key in model_dict.keys(): | |
for model_name_key in model_dict[use_case_key].keys(): | |
assert 1 == len(model_dict[use_case_key][model_name_key].keys()) | |
assert framework in model_dict[use_case_key][model_name_key] | |
def test_get_supported_models_bad_framework(bad_framework): | |
""" | |
Ensure that the proper error is raised when a bad framework is passed in | |
""" | |
with pytest.raises(ValueError) as e: | |
model_factory.get_supported_models(bad_framework) | |
assert "Unsupported framework: {}".format(bad_framework) in str(e) | |
def test_get_supported_models_bad_use_case(bad_use_case): | |
""" | |
Ensure that the proper error is raised when a bad use case is passed in | |
""" | |
with pytest.raises(ValueError) as e: | |
model_factory.get_supported_models(use_case=bad_use_case) | |
assert "Unsupported use case: {}".format(bad_use_case) in str(e) | |
# This is necessary to protect from import errors when testing in a tensorflow only environment | |
if tf_env: | |
def test_tf_model_train(mock_tokenizer, model_name, dataset_type, get_hub_model_patch, class_names): | |
""" | |
Tests calling train on an TFHub or Keras model with a mock dataset and mock model and verifies we get back the | |
return value from the fit function. | |
""" | |
model = model_factory.get_model(model_name, 'tensorflow') | |
with patch(get_hub_model_patch) as mock_get_hub_model: | |
mock_dataset = MagicMock() | |
mock_dataset.__class__ = dataset_type | |
mock_dataset.validation_subset = [1, 2, 3] | |
mock_dataset.class_names = class_names | |
mock_model = MagicMock() | |
expected_return_value = {"result": True} | |
mock_history = MagicMock() | |
mock_history.history = expected_return_value | |
def mock_fit(x=None, y=None, epochs=1, shuffle=True, callbacks=[], validation_data=None, batch_size=None): | |
assert x is not None | |
assert isinstance(epochs, int) | |
assert isinstance(shuffle, bool) | |
assert len(callbacks) > 0 | |
if eval_expected: | |
assert validation_data is not None | |
else: | |
assert validation_data is None | |
return mock_history | |
# Mock internal function to tokenize input data | |
mock_tokenizer.return_value = mock_dataset, [] | |
mock_model.fit = mock_fit | |
mock_get_hub_model.return_value = mock_model | |
# Test train with eval | |
eval_expected = True | |
return_val = model.train(mock_dataset, output_dir="/tmp/output", do_eval=True) | |
assert return_val == expected_return_value | |
# Test train without eval | |
eval_expected = False | |
return_val = model.train(mock_dataset, output_dir="/tmp/output", do_eval=False) | |
assert return_val == expected_return_value | |
# Test train with eval, but no validation subset | |
eval_expected = False | |
mock_dataset.validation_subset = None | |
return_val = model.train(mock_dataset, output_dir="/tmp/output", do_eval=True) | |
assert return_val == expected_return_value | |
# This is necessary to protect from import errors when testing in a tensorflow only environment | |
if tf_env: | |
def test_custom_model_train(): | |
""" | |
Tests calling train on a custom TF model with a mock dataset and mock model and verifies we get back the return | |
value from the fit function. | |
""" | |
model = model_factory.load_model('custom_model', ALEXNET, 'tensorflow', 'image_classification') | |
mock_dataset = MagicMock() | |
mock_dataset.__class__ = ImageClassificationDataset | |
mock_dataset.class_names = ['1', '2', '3'] | |
model._model = MagicMock() | |
expected_return_value = {"result": True} | |
mock_history = MagicMock() | |
mock_history.history = expected_return_value | |
def mock_fit(dataset, epochs, shuffle, callbacks, validation_data=None): | |
assert dataset is not None | |
assert isinstance(epochs, int) | |
assert isinstance(shuffle, bool) | |
assert len(callbacks) > 0 | |
return mock_history | |
model._model.fit = mock_fit | |
return_val = model.train(mock_dataset, output_dir="/tmp/output") | |
assert return_val == expected_return_value | |
def test_tfhub_auto_mixed_precision(mock_tokenizer, mock_subprocess, mock_platform, mock_os, mock_get_cpuset, | |
mock_set_experimental_options, mock_tf_version, cpu_model, | |
enable_auto_mixed_precision, expected_auto_mixed_precision_parameter, | |
tf_version, model_name, dataset_type): | |
""" | |
Verifies that auto mixed precision is enabled by default for SPR (cpu model 85), but disabled by default for other | |
CPU types like SKX (cpu model 143). The default auto mixed precision setting is used when | |
enable_auto_mixed_precision=None. Auto mixed precision was enabled for TF 2.9.0 and later, so don't expect the call | |
to set the config for earlier TF versions. | |
If enable_auto_mixed_precision is set to True/False, then that's what should be used, regardless of CPU type. | |
""" | |
mock_get_cpuset.return_value = platform_config.CPUSET | |
platform_config.set_mock_system_type(mock_platform) | |
platform_config.set_mock_os_access(mock_os) | |
# get the lscpu sample output, but replace in the parameterized cpu model id | |
lscpu_value = platform_config.LSCPU_OUTPUT | |
original_model_value = "Model: 143\n" # model test value from the test platform config | |
new_model_value = "Model: {}\n".format(cpu_model) | |
lscpu_value = lscpu_value.replace(original_model_value, new_model_value) | |
mock_subprocess.check_output.return_value = lscpu_value | |
mock_dataset = MagicMock() | |
mock_dataset.__class__ = dataset_type | |
mock_dataset.class_names = ['a', 'b'] | |
mock_tf_version.VERSION = tf_version | |
model = model_factory.get_model(model_name, 'tensorflow') | |
model._get_hub_model = MagicMock() | |
# Mock internal function to tokenize input data | |
mock_tokenizer.return_value = mock_dataset, [] | |
model.train(mock_dataset, output_dir="/tmp/output", enable_auto_mixed_precision=enable_auto_mixed_precision) | |
if expected_auto_mixed_precision_parameter is not None: | |
expected_parameter = {'auto_mixed_precision_mkl': expected_auto_mixed_precision_parameter} | |
mock_set_experimental_options.assert_called_with(expected_parameter) | |
else: | |
# We expect that the auto mixed prercision config is not called (due to TF version unsupported) | |
assert not mock_set_experimental_options.called | |
# This is necessary to protect from import errors when testing in a tensorflow only environment | |
if tf_env: | |
def test_tf_optimizer_loss(mock_tokenizer, model_name, use_case, dataset_type, optimizer, loss): | |
""" | |
Tests initializing and training a model with configurable optimizers and loss functions | |
""" | |
if model_name == 'custom': | |
model = model_factory.load_model(model_name, ALEXNET, 'tensorflow', use_case, optimizer=optimizer, loss=loss) # noqa: E501 | |
else: | |
model = model_factory.get_model(model_name, 'tensorflow', optimizer=optimizer, loss=loss) | |
model._generate_checkpoints = False | |
model._get_hub_model = MagicMock() | |
model._model = MagicMock() | |
model._model.fit = MagicMock() | |
assert model._optimizer_class == optimizer | |
assert model._loss_class == loss | |
mock_dataset = MagicMock() | |
mock_dataset.__class__ = dataset_type | |
if dataset_type == TextClassificationDataset: | |
mock_dataset.class_names = ['a', 'b'] | |
else: | |
mock_dataset.class_names = ['a', 'b', 'c'] | |
# Mock internal function to tokenize input data | |
mock_tokenizer.return_value = mock_dataset, [] | |
# Train is called and optimizer and loss objects should match the input types | |
model.train(mock_dataset, output_dir="/tmp/output/tf") | |
assert model._optimizer_class == optimizer | |
assert type(model._optimizer) == optimizer | |
assert model._loss_class == loss | |
assert type(model._loss) == loss | |
# This is necessary to protect from import errors when testing in a tensorflow only environment | |
if tf_env: | |
def test_tf_loss_wrong_type(model_name, loss): | |
""" | |
Tests that an exception is thrown when the input loss function is the wrong type | |
""" | |
with pytest.raises(TypeError): | |
model_factory.get_model(model_name, 'tensorflow', loss=loss) | |