Spaces:

ParamDev
/

Quality-Control-Inspector

Configuration error

App Files Files Community

Quality-Control-Inspector / tests /tensorflow_tests /unit /test_models.py

ParamDev

Upload folder using huggingface_hub

a01ef8c verified 11 days ago

raw

history blame

23.1 kB

	#!/usr/bin/env python
	# -- coding: utf-8 --
	#
	# Copyright (c) 2022 Intel Corporation
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	# SPDX-License-Identifier: Apache-2.0
	#

	import pytest
	from unittest.mock import MagicMock, patch

	from test_utils import platform_config
	from tlt.models import model_factory
	from tlt.utils.types import FrameworkType, UseCaseType
	from tlt.datasets.image_classification.image_classification_dataset import ImageClassificationDataset
	from tlt.datasets.text_classification.text_classification_dataset import TextClassificationDataset

	# True when all imports are successful, false when an import fails
	# This is necessary to protect from import errors when testing in a tensorflow only environment
	tf_env = True

	try:
	from tensorflow import keras
	except ModuleNotFoundError:
	print("WARNING: Unable to import Keras. Tensorflow may not be installed")
	tf_env = False


	try:
	# Do TF specific imports in a try/except to prevent pytest test loading from failing when running in a PyTorch env
	from tlt.models.image_classification.tfhub_image_classification_model import TFHubImageClassificationModel
	from tlt.models.image_classification.keras_image_classification_model import KerasImageClassificationModel
	from tlt.models.image_classification.tf_image_classification_model import TFImageClassificationModel
	except ModuleNotFoundError:
	TFHubImageClassificationModel = None
	KerasImageClassificationModel = None
	TFImageClassificationModel = None
	print("WARNING: Unable to import TFHubImageClassificationModel or TFImageClassificationModel. "
	"TensorFlow may not be installed")
	tf_env = False


	try:
	# Do TF specific imports in a try/except to prevent pytest test loading from failing when running in a PyTorch env
	from tlt.models.text_classification.tf_hf_text_classification_model import TFHFTextClassificationModel
	from tlt.models.text_classification.tf_text_classification_model import TFTextClassificationModel
	except ModuleNotFoundError:
	TFHFTextClassificationModel = None
	TFTextClassificationModel = None
	print("WARNING: Unable to import TFHFTextClassificationModel. TensorFlow may not be installed")
	tf_env = False


	# This is necessary to protect from import errors when testing in a tensorflow only environment
	if tf_env:
	# Define a custom model
	ALEXNET = keras.models.Sequential([
	keras.layers.Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), activation='relu',
	input_shape=(227, 227, 3)),
	keras.layers.BatchNormalization(),
	keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
	keras.layers.Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding="same"),
	keras.layers.BatchNormalization(),
	keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
	keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"),
	keras.layers.BatchNormalization(),
	keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"),
	keras.layers.BatchNormalization(),
	keras.layers.Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"),
	keras.layers.BatchNormalization(),
	keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
	keras.layers.Flatten(),
	keras.layers.Dense(4096, activation='relu'),
	keras.layers.Dropout(0.5),
	keras.layers.Dense(4096, activation='relu'),
	keras.layers.Dropout(0.5),
	keras.layers.Dense(3, activation='softmax')
	])


	@pytest.mark.tensorflow
	@pytest.mark.parametrize('model_name,expected_class,expected_image_size',
	[['efficientnet_b0', TFHubImageClassificationModel, 224],
	['google/bert_uncased_L-2_H-128_A-2', TFHFTextClassificationModel, None]])
	def test_tf_model_load(model_name, expected_class, expected_image_size):
	"""
	Checks that a model can be downloaded
	"""
	model = model_factory.get_model(model_name, 'tensorflow')
	assert type(model) == expected_class
	if expected_image_size:
	assert model.image_size == expected_image_size


	# This is necessary to protect from import errors when testing in a tensorflow only environment
	if tf_env:
	@pytest.mark.tensorflow
	@pytest.mark.parametrize('model_name,expected_class,expected_image_size',
	[['ResNet50', KerasImageClassificationModel, 224],
	['Xception', KerasImageClassificationModel, 299]])
	def test_keras_model_load(model_name, expected_class, expected_image_size):
	"""
	Checks that a model can be downloaded from Keras.applications
	"""
	model = model_factory.get_model(model_name, 'tensorflow')
	assert type(model) == expected_class
	if expected_image_size:
	assert model.image_size == expected_image_size
	assert callable(model.preprocessor)

	# This is necessary to protect from import errors when testing in a tensorflow only environment
	if tf_env:
	@pytest.mark.tensorflow
	@pytest.mark.parametrize('model_name,use_case,expected_class,expected_image_size,expected_num_classes',
	[['alexnet', 'image_classification', TFImageClassificationModel, 227, 3],
	['alexnet', 'text_classification', TFTextClassificationModel, None, 3]])
	def test_custom_model_load(model_name, use_case, expected_class, expected_image_size, expected_num_classes):
	"""
	Checks that a custom model can be loaded
	"""
	model = model_factory.load_model(model_name, ALEXNET, 'tensorflow', use_case)
	assert type(model) == expected_class
	assert model.num_classes == expected_num_classes
	if use_case == 'image_classification':
	assert model.image_size == expected_image_size


	@pytest.mark.tensorflow
	@pytest.mark.parametrize('model_name,use_case,hub',
	[['ResNet50', 'image_classification', 'Keras'],
	['efficientnet_b0', 'image_classification', 'TFHub'],
	['google/bert_uncased_L-2_H-128_A-2', 'text_classification', 'huggingface']])
	def test_get_supported_models(model_name, use_case, hub):
	"""
	Call get supported models and checks to make sure the dictionary has keys for each use case,
	and checks for a known supported model.
	"""
	model_dict = model_factory.get_supported_models()

	# Ensure there are keys for each use case
	for k in UseCaseType:
	assert str(k) in model_dict.keys()

	# Check for a known model
	assert model_name in model_dict[use_case]
	model_info = model_dict[use_case][model_name]
	assert str(FrameworkType.TENSORFLOW) in model_info
	assert hub == model_info[str(FrameworkType.TENSORFLOW)]['model_hub']


	@pytest.mark.tensorflow
	@pytest.mark.parametrize('framework,use_case',
	[['tensorflow', None],
	['pytorch', None],
	[None, 'image_classification'],
	[None, 'question_answering'],
	['tensorflow', 'image_classification'],
	['tensorflow', 'text_classification'],
	['pytorch', 'text_classification'],
	['pytorch', 'question_answering']])
	def test_get_supported_models_with_filter(framework, use_case):
	"""
	Tests getting the dictionary of supported models while filtering by framework and/or use case.
	Checks to ensure that keys for the expected use cases are there. If filtering by framework, then the test will
	also check to make sure we only have models for the specified framework.
	"""
	model_dict = model_factory.get_supported_models(framework, use_case)

	if use_case is not None:
	# Model dictionary should only have a key for the specified use case
	assert 1 == len(model_dict.keys())
	assert use_case in model_dict
	else:
	# Model dictionary should have keys for every use case
	assert len(UseCaseType) == len(model_dict.keys())
	for k in UseCaseType:
	assert str(k) in model_dict.keys()

	# If filtering by framework, we should not find models from other frameworks
	if framework is not None:
	for use_case_key in model_dict.keys():
	for model_name_key in model_dict[use_case_key].keys():
	assert 1 == len(model_dict[use_case_key][model_name_key].keys())
	assert framework in model_dict[use_case_key][model_name_key]


	@pytest.mark.tensorflow
	@pytest.mark.parametrize('bad_framework',
	['tensorflowers',
	'python',
	'torch',
	'fantastic-potato'])
	def test_get_supported_models_bad_framework(bad_framework):
	"""
	Ensure that the proper error is raised when a bad framework is passed in
	"""
	with pytest.raises(ValueError) as e:
	model_factory.get_supported_models(bad_framework)
	assert "Unsupported framework: {}".format(bad_framework) in str(e)


	@pytest.mark.tensorflow
	@pytest.mark.parametrize('bad_use_case',
	['tensorflow',
	'imageclassification',
	'python',
	'fantastic-potato'])
	def test_get_supported_models_bad_use_case(bad_use_case):
	"""
	Ensure that the proper error is raised when a bad use case is passed in
	"""
	with pytest.raises(ValueError) as e:
	model_factory.get_supported_models(use_case=bad_use_case)
	assert "Unsupported use case: {}".format(bad_use_case) in str(e)


	# This is necessary to protect from import errors when testing in a tensorflow only environment
	if tf_env:
	@pytest.mark.tensorflow
	@pytest.mark.parametrize('model_name,dataset_type,get_hub_model_patch,class_names',
	[['efficientnet_b0', ImageClassificationDataset,
	'tlt.models.image_classification.tfhub_image_classification_model.'
	'TFHubImageClassificationModel._get_hub_model', ['a', 'b', 'c']],
	['google/bert_uncased_L-2_H-128_A-2',
	TextClassificationDataset, 'tlt.models.text_classification.tf_hf_text_classification_model.' # noqa: E501
	'TFHFTextClassificationModel._get_hub_model', ['a', 'b']],
	['ResNet50', ImageClassificationDataset,
	'tlt.models.image_classification.keras_image_classification_model.'
	'KerasImageClassificationModel._get_hub_model', ['a', 'b', 'c']]
	])
	@patch('tlt.models.text_classification.tf_hf_text_classification_model.prepare_huggingface_input_data')
	def test_tf_model_train(mock_tokenizer, model_name, dataset_type, get_hub_model_patch, class_names):
	"""
	Tests calling train on an TFHub or Keras model with a mock dataset and mock model and verifies we get back the
	return value from the fit function.
	"""
	model = model_factory.get_model(model_name, 'tensorflow')

	with patch(get_hub_model_patch) as mock_get_hub_model:
	mock_dataset = MagicMock()
	mock_dataset.__class__ = dataset_type
	mock_dataset.validation_subset = [1, 2, 3]

	mock_dataset.class_names = class_names
	mock_model = MagicMock()
	expected_return_value = {"result": True}
	mock_history = MagicMock()
	mock_history.history = expected_return_value

	def mock_fit(x=None, y=None, epochs=1, shuffle=True, callbacks=[], validation_data=None, batch_size=None):
	assert x is not None
	assert isinstance(epochs, int)
	assert isinstance(shuffle, bool)
	assert len(callbacks) > 0

	if eval_expected:
	assert validation_data is not None
	else:
	assert validation_data is None

	return mock_history

	# Mock internal function to tokenize input data
	mock_tokenizer.return_value = mock_dataset, []

	mock_model.fit = mock_fit
	mock_get_hub_model.return_value = mock_model

	# Test train with eval
	eval_expected = True
	return_val = model.train(mock_dataset, output_dir="/tmp/output", do_eval=True)
	assert return_val == expected_return_value

	# Test train without eval
	eval_expected = False
	return_val = model.train(mock_dataset, output_dir="/tmp/output", do_eval=False)
	assert return_val == expected_return_value

	# Test train with eval, but no validation subset
	eval_expected = False
	mock_dataset.validation_subset = None
	return_val = model.train(mock_dataset, output_dir="/tmp/output", do_eval=True)
	assert return_val == expected_return_value


	# This is necessary to protect from import errors when testing in a tensorflow only environment
	if tf_env:
	@pytest.mark.tensorflow
	def test_custom_model_train():
	"""
	Tests calling train on a custom TF model with a mock dataset and mock model and verifies we get back the return
	value from the fit function.
	"""
	model = model_factory.load_model('custom_model', ALEXNET, 'tensorflow', 'image_classification')

	mock_dataset = MagicMock()
	mock_dataset.__class__ = ImageClassificationDataset

	mock_dataset.class_names = ['1', '2', '3']
	model._model = MagicMock()
	expected_return_value = {"result": True}
	mock_history = MagicMock()
	mock_history.history = expected_return_value

	def mock_fit(dataset, epochs, shuffle, callbacks, validation_data=None):
	assert dataset is not None
	assert isinstance(epochs, int)
	assert isinstance(shuffle, bool)
	assert len(callbacks) > 0

	return mock_history

	model._model.fit = mock_fit

	return_val = model.train(mock_dataset, output_dir="/tmp/output")
	assert return_val == expected_return_value


	@pytest.mark.tensorflow
	@pytest.mark.parametrize(
	'cpu_model,enable_auto_mixed_precision,expected_auto_mixed_precision_parameter,tf_version,model_name,dataset_type',
	[['85', None, False, '2.9.0', 'efficientnet_b0', ImageClassificationDataset],
	['143', None, True, '2.9.0', 'efficientnet_b0', ImageClassificationDataset],
	['123', None, False, '2.9.0', 'efficientnet_b0', ImageClassificationDataset],
	['85', True, True, '2.9.0', 'efficientnet_b0', ImageClassificationDataset],
	['143', True, True, '2.9.0', 'efficientnet_b0', ImageClassificationDataset],
	['123', True, True, '2.9.0', 'efficientnet_b0', ImageClassificationDataset],
	['85', True, True, '2.10.0', 'efficientnet_b0', ImageClassificationDataset],
	['85', None, False, '2.9.0', 'bert-base-uncased', TextClassificationDataset],
	['143', None, True, '2.9.0', 'bert-base-uncased', TextClassificationDataset],
	['123', None, False, '2.9.0', 'bert-base-uncased', TextClassificationDataset],
	['85', True, True, '2.9.0', 'bert-base-uncased', TextClassificationDataset],
	['143', True, True, '2.9.0', 'bert-base-uncased', TextClassificationDataset],
	['123', True, True, '2.9.0', 'bert-base-uncased', TextClassificationDataset],
	['85', True, True, '2.10.0', 'efficientnet_b0', ImageClassificationDataset],
	['143', True, True, '2.10.0', 'efficientnet_b0', ImageClassificationDataset],
	['123', True, True, '2.10.0', 'efficientnet_b0', ImageClassificationDataset],
	['85', False, False, '2.9.1', 'efficientnet_b0', ImageClassificationDataset],
	['143', False, False, '2.9.1', 'efficientnet_b0', ImageClassificationDataset],
	['123', False, False, '2.9.1', 'efficientnet_b0', ImageClassificationDataset],
	['123', False, None, '2.8.0', 'efficientnet_b0', ImageClassificationDataset],
	['123', None, None, '2.8.0', 'efficientnet_b0', ImageClassificationDataset],
	['123', True, None, '2.8.0', 'efficientnet_b0', ImageClassificationDataset],
	['85', None, None, '2.8.0', 'efficientnet_b0', ImageClassificationDataset],
	['85', True, None, '2.8.0', 'efficientnet_b0', ImageClassificationDataset],
	['143', None, True, '3.1.0', 'efficientnet_b0', ImageClassificationDataset]])
	@patch("tlt.models.tf_model.tf.version")
	@patch("tlt.models.tf_model.tf.config.optimizer.set_experimental_options")
	@patch("tlt.utils.platform_util.PlatformUtil._get_cpuset")
	@patch("tlt.utils.platform_util.os")
	@patch("tlt.utils.platform_util.system_platform")
	@patch("tlt.utils.platform_util.subprocess")
	@patch('tlt.models.text_classification.tf_hf_text_classification_model.prepare_huggingface_input_data')
	def test_tfhub_auto_mixed_precision(mock_tokenizer, mock_subprocess, mock_platform, mock_os, mock_get_cpuset,
	mock_set_experimental_options, mock_tf_version, cpu_model,
	enable_auto_mixed_precision, expected_auto_mixed_precision_parameter,
	tf_version, model_name, dataset_type):
	"""
	Verifies that auto mixed precision is enabled by default for SPR (cpu model 85), but disabled by default for other
	CPU types like SKX (cpu model 143). The default auto mixed precision setting is used when
	enable_auto_mixed_precision=None. Auto mixed precision was enabled for TF 2.9.0 and later, so don't expect the call
	to set the config for earlier TF versions.

	If enable_auto_mixed_precision is set to True/False, then that's what should be used, regardless of CPU type.
	"""
	mock_get_cpuset.return_value = platform_config.CPUSET
	platform_config.set_mock_system_type(mock_platform)
	platform_config.set_mock_os_access(mock_os)

	# get the lscpu sample output, but replace in the parameterized cpu model id
	lscpu_value = platform_config.LSCPU_OUTPUT
	original_model_value = "Model: 143\n" # model test value from the test platform config
	new_model_value = "Model: {}\n".format(cpu_model)
	lscpu_value = lscpu_value.replace(original_model_value, new_model_value)
	mock_subprocess.check_output.return_value = lscpu_value

	mock_dataset = MagicMock()
	mock_dataset.__class__ = dataset_type
	mock_dataset.class_names = ['a', 'b']

	mock_tf_version.VERSION = tf_version

	model = model_factory.get_model(model_name, 'tensorflow')
	model._get_hub_model = MagicMock()

	# Mock internal function to tokenize input data
	mock_tokenizer.return_value = mock_dataset, []

	model.train(mock_dataset, output_dir="/tmp/output", enable_auto_mixed_precision=enable_auto_mixed_precision)

	if expected_auto_mixed_precision_parameter is not None:
	expected_parameter = {'auto_mixed_precision_mkl': expected_auto_mixed_precision_parameter}
	mock_set_experimental_options.assert_called_with(expected_parameter)
	else:
	# We expect that the auto mixed prercision config is not called (due to TF version unsupported)
	assert not mock_set_experimental_options.called


	# This is necessary to protect from import errors when testing in a tensorflow only environment
	if tf_env:
	@pytest.mark.tensorflow
	@pytest.mark.parametrize('model_name,use_case,dataset_type,optimizer,loss',
	[['efficientnet_b0', 'image_classification', ImageClassificationDataset,
	keras.optimizers.Adagrad, keras.losses.MeanSquaredError],
	['custom', 'image_classification', ImageClassificationDataset,
	keras.optimizers.SGD, keras.losses.CategoricalCrossentropy],
	['bert-base-uncased', 'text_classification', TextClassificationDataset,
	keras.optimizers.RMSprop, keras.losses.BinaryCrossentropy]])
	@patch('tlt.models.text_classification.tf_hf_text_classification_model.prepare_huggingface_input_data')
	def test_tf_optimizer_loss(mock_tokenizer, model_name, use_case, dataset_type, optimizer, loss):
	"""
	Tests initializing and training a model with configurable optimizers and loss functions
	"""

	if model_name == 'custom':
	model = model_factory.load_model(model_name, ALEXNET, 'tensorflow', use_case, optimizer=optimizer, loss=loss) # noqa: E501
	else:
	model = model_factory.get_model(model_name, 'tensorflow', optimizer=optimizer, loss=loss)

	model._generate_checkpoints = False
	model._get_hub_model = MagicMock()
	model._model = MagicMock()
	model._model.fit = MagicMock()
	assert model._optimizer_class == optimizer
	assert model._loss_class == loss

	mock_dataset = MagicMock()
	mock_dataset.__class__ = dataset_type
	if dataset_type == TextClassificationDataset:
	mock_dataset.class_names = ['a', 'b']
	else:
	mock_dataset.class_names = ['a', 'b', 'c']

	# Mock internal function to tokenize input data
	mock_tokenizer.return_value = mock_dataset, []

	# Train is called and optimizer and loss objects should match the input types
	model.train(mock_dataset, output_dir="/tmp/output/tf")
	assert model._optimizer_class == optimizer
	assert type(model._optimizer) == optimizer
	assert model._loss_class == loss
	assert type(model._loss) == loss

	# This is necessary to protect from import errors when testing in a tensorflow only environment
	if tf_env:
	@pytest.mark.tensorflow
	@pytest.mark.parametrize('model_name,loss',
	[['efficientnet_b0', 1],
	['efficientnet_b0', 'foo'],
	['bert-base-uncased', keras.optimizers.Adam]])
	def test_tf_loss_wrong_type(model_name, loss):
	"""
	Tests that an exception is thrown when the input loss function is the wrong type
	"""
	with pytest.raises(TypeError):
	model_factory.get_model(model_name, 'tensorflow', loss=loss)