Spaces:

aletrn
/

ai-pronunciation-trainer

Running

ai-pronunciation-trainer / tests /test_wordmatching.py

alessandro trinca tornidor

feat: port whisper and faster-whisper support from https://github.com/Thiagohgl/ai-pronunciation-trainer

85b7206 3 months ago

13.8 kB

	import os
	import platform
	import unittest
	import numpy as np
	## permit to import from parent directory also in
	import sys
	from pathlib import Path
	parent = Path(__file__).parent.parent
	sys.path.append(str(parent))
	import WordMatching
	from constants import app_logger
	from tests import set_seed
	from tests import constants_wordmatching as const


	class TestWordMatching(unittest.TestCase):
	def setUp(self):
	if platform.system() == "Windows" or platform.system() == "Win32":
	os.environ["PYTHONUTF8"] = "1"
	os.environ["IS_TESTING"] = "TRUE"

	def tearDown(self):
	if platform.system() == "Windows" or platform.system() == "Win32" and "PYTHONUTF8" in os.environ:
	del os.environ["PYTHONUTF8"]
	del os.environ["IS_TESTING"]

	def test_get_word_distance_matrix(self):
	words_estimated = ["hello", "world"]
	words_real = ["hello", "word"]
	expected_matrix = np.array([[0., 5.], [4., 1.], [5., 4.]])
	result_matrix = WordMatching.get_word_distance_matrix(words_estimated, words_real)
	np.testing.assert_array_equal(result_matrix, expected_matrix)

	def test_get_best_path_from_distance_matrix(self):
	for word_distance_matrix, expected_result_indices in const.get_best_path_from_distance_matrix_constants:
	set_seed()
	result_indices = WordMatching.get_best_path_from_distance_matrix(word_distance_matrix)
	np.testing.assert_array_equal(result_indices, expected_result_indices)

	def test_get_best_path_from_distance_matrix_with_inf_values(self):
	set_seed()
	try:
	word_distance_matrix = np.array([[np.inf, 1, 2]])
	result_indices = WordMatching.get_best_path_from_distance_matrix(word_distance_matrix)
	app_logger.info(f"result_indices0: {result_indices}, {result_indices.shape} .")
	self.assertIsInstance(result_indices, np.ndarray)
	self.assertEqual(result_indices.shape, (3,))
	self.assertGreater(result_indices[0], 0)
	self.assertGreater(result_indices[1], 0)
	self.assertEqual(result_indices[2], 0)

	word_distance_matrix = np.array([[-1, np.inf, 3]])
	result_indices = WordMatching.get_best_path_from_distance_matrix(word_distance_matrix)
	app_logger.info(f"result_indices1: {result_indices}, {result_indices.shape} .")
	self.assertLess(result_indices[0], 0)
	self.assertGreater(result_indices[1], 0)
	self.assertEqual(result_indices[2], 0)

	word_distance_matrix = np.array([[2, -1, np.inf]])
	result_indices = WordMatching.get_best_path_from_distance_matrix(word_distance_matrix)
	app_logger.info(f"result_indices2: {result_indices}, {result_indices.shape} .")
	self.assertGreater(result_indices[0], 0)
	self.assertGreater(result_indices[1], -1)
	self.assertEqual(result_indices[2], 0)

	word_distance_matrix = np.array([[np.inf, 1, 2], [1, np.inf, 3], [2, 3, np.inf], [-1, -np.inf, 1]])
	result_indices = WordMatching.get_best_path_from_distance_matrix(word_distance_matrix)
	app_logger.info(f"result_indices3: {result_indices}, {result_indices.shape} .")
	self.assertGreater(result_indices[0], 0)
	self.assertGreater(result_indices[1], 0)
	self.assertEqual(result_indices[2], 0)

	except AssertionError as ae:
	app_logger.error("ae:")
	app_logger.error(ae)
	raise ae

	def test_getWhichLettersWereTranscribedCorrectly(self):
	real_word = "hello"
	transcribed_word = [x for x in "hxllo"]
	expected_result = [1, 0, 1, 1, 1]
	result = WordMatching.getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word)
	self.assertEqual(result, expected_result)

	def test_get_best_mapped_words_false(self):
	words_estimated = ["hello", "world"]
	words_real = ["hello", "word"]
	expected_words = ["hello", "world"]
	expected_indices = [0, 1]
	result_words, result_indices = WordMatching.get_best_mapped_words(words_estimated, words_real, use_dtw=False)
	self.assertEqual(result_words, expected_words)
	self.assertEqual(result_indices, expected_indices)

	expected_mapped_letters = ['e', 's', 's', 'e', 'n', '-']
	expected_mapped_words_indices = [np.int64(0), np.int64(1), np.int64(2), np.int64(3), np.int64(4), -1]
	output_mapped_letters, output_mapped_words_indices = WordMatching.get_best_mapped_words("essen", "essen?", use_dtw=False)
	assert output_mapped_letters == expected_mapped_letters
	assert output_mapped_words_indices == expected_mapped_words_indices

	def test_get_word_distance_matrix_with_empty_lists(self):
	words_estimated = []
	words_real = []
	expected_matrix = np.arange(0).reshape((1, 0))
	result_matrix = WordMatching.get_word_distance_matrix(words_estimated, words_real)
	np.testing.assert_array_equal(result_matrix, expected_matrix)

	def test_get_word_distance_matrix_with_different_lengths(self):
	words_estimated = ["hello"]
	words_real = ["hello", "world"]
	expected_matrix = np.array([[0., 4.], [5., 5.]])
	result_matrix = WordMatching.get_word_distance_matrix(words_estimated, words_real)
	np.testing.assert_array_equal(result_matrix, expected_matrix)

	def test_get_best_path_from_distance_matrix_with_empty_matrix_indexerror(self):
	word_distance_matrix = np.array([])
	with self.assertRaises(IndexError):
	try:
	WordMatching.get_best_path_from_distance_matrix(word_distance_matrix)
	except IndexError as e:
	msg = "tuple index out of range"
	assert msg in str(e)
	raise e

	def test_getWhichLettersWereTranscribedCorrectly_with_empty_strings(self):
	real_word = ""
	transcribed_word = [""]
	expected_result = []
	result = WordMatching.getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word)
	self.assertEqual(result, expected_result)

	def test_getWhichLettersWereTranscribedCorrectly_with_different_lengths(self):
	real_word = "hello"
	transcribed_word = [x for x in "hello oo"]
	expected_result = [1, 1, 1, 1, 1]
	result = WordMatching.getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word)
	self.assertEqual(result, expected_result)

	def test_getWhichLettersWereTranscribedCorrectly_wrong_number_elements_mapped_letters(self):
	word_real = "ich"
	mapped_letters=['i', 'c', 'h', "z"]
	is_letter_correct1 = WordMatching.getWhichLettersWereTranscribedCorrectly(word_real, mapped_letters) # , mapped_letters_indices)
	self.assertEqual(is_letter_correct1, [1, 1, 1])

	def test_getWhichLettersWereTranscribedCorrectly_wrong_number_elements_mapped_letters(self):
	word_real = "ichh"
	mapped_letters=['i', 'c', 'h']
	with self.assertRaises(IndexError):
	try:
	WordMatching.getWhichLettersWereTranscribedCorrectly(word_real, mapped_letters) # , mapped_letters_indices)
	except IndexError as e:
	msg = 'list index out of range'
	assert msg in str(e)
	raise e

	def test_get_best_mapped_words_with_empty_lists_false(self):
	expected_words = ["?"]
	expected_indices = [0]
	result_words, result_indices = WordMatching.get_best_mapped_words("?", "-", use_dtw=False)
	self.assertEqual(result_words, expected_words)
	self.assertEqual(result_indices, expected_indices)
	expected_words = ['b', 'i', 'n', '-']
	expected_indices = [np.int64(0), np.int64(1), np.int64(2), -1]
	result_words, result_indices = WordMatching.get_best_mapped_words("bin", "bind", use_dtw=False)
	self.assertEqual(result_words, expected_words)
	self.assertEqual(result_indices, expected_indices)

	def test_get_best_mapped_words_with_different_lengths_false(self):
	result_words, result_indices = WordMatching.get_best_mapped_words("bin", "", use_dtw=False)
	self.assertEqual(result_words, [])
	self.assertEqual(result_indices, [])

	def test_get_best_mapped_words_with_word_estimated_empty_real_word_not_empty_false(self):
	result_words, result_indices = WordMatching.get_best_mapped_words("", "bin", use_dtw=False)
	self.assertEqual(result_words, ['', '-', '-'])
	self.assertEqual(result_indices, [-1, -1, -1])

	def test_get_best_mapped_words_with_word_estimated_real_word_both_empty_false(self):
	try:
	with self.assertRaises(IndexError):
	try:
	WordMatching.get_best_mapped_words("", "", use_dtw=False)
	except IndexError as ie:
	app_logger.error(f"raised IndexError, ie.args: {ie.args} => exception: {ie} ##")
	msg = "index -1 is out of bounds for axis {axis} with size 0"
	assert msg.format(axis=0) in str(ie) or msg.format(axis=1) in str(ie)
	raise ie
	except AssertionError:
	# for some reason executing the test in debug mode from Visual Studio Code raises an AssertionError instead of an IndexError
	app_logger.error("raised AssertionError instead than IndexError...")
	with self.assertRaises(AssertionError):
	try:
	WordMatching.get_best_mapped_words("", "", use_dtw=False)
	except AssertionError as ae:
	msg = "code object dtw_low at "
	assert msg in str(ae)
	raise ae

	def test_get_best_mapped_words_survived_false(self):
	set_seed()

	word_real = "habe"
	for word_estimated, expected_letters, expected_indices in [
	("habe", ["h", "a", "b", "e"], [0, 1, 2, 3]),
	("hobe", ["h", "-", "b", "e"], [0, -1, 2, 3]),
	("hone", ["h", "-", "-", "e"], [0, -1, -1, 3]),
	("honi", ["h", "-", "-", "-"], [0, -1, -1, -1]),
	("koni", ["k", "-", "-", "-"], [0, -1, -1, -1]),
	("kabe", ["k", "a", "b", "e"], [0, 1, 2, 3]),
	("kane", ["k", "a", "-", "e"], [0, 1, -1, 3]),
	]:
	result_words, result_indices = WordMatching.get_best_mapped_words(word_estimated, word_real, use_dtw=False)
	try:
	self.assertEqual(result_words, expected_letters)
	self.assertEqual(result_indices, expected_indices)
	except AssertionError as ae:
	app_logger.error("ae:", ae, "#", word_estimated, "#", word_real, "#", expected_letters, "#", expected_indices, "##")
	raise ae

	def test_get_resulting_string1(self):
	set_seed()
	mapped_indices = np.array([0, 1])
	words_estimated = ["hello", "world"]
	words_real = ["hello", "word"]
	expected_words = ["hello", "world"]
	expected_indices = [0, 1]
	result_words, result_indices = WordMatching.get_resulting_string(mapped_indices, words_estimated, words_real)
	self.assertEqual(result_words, expected_words)
	self.assertEqual(result_indices, expected_indices)

	def test_get_resulting_string2(self):
	set_seed()
	mapped_indices = np.array([0, 1])
	words_estimated = ["hollo", "uorld"]
	words_real = ["hello", "word"]
	expected_words = ['hollo', 'uorld']
	expected_indices = [0, 1]
	result_words, result_indices = WordMatching.get_resulting_string(mapped_indices, words_estimated, words_real)
	self.assertEqual(result_words, expected_words)
	self.assertEqual(result_indices, expected_indices)

	mapped_indices = np.array([1, 1])
	expected_words = ['-', 'uorld']
	expected_indices = [-1, 1]
	result_words, result_indices = WordMatching.get_resulting_string(mapped_indices, words_estimated, words_real)
	self.assertEqual(result_words, expected_words)
	self.assertEqual(result_indices, expected_indices)

	mapped_indices = np.array([0, 0])
	expected_words = ['hollo', '-']
	expected_indices = [0, -1]
	result_words, result_indices = WordMatching.get_resulting_string(mapped_indices, words_estimated, words_real)
	self.assertEqual(result_words, expected_words)
	self.assertEqual(result_indices, expected_indices)

	mapped_indices = np.array([0, -1])
	expected_words = ["hollo", "-"]
	expected_indices = [0, -1]
	result_words, result_indices = WordMatching.get_resulting_string(mapped_indices, words_estimated, words_real)
	self.assertEqual(result_words, expected_words)
	self.assertEqual(result_indices, expected_indices)

	mapped_indices = np.array([-1, -1])
	expected_words = ["-", "-"]
	expected_indices = [-1, -1]
	result_words, result_indices = WordMatching.get_resulting_string(mapped_indices, words_estimated, words_real)
	self.assertEqual(result_words, expected_words)
	self.assertEqual(result_indices, expected_indices)

	def test_get_resulting_string_with_empty_lists(self):
	mapped_indices = np.array([])
	words_estimated = []
	words_real = []
	expected_words = []
	expected_indices = []
	result_words, result_indices = WordMatching.get_resulting_string(mapped_indices, words_estimated, words_real)
	self.assertEqual(result_words, expected_words)
	self.assertEqual(result_indices, expected_indices)


	if __name__ == '__main__':
	unittest.main()