File size: 4,587 Bytes
85b7206
8838421
85b7206
8838421
85b7206
8838421
85b7206
 
 
 
 
8838421
 
 
85b7206
8838421
85b7206
8838421
 
 
 
85b7206
 
 
8838421
 
 
85b7206
 
 
 
 
 
 
 
 
 
 
 
 
 
8838421
 
 
85b7206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8838421
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import csv
import json
import shutil
import unittest
from unittest.mock import patch

import pandas as pd

import lambdaGetSample
from constants import PROJECT_ROOT_FOLDER, app_logger
from tests import EVENTS_FOLDER, set_seed


def helper_category(cls, category: int, language: str, expected_output: dict):
    set_seed()
    event = {"body": json.dumps({"category": category, "language": language})}
    response = lambdaGetSample.lambda_handler(event, {})
    response_dict = json.loads(response)
    try:
        cls.assertDictEqual(response_dict, expected_output)
    except AssertionError as ae:
        app_logger.error(f"category: {category}, language: {language}.")
        app_logger.error(f"response_dict: {response_dict} .")
        app_logger.error(f"expected_output: {expected_output} .")
        raise ae


def helper_get_enriched_dataframe_csv(lang: str):
    import os

    input_df = f"test_data_{lang}.csv"
    backup_df = f"test_data2_{lang}.csv"
    shutil.copy2(EVENTS_FOLDER / input_df, EVENTS_FOLDER / backup_df)
    lambdaGetSample.get_enriched_dataframe_csv(lang, "test_data", EVENTS_FOLDER)
    with open(EVENTS_FOLDER / input_df, 'r') as src1:
        with open(PROJECT_ROOT_FOLDER / "databases" / f'data_{lang}.csv', 'r') as src2:
            csv1 = src1.readlines()
            csv2 = src2.readlines()
            assert csv1 == csv2
    shutil.copy2(EVENTS_FOLDER / backup_df, EVENTS_FOLDER / input_df)
    os.remove(EVENTS_FOLDER / backup_df)


class TestDataset(unittest.TestCase):
    def test_get_sample_by_category(self):
        count = 0
        with open(EVENTS_FOLDER / "test_lambdaGetSample.json") as src:
            json_data = json.load(src)

        for lang in ["de", "en"]:
            for cat in range(4):
                expected_output = json_data[lang][str(cat)]
                helper_category(self, cat, lang, expected_output=expected_output)
                count += 1

    def test_get_sample_using_text(self):
        body = {"language": "en", "transcript": "Hi there, how are you?"}
        event = {"body": json.dumps(body)}
        response = lambdaGetSample.lambda_handler(event, {})
        expected_output = {
            'ipa_transcript': 'haɪ ðɛr, haʊ ər ju?',
            'real_transcript': ['Hi there, how are you?'],
            'transcript_translation': ''
        }
        self.assertEqual(json.loads(response), expected_output)

    @patch.object(lambdaGetSample, "get_random_selection")
    def test_get_sample_using_text_exception(self, get_random_selection_mocked):
        with self.assertRaises(Exception):
            msg_ex = "a test exception"
            get_random_selection_mocked.side_effect = Exception(msg_ex)
            try:
                body = {"category": 1, "language": "en"}
                event = {"body": json.dumps(body)}
                lambdaGetSample.lambda_handler(event, {})
            except Exception as ex:
                assert str(ex) == msg_ex
                raise ex

    def test_get_enriched_dataframe_csv_de(self):
        helper_get_enriched_dataframe_csv("de")

    def test_get_enriched_dataframe_csv_en(self):
        helper_get_enriched_dataframe_csv("en")

    def test_getSentenceCategory(self):
        from tests import set_seed
        from lambdaGetSample import get_random_selection, getSentenceCategory

        for cat in range(1, 4):
            set_seed()
            sentence = get_random_selection("de", cat)
            cat_from_sentence = getSentenceCategory(sentence)
            assert cat == cat_from_sentence

    def test_getSentence_ValueError(self):
        from lambdaGetSample import getSentenceCategory

        with self.assertRaises(ValueError):
            try:
                getSentenceCategory("")
            except ValueError as ve:
                assert str(ve) == "category not assigned for sentence '' ..."
                raise ve

    def test_textdataset_len(self):
        from lambdaGetSample import TextDataset
        lang = "de"
        df = pd.read_csv(PROJECT_ROOT_FOLDER / "databases" / f'data_{lang}.csv', delimiter='|')
        df_de = TextDataset(df, lang)
        self.assertEqual(len(df_de), len(df))

    def test_textdataset_getitem(self):
        from lambdaGetSample import TextDataset
        lang = "de"
        df = pd.read_csv(PROJECT_ROOT_FOLDER / "databases" / f'data_{lang}.csv', delimiter='|')
        textdataframe_de = TextDataset(df, lang)
        expected = df["sentence"].iloc[0]
        self.assertListEqual(textdataframe_de[0], [expected])


if __name__ == "__main__":
    unittest.main()