File size: 5,028 Bytes
73b6e10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import os
import numpy as np
import librosa
from speaker.speaker_identification import assign_speaker_for_audio_list

# Define constants
TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Test_data_for_clas_Idef')
AUDIO_FILES_DIR = os.path.join(TEST_DATA_DIR, 'enni_audio_files')
NUMPY_FILES_DIR = os.path.join(TEST_DATA_DIR, 'enni_testset_numpy_minimal')
FILEPATHS_DIR = os.path.join(TEST_DATA_DIR, 'enni_testset_filepaths_minimal')

def generate_fake_audio_test_set(num_samples=10, length=16000, random_seed=42):
    """
    Generate a synthetic test set of fake audio signals (numpy arrays).
    Args:
        num_samples (int): Number of audio samples.
        length (int): Length of each audio sample (e.g., 1 second at 16kHz).
        random_seed (int): Seed for reproducibility.
    Returns:
        List[np.ndarray]: List of fake audio signals.
    """
    np.random.seed(random_seed)
    return [np.random.randn(length) for _ in range(num_samples)]

def test_file_paths():
    """Test with all real audio files from the dataset"""
    # Get file paths using the constant
    audio_dir = AUDIO_FILES_DIR
    
    # Get all child and adult files
    child_files = [
        os.path.join(audio_dir, file) 
        for file in os.listdir(audio_dir) 
        if file.startswith('child_') and file.endswith('.wav')
    ]  # Use all child files
    
    adult_files = [
        os.path.join(audio_dir, file) 
        for file in os.listdir(audio_dir) 
        if file.startswith('adult_') and file.endswith('.wav')
    ]  # Use all adult files
    
    # Create list with known order
    audio_list = child_files + adult_files
    
    # Get speaker IDs
    speaker_ids = assign_speaker_for_audio_list(audio_list)
    
    # Print results
    print("\n--- Testing with file paths ---")
    print(f"Testing {len(audio_list)} audio files: {len(child_files)} child files and {len(adult_files)} adult files")
    
    # Count correct predictions
    correct = 0
    for i, (file, speaker_id) in enumerate(zip(audio_list, speaker_ids)):
        expected = "Speaker_id_0" if "child_" in file else "Speaker_id_1"
        is_correct = speaker_id == expected
        correct += 1 if is_correct else 0
        
        # Print only the first 5 examples to avoid cluttering the output
        if i < 5:
            print(f"{i+1}. {os.path.basename(file)}: {speaker_id} (Expected: {expected}) {'✓' if is_correct else '✗'}")
    
    # Print accuracy
    accuracy = correct / len(audio_list) * 100 if audio_list else 0
    print(f"Accuracy: {correct}/{len(audio_list)} ({accuracy:.2f}%)")

def test_numpy_arrays():
    """Test with NumPy arrays by loading all audio files"""
    # Get file paths using the constant
    audio_dir = AUDIO_FILES_DIR
    
    # Load all child and adult files as arrays
    child_files = [
        os.path.join(audio_dir, file) 
        for file in os.listdir(audio_dir) 
        if file.startswith('child_') and file.endswith('.wav')
    ]
    
    adult_files = [
        os.path.join(audio_dir, file) 
        for file in os.listdir(audio_dir) 
        if file.startswith('adult_') and file.endswith('.wav')
    ]
    
    # Load as arrays
    child_arrays = [librosa.load(f, sr=16000)[0] for f in child_files]
    adult_arrays = [librosa.load(f, sr=16000)[0] for f in adult_files]
    
    # Create list with known order
    audio_list = child_arrays + adult_arrays
    filenames = [os.path.basename(f) for f in child_files + adult_files]
    
    # Get speaker IDs
    speaker_ids = assign_speaker_for_audio_list(audio_list)
    
    # Print results
    print("\n--- Testing with NumPy arrays ---")
    print(f"Testing {len(audio_list)} audio arrays: {len(child_arrays)} child arrays and {len(adult_arrays)} adult arrays")
    
    # Count correct predictions
    correct = 0
    for i, (filename, speaker_id) in enumerate(zip(filenames, speaker_ids)):
        expected = "Speaker_id_0" if "child_" in filename else "Speaker_id_1"
        is_correct = speaker_id == expected
        correct += 1 if is_correct else 0
        
        # Print only the first 5 examples to avoid cluttering the output
        if i < 5:
            print(f"{i+1}. {filename} (as array): {speaker_id} (Expected: {expected}) {'✓' if is_correct else '✗'}")
    
    # Print accuracy
    accuracy = correct / len(audio_list) * 100 if audio_list else 0
    print(f"Accuracy: {correct}/{len(audio_list)} ({accuracy:.2f}%)")

if __name__ == "__main__":
    # Test with synthetic data
    print("--- Testing with synthetic data ---")
    audio_list = generate_fake_audio_test_set(num_samples=5)
    speaker_ids = assign_speaker_for_audio_list(audio_list)
    print(f"Synthetic data predictions: {speaker_ids}")
    
    # Test with real files
    try:
        test_file_paths()
    except Exception as e:
        print(f"Error testing file paths: {e}")
    
    # Test with NumPy arrays
    try:
        test_numpy_arrays()
    except Exception as e:
        print(f"Error testing NumPy arrays: {e}")