File size: 11,006 Bytes
d66ab65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
"""

Unit tests for Validators utility

"""
import pytest
from app.utils.validators import Validators, ValidationError


class TestValidators:
    """Test cases for Validators utility."""
    
    def setup_method(self):
        """Set up test fixtures."""
        self.validators = Validators()
    
    def test_validate_filename_valid(self):
        """Test filename validation with valid filenames."""
        # Valid filenames should not raise
        self.validators.validate_filename('test.txt')
        self.validators.validate_filename('document.md')
        self.validators.validate_filename('script_file.py')
        self.validators.validate_filename('My Document.txt')
        self.validators.validate_filename('file-name.json')
        self.validators.validate_filename('data123.csv')
    
    def test_validate_filename_invalid(self):
        """Test filename validation with invalid filenames."""
        # Empty or None filename
        with pytest.raises(ValidationError):
            self.validators.validate_filename('')
        
        with pytest.raises(ValidationError):
            self.validators.validate_filename(None)
        
        # Dangerous characters
        with pytest.raises(ValidationError):
            self.validators.validate_filename('../../../etc/passwd')
        
        with pytest.raises(ValidationError):
            self.validators.validate_filename('file\\with\\backslashes.txt')
        
        # Null bytes
        with pytest.raises(ValidationError):
            self.validators.validate_filename('file\x00.txt')
        
        # Control characters
        with pytest.raises(ValidationError):
            self.validators.validate_filename('file\x01\x02.txt')
        
        # Reserved names on Windows
        with pytest.raises(ValidationError):
            self.validators.validate_filename('CON.txt')
        
        with pytest.raises(ValidationError):
            self.validators.validate_filename('PRN.txt')
        
        with pytest.raises(ValidationError):
            self.validators.validate_filename('AUX.txt')
    
    def test_validate_file_extension_valid(self):
        """Test file extension validation with valid extensions."""
        allowed_extensions = {'.txt', '.md', '.py', '.js', '.json'}
        
        # Valid extensions should not raise
        self.validators.validate_file_extension('test.txt', allowed_extensions)
        self.validators.validate_file_extension('document.md', allowed_extensions)
        self.validators.validate_file_extension('script.py', allowed_extensions)
        self.validators.validate_file_extension('data.json', allowed_extensions)
        
        # Case insensitive
        self.validators.validate_file_extension('FILE.TXT', allowed_extensions)
        self.validators.validate_file_extension('Document.MD', allowed_extensions)
    
    def test_validate_file_extension_invalid(self):
        """Test file extension validation with invalid extensions."""
        allowed_extensions = {'.txt', '.md', '.py'}
        
        # Invalid extensions should raise
        with pytest.raises(ValidationError):
            self.validators.validate_file_extension('virus.exe', allowed_extensions)
        
        with pytest.raises(ValidationError):
            self.validators.validate_file_extension('archive.zip', allowed_extensions)
        
        with pytest.raises(ValidationError):
            self.validators.validate_file_extension('image.jpg', allowed_extensions)
        
        # No extension
        with pytest.raises(ValidationError):
            self.validators.validate_file_extension('filename', allowed_extensions)
        
        # Empty filename
        with pytest.raises(ValidationError):
            self.validators.validate_file_extension('', allowed_extensions)
    
    def test_validate_model_path_valid(self):
        """Test model path validation with valid paths."""
        # Valid HuggingFace model paths
        valid_paths = [
            'microsoft/DialoGPT-medium',
            'google/bert-base-uncased',
            'meta-llama/Llama-2-7b-hf',
            'mistralai/Mistral-7B-Instruct-v0.1',
            'Qwen/Qwen2.5-72B-Instruct',
            'THUDM/chatglm-6b',
            'deepseek-ai/deepseek-coder-6.7b-base',
            'unsloth/llama-2-7b-bnb-4bit',
            'google-bert/bert-base-uncased',
            'bartar/SPLM-2'  # User's specific case
        ]
        
        for path in valid_paths:
            self.validators.validate_model_path(path)  # Should not raise
    
    def test_validate_model_path_invalid_format(self):
        """Test model path validation with invalid formats."""
        # Invalid formats should raise
        invalid_paths = [
            '',  # Empty
            'invalid-path',  # No slash
            'user/',  # Empty model name
            '/model-name',  # Empty user
            'user//model',  # Double slash
            'user/model/extra',  # Too many parts
            'user name/model',  # Space in user
            'user/model name',  # Space in model (actually this might be valid)
            'user@domain/model',  # Invalid characters
            '../malicious/path',  # Path traversal
            'user\\model',  # Backslash
        ]
        
        for path in invalid_paths:
            with pytest.raises(ValidationError):
                self.validators.validate_model_path(path)
    
    def test_validate_model_path_untrusted_prefix(self):
        """Test model path validation with untrusted prefixes."""
        # Paths with untrusted prefixes should raise
        untrusted_paths = [
            'random-user/some-model',
            'untrusted/malicious-model',
            'hacker/backdoor-model',
            'suspicious/model'
        ]
        
        for path in untrusted_paths:
            with pytest.raises(ValidationError):
                self.validators.validate_model_path(path)
    
    def test_validate_model_path_edge_cases(self):
        """Test model path validation edge cases."""
        # None input
        with pytest.raises(ValidationError):
            self.validators.validate_model_path(None)
        
        # Very long path
        long_path = 'microsoft/' + 'a' * 1000
        with pytest.raises(ValidationError):
            self.validators.validate_model_path(long_path)
        
        # Special characters in allowed prefix
        self.validators.validate_model_path('microsoft/model-with-dashes')
        self.validators.validate_model_path('microsoft/model_with_underscores')
        self.validators.validate_model_path('microsoft/model.with.dots')
    
    def test_validate_text_input_valid(self):
        """Test text input validation with valid inputs."""
        # Valid text inputs should not raise
        self.validators.validate_text_input('Hello world!')
        self.validators.validate_text_input('A' * 1000)  # Long but reasonable text
        self.validators.validate_text_input('Text with\nnewlines\nand\ttabs')
        self.validators.validate_text_input('Unicode: 你好世界 🌍')
        self.validators.validate_text_input('')  # Empty text might be valid
    
    def test_validate_text_input_invalid(self):
        """Test text input validation with invalid inputs."""
        # None input
        with pytest.raises(ValidationError):
            self.validators.validate_text_input(None)
        
        # Extremely long text (if there's a limit)
        very_long_text = 'A' * (10 * 1024 * 1024)  # 10MB of text
        with pytest.raises(ValidationError):
            self.validators.validate_text_input(very_long_text)
    
    def test_validate_text_input_malicious_content(self):
        """Test text input validation with potentially malicious content."""
        # Null bytes
        with pytest.raises(ValidationError):
            self.validators.validate_text_input('text\x00with\x00nulls')
        
        # Control characters (some might be allowed like \n, \t)
        try:
            self.validators.validate_text_input('text\x01with\x02controls')
        except ValidationError:
            pass  # This might be expected
    
    def test_validation_error_messages(self):
        """Test that ValidationError contains meaningful messages."""
        # Test filename validation error message
        try:
            self.validators.validate_filename('../../../etc/passwd')
            assert False, "Should have raised ValidationError"
        except ValidationError as e:
            assert 'filename' in str(e).lower() or 'path' in str(e).lower()
        
        # Test file extension error message
        try:
            self.validators.validate_file_extension('virus.exe', {'.txt'})
            assert False, "Should have raised ValidationError"
        except ValidationError as e:
            assert 'extension' in str(e).lower() or 'allowed' in str(e).lower()
        
        # Test model path error message
        try:
            self.validators.validate_model_path('invalid-path')
            assert False, "Should have raised ValidationError"
        except ValidationError as e:
            assert 'model' in str(e).lower() or 'path' in str(e).lower()
    
    def test_allowed_model_prefixes_coverage(self):
        """Test that all common model prefixes are covered."""
        # This test ensures we have good coverage of trusted model prefixes
        common_prefixes = [
            'microsoft/',
            'google/',
            'meta-llama/',
            'mistralai/',
            'openai-community/',
            'Qwen/',
            'THUDM/',
            'deepseek-ai/',
            'unsloth/',
            'google-bert/'
        ]
        
        for prefix in common_prefixes:
            # Should be able to validate models with these prefixes
            test_path = prefix + 'test-model'
            try:
                self.validators.validate_model_path(test_path)
            except ValidationError:
                pytest.fail(f"Trusted prefix {prefix} should be allowed")
    
    def test_case_sensitivity(self):
        """Test case sensitivity in various validations."""
        # File extensions should be case insensitive
        allowed_extensions = {'.txt', '.md'}
        self.validators.validate_file_extension('FILE.TXT', allowed_extensions)
        self.validators.validate_file_extension('Document.MD', allowed_extensions)
        
        # Model path prefixes should be case sensitive (HuggingFace convention)
        self.validators.validate_model_path('Microsoft/model')  # Capital M
        
        # But random capitalization in untrusted prefixes should still fail
        with pytest.raises(ValidationError):
            self.validators.validate_model_path('RANDOM/model')