""" Unit tests for Validators utility """ import pytest from app.utils.validators import Validators, ValidationError class TestValidators: """Test cases for Validators utility.""" def setup_method(self): """Set up test fixtures.""" self.validators = Validators() def test_validate_filename_valid(self): """Test filename validation with valid filenames.""" # Valid filenames should not raise self.validators.validate_filename('test.txt') self.validators.validate_filename('document.md') self.validators.validate_filename('script_file.py') self.validators.validate_filename('My Document.txt') self.validators.validate_filename('file-name.json') self.validators.validate_filename('data123.csv') def test_validate_filename_invalid(self): """Test filename validation with invalid filenames.""" # Empty or None filename with pytest.raises(ValidationError): self.validators.validate_filename('') with pytest.raises(ValidationError): self.validators.validate_filename(None) # Dangerous characters with pytest.raises(ValidationError): self.validators.validate_filename('../../../etc/passwd') with pytest.raises(ValidationError): self.validators.validate_filename('file\\with\\backslashes.txt') # Null bytes with pytest.raises(ValidationError): self.validators.validate_filename('file\x00.txt') # Control characters with pytest.raises(ValidationError): self.validators.validate_filename('file\x01\x02.txt') # Reserved names on Windows with pytest.raises(ValidationError): self.validators.validate_filename('CON.txt') with pytest.raises(ValidationError): self.validators.validate_filename('PRN.txt') with pytest.raises(ValidationError): self.validators.validate_filename('AUX.txt') def test_validate_file_extension_valid(self): """Test file extension validation with valid extensions.""" allowed_extensions = {'.txt', '.md', '.py', '.js', '.json'} # Valid extensions should not raise self.validators.validate_file_extension('test.txt', allowed_extensions) self.validators.validate_file_extension('document.md', allowed_extensions) self.validators.validate_file_extension('script.py', allowed_extensions) self.validators.validate_file_extension('data.json', allowed_extensions) # Case insensitive self.validators.validate_file_extension('FILE.TXT', allowed_extensions) self.validators.validate_file_extension('Document.MD', allowed_extensions) def test_validate_file_extension_invalid(self): """Test file extension validation with invalid extensions.""" allowed_extensions = {'.txt', '.md', '.py'} # Invalid extensions should raise with pytest.raises(ValidationError): self.validators.validate_file_extension('virus.exe', allowed_extensions) with pytest.raises(ValidationError): self.validators.validate_file_extension('archive.zip', allowed_extensions) with pytest.raises(ValidationError): self.validators.validate_file_extension('image.jpg', allowed_extensions) # No extension with pytest.raises(ValidationError): self.validators.validate_file_extension('filename', allowed_extensions) # Empty filename with pytest.raises(ValidationError): self.validators.validate_file_extension('', allowed_extensions) def test_validate_model_path_valid(self): """Test model path validation with valid paths.""" # Valid HuggingFace model paths valid_paths = [ 'microsoft/DialoGPT-medium', 'google/bert-base-uncased', 'meta-llama/Llama-2-7b-hf', 'mistralai/Mistral-7B-Instruct-v0.1', 'Qwen/Qwen2.5-72B-Instruct', 'THUDM/chatglm-6b', 'deepseek-ai/deepseek-coder-6.7b-base', 'unsloth/llama-2-7b-bnb-4bit', 'google-bert/bert-base-uncased', 'bartar/SPLM-2' # User's specific case ] for path in valid_paths: self.validators.validate_model_path(path) # Should not raise def test_validate_model_path_invalid_format(self): """Test model path validation with invalid formats.""" # Invalid formats should raise invalid_paths = [ '', # Empty 'invalid-path', # No slash 'user/', # Empty model name '/model-name', # Empty user 'user//model', # Double slash 'user/model/extra', # Too many parts 'user name/model', # Space in user 'user/model name', # Space in model (actually this might be valid) 'user@domain/model', # Invalid characters '../malicious/path', # Path traversal 'user\\model', # Backslash ] for path in invalid_paths: with pytest.raises(ValidationError): self.validators.validate_model_path(path) def test_validate_model_path_untrusted_prefix(self): """Test model path validation with untrusted prefixes.""" # Paths with untrusted prefixes should raise untrusted_paths = [ 'random-user/some-model', 'untrusted/malicious-model', 'hacker/backdoor-model', 'suspicious/model' ] for path in untrusted_paths: with pytest.raises(ValidationError): self.validators.validate_model_path(path) def test_validate_model_path_edge_cases(self): """Test model path validation edge cases.""" # None input with pytest.raises(ValidationError): self.validators.validate_model_path(None) # Very long path long_path = 'microsoft/' + 'a' * 1000 with pytest.raises(ValidationError): self.validators.validate_model_path(long_path) # Special characters in allowed prefix self.validators.validate_model_path('microsoft/model-with-dashes') self.validators.validate_model_path('microsoft/model_with_underscores') self.validators.validate_model_path('microsoft/model.with.dots') def test_validate_text_input_valid(self): """Test text input validation with valid inputs.""" # Valid text inputs should not raise self.validators.validate_text_input('Hello world!') self.validators.validate_text_input('A' * 1000) # Long but reasonable text self.validators.validate_text_input('Text with\nnewlines\nand\ttabs') self.validators.validate_text_input('Unicode: 你好世界 🌍') self.validators.validate_text_input('') # Empty text might be valid def test_validate_text_input_invalid(self): """Test text input validation with invalid inputs.""" # None input with pytest.raises(ValidationError): self.validators.validate_text_input(None) # Extremely long text (if there's a limit) very_long_text = 'A' * (10 * 1024 * 1024) # 10MB of text with pytest.raises(ValidationError): self.validators.validate_text_input(very_long_text) def test_validate_text_input_malicious_content(self): """Test text input validation with potentially malicious content.""" # Null bytes with pytest.raises(ValidationError): self.validators.validate_text_input('text\x00with\x00nulls') # Control characters (some might be allowed like \n, \t) try: self.validators.validate_text_input('text\x01with\x02controls') except ValidationError: pass # This might be expected def test_validation_error_messages(self): """Test that ValidationError contains meaningful messages.""" # Test filename validation error message try: self.validators.validate_filename('../../../etc/passwd') assert False, "Should have raised ValidationError" except ValidationError as e: assert 'filename' in str(e).lower() or 'path' in str(e).lower() # Test file extension error message try: self.validators.validate_file_extension('virus.exe', {'.txt'}) assert False, "Should have raised ValidationError" except ValidationError as e: assert 'extension' in str(e).lower() or 'allowed' in str(e).lower() # Test model path error message try: self.validators.validate_model_path('invalid-path') assert False, "Should have raised ValidationError" except ValidationError as e: assert 'model' in str(e).lower() or 'path' in str(e).lower() def test_allowed_model_prefixes_coverage(self): """Test that all common model prefixes are covered.""" # This test ensures we have good coverage of trusted model prefixes common_prefixes = [ 'microsoft/', 'google/', 'meta-llama/', 'mistralai/', 'openai-community/', 'Qwen/', 'THUDM/', 'deepseek-ai/', 'unsloth/', 'google-bert/' ] for prefix in common_prefixes: # Should be able to validate models with these prefixes test_path = prefix + 'test-model' try: self.validators.validate_model_path(test_path) except ValidationError: pytest.fail(f"Trusted prefix {prefix} should be allowed") def test_case_sensitivity(self): """Test case sensitivity in various validations.""" # File extensions should be case insensitive allowed_extensions = {'.txt', '.md'} self.validators.validate_file_extension('FILE.TXT', allowed_extensions) self.validators.validate_file_extension('Document.MD', allowed_extensions) # Model path prefixes should be case sensitive (HuggingFace convention) self.validators.validate_model_path('Microsoft/model') # Capital M # But random capitalization in untrusted prefixes should still fail with pytest.raises(ValidationError): self.validators.validate_model_path('RANDOM/model')