""" Integration tests for Flask routes """ import pytest import json import tempfile import os from unittest.mock import patch, Mock from io import BytesIO from werkzeug.datastructures import FileStorage class TestMainRoutes: """Integration tests for main application routes.""" def test_index_get_basic(self, client): """Test basic GET request to index.""" response = client.get('/') assert response.status_code == 200 assert b'Tokenizer Pro' in response.data assert b'Advanced tokenization analysis' in response.data assert b'textarea' in response.data def test_index_get_with_parameters(self, client): """Test GET request with query parameters.""" response = client.get('/?model=gpt2&model_type=predefined') assert response.status_code == 200 assert b'gpt2' in response.data or b'GPT-2' in response.data @patch('app.services.tokenizer_service.tokenizer_service') @patch('app.services.file_service.file_service') def test_index_post_text_analysis(self, mock_file_service, mock_tokenizer_service, client): """Test POST request with text analysis.""" # Mock services mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} mock_file_service.process_text_for_tokenization.return_value = { 'tokens': [ { 'display': 'Hello', 'original': 'Hello', 'token_id': 15496, 'colors': {'background': '#FF5733', 'text': '#FFFFFF'}, 'newline': False } ], 'stats': { 'basic_stats': { 'total_tokens': 1, 'unique_tokens': 1, 'unique_percentage': '100.0', 'special_tokens': 0, 'space_tokens': 0, 'newline_tokens': 0, 'compression_ratio': '5.0' }, 'length_stats': { 'avg_length': '5.0', 'median_length': '5.0', 'std_dev': '0.0' } }, 'display_limit_reached': False, 'total_tokens': 1, 'preview_only': False, 'tokenizer_info': { 'vocab_size': 50257, 'tokenizer_type': 'GPT2TokenizerFast' } } response = client.post('/', data={ 'text': 'Hello', 'model': 'gpt2', 'model_type': 'predefined' }) assert response.status_code == 200 mock_file_service.process_text_for_tokenization.assert_called_once() @patch('app.services.tokenizer_service.tokenizer_service') @patch('app.services.file_service.file_service') def test_index_post_ajax_request(self, mock_file_service, mock_tokenizer_service, client): """Test AJAX POST request for text analysis.""" # Mock services mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} expected_response = { 'tokens': [], 'stats': {'basic_stats': {}, 'length_stats': {}}, 'display_limit_reached': False, 'total_tokens': 0 } mock_file_service.process_text_for_tokenization.return_value = expected_response response = client.post('/', data={'text': 'Test', 'model': 'gpt2', 'model_type': 'predefined'}, headers={'X-Requested-With': 'XMLHttpRequest'} ) assert response.status_code == 200 assert response.content_type == 'application/json' data = json.loads(response.data) assert 'tokens' in data assert 'stats' in data @patch('app.services.file_service.file_service') @patch('app.services.tokenizer_service.tokenizer_service') def test_index_post_file_upload(self, mock_tokenizer_service, mock_file_service, client, app): """Test POST request with file upload.""" with app.app_context(): # Mock services mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} # Mock file processing mock_file_service.save_uploaded_file.return_value = '/tmp/test_file.txt' mock_file_service.process_file_for_tokenization.return_value = { 'tokens': [], 'stats': {'basic_stats': {}, 'length_stats': {}}, 'display_limit_reached': False, 'total_tokens': 0, 'preview_only': True } mock_file_service.cleanup_file.return_value = None # Create test file data file_data = BytesIO(b"Hello world! This is a test file.") response = client.post('/', data={ 'file': (file_data, 'test.txt'), 'model': 'gpt2', 'model_type': 'predefined' }, content_type='multipart/form-data' ) assert response.status_code == 200 mock_file_service.save_uploaded_file.assert_called_once() mock_file_service.process_file_for_tokenization.assert_called_once() mock_file_service.cleanup_file.assert_called_once() @patch('app.utils.validators.validators') def test_index_post_validation_error(self, mock_validators, client): """Test POST request with validation error.""" from app.utils.validators import ValidationError # Mock validation to raise error mock_validators.validate_text_input.side_effect = ValidationError("Invalid input") response = client.post('/', data={'text': 'Invalid text', 'model': 'gpt2'}, headers={'X-Requested-With': 'XMLHttpRequest'} ) assert response.status_code == 400 data = json.loads(response.data) assert 'error' in data assert 'Invalid input' in data['error'] def test_index_post_empty_data(self, client): """Test POST request with empty data.""" response = client.post('/', data={}) assert response.status_code == 200 # Should return the form again without processing class TestTokenizerInfoRoute: """Integration tests for tokenizer info route.""" @patch('app.services.tokenizer_service.tokenizer_service') def test_tokenizer_info_predefined_model(self, mock_tokenizer_service, client): """Test tokenizer info for predefined model.""" # Mock service mock_tokenizer_service.is_predefined_model.return_value = True mock_tokenizer_service.load_tokenizer.return_value = ( Mock(), { 'vocab_size': 50257, 'tokenizer_type': 'GPT2TokenizerFast', 'model_max_length': 1024, 'special_tokens': {'eos_token': ''} }, None ) response = client.get('/tokenizer-info?model_id=gpt2&is_custom=false') assert response.status_code == 200 assert response.content_type == 'application/json' data = json.loads(response.data) assert 'vocab_size' in data assert 'tokenizer_type' in data assert data['vocab_size'] == 50257 @patch('app.services.tokenizer_service.tokenizer_service') @patch('app.utils.validators.validators') def test_tokenizer_info_custom_model(self, mock_validators, mock_tokenizer_service, client): """Test tokenizer info for custom model.""" # Mock validation mock_validators.validate_model_path.return_value = None # Mock service mock_tokenizer_service.is_predefined_model.return_value = False mock_tokenizer_service.load_tokenizer.return_value = ( Mock(), { 'vocab_size': 32000, 'tokenizer_type': 'LlamaTokenizerFast', 'special_tokens': {} }, None ) response = client.get('/tokenizer-info?model_id=meta-llama/Llama-2-7b-hf&is_custom=true') assert response.status_code == 200 data = json.loads(response.data) assert data['vocab_size'] == 32000 def test_tokenizer_info_missing_model_id(self, client): """Test tokenizer info without model_id.""" response = client.get('/tokenizer-info') assert response.status_code == 400 data = json.loads(response.data) assert 'error' in data assert 'No model ID provided' in data['error'] @patch('app.utils.validators.validators') def test_tokenizer_info_validation_error(self, mock_validators, client): """Test tokenizer info with validation error.""" from app.utils.validators import ValidationError # Mock validation to raise error mock_validators.validate_model_path.side_effect = ValidationError("Invalid model path") response = client.get('/tokenizer-info?model_id=invalid/path&is_custom=true') assert response.status_code == 400 data = json.loads(response.data) assert 'error' in data assert 'Invalid model path' in data['error'] @patch('app.services.tokenizer_service.tokenizer_service') def test_tokenizer_info_service_error(self, mock_tokenizer_service, client): """Test tokenizer info with service error.""" # Mock service to return error mock_tokenizer_service.is_predefined_model.return_value = True mock_tokenizer_service.load_tokenizer.return_value = (None, {}, "Failed to load tokenizer") response = client.get('/tokenizer-info?model_id=gpt2&is_custom=false') assert response.status_code == 400 data = json.loads(response.data) assert 'error' in data assert 'Failed to load tokenizer' in data['error'] class TestHealthCheckRoutes: """Integration tests for health check routes.""" def test_basic_health_check(self, client): """Test basic health check endpoint.""" response = client.get('/health') assert response.status_code == 200 assert response.content_type == 'application/json' data = json.loads(response.data) assert 'status' in data assert 'timestamp' in data assert 'version' in data assert data['status'] == 'healthy' @patch('app.services.tokenizer_service.tokenizer_service') @patch('psutil.cpu_percent') @patch('psutil.virtual_memory') @patch('psutil.disk_usage') def test_detailed_health_check(self, mock_disk, mock_memory, mock_cpu, mock_tokenizer_service, client): """Test detailed health check endpoint.""" # Mock system info mock_cpu.return_value = 25.5 mock_memory.return_value = Mock(total=8000000000, available=4000000000, percent=50.0, used=4000000000) mock_disk.return_value = Mock(total=100000000000, used=50000000000, free=50000000000) # Mock tokenizer service mock_tokenizer_service.tokenizers = {} mock_tokenizer_service.custom_tokenizers = {} mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {}} mock_tokenizer_service.load_tokenizer.return_value = (Mock(), {}, None) response = client.get('/health/detailed') assert response.status_code == 200 data = json.loads(response.data) assert 'status' in data assert 'system' in data assert 'services' in data assert 'configuration' in data # Check system info assert 'cpu_percent' in data['system'] assert 'memory' in data['system'] assert 'disk' in data['system'] # Check services info assert 'tokenizer_service' in data['services'] assert 'file_service' in data['services'] @patch('app.services.tokenizer_service.tokenizer_service') def test_readiness_check_ready(self, mock_tokenizer_service, client, app): """Test readiness check when application is ready.""" with app.app_context(): # Mock successful tokenizer loading mock_tokenizer_service.load_tokenizer.return_value = (Mock(), {}, None) response = client.get('/health/ready') assert response.status_code == 200 data = json.loads(response.data) assert 'ready' in data assert 'checks' in data assert isinstance(data['checks'], dict) @patch('app.services.tokenizer_service.tokenizer_service') def test_readiness_check_not_ready(self, mock_tokenizer_service, client): """Test readiness check when application is not ready.""" # Mock failed tokenizer loading mock_tokenizer_service.load_tokenizer.return_value = (None, {}, "Failed to load") response = client.get('/health/ready') assert response.status_code == 503 data = json.loads(response.data) assert data['ready'] is False assert 'checks' in data class TestErrorHandling: """Test error handling across routes.""" def test_404_handling(self, client): """Test 404 error handling.""" response = client.get('/nonexistent-route') assert response.status_code == 404 def test_405_method_not_allowed(self, client): """Test 405 error for wrong HTTP method.""" response = client.put('/') # PUT not allowed assert response.status_code == 405 @patch('app.services.tokenizer_service.tokenizer_service') def test_500_internal_error(self, mock_tokenizer_service, client): """Test 500 error handling.""" # Mock service to raise unexpected exception mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} with patch('app.services.file_service.file_service') as mock_file_service: mock_file_service.process_text_for_tokenization.side_effect = Exception("Unexpected error") response = client.post('/', data={'text': 'Test', 'model': 'gpt2', 'model_type': 'predefined'}, headers={'X-Requested-With': 'XMLHttpRequest'} ) assert response.status_code == 400 # Our app returns 400 for processing errors data = json.loads(response.data) assert 'error' in data class TestSecurityFeatures: """Test security features in routes.""" @patch('app.utils.validators.validators') def test_malicious_filename_blocked(self, mock_validators, client): """Test that malicious filenames are blocked.""" from app.utils.validators import ValidationError # Mock validation to detect malicious filename mock_validators.validate_filename.side_effect = ValidationError("Malicious filename detected") file_data = BytesIO(b"test content") response = client.post('/', data={ 'file': (file_data, '../../../etc/passwd'), 'model': 'gpt2', 'model_type': 'predefined' }, content_type='multipart/form-data', headers={'X-Requested-With': 'XMLHttpRequest'} ) assert response.status_code == 400 data = json.loads(response.data) assert 'error' in data @patch('app.utils.validators.validators') def test_malicious_model_path_blocked(self, mock_validators, client): """Test that malicious model paths are blocked.""" from app.utils.validators import ValidationError # Mock validation to detect malicious model path mock_validators.validate_model_path.side_effect = ValidationError("Untrusted model path") response = client.post('/', data={ 'text': 'Test', 'custom_model': 'malicious/backdoor-model', 'model_type': 'custom' }, headers={'X-Requested-With': 'XMLHttpRequest'} ) assert response.status_code == 400 data = json.loads(response.data) assert 'error' in data assert 'Untrusted model path' in data['error']