Spaces:
Running
Running
""" | |
Integration tests for Flask routes | |
""" | |
import pytest | |
import json | |
import tempfile | |
import os | |
from unittest.mock import patch, Mock | |
from io import BytesIO | |
from werkzeug.datastructures import FileStorage | |
class TestMainRoutes: | |
"""Integration tests for main application routes.""" | |
def test_index_get_basic(self, client): | |
"""Test basic GET request to index.""" | |
response = client.get('/') | |
assert response.status_code == 200 | |
assert b'Tokenizer Pro' in response.data | |
assert b'Advanced tokenization analysis' in response.data | |
assert b'textarea' in response.data | |
def test_index_get_with_parameters(self, client): | |
"""Test GET request with query parameters.""" | |
response = client.get('/?model=gpt2&model_type=predefined') | |
assert response.status_code == 200 | |
assert b'gpt2' in response.data or b'GPT-2' in response.data | |
def test_index_post_text_analysis(self, mock_file_service, mock_tokenizer_service, client): | |
"""Test POST request with text analysis.""" | |
# Mock services | |
mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} | |
mock_file_service.process_text_for_tokenization.return_value = { | |
'tokens': [ | |
{ | |
'display': 'Hello', | |
'original': 'Hello', | |
'token_id': 15496, | |
'colors': {'background': '#FF5733', 'text': '#FFFFFF'}, | |
'newline': False | |
} | |
], | |
'stats': { | |
'basic_stats': { | |
'total_tokens': 1, | |
'unique_tokens': 1, | |
'unique_percentage': '100.0', | |
'special_tokens': 0, | |
'space_tokens': 0, | |
'newline_tokens': 0, | |
'compression_ratio': '5.0' | |
}, | |
'length_stats': { | |
'avg_length': '5.0', | |
'median_length': '5.0', | |
'std_dev': '0.0' | |
} | |
}, | |
'display_limit_reached': False, | |
'total_tokens': 1, | |
'preview_only': False, | |
'tokenizer_info': { | |
'vocab_size': 50257, | |
'tokenizer_type': 'GPT2TokenizerFast' | |
} | |
} | |
response = client.post('/', data={ | |
'text': 'Hello', | |
'model': 'gpt2', | |
'model_type': 'predefined' | |
}) | |
assert response.status_code == 200 | |
mock_file_service.process_text_for_tokenization.assert_called_once() | |
def test_index_post_ajax_request(self, mock_file_service, mock_tokenizer_service, client): | |
"""Test AJAX POST request for text analysis.""" | |
# Mock services | |
mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} | |
expected_response = { | |
'tokens': [], | |
'stats': {'basic_stats': {}, 'length_stats': {}}, | |
'display_limit_reached': False, | |
'total_tokens': 0 | |
} | |
mock_file_service.process_text_for_tokenization.return_value = expected_response | |
response = client.post('/', | |
data={'text': 'Test', 'model': 'gpt2', 'model_type': 'predefined'}, | |
headers={'X-Requested-With': 'XMLHttpRequest'} | |
) | |
assert response.status_code == 200 | |
assert response.content_type == 'application/json' | |
data = json.loads(response.data) | |
assert 'tokens' in data | |
assert 'stats' in data | |
def test_index_post_file_upload(self, mock_tokenizer_service, mock_file_service, client, app): | |
"""Test POST request with file upload.""" | |
with app.app_context(): | |
# Mock services | |
mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} | |
# Mock file processing | |
mock_file_service.save_uploaded_file.return_value = '/tmp/test_file.txt' | |
mock_file_service.process_file_for_tokenization.return_value = { | |
'tokens': [], | |
'stats': {'basic_stats': {}, 'length_stats': {}}, | |
'display_limit_reached': False, | |
'total_tokens': 0, | |
'preview_only': True | |
} | |
mock_file_service.cleanup_file.return_value = None | |
# Create test file data | |
file_data = BytesIO(b"Hello world! This is a test file.") | |
response = client.post('/', | |
data={ | |
'file': (file_data, 'test.txt'), | |
'model': 'gpt2', | |
'model_type': 'predefined' | |
}, | |
content_type='multipart/form-data' | |
) | |
assert response.status_code == 200 | |
mock_file_service.save_uploaded_file.assert_called_once() | |
mock_file_service.process_file_for_tokenization.assert_called_once() | |
mock_file_service.cleanup_file.assert_called_once() | |
def test_index_post_validation_error(self, mock_validators, client): | |
"""Test POST request with validation error.""" | |
from app.utils.validators import ValidationError | |
# Mock validation to raise error | |
mock_validators.validate_text_input.side_effect = ValidationError("Invalid input") | |
response = client.post('/', | |
data={'text': 'Invalid text', 'model': 'gpt2'}, | |
headers={'X-Requested-With': 'XMLHttpRequest'} | |
) | |
assert response.status_code == 400 | |
data = json.loads(response.data) | |
assert 'error' in data | |
assert 'Invalid input' in data['error'] | |
def test_index_post_empty_data(self, client): | |
"""Test POST request with empty data.""" | |
response = client.post('/', data={}) | |
assert response.status_code == 200 | |
# Should return the form again without processing | |
class TestTokenizerInfoRoute: | |
"""Integration tests for tokenizer info route.""" | |
def test_tokenizer_info_predefined_model(self, mock_tokenizer_service, client): | |
"""Test tokenizer info for predefined model.""" | |
# Mock service | |
mock_tokenizer_service.is_predefined_model.return_value = True | |
mock_tokenizer_service.load_tokenizer.return_value = ( | |
Mock(), | |
{ | |
'vocab_size': 50257, | |
'tokenizer_type': 'GPT2TokenizerFast', | |
'model_max_length': 1024, | |
'special_tokens': {'eos_token': '</s>'} | |
}, | |
None | |
) | |
response = client.get('/tokenizer-info?model_id=gpt2&is_custom=false') | |
assert response.status_code == 200 | |
assert response.content_type == 'application/json' | |
data = json.loads(response.data) | |
assert 'vocab_size' in data | |
assert 'tokenizer_type' in data | |
assert data['vocab_size'] == 50257 | |
def test_tokenizer_info_custom_model(self, mock_validators, mock_tokenizer_service, client): | |
"""Test tokenizer info for custom model.""" | |
# Mock validation | |
mock_validators.validate_model_path.return_value = None | |
# Mock service | |
mock_tokenizer_service.is_predefined_model.return_value = False | |
mock_tokenizer_service.load_tokenizer.return_value = ( | |
Mock(), | |
{ | |
'vocab_size': 32000, | |
'tokenizer_type': 'LlamaTokenizerFast', | |
'special_tokens': {} | |
}, | |
None | |
) | |
response = client.get('/tokenizer-info?model_id=meta-llama/Llama-2-7b-hf&is_custom=true') | |
assert response.status_code == 200 | |
data = json.loads(response.data) | |
assert data['vocab_size'] == 32000 | |
def test_tokenizer_info_missing_model_id(self, client): | |
"""Test tokenizer info without model_id.""" | |
response = client.get('/tokenizer-info') | |
assert response.status_code == 400 | |
data = json.loads(response.data) | |
assert 'error' in data | |
assert 'No model ID provided' in data['error'] | |
def test_tokenizer_info_validation_error(self, mock_validators, client): | |
"""Test tokenizer info with validation error.""" | |
from app.utils.validators import ValidationError | |
# Mock validation to raise error | |
mock_validators.validate_model_path.side_effect = ValidationError("Invalid model path") | |
response = client.get('/tokenizer-info?model_id=invalid/path&is_custom=true') | |
assert response.status_code == 400 | |
data = json.loads(response.data) | |
assert 'error' in data | |
assert 'Invalid model path' in data['error'] | |
def test_tokenizer_info_service_error(self, mock_tokenizer_service, client): | |
"""Test tokenizer info with service error.""" | |
# Mock service to return error | |
mock_tokenizer_service.is_predefined_model.return_value = True | |
mock_tokenizer_service.load_tokenizer.return_value = (None, {}, "Failed to load tokenizer") | |
response = client.get('/tokenizer-info?model_id=gpt2&is_custom=false') | |
assert response.status_code == 400 | |
data = json.loads(response.data) | |
assert 'error' in data | |
assert 'Failed to load tokenizer' in data['error'] | |
class TestHealthCheckRoutes: | |
"""Integration tests for health check routes.""" | |
def test_basic_health_check(self, client): | |
"""Test basic health check endpoint.""" | |
response = client.get('/health') | |
assert response.status_code == 200 | |
assert response.content_type == 'application/json' | |
data = json.loads(response.data) | |
assert 'status' in data | |
assert 'timestamp' in data | |
assert 'version' in data | |
assert data['status'] == 'healthy' | |
def test_detailed_health_check(self, mock_disk, mock_memory, mock_cpu, mock_tokenizer_service, client): | |
"""Test detailed health check endpoint.""" | |
# Mock system info | |
mock_cpu.return_value = 25.5 | |
mock_memory.return_value = Mock(total=8000000000, available=4000000000, percent=50.0, used=4000000000) | |
mock_disk.return_value = Mock(total=100000000000, used=50000000000, free=50000000000) | |
# Mock tokenizer service | |
mock_tokenizer_service.tokenizers = {} | |
mock_tokenizer_service.custom_tokenizers = {} | |
mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {}} | |
mock_tokenizer_service.load_tokenizer.return_value = (Mock(), {}, None) | |
response = client.get('/health/detailed') | |
assert response.status_code == 200 | |
data = json.loads(response.data) | |
assert 'status' in data | |
assert 'system' in data | |
assert 'services' in data | |
assert 'configuration' in data | |
# Check system info | |
assert 'cpu_percent' in data['system'] | |
assert 'memory' in data['system'] | |
assert 'disk' in data['system'] | |
# Check services info | |
assert 'tokenizer_service' in data['services'] | |
assert 'file_service' in data['services'] | |
def test_readiness_check_ready(self, mock_tokenizer_service, client, app): | |
"""Test readiness check when application is ready.""" | |
with app.app_context(): | |
# Mock successful tokenizer loading | |
mock_tokenizer_service.load_tokenizer.return_value = (Mock(), {}, None) | |
response = client.get('/health/ready') | |
assert response.status_code == 200 | |
data = json.loads(response.data) | |
assert 'ready' in data | |
assert 'checks' in data | |
assert isinstance(data['checks'], dict) | |
def test_readiness_check_not_ready(self, mock_tokenizer_service, client): | |
"""Test readiness check when application is not ready.""" | |
# Mock failed tokenizer loading | |
mock_tokenizer_service.load_tokenizer.return_value = (None, {}, "Failed to load") | |
response = client.get('/health/ready') | |
assert response.status_code == 503 | |
data = json.loads(response.data) | |
assert data['ready'] is False | |
assert 'checks' in data | |
class TestErrorHandling: | |
"""Test error handling across routes.""" | |
def test_404_handling(self, client): | |
"""Test 404 error handling.""" | |
response = client.get('/nonexistent-route') | |
assert response.status_code == 404 | |
def test_405_method_not_allowed(self, client): | |
"""Test 405 error for wrong HTTP method.""" | |
response = client.put('/') # PUT not allowed | |
assert response.status_code == 405 | |
def test_500_internal_error(self, mock_tokenizer_service, client): | |
"""Test 500 error handling.""" | |
# Mock service to raise unexpected exception | |
mock_tokenizer_service.TOKENIZER_MODELS = {'gpt2': {'name': 'gpt2', 'alias': 'GPT-2'}} | |
with patch('app.services.file_service.file_service') as mock_file_service: | |
mock_file_service.process_text_for_tokenization.side_effect = Exception("Unexpected error") | |
response = client.post('/', | |
data={'text': 'Test', 'model': 'gpt2', 'model_type': 'predefined'}, | |
headers={'X-Requested-With': 'XMLHttpRequest'} | |
) | |
assert response.status_code == 400 # Our app returns 400 for processing errors | |
data = json.loads(response.data) | |
assert 'error' in data | |
class TestSecurityFeatures: | |
"""Test security features in routes.""" | |
def test_malicious_filename_blocked(self, mock_validators, client): | |
"""Test that malicious filenames are blocked.""" | |
from app.utils.validators import ValidationError | |
# Mock validation to detect malicious filename | |
mock_validators.validate_filename.side_effect = ValidationError("Malicious filename detected") | |
file_data = BytesIO(b"test content") | |
response = client.post('/', | |
data={ | |
'file': (file_data, '../../../etc/passwd'), | |
'model': 'gpt2', | |
'model_type': 'predefined' | |
}, | |
content_type='multipart/form-data', | |
headers={'X-Requested-With': 'XMLHttpRequest'} | |
) | |
assert response.status_code == 400 | |
data = json.loads(response.data) | |
assert 'error' in data | |
def test_malicious_model_path_blocked(self, mock_validators, client): | |
"""Test that malicious model paths are blocked.""" | |
from app.utils.validators import ValidationError | |
# Mock validation to detect malicious model path | |
mock_validators.validate_model_path.side_effect = ValidationError("Untrusted model path") | |
response = client.post('/', | |
data={ | |
'text': 'Test', | |
'custom_model': 'malicious/backdoor-model', | |
'model_type': 'custom' | |
}, | |
headers={'X-Requested-With': 'XMLHttpRequest'} | |
) | |
assert response.status_code == 400 | |
data = json.loads(response.data) | |
assert 'error' in data | |
assert 'Untrusted model path' in data['error'] |