File size: 6,749 Bytes
a963d65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
"""
Tests for Local Mock Processor
Simple tests to verify mock processing functionality
"""
import pytest
import asyncio
import os
from unittest.mock import patch, Mock
from src.file_processor import LocalProcessor
class TestLocalProcessor:
"""Test suite for the local processor"""
@pytest.fixture
def local_processor(self):
"""Create a local processor instance"""
return LocalProcessor()
@pytest.fixture
def sample_document_bytes(self):
"""Sample document bytes for testing"""
return b"Mock PDF document content"
@pytest.mark.asyncio
async def test_basic_document_processing(self, local_processor, sample_document_bytes):
"""Test basic document processing without fallbacks"""
result = await local_processor.process_document(
document_bytes=sample_document_bytes,
user_id="test-user-123",
filename="test_document.pdf"
)
# Verify response structure
assert result["status"] == "success"
assert result["filename"] == "test_document.pdf"
assert result["processed_by"] == "test-user-123"
assert "entities_found" in result
assert "fhir_bundle" in result
assert "extracted_text" in result
# Verify FHIR bundle structure
fhir_bundle = result["fhir_bundle"]
assert fhir_bundle["resourceType"] == "Bundle"
assert fhir_bundle["type"] == "document"
assert len(fhir_bundle["entry"]) >= 2 # Patient + Observation
# Check for required FHIR resources
resource_types = [entry["resource"]["resourceType"] for entry in fhir_bundle["entry"]]
assert "Patient" in resource_types
assert "Observation" in resource_types
def test_mock_text_extraction_by_file_type(self, local_processor):
"""Test text extraction based on file types"""
# Test PDF/DOC files
pdf_text = local_processor._get_mock_text_by_type("medical_record.pdf")
assert "MEDICAL RECORD" in pdf_text
assert "John Doe" in pdf_text
# Test image files
image_text = local_processor._get_mock_text_by_type("lab_results.jpg")
assert "LAB REPORT" in image_text
assert "Jane Smith" in image_text
# Test other files
other_text = local_processor._get_mock_text_by_type("notes.txt")
assert "CLINICAL NOTE" in other_text
def test_medical_entity_extraction(self, local_processor):
"""Test medical entity extraction"""
test_text = """
Patient: John Doe
Diagnosis: Hypertension
Medication: Lisinopril
Blood Pressure: 140/90
"""
entities = local_processor._extract_medical_entities(test_text)
# Should find multiple entities
assert len(entities) > 0
# Check entity types
entity_types = [entity["type"] for entity in entities]
assert "PERSON" in entity_types
assert "CONDITION" in entity_types
assert "MEDICATION" in entity_types
assert "VITAL" in entity_types
# Verify entity structure
for entity in entities:
assert "text" in entity
assert "type" in entity
assert "confidence" in entity
assert "start" in entity
assert "end" in entity
def test_processing_mode_detection(self, local_processor):
"""Test processing mode detection"""
# Test default mode
mode = local_processor._get_processing_mode()
assert mode == "local_mock_only"
# Test with environment variables
with patch.dict(os.environ, {"USE_MISTRAL_FALLBACK": "true", "MISTRAL_API_KEY": "test-key"}):
processor = LocalProcessor()
mode = processor._get_processing_mode()
assert mode == "local_mock_with_mistral_fallback"
with patch.dict(os.environ, {"USE_MULTIMODAL_FALLBACK": "true"}):
processor = LocalProcessor()
mode = processor._get_processing_mode()
assert mode == "local_mock_with_multimodal_fallback"
@pytest.mark.asyncio
async def test_fallback_handling(self, local_processor, sample_document_bytes):
"""Test fallback mechanisms"""
# Test with fallbacks disabled (default)
text = await local_processor._extract_text_with_fallback(sample_document_bytes, "test.pdf")
assert isinstance(text, str)
assert len(text) > 0
@pytest.mark.asyncio
@pytest.mark.skipif(not os.getenv("MISTRAL_API_KEY"), reason="Mistral API key not available")
async def test_mistral_fallback(self, local_processor, sample_document_bytes):
"""Test Mistral API fallback (requires API key)"""
with patch.dict(os.environ, {"USE_MISTRAL_FALLBACK": "true"}):
processor = LocalProcessor()
# Mock the Mistral API response
with patch('httpx.AsyncClient.post') as mock_post:
mock_response = Mock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{"message": {"content": "Extracted medical text from Mistral"}}]
}
mock_post.return_value = mock_response
text = await processor._extract_with_mistral(sample_document_bytes)
assert text == "Extracted medical text from Mistral"
def test_fhir_bundle_creation(self, local_processor):
"""Test FHIR bundle creation"""
test_entities = [
{"text": "John Doe", "type": "PERSON", "confidence": 0.95},
{"text": "Hypertension", "type": "CONDITION", "confidence": 0.89}
]
bundle = local_processor._create_simple_fhir_bundle(test_entities, "test-user")
# Verify bundle structure
assert bundle["resourceType"] == "Bundle"
assert bundle["type"] == "document"
assert "timestamp" in bundle
assert "entry" in bundle
# Verify metadata
assert bundle["_metadata"]["entities_found"] == 2
assert bundle["_metadata"]["processed_by"] == "test-user"
# Verify LOINC codes in observations
observation_entry = next(
entry for entry in bundle["entry"]
if entry["resource"]["resourceType"] == "Observation"
)
coding = observation_entry["resource"]["code"]["coding"][0]
assert coding["system"] == "http://loinc.org"
assert coding["code"] == "85354-9"
if __name__ == "__main__":
pytest.main([__file__, "-v"]) |