File size: 6,749 Bytes
a963d65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
"""
Tests for Local Mock Processor
Simple tests to verify mock processing functionality
"""

import pytest
import asyncio
import os
from unittest.mock import patch, Mock
from src.file_processor import LocalProcessor

class TestLocalProcessor:
    """Test suite for the local processor"""
    
    @pytest.fixture
    def local_processor(self):
        """Create a local processor instance"""
        return LocalProcessor()
    
    @pytest.fixture
    def sample_document_bytes(self):
        """Sample document bytes for testing"""
        return b"Mock PDF document content"
    
    @pytest.mark.asyncio
    async def test_basic_document_processing(self, local_processor, sample_document_bytes):
        """Test basic document processing without fallbacks"""
        result = await local_processor.process_document(
            document_bytes=sample_document_bytes,
            user_id="test-user-123",
            filename="test_document.pdf"
        )
        
        # Verify response structure
        assert result["status"] == "success"
        assert result["filename"] == "test_document.pdf"
        assert result["processed_by"] == "test-user-123"
        assert "entities_found" in result
        assert "fhir_bundle" in result
        assert "extracted_text" in result
        
        # Verify FHIR bundle structure
        fhir_bundle = result["fhir_bundle"]
        assert fhir_bundle["resourceType"] == "Bundle"
        assert fhir_bundle["type"] == "document"
        assert len(fhir_bundle["entry"]) >= 2  # Patient + Observation
        
        # Check for required FHIR resources
        resource_types = [entry["resource"]["resourceType"] for entry in fhir_bundle["entry"]]
        assert "Patient" in resource_types
        assert "Observation" in resource_types
    
    def test_mock_text_extraction_by_file_type(self, local_processor):
        """Test text extraction based on file types"""
        # Test PDF/DOC files
        pdf_text = local_processor._get_mock_text_by_type("medical_record.pdf")
        assert "MEDICAL RECORD" in pdf_text
        assert "John Doe" in pdf_text
        
        # Test image files
        image_text = local_processor._get_mock_text_by_type("lab_results.jpg")
        assert "LAB REPORT" in image_text
        assert "Jane Smith" in image_text
        
        # Test other files
        other_text = local_processor._get_mock_text_by_type("notes.txt")
        assert "CLINICAL NOTE" in other_text
    
    def test_medical_entity_extraction(self, local_processor):
        """Test medical entity extraction"""
        test_text = """
        Patient: John Doe
        Diagnosis: Hypertension
        Medication: Lisinopril
        Blood Pressure: 140/90
        """
        
        entities = local_processor._extract_medical_entities(test_text)
        
        # Should find multiple entities
        assert len(entities) > 0
        
        # Check entity types
        entity_types = [entity["type"] for entity in entities]
        assert "PERSON" in entity_types
        assert "CONDITION" in entity_types
        assert "MEDICATION" in entity_types
        assert "VITAL" in entity_types
        
        # Verify entity structure
        for entity in entities:
            assert "text" in entity
            assert "type" in entity
            assert "confidence" in entity
            assert "start" in entity
            assert "end" in entity
    
    def test_processing_mode_detection(self, local_processor):
        """Test processing mode detection"""
        # Test default mode
        mode = local_processor._get_processing_mode()
        assert mode == "local_mock_only"
        
        # Test with environment variables
        with patch.dict(os.environ, {"USE_MISTRAL_FALLBACK": "true", "MISTRAL_API_KEY": "test-key"}):
            processor = LocalProcessor()
            mode = processor._get_processing_mode()
            assert mode == "local_mock_with_mistral_fallback"
        
        with patch.dict(os.environ, {"USE_MULTIMODAL_FALLBACK": "true"}):
            processor = LocalProcessor()
            mode = processor._get_processing_mode()
            assert mode == "local_mock_with_multimodal_fallback"
    
    @pytest.mark.asyncio
    async def test_fallback_handling(self, local_processor, sample_document_bytes):
        """Test fallback mechanisms"""
        # Test with fallbacks disabled (default)
        text = await local_processor._extract_text_with_fallback(sample_document_bytes, "test.pdf")
        assert isinstance(text, str)
        assert len(text) > 0
    
    @pytest.mark.asyncio
    @pytest.mark.skipif(not os.getenv("MISTRAL_API_KEY"), reason="Mistral API key not available")
    async def test_mistral_fallback(self, local_processor, sample_document_bytes):
        """Test Mistral API fallback (requires API key)"""
        with patch.dict(os.environ, {"USE_MISTRAL_FALLBACK": "true"}):
            processor = LocalProcessor()
            
            # Mock the Mistral API response
            with patch('httpx.AsyncClient.post') as mock_post:
                mock_response = Mock()
                mock_response.status_code = 200
                mock_response.json.return_value = {
                    "choices": [{"message": {"content": "Extracted medical text from Mistral"}}]
                }
                mock_post.return_value = mock_response
                
                text = await processor._extract_with_mistral(sample_document_bytes)
                assert text == "Extracted medical text from Mistral"
    
    def test_fhir_bundle_creation(self, local_processor):
        """Test FHIR bundle creation"""
        test_entities = [
            {"text": "John Doe", "type": "PERSON", "confidence": 0.95},
            {"text": "Hypertension", "type": "CONDITION", "confidence": 0.89}
        ]
        
        bundle = local_processor._create_simple_fhir_bundle(test_entities, "test-user")
        
        # Verify bundle structure
        assert bundle["resourceType"] == "Bundle"
        assert bundle["type"] == "document"
        assert "timestamp" in bundle
        assert "entry" in bundle
        
        # Verify metadata
        assert bundle["_metadata"]["entities_found"] == 2
        assert bundle["_metadata"]["processed_by"] == "test-user"
        
        # Verify LOINC codes in observations
        observation_entry = next(
            entry for entry in bundle["entry"] 
            if entry["resource"]["resourceType"] == "Observation"
        )
        coding = observation_entry["resource"]["code"]["coding"][0]
        assert coding["system"] == "http://loinc.org"
        assert coding["code"] == "85354-9"

if __name__ == "__main__":
    pytest.main([__file__, "-v"])