File size: 7,412 Bytes
a963d65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
#!/usr/bin/env python3
"""
π FhirFlame Mistral OCR API Integration Test
Testing real Mistral Pixtral-12B OCR with medical document processing
"""
import asyncio
import os
import sys
import base64
import time
from datetime import datetime
# Add src to path (from tests directory)
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
from src.file_processor import local_processor
from src.monitoring import monitor
def create_mock_medical_image() -> bytes:
"""Create a mock medical document image (PNG format)"""
# This is a minimal PNG header for a 1x1 pixel transparent image
# In real scenarios, this would be actual medical document image bytes
png_header = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\xdac\x00\x01\x00\x00\x05\x00\x01\r\n-\xdb\x00\x00\x00\x00IEND\xaeB`\x82'
return png_header
def create_mock_medical_pdf_text() -> str:
"""Create realistic medical document text for simulation"""
return """
MEDICAL RECORD - CONFIDENTIAL
Patient: Sarah Johnson
DOB: 1985-07-20
MRN: MR456789
CHIEF COMPLAINT:
Follow-up visit for Type 2 Diabetes Mellitus
CURRENT MEDICATIONS:
- Metformin 1000mg twice daily
- Glipizide 5mg once daily
- Lisinopril 10mg once daily for hypertension
VITAL SIGNS:
- Blood Pressure: 130/85 mmHg
- Weight: 168 lbs
- BMI: 26.8
- Glucose: 145 mg/dL
ASSESSMENT:
Type 2 Diabetes - adequately controlled
Hypertension - stable
PLAN:
Continue current medications
Follow-up in 3 months
Annual eye exam recommended
"""
async def test_mistral_ocr_integration():
"""Test complete Mistral OCR integration with monitoring"""
print("π FhirFlame Mistral OCR API Integration Test")
print("=" * 55)
print(f"π Starting at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
# Check configuration
print(f"\nπ§ Configuration:")
print(f" USE_MISTRAL_FALLBACK: {os.getenv('USE_MISTRAL_FALLBACK', 'false')}")
print(f" MISTRAL_API_KEY: {'β
Set' if os.getenv('MISTRAL_API_KEY') else 'β Missing'}")
print(f" Langfuse Monitoring: {'β
Active' if monitor.langfuse else 'β Disabled'}")
# Create test medical document image
print(f"\nπ Creating test medical document...")
document_bytes = create_mock_medical_image()
print(f" Document size: {len(document_bytes)} bytes")
print(f" Format: PNG medical document simulation")
# Test Mistral OCR processing
try:
print(f"\nπ Testing Mistral Pixtral-12B OCR...")
start_time = time.time()
# Process document with Mistral OCR
result = await local_processor.process_document(
document_bytes=document_bytes,
user_id="test-user-mistral",
filename="medical_record.png"
)
processing_time = time.time() - start_time
# Display results
print(f"β
Processing completed in {processing_time:.2f}s")
print(f"π Processing mode: {result['processing_mode']}")
print(f"π― Entities found: {result['entities_found']}")
# Show extracted text (first 300 chars)
extracted_text = result.get('extracted_text', '')
if extracted_text:
print(f"\nπ Extracted Text (preview):")
print(f" {extracted_text[:300]}{'...' if len(extracted_text) > 300 else ''}")
# Validate FHIR bundle
if 'fhir_bundle' in result:
from src.fhir_validator import FhirValidator
validator = FhirValidator()
print(f"\nπ Validating FHIR bundle...")
validation_result = validator.validate_fhir_bundle(result['fhir_bundle'])
print(f" FHIR R4 Valid: {validation_result['is_valid']}")
print(f" Compliance Score: {validation_result['compliance_score']:.1%}")
print(f" Resources: {', '.join(validation_result.get('detected_resources', []))}")
# Log monitoring summary
if monitor.langfuse:
print(f"\nπ Monitoring Summary:")
print(f" Session ID: {monitor.session_id}")
print(f" Mistral API called: β
")
print(f" Langfuse events logged: β
")
return result
except Exception as e:
print(f"β Mistral OCR test failed: {e}")
# Test fallback behavior
print(f"\nπ Testing fallback behavior...")
try:
# Temporarily disable Mistral to test fallback
original_api_key = os.environ.get('MISTRAL_API_KEY')
os.environ['MISTRAL_API_KEY'] = ''
fallback_result = await local_processor.process_document(
document_bytes=document_bytes,
user_id="test-user-fallback",
filename="medical_record.png"
)
print(f"β
Fallback processing successful")
print(f"π Fallback mode: {fallback_result['processing_mode']}")
# Restore API key
if original_api_key:
os.environ['MISTRAL_API_KEY'] = original_api_key
return fallback_result
except Exception as fallback_error:
print(f"β Fallback also failed: {fallback_error}")
raise e
async def test_with_simulated_medical_text():
"""Test with simulated OCR output for demonstration"""
print(f"\n" + "=" * 55)
print(f"π§ͺ SIMULATION: Testing with realistic medical text")
print(f"=" * 55)
# Simulate what Mistral OCR would extract
simulated_text = create_mock_medical_pdf_text()
print(f"π Simulated OCR Text:")
print(f" {simulated_text[:200]}...")
# Process with the local processor's entity extraction
entities = local_processor._extract_medical_entities(simulated_text)
print(f"\nπ₯ Extracted Medical Entities:")
for entity in entities:
print(f" β’ {entity['type']}: {entity['text']} ({entity['confidence']:.0%})")
# Create FHIR bundle
fhir_bundle = local_processor._create_simple_fhir_bundle(entities, "simulated-user")
print(f"\nπ FHIR Bundle Created:")
print(f" Resource Type: {fhir_bundle['resourceType']}")
print(f" Entries: {len(fhir_bundle['entry'])}")
print(f" Processing Mode: {fhir_bundle['_metadata']['processing_mode']}")
async def main():
"""Main test execution"""
try:
# Test 1: Real Mistral OCR Integration
result = await test_mistral_ocr_integration()
# Test 2: Simulation with realistic medical text
await test_with_simulated_medical_text()
print(f"\nπ Mistral OCR integration test completed successfully!")
# Log final workflow summary
if monitor.langfuse:
monitor.log_workflow_summary(
documents_processed=1,
successful_documents=1,
total_time=10.0, # Approximate
average_time=10.0,
monitoring_active=True
)
return 0
except Exception as e:
print(f"\nπ₯ Test failed: {e}")
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code) |