fhirflame / tests /test_mistral_ocr.py
leksval
initial commit
a963d65
#!/usr/bin/env python3
"""
πŸ” FhirFlame Mistral OCR API Integration Test
Testing real Mistral Pixtral-12B OCR with medical document processing
"""
import asyncio
import os
import sys
import base64
import time
from datetime import datetime
# Add src to path (from tests directory)
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
from src.file_processor import local_processor
from src.monitoring import monitor
def create_mock_medical_image() -> bytes:
"""Create a mock medical document image (PNG format)"""
# This is a minimal PNG header for a 1x1 pixel transparent image
# In real scenarios, this would be actual medical document image bytes
png_header = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\xdac\x00\x01\x00\x00\x05\x00\x01\r\n-\xdb\x00\x00\x00\x00IEND\xaeB`\x82'
return png_header
def create_mock_medical_pdf_text() -> str:
"""Create realistic medical document text for simulation"""
return """
MEDICAL RECORD - CONFIDENTIAL
Patient: Sarah Johnson
DOB: 1985-07-20
MRN: MR456789
CHIEF COMPLAINT:
Follow-up visit for Type 2 Diabetes Mellitus
CURRENT MEDICATIONS:
- Metformin 1000mg twice daily
- Glipizide 5mg once daily
- Lisinopril 10mg once daily for hypertension
VITAL SIGNS:
- Blood Pressure: 130/85 mmHg
- Weight: 168 lbs
- BMI: 26.8
- Glucose: 145 mg/dL
ASSESSMENT:
Type 2 Diabetes - adequately controlled
Hypertension - stable
PLAN:
Continue current medications
Follow-up in 3 months
Annual eye exam recommended
"""
async def test_mistral_ocr_integration():
"""Test complete Mistral OCR integration with monitoring"""
print("πŸ” FhirFlame Mistral OCR API Integration Test")
print("=" * 55)
print(f"πŸ• Starting at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
# Check configuration
print(f"\nπŸ”§ Configuration:")
print(f" USE_MISTRAL_FALLBACK: {os.getenv('USE_MISTRAL_FALLBACK', 'false')}")
print(f" MISTRAL_API_KEY: {'βœ… Set' if os.getenv('MISTRAL_API_KEY') else '❌ Missing'}")
print(f" Langfuse Monitoring: {'βœ… Active' if monitor.langfuse else '❌ Disabled'}")
# Create test medical document image
print(f"\nπŸ“„ Creating test medical document...")
document_bytes = create_mock_medical_image()
print(f" Document size: {len(document_bytes)} bytes")
print(f" Format: PNG medical document simulation")
# Test Mistral OCR processing
try:
print(f"\nπŸš€ Testing Mistral Pixtral-12B OCR...")
start_time = time.time()
# Process document with Mistral OCR
result = await local_processor.process_document(
document_bytes=document_bytes,
user_id="test-user-mistral",
filename="medical_record.png"
)
processing_time = time.time() - start_time
# Display results
print(f"βœ… Processing completed in {processing_time:.2f}s")
print(f"πŸ“Š Processing mode: {result['processing_mode']}")
print(f"🎯 Entities found: {result['entities_found']}")
# Show extracted text (first 300 chars)
extracted_text = result.get('extracted_text', '')
if extracted_text:
print(f"\nπŸ“ Extracted Text (preview):")
print(f" {extracted_text[:300]}{'...' if len(extracted_text) > 300 else ''}")
# Validate FHIR bundle
if 'fhir_bundle' in result:
from src.fhir_validator import FhirValidator
validator = FhirValidator()
print(f"\nπŸ“‹ Validating FHIR bundle...")
validation_result = validator.validate_fhir_bundle(result['fhir_bundle'])
print(f" FHIR R4 Valid: {validation_result['is_valid']}")
print(f" Compliance Score: {validation_result['compliance_score']:.1%}")
print(f" Resources: {', '.join(validation_result.get('detected_resources', []))}")
# Log monitoring summary
if monitor.langfuse:
print(f"\nπŸ” Monitoring Summary:")
print(f" Session ID: {monitor.session_id}")
print(f" Mistral API called: βœ…")
print(f" Langfuse events logged: βœ…")
return result
except Exception as e:
print(f"❌ Mistral OCR test failed: {e}")
# Test fallback behavior
print(f"\nπŸ”„ Testing fallback behavior...")
try:
# Temporarily disable Mistral to test fallback
original_api_key = os.environ.get('MISTRAL_API_KEY')
os.environ['MISTRAL_API_KEY'] = ''
fallback_result = await local_processor.process_document(
document_bytes=document_bytes,
user_id="test-user-fallback",
filename="medical_record.png"
)
print(f"βœ… Fallback processing successful")
print(f"πŸ“Š Fallback mode: {fallback_result['processing_mode']}")
# Restore API key
if original_api_key:
os.environ['MISTRAL_API_KEY'] = original_api_key
return fallback_result
except Exception as fallback_error:
print(f"❌ Fallback also failed: {fallback_error}")
raise e
async def test_with_simulated_medical_text():
"""Test with simulated OCR output for demonstration"""
print(f"\n" + "=" * 55)
print(f"πŸ§ͺ SIMULATION: Testing with realistic medical text")
print(f"=" * 55)
# Simulate what Mistral OCR would extract
simulated_text = create_mock_medical_pdf_text()
print(f"πŸ“ Simulated OCR Text:")
print(f" {simulated_text[:200]}...")
# Process with the local processor's entity extraction
entities = local_processor._extract_medical_entities(simulated_text)
print(f"\nπŸ₯ Extracted Medical Entities:")
for entity in entities:
print(f" β€’ {entity['type']}: {entity['text']} ({entity['confidence']:.0%})")
# Create FHIR bundle
fhir_bundle = local_processor._create_simple_fhir_bundle(entities, "simulated-user")
print(f"\nπŸ“‹ FHIR Bundle Created:")
print(f" Resource Type: {fhir_bundle['resourceType']}")
print(f" Entries: {len(fhir_bundle['entry'])}")
print(f" Processing Mode: {fhir_bundle['_metadata']['processing_mode']}")
async def main():
"""Main test execution"""
try:
# Test 1: Real Mistral OCR Integration
result = await test_mistral_ocr_integration()
# Test 2: Simulation with realistic medical text
await test_with_simulated_medical_text()
print(f"\nπŸŽ‰ Mistral OCR integration test completed successfully!")
# Log final workflow summary
if monitor.langfuse:
monitor.log_workflow_summary(
documents_processed=1,
successful_documents=1,
total_time=10.0, # Approximate
average_time=10.0,
monitoring_active=True
)
return 0
except Exception as e:
print(f"\nπŸ’₯ Test failed: {e}")
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)