#!/usr/bin/env python3 """ ๐Ÿ” FhirFlame Mistral OCR API Integration Test Testing real Mistral Pixtral-12B OCR with medical document processing """ import asyncio import os import sys import base64 import time from datetime import datetime # Add src to path (from tests directory) sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) from src.file_processor import local_processor from src.monitoring import monitor def create_mock_medical_image() -> bytes: """Create a mock medical document image (PNG format)""" # This is a minimal PNG header for a 1x1 pixel transparent image # In real scenarios, this would be actual medical document image bytes png_header = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\xdac\x00\x01\x00\x00\x05\x00\x01\r\n-\xdb\x00\x00\x00\x00IEND\xaeB`\x82' return png_header def create_mock_medical_pdf_text() -> str: """Create realistic medical document text for simulation""" return """ MEDICAL RECORD - CONFIDENTIAL Patient: Sarah Johnson DOB: 1985-07-20 MRN: MR456789 CHIEF COMPLAINT: Follow-up visit for Type 2 Diabetes Mellitus CURRENT MEDICATIONS: - Metformin 1000mg twice daily - Glipizide 5mg once daily - Lisinopril 10mg once daily for hypertension VITAL SIGNS: - Blood Pressure: 130/85 mmHg - Weight: 168 lbs - BMI: 26.8 - Glucose: 145 mg/dL ASSESSMENT: Type 2 Diabetes - adequately controlled Hypertension - stable PLAN: Continue current medications Follow-up in 3 months Annual eye exam recommended """ async def test_mistral_ocr_integration(): """Test complete Mistral OCR integration with monitoring""" print("๐Ÿ” FhirFlame Mistral OCR API Integration Test") print("=" * 55) print(f"๐Ÿ• Starting at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") # Check configuration print(f"\n๐Ÿ”ง Configuration:") print(f" USE_MISTRAL_FALLBACK: {os.getenv('USE_MISTRAL_FALLBACK', 'false')}") print(f" MISTRAL_API_KEY: {'โœ… Set' if os.getenv('MISTRAL_API_KEY') else 'โŒ Missing'}") print(f" Langfuse Monitoring: {'โœ… Active' if monitor.langfuse else 'โŒ Disabled'}") # Create test medical document image print(f"\n๐Ÿ“„ Creating test medical document...") document_bytes = create_mock_medical_image() print(f" Document size: {len(document_bytes)} bytes") print(f" Format: PNG medical document simulation") # Test Mistral OCR processing try: print(f"\n๐Ÿš€ Testing Mistral Pixtral-12B OCR...") start_time = time.time() # Process document with Mistral OCR result = await local_processor.process_document( document_bytes=document_bytes, user_id="test-user-mistral", filename="medical_record.png" ) processing_time = time.time() - start_time # Display results print(f"โœ… Processing completed in {processing_time:.2f}s") print(f"๐Ÿ“Š Processing mode: {result['processing_mode']}") print(f"๐ŸŽฏ Entities found: {result['entities_found']}") # Show extracted text (first 300 chars) extracted_text = result.get('extracted_text', '') if extracted_text: print(f"\n๐Ÿ“ Extracted Text (preview):") print(f" {extracted_text[:300]}{'...' if len(extracted_text) > 300 else ''}") # Validate FHIR bundle if 'fhir_bundle' in result: from src.fhir_validator import FhirValidator validator = FhirValidator() print(f"\n๐Ÿ“‹ Validating FHIR bundle...") validation_result = validator.validate_fhir_bundle(result['fhir_bundle']) print(f" FHIR R4 Valid: {validation_result['is_valid']}") print(f" Compliance Score: {validation_result['compliance_score']:.1%}") print(f" Resources: {', '.join(validation_result.get('detected_resources', []))}") # Log monitoring summary if monitor.langfuse: print(f"\n๐Ÿ” Monitoring Summary:") print(f" Session ID: {monitor.session_id}") print(f" Mistral API called: โœ…") print(f" Langfuse events logged: โœ…") return result except Exception as e: print(f"โŒ Mistral OCR test failed: {e}") # Test fallback behavior print(f"\n๐Ÿ”„ Testing fallback behavior...") try: # Temporarily disable Mistral to test fallback original_api_key = os.environ.get('MISTRAL_API_KEY') os.environ['MISTRAL_API_KEY'] = '' fallback_result = await local_processor.process_document( document_bytes=document_bytes, user_id="test-user-fallback", filename="medical_record.png" ) print(f"โœ… Fallback processing successful") print(f"๐Ÿ“Š Fallback mode: {fallback_result['processing_mode']}") # Restore API key if original_api_key: os.environ['MISTRAL_API_KEY'] = original_api_key return fallback_result except Exception as fallback_error: print(f"โŒ Fallback also failed: {fallback_error}") raise e async def test_with_simulated_medical_text(): """Test with simulated OCR output for demonstration""" print(f"\n" + "=" * 55) print(f"๐Ÿงช SIMULATION: Testing with realistic medical text") print(f"=" * 55) # Simulate what Mistral OCR would extract simulated_text = create_mock_medical_pdf_text() print(f"๐Ÿ“ Simulated OCR Text:") print(f" {simulated_text[:200]}...") # Process with the local processor's entity extraction entities = local_processor._extract_medical_entities(simulated_text) print(f"\n๐Ÿฅ Extracted Medical Entities:") for entity in entities: print(f" โ€ข {entity['type']}: {entity['text']} ({entity['confidence']:.0%})") # Create FHIR bundle fhir_bundle = local_processor._create_simple_fhir_bundle(entities, "simulated-user") print(f"\n๐Ÿ“‹ FHIR Bundle Created:") print(f" Resource Type: {fhir_bundle['resourceType']}") print(f" Entries: {len(fhir_bundle['entry'])}") print(f" Processing Mode: {fhir_bundle['_metadata']['processing_mode']}") async def main(): """Main test execution""" try: # Test 1: Real Mistral OCR Integration result = await test_mistral_ocr_integration() # Test 2: Simulation with realistic medical text await test_with_simulated_medical_text() print(f"\n๐ŸŽ‰ Mistral OCR integration test completed successfully!") # Log final workflow summary if monitor.langfuse: monitor.log_workflow_summary( documents_processed=1, successful_documents=1, total_time=10.0, # Approximate average_time=10.0, monitoring_active=True ) return 0 except Exception as e: print(f"\n๐Ÿ’ฅ Test failed: {e}") return 1 if __name__ == "__main__": exit_code = asyncio.run(main()) sys.exit(exit_code)