|
|
|
""" |
|
π FhirFlame Mistral OCR API Integration Test |
|
Testing real Mistral Pixtral-12B OCR with medical document processing |
|
""" |
|
|
|
import asyncio |
|
import os |
|
import sys |
|
import base64 |
|
import time |
|
from datetime import datetime |
|
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) |
|
|
|
from src.file_processor import local_processor |
|
from src.monitoring import monitor |
|
|
|
def create_mock_medical_image() -> bytes: |
|
"""Create a mock medical document image (PNG format)""" |
|
|
|
|
|
png_header = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\xdac\x00\x01\x00\x00\x05\x00\x01\r\n-\xdb\x00\x00\x00\x00IEND\xaeB`\x82' |
|
return png_header |
|
|
|
def create_mock_medical_pdf_text() -> str: |
|
"""Create realistic medical document text for simulation""" |
|
return """ |
|
MEDICAL RECORD - CONFIDENTIAL |
|
Patient: Sarah Johnson |
|
DOB: 1985-07-20 |
|
MRN: MR456789 |
|
|
|
CHIEF COMPLAINT: |
|
Follow-up visit for Type 2 Diabetes Mellitus |
|
|
|
CURRENT MEDICATIONS: |
|
- Metformin 1000mg twice daily |
|
- Glipizide 5mg once daily |
|
- Lisinopril 10mg once daily for hypertension |
|
|
|
VITAL SIGNS: |
|
- Blood Pressure: 130/85 mmHg |
|
- Weight: 168 lbs |
|
- BMI: 26.8 |
|
- Glucose: 145 mg/dL |
|
|
|
ASSESSMENT: |
|
Type 2 Diabetes - adequately controlled |
|
Hypertension - stable |
|
|
|
PLAN: |
|
Continue current medications |
|
Follow-up in 3 months |
|
Annual eye exam recommended |
|
""" |
|
|
|
async def test_mistral_ocr_integration(): |
|
"""Test complete Mistral OCR integration with monitoring""" |
|
|
|
print("π FhirFlame Mistral OCR API Integration Test") |
|
print("=" * 55) |
|
print(f"π Starting at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
|
|
|
|
|
print(f"\nπ§ Configuration:") |
|
print(f" USE_MISTRAL_FALLBACK: {os.getenv('USE_MISTRAL_FALLBACK', 'false')}") |
|
print(f" MISTRAL_API_KEY: {'β
Set' if os.getenv('MISTRAL_API_KEY') else 'β Missing'}") |
|
print(f" Langfuse Monitoring: {'β
Active' if monitor.langfuse else 'β Disabled'}") |
|
|
|
|
|
print(f"\nπ Creating test medical document...") |
|
document_bytes = create_mock_medical_image() |
|
print(f" Document size: {len(document_bytes)} bytes") |
|
print(f" Format: PNG medical document simulation") |
|
|
|
|
|
try: |
|
print(f"\nπ Testing Mistral Pixtral-12B OCR...") |
|
start_time = time.time() |
|
|
|
|
|
result = await local_processor.process_document( |
|
document_bytes=document_bytes, |
|
user_id="test-user-mistral", |
|
filename="medical_record.png" |
|
) |
|
|
|
processing_time = time.time() - start_time |
|
|
|
|
|
print(f"β
Processing completed in {processing_time:.2f}s") |
|
print(f"π Processing mode: {result['processing_mode']}") |
|
print(f"π― Entities found: {result['entities_found']}") |
|
|
|
|
|
extracted_text = result.get('extracted_text', '') |
|
if extracted_text: |
|
print(f"\nπ Extracted Text (preview):") |
|
print(f" {extracted_text[:300]}{'...' if len(extracted_text) > 300 else ''}") |
|
|
|
|
|
if 'fhir_bundle' in result: |
|
from src.fhir_validator import FhirValidator |
|
validator = FhirValidator() |
|
|
|
print(f"\nπ Validating FHIR bundle...") |
|
validation_result = validator.validate_fhir_bundle(result['fhir_bundle']) |
|
print(f" FHIR R4 Valid: {validation_result['is_valid']}") |
|
print(f" Compliance Score: {validation_result['compliance_score']:.1%}") |
|
print(f" Resources: {', '.join(validation_result.get('detected_resources', []))}") |
|
|
|
|
|
if monitor.langfuse: |
|
print(f"\nπ Monitoring Summary:") |
|
print(f" Session ID: {monitor.session_id}") |
|
print(f" Mistral API called: β
") |
|
print(f" Langfuse events logged: β
") |
|
|
|
return result |
|
|
|
except Exception as e: |
|
print(f"β Mistral OCR test failed: {e}") |
|
|
|
|
|
print(f"\nπ Testing fallback behavior...") |
|
try: |
|
|
|
original_api_key = os.environ.get('MISTRAL_API_KEY') |
|
os.environ['MISTRAL_API_KEY'] = '' |
|
|
|
fallback_result = await local_processor.process_document( |
|
document_bytes=document_bytes, |
|
user_id="test-user-fallback", |
|
filename="medical_record.png" |
|
) |
|
|
|
print(f"β
Fallback processing successful") |
|
print(f"π Fallback mode: {fallback_result['processing_mode']}") |
|
|
|
|
|
if original_api_key: |
|
os.environ['MISTRAL_API_KEY'] = original_api_key |
|
|
|
return fallback_result |
|
|
|
except Exception as fallback_error: |
|
print(f"β Fallback also failed: {fallback_error}") |
|
raise e |
|
|
|
async def test_with_simulated_medical_text(): |
|
"""Test with simulated OCR output for demonstration""" |
|
|
|
print(f"\n" + "=" * 55) |
|
print(f"π§ͺ SIMULATION: Testing with realistic medical text") |
|
print(f"=" * 55) |
|
|
|
|
|
simulated_text = create_mock_medical_pdf_text() |
|
|
|
print(f"π Simulated OCR Text:") |
|
print(f" {simulated_text[:200]}...") |
|
|
|
|
|
entities = local_processor._extract_medical_entities(simulated_text) |
|
|
|
print(f"\nπ₯ Extracted Medical Entities:") |
|
for entity in entities: |
|
print(f" β’ {entity['type']}: {entity['text']} ({entity['confidence']:.0%})") |
|
|
|
|
|
fhir_bundle = local_processor._create_simple_fhir_bundle(entities, "simulated-user") |
|
|
|
print(f"\nπ FHIR Bundle Created:") |
|
print(f" Resource Type: {fhir_bundle['resourceType']}") |
|
print(f" Entries: {len(fhir_bundle['entry'])}") |
|
print(f" Processing Mode: {fhir_bundle['_metadata']['processing_mode']}") |
|
|
|
async def main(): |
|
"""Main test execution""" |
|
|
|
try: |
|
|
|
result = await test_mistral_ocr_integration() |
|
|
|
|
|
await test_with_simulated_medical_text() |
|
|
|
print(f"\nπ Mistral OCR integration test completed successfully!") |
|
|
|
|
|
if monitor.langfuse: |
|
monitor.log_workflow_summary( |
|
documents_processed=1, |
|
successful_documents=1, |
|
total_time=10.0, |
|
average_time=10.0, |
|
monitoring_active=True |
|
) |
|
|
|
return 0 |
|
|
|
except Exception as e: |
|
print(f"\nπ₯ Test failed: {e}") |
|
return 1 |
|
|
|
if __name__ == "__main__": |
|
exit_code = asyncio.run(main()) |
|
sys.exit(exit_code) |