#!/usr/bin/env python3 """ Quick Test: Modal Scaling Implementation Test the key components of our 3-prompt implementation """ import asyncio import os import sys import time def test_environment_config(): """Test 1: Environment configuration""" print("šŸ” Test 1: Environment Configuration") # Test cost configuration loading a100_rate = float(os.getenv("MODAL_A100_HOURLY_RATE", "1.32")) t4_rate = float(os.getenv("MODAL_T4_HOURLY_RATE", "0.51")) platform_fee = float(os.getenv("MODAL_PLATFORM_FEE", "15")) print(f"āœ… A100 Rate: ${a100_rate}/hour") print(f"āœ… T4 Rate: ${t4_rate}/hour") print(f"āœ… Platform Fee: {platform_fee}%") assert a100_rate > 0 and t4_rate > 0 and platform_fee > 0 return True def test_cost_calculation(): """Test 2: Real cost calculation""" print("\nšŸ” Test 2: Cost Calculation") try: from src.enhanced_codellama_processor import EnhancedCodeLlamaProcessor, InferenceProvider processor = EnhancedCodeLlamaProcessor() # Test different scenarios test_cases = [ ("Short text", "Patient has diabetes", 0.5, "T4"), ("Long text", "Patient has diabetes. " * 100, 1.2, "A100"), ("Ollama local", "Test text", 0.8, None) ] for name, text, proc_time, gpu_type in test_cases: # Test Modal cost modal_cost = processor._calculate_cost( InferenceProvider.MODAL, len(text), proc_time, gpu_type ) # Test Ollama cost ollama_cost = processor._calculate_cost( InferenceProvider.OLLAMA, len(text) ) # Test HuggingFace cost hf_cost = processor._calculate_cost( InferenceProvider.HUGGINGFACE, len(text) ) print(f" {name}:") print(f" Modal ({gpu_type}): ${modal_cost:.6f}") print(f" Ollama: ${ollama_cost:.6f}") print(f" HuggingFace: ${hf_cost:.6f}") return True except Exception as e: print(f"āŒ Cost calculation test failed: {e}") return False async def test_modal_integration(): """Test 3: Modal integration""" print("\nšŸ” Test 3: Modal Integration") try: from src.enhanced_codellama_processor import EnhancedCodeLlamaProcessor processor = EnhancedCodeLlamaProcessor() # Test with simulation (since Modal endpoint may not be deployed) test_text = """ Patient John Doe, 45 years old, presents with chest pain. Diagnosed with acute myocardial infarction. Treatment: Aspirin 325mg, Metoprolol 25mg BID. """ result = await processor._call_modal_api( text=test_text, document_type="clinical_note", extract_entities=True, generate_fhir=False ) print("āœ… Modal API call completed") # Check result structure if "scaling_metadata" in result: scaling = result["scaling_metadata"] print(f"āœ… Provider: {scaling.get('provider', 'unknown')}") print(f"āœ… Cost: ${scaling.get('cost_estimate', 0):.6f}") print(f"āœ… Container: {scaling.get('container_id', 'N/A')}") return True except Exception as e: print(f"āŒ Modal integration test failed: {e}") return False def test_modal_deployment(): """Test 4: Modal deployment file""" print("\nšŸ” Test 4: Modal Deployment") try: import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), '..')) from modal.functions import calculate_real_modal_cost # Test cost calculation function for L4 (RTX 4090 equivalent) cost_l4 = calculate_real_modal_cost(1.0, "L4") cost_cpu = calculate_real_modal_cost(1.0, "CPU") print(f"āœ… L4 GPU 1s cost: ${cost_l4:.6f}") print(f"āœ… CPU 1s cost: ${cost_cpu:.6f}") # Verify L4 is more expensive than CPU if cost_l4 > cost_cpu: print("āœ… Cost hierarchy correct (L4 > CPU)") return True else: print("āš ļø Cost hierarchy issue") return False except Exception as e: print(f"āŒ Modal deployment test failed: {e}") return False async def test_end_to_end(): """Test 5: End-to-end scaling demo""" print("\nšŸ” Test 5: End-to-End Demo") try: from src.enhanced_codellama_processor import EnhancedCodeLlamaProcessor processor = EnhancedCodeLlamaProcessor() # Test auto-selection logic short_text = "Patient has hypertension" long_text = "Patient John Doe presents with chest pain. " * 30 # Test provider selection short_provider = processor.router.select_optimal_provider(short_text) long_provider = processor.router.select_optimal_provider(long_text) print(f"āœ… Short text → {short_provider.value}") print(f"āœ… Long text → {long_provider.value}") # Test processing with cost calculation result = await processor.process_document( medical_text=long_text, document_type="clinical_note", extract_entities=True, generate_fhir=False, complexity="medium" ) if result and "provider_metadata" in result: meta = result["provider_metadata"] print(f"āœ… Processed with: {meta.get('provider_used', 'unknown')}") print(f"āœ… Cost estimate: ${meta.get('cost_estimate', 0):.6f}") print(f"āœ… Processing time: {meta.get('processing_time', 0):.2f}s") return True except Exception as e: print(f"āŒ End-to-end test failed: {e}") return False async def main(): """Run focused tests""" print("šŸš€ Testing Modal Scaling Implementation") print("=" * 50) tests = [ ("Environment Config", test_environment_config), ("Cost Calculation", test_cost_calculation), ("Modal Integration", test_modal_integration), ("Modal Deployment", test_modal_deployment), ("End-to-End Demo", test_end_to_end) ] results = {} for test_name, test_func in tests: try: if asyncio.iscoroutinefunction(test_func): result = await test_func() else: result = test_func() results[test_name] = result except Exception as e: print(f"āŒ Test {test_name} crashed: {e}") results[test_name] = False # Summary print("\n" + "=" * 50) print("šŸ“Š Test Results") print("=" * 50) passed = sum(1 for r in results.values() if r) total = len(results) for test_name, result in results.items(): status = "āœ… PASS" if result else "āŒ FAIL" print(f"{test_name}: {status}") print(f"\nOverall: {passed}/{total} tests passed") if passed == total: print("šŸŽ‰ Modal scaling implementation is working!") print("\nšŸ“‹ Next Steps:") print("1. Set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in .env") print("2. Deploy: modal deploy modal_deployment.py") print("3. Set MODAL_ENDPOINT_URL in .env") print("4. Test Dynamic Scaling tab in Gradio UI") else: print("āš ļø Some tests failed. Check the details above.") return passed == total if __name__ == "__main__": asyncio.run(main())