|
|
|
""" |
|
Quick Test: Modal Scaling Implementation |
|
Test the key components of our 3-prompt implementation |
|
""" |
|
|
|
import asyncio |
|
import os |
|
import sys |
|
import time |
|
|
|
def test_environment_config(): |
|
"""Test 1: Environment configuration""" |
|
print("π Test 1: Environment Configuration") |
|
|
|
|
|
a100_rate = float(os.getenv("MODAL_A100_HOURLY_RATE", "1.32")) |
|
t4_rate = float(os.getenv("MODAL_T4_HOURLY_RATE", "0.51")) |
|
platform_fee = float(os.getenv("MODAL_PLATFORM_FEE", "15")) |
|
|
|
print(f"β
A100 Rate: ${a100_rate}/hour") |
|
print(f"β
T4 Rate: ${t4_rate}/hour") |
|
print(f"β
Platform Fee: {platform_fee}%") |
|
|
|
assert a100_rate > 0 and t4_rate > 0 and platform_fee > 0 |
|
return True |
|
|
|
def test_cost_calculation(): |
|
"""Test 2: Real cost calculation""" |
|
print("\nπ Test 2: Cost Calculation") |
|
|
|
try: |
|
from src.enhanced_codellama_processor import EnhancedCodeLlamaProcessor, InferenceProvider |
|
|
|
processor = EnhancedCodeLlamaProcessor() |
|
|
|
|
|
test_cases = [ |
|
("Short text", "Patient has diabetes", 0.5, "T4"), |
|
("Long text", "Patient has diabetes. " * 100, 1.2, "A100"), |
|
("Ollama local", "Test text", 0.8, None) |
|
] |
|
|
|
for name, text, proc_time, gpu_type in test_cases: |
|
|
|
modal_cost = processor._calculate_cost( |
|
InferenceProvider.MODAL, len(text), proc_time, gpu_type |
|
) |
|
|
|
|
|
ollama_cost = processor._calculate_cost( |
|
InferenceProvider.OLLAMA, len(text) |
|
) |
|
|
|
|
|
hf_cost = processor._calculate_cost( |
|
InferenceProvider.HUGGINGFACE, len(text) |
|
) |
|
|
|
print(f" {name}:") |
|
print(f" Modal ({gpu_type}): ${modal_cost:.6f}") |
|
print(f" Ollama: ${ollama_cost:.6f}") |
|
print(f" HuggingFace: ${hf_cost:.6f}") |
|
|
|
return True |
|
|
|
except Exception as e: |
|
print(f"β Cost calculation test failed: {e}") |
|
return False |
|
|
|
async def test_modal_integration(): |
|
"""Test 3: Modal integration""" |
|
print("\nπ Test 3: Modal Integration") |
|
|
|
try: |
|
from src.enhanced_codellama_processor import EnhancedCodeLlamaProcessor |
|
|
|
processor = EnhancedCodeLlamaProcessor() |
|
|
|
|
|
test_text = """ |
|
Patient John Doe, 45 years old, presents with chest pain. |
|
Diagnosed with acute myocardial infarction. |
|
Treatment: Aspirin 325mg, Metoprolol 25mg BID. |
|
""" |
|
|
|
result = await processor._call_modal_api( |
|
text=test_text, |
|
document_type="clinical_note", |
|
extract_entities=True, |
|
generate_fhir=False |
|
) |
|
|
|
print("β
Modal API call completed") |
|
|
|
|
|
if "scaling_metadata" in result: |
|
scaling = result["scaling_metadata"] |
|
print(f"β
Provider: {scaling.get('provider', 'unknown')}") |
|
print(f"β
Cost: ${scaling.get('cost_estimate', 0):.6f}") |
|
print(f"β
Container: {scaling.get('container_id', 'N/A')}") |
|
|
|
return True |
|
|
|
except Exception as e: |
|
print(f"β Modal integration test failed: {e}") |
|
return False |
|
|
|
def test_modal_deployment(): |
|
"""Test 4: Modal deployment file""" |
|
print("\nπ Test 4: Modal Deployment") |
|
|
|
try: |
|
import sys |
|
import os |
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..')) |
|
from modal.functions import calculate_real_modal_cost |
|
|
|
|
|
cost_l4 = calculate_real_modal_cost(1.0, "L4") |
|
cost_cpu = calculate_real_modal_cost(1.0, "CPU") |
|
|
|
print(f"β
L4 GPU 1s cost: ${cost_l4:.6f}") |
|
print(f"β
CPU 1s cost: ${cost_cpu:.6f}") |
|
|
|
|
|
if cost_l4 > cost_cpu: |
|
print("β
Cost hierarchy correct (L4 > CPU)") |
|
return True |
|
else: |
|
print("β οΈ Cost hierarchy issue") |
|
return False |
|
|
|
except Exception as e: |
|
print(f"β Modal deployment test failed: {e}") |
|
return False |
|
|
|
async def test_end_to_end(): |
|
"""Test 5: End-to-end scaling demo""" |
|
print("\nπ Test 5: End-to-End Demo") |
|
|
|
try: |
|
from src.enhanced_codellama_processor import EnhancedCodeLlamaProcessor |
|
|
|
processor = EnhancedCodeLlamaProcessor() |
|
|
|
|
|
short_text = "Patient has hypertension" |
|
long_text = "Patient John Doe presents with chest pain. " * 30 |
|
|
|
|
|
short_provider = processor.router.select_optimal_provider(short_text) |
|
long_provider = processor.router.select_optimal_provider(long_text) |
|
|
|
print(f"β
Short text β {short_provider.value}") |
|
print(f"β
Long text β {long_provider.value}") |
|
|
|
|
|
result = await processor.process_document( |
|
medical_text=long_text, |
|
document_type="clinical_note", |
|
extract_entities=True, |
|
generate_fhir=False, |
|
complexity="medium" |
|
) |
|
|
|
if result and "provider_metadata" in result: |
|
meta = result["provider_metadata"] |
|
print(f"β
Processed with: {meta.get('provider_used', 'unknown')}") |
|
print(f"β
Cost estimate: ${meta.get('cost_estimate', 0):.6f}") |
|
print(f"β
Processing time: {meta.get('processing_time', 0):.2f}s") |
|
|
|
return True |
|
|
|
except Exception as e: |
|
print(f"β End-to-end test failed: {e}") |
|
return False |
|
|
|
async def main(): |
|
"""Run focused tests""" |
|
print("π Testing Modal Scaling Implementation") |
|
print("=" * 50) |
|
|
|
tests = [ |
|
("Environment Config", test_environment_config), |
|
("Cost Calculation", test_cost_calculation), |
|
("Modal Integration", test_modal_integration), |
|
("Modal Deployment", test_modal_deployment), |
|
("End-to-End Demo", test_end_to_end) |
|
] |
|
|
|
results = {} |
|
|
|
for test_name, test_func in tests: |
|
try: |
|
if asyncio.iscoroutinefunction(test_func): |
|
result = await test_func() |
|
else: |
|
result = test_func() |
|
results[test_name] = result |
|
except Exception as e: |
|
print(f"β Test {test_name} crashed: {e}") |
|
results[test_name] = False |
|
|
|
|
|
print("\n" + "=" * 50) |
|
print("π Test Results") |
|
print("=" * 50) |
|
|
|
passed = sum(1 for r in results.values() if r) |
|
total = len(results) |
|
|
|
for test_name, result in results.items(): |
|
status = "β
PASS" if result else "β FAIL" |
|
print(f"{test_name}: {status}") |
|
|
|
print(f"\nOverall: {passed}/{total} tests passed") |
|
|
|
if passed == total: |
|
print("π Modal scaling implementation is working!") |
|
print("\nπ Next Steps:") |
|
print("1. Set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in .env") |
|
print("2. Deploy: modal deploy modal_deployment.py") |
|
print("3. Set MODAL_ENDPOINT_URL in .env") |
|
print("4. Test Dynamic Scaling tab in Gradio UI") |
|
else: |
|
print("β οΈ Some tests failed. Check the details above.") |
|
|
|
return passed == total |
|
|
|
if __name__ == "__main__": |
|
asyncio.run(main()) |