ndc8 commited on
Commit
4ecf54e
Β·
1 Parent(s): 375ade4
Files changed (2) hide show
  1. monitor_service.py +80 -0
  2. test_mistral_service.py +144 -0
monitor_service.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Monitor the Mistral Nemo service startup and run tests when ready.
4
+ """
5
+ import time
6
+ import requests
7
+ import json
8
+ import sys
9
+
10
+ def check_service_health():
11
+ """Check if the service is healthy and ready."""
12
+ try:
13
+ response = requests.get("http://localhost:8001/health", timeout=5)
14
+ if response.status_code == 200:
15
+ data = response.json()
16
+ return data.get("status") == "healthy"
17
+ except requests.exceptions.RequestException:
18
+ pass
19
+ return False
20
+
21
+ def test_chat_completion():
22
+ """Test the chat completion endpoint."""
23
+ try:
24
+ response = requests.post(
25
+ "http://localhost:8001/v1/chat/completions",
26
+ headers={"Content-Type": "application/json"},
27
+ json={
28
+ "model": "unsloth/Mistral-Nemo-Instruct-2407",
29
+ "messages": [
30
+ {"role": "user", "content": "Hello! Please say 'Service is working correctly' if you can read this."}
31
+ ],
32
+ "max_tokens": 50,
33
+ "temperature": 0.7
34
+ },
35
+ timeout=30
36
+ )
37
+
38
+ if response.status_code == 200:
39
+ data = response.json()
40
+ content = data["choices"][0]["message"]["content"]
41
+ print(f"βœ… Chat completion successful: {content}")
42
+ return True
43
+ else:
44
+ print(f"❌ Chat completion failed: {response.status_code} - {response.text}")
45
+ return False
46
+ except requests.exceptions.RequestException as e:
47
+ print(f"❌ Chat completion error: {e}")
48
+ return False
49
+
50
+ def monitor_service():
51
+ """Monitor service startup and test when ready."""
52
+ print("πŸ” Monitoring Mistral Nemo service startup...")
53
+ print("πŸ“₯ Waiting for model download and loading to complete...")
54
+
55
+ check_count = 0
56
+ max_checks = 300 # 25 minutes max wait
57
+
58
+ while check_count < max_checks:
59
+ if check_service_health():
60
+ print("\nπŸŽ‰ Service is healthy! Running tests...")
61
+
62
+ # Test chat completion
63
+ if test_chat_completion():
64
+ print("\nβœ… All tests passed! Mistral Nemo service is fully operational.")
65
+ return True
66
+ else:
67
+ print("\n⚠️ Service health check passed but chat completion failed.")
68
+ return False
69
+
70
+ check_count += 1
71
+ dots = "." * (check_count % 4)
72
+ print(f"\r⏳ Waiting for service to be ready{dots:<3} ({check_count}/300)", end="")
73
+ time.sleep(5)
74
+
75
+ print(f"\n❌ Service didn't become ready after {max_checks * 5} seconds")
76
+ return False
77
+
78
+ if __name__ == "__main__":
79
+ success = monitor_service()
80
+ sys.exit(0 if success else 1)
test_mistral_service.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script for the Mistral Nemo Backend Service
4
+ """
5
+
6
+ import requests
7
+ import json
8
+ import time
9
+
10
+ # Service configuration
11
+ BASE_URL = "http://localhost:8001"
12
+
13
+ def test_health():
14
+ """Test the health endpoint"""
15
+ print("πŸ₯ Testing health endpoint...")
16
+ try:
17
+ response = requests.get(f"{BASE_URL}/health", timeout=5)
18
+ if response.status_code == 200:
19
+ print(f"βœ… Health check passed: {response.json()}")
20
+ return True
21
+ else:
22
+ print(f"❌ Health check failed: {response.status_code}")
23
+ return False
24
+ except Exception as e:
25
+ print(f"❌ Health check error: {e}")
26
+ return False
27
+
28
+ def test_root():
29
+ """Test the root endpoint"""
30
+ print("🏠 Testing root endpoint...")
31
+ try:
32
+ response = requests.get(f"{BASE_URL}/", timeout=5)
33
+ if response.status_code == 200:
34
+ data = response.json()
35
+ print(f"βœ… Root endpoint: {data}")
36
+ return True
37
+ else:
38
+ print(f"❌ Root endpoint failed: {response.status_code}")
39
+ return False
40
+ except Exception as e:
41
+ print(f"❌ Root endpoint error: {e}")
42
+ return False
43
+
44
+ def test_models():
45
+ """Test the models endpoint"""
46
+ print("πŸ“‹ Testing models endpoint...")
47
+ try:
48
+ response = requests.get(f"{BASE_URL}/v1/models", timeout=5)
49
+ if response.status_code == 200:
50
+ data = response.json()
51
+ print(f"βœ… Available models: {[model['id'] for model in data['data']]}")
52
+ return True
53
+ else:
54
+ print(f"❌ Models endpoint failed: {response.status_code}")
55
+ return False
56
+ except Exception as e:
57
+ print(f"❌ Models endpoint error: {e}")
58
+ return False
59
+
60
+ def test_chat_completion():
61
+ """Test a simple chat completion"""
62
+ print("πŸ’¬ Testing chat completion...")
63
+ try:
64
+ payload = {
65
+ "model": "unsloth/Mistral-Nemo-Instruct-2407",
66
+ "messages": [
67
+ {"role": "system", "content": "You are a helpful assistant."},
68
+ {"role": "user", "content": "Hello! Tell me a fun fact about AI."}
69
+ ],
70
+ "max_tokens": 100,
71
+ "temperature": 0.7
72
+ }
73
+
74
+ response = requests.post(f"{BASE_URL}/v1/chat/completions",
75
+ json=payload, timeout=30)
76
+
77
+ if response.status_code == 200:
78
+ data = response.json()
79
+ message = data["choices"][0]["message"]["content"]
80
+ print(f"βœ… Chat completion successful!")
81
+ print(f"πŸ€– Assistant: {message}")
82
+ return True
83
+ else:
84
+ print(f"❌ Chat completion failed: {response.status_code}")
85
+ print(f"Response: {response.text}")
86
+ return False
87
+ except Exception as e:
88
+ print(f"❌ Chat completion error: {e}")
89
+ return False
90
+
91
+ def wait_for_service():
92
+ """Wait for the service to be ready"""
93
+ print("⏳ Waiting for service to be ready...")
94
+ max_attempts = 60 # Wait up to 5 minutes
95
+
96
+ for attempt in range(max_attempts):
97
+ try:
98
+ response = requests.get(f"{BASE_URL}/health", timeout=5)
99
+ if response.status_code == 200:
100
+ print(f"βœ… Service is ready after {attempt * 5} seconds!")
101
+ return True
102
+ except:
103
+ pass
104
+
105
+ if attempt < max_attempts - 1:
106
+ print(f"⏳ Attempt {attempt + 1}/{max_attempts} - waiting 5 seconds...")
107
+ time.sleep(5)
108
+
109
+ print("❌ Service did not become ready within the timeout period")
110
+ return False
111
+
112
+ def main():
113
+ """Run all tests"""
114
+ print("πŸš€ Testing Mistral Nemo Backend Service")
115
+ print("=" * 50)
116
+
117
+ # Wait for service to be ready
118
+ if not wait_for_service():
119
+ print("❌ Service is not ready. Exiting.")
120
+ return
121
+
122
+ # Run tests
123
+ tests = [test_root, test_health, test_models, test_chat_completion]
124
+ passed = 0
125
+
126
+ for test in tests:
127
+ try:
128
+ if test():
129
+ passed += 1
130
+ print()
131
+ except Exception as e:
132
+ print(f"❌ Test failed with exception: {e}")
133
+ print()
134
+
135
+ print("=" * 50)
136
+ print(f"πŸ“Š Test Results: {passed}/{len(tests)} tests passed")
137
+
138
+ if passed == len(tests):
139
+ print("πŸŽ‰ All tests passed! Your Mistral Nemo service is working perfectly!")
140
+ else:
141
+ print("⚠️ Some tests failed. Check the logs above for details.")
142
+
143
+ if __name__ == "__main__":
144
+ main()