firstAI / test_integration.py
ndc8
chg model
375ade4
raw
history blame
5.04 kB
#!/usr/bin/env python3
"""
Test Client for Gemma 3n GGUF Backend
Demonstrates the complete integration working
"""
import requests
import json
import time
def test_gemma_backend():
"""Test the Gemma 3n GGUF backend integration"""
base_url = "http://localhost:8000"
print("πŸ§ͺ Testing Gemma 3n GGUF Backend Integration")
print("=" * 50)
# Test 1: Health Check
print("\n1. πŸ” Testing Health Endpoint")
try:
response = requests.get(f"{base_url}/health")
health_data = response.json()
print(f"βœ… Health Status: {health_data['status']}")
print(f"πŸ€– Model: {health_data['model']}")
print(f"πŸ› οΈ Backend: {health_data['backend']}")
print(f"πŸ“Š Version: {health_data['version']}")
except Exception as e:
print(f"❌ Health check failed: {e}")
return False
# Test 2: Root Info
print("\n2. πŸ“‹ Testing Root Info Endpoint")
try:
response = requests.get(f"{base_url}/")
root_data = response.json()
print(f"βœ… Service: {root_data['message']}")
print(f"πŸ“ˆ Model Loaded: {root_data.get('model_loaded', 'unknown')}")
print(f"🎯 Available Endpoints: {', '.join(root_data['endpoints'].keys())}")
except Exception as e:
print(f"❌ Root info failed: {e}")
return False
# Test 3: Chat Completion
print("\n3. πŸ’¬ Testing Chat Completion")
chat_request = {
"model": "gemma-3n-e4b-it",
"messages": [
{"role": "user", "content": "Hello! What is your name and what can you help me with?"}
],
"max_tokens": 150,
"temperature": 0.7
}
try:
start_time = time.time()
response = requests.post(
f"{base_url}/v1/chat/completions",
headers={"Content-Type": "application/json"},
json=chat_request
)
end_time = time.time()
if response.status_code == 200:
chat_data = response.json()
print(f"βœ… Chat completion successful!")
print(f"⚑ Response time: {end_time - start_time:.2f}s")
print(f"🎯 Model: {chat_data['model']}")
print(f"πŸ”’ Completion ID: {chat_data['id']}")
# Display the response
assistant_message = chat_data['choices'][0]['message']['content']
print(f"\nπŸ€– Assistant Response:")
print(f" {assistant_message}")
print(f"🏁 Finish Reason: {chat_data['choices'][0]['finish_reason']}")
else:
print(f"❌ Chat completion failed with status: {response.status_code}")
print(f"πŸ“„ Response: {response.text}")
return False
except Exception as e:
print(f"❌ Chat completion failed: {e}")
return False
# Test 4: Multiple Conversation Turns
print("\n4. πŸ”„ Testing Multi-turn Conversation")
multi_turn_request = {
"model": "gemma-3n-e4b-it",
"messages": [
{"role": "user", "content": "What is AI?"},
{"role": "assistant", "content": "AI stands for Artificial Intelligence. It refers to the simulation of human intelligence in machines."},
{"role": "user", "content": "What are some practical applications?"}
],
"max_tokens": 100,
"temperature": 0.5
}
try:
response = requests.post(
f"{base_url}/v1/chat/completions",
headers={"Content-Type": "application/json"},
json=multi_turn_request
)
if response.status_code == 200:
chat_data = response.json()
print("βœ… Multi-turn conversation successful!")
assistant_response = chat_data['choices'][0]['message']['content']
print(f"πŸ€– Follow-up Response: {assistant_response[:100]}...")
else:
print(f"❌ Multi-turn failed with status: {response.status_code}")
except Exception as e:
print(f"❌ Multi-turn conversation failed: {e}")
print("\n" + "=" * 50)
print("πŸŽ‰ Gemma 3n GGUF Backend Integration Test Complete!")
print("βœ… Your app is successfully using the Gemma-3n-E4B-it-GGUF model!")
return True
if __name__ == "__main__":
print("πŸš€ Starting Gemma 3n Integration Test...")
print("πŸ“‹ Make sure the backend is running: python3 gemma_gguf_backend.py")
print("⏳ Waiting 2 seconds for you to start the backend if needed...")
time.sleep(2)
success = test_gemma_backend()
if success:
print("\n🎯 Integration Summary:")
print(" βœ… Backend is running correctly")
print(" βœ… OpenAI-compatible API working")
print(" βœ… Gemma 3n model integration successful")
print(" βœ… Ready for production use!")
else:
print("\n❌ Some tests failed. Check the backend logs.")
print("πŸ’‘ Make sure to run: python3 gemma_gguf_backend.py")