File size: 5,035 Bytes
375ade4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
#!/usr/bin/env python3
"""
Test Client for Gemma 3n GGUF Backend
Demonstrates the complete integration working
"""
import requests
import json
import time
def test_gemma_backend():
"""Test the Gemma 3n GGUF backend integration"""
base_url = "http://localhost:8000"
print("π§ͺ Testing Gemma 3n GGUF Backend Integration")
print("=" * 50)
# Test 1: Health Check
print("\n1. π Testing Health Endpoint")
try:
response = requests.get(f"{base_url}/health")
health_data = response.json()
print(f"β
Health Status: {health_data['status']}")
print(f"π€ Model: {health_data['model']}")
print(f"π οΈ Backend: {health_data['backend']}")
print(f"π Version: {health_data['version']}")
except Exception as e:
print(f"β Health check failed: {e}")
return False
# Test 2: Root Info
print("\n2. π Testing Root Info Endpoint")
try:
response = requests.get(f"{base_url}/")
root_data = response.json()
print(f"β
Service: {root_data['message']}")
print(f"π Model Loaded: {root_data.get('model_loaded', 'unknown')}")
print(f"π― Available Endpoints: {', '.join(root_data['endpoints'].keys())}")
except Exception as e:
print(f"β Root info failed: {e}")
return False
# Test 3: Chat Completion
print("\n3. π¬ Testing Chat Completion")
chat_request = {
"model": "gemma-3n-e4b-it",
"messages": [
{"role": "user", "content": "Hello! What is your name and what can you help me with?"}
],
"max_tokens": 150,
"temperature": 0.7
}
try:
start_time = time.time()
response = requests.post(
f"{base_url}/v1/chat/completions",
headers={"Content-Type": "application/json"},
json=chat_request
)
end_time = time.time()
if response.status_code == 200:
chat_data = response.json()
print(f"β
Chat completion successful!")
print(f"β‘ Response time: {end_time - start_time:.2f}s")
print(f"π― Model: {chat_data['model']}")
print(f"π’ Completion ID: {chat_data['id']}")
# Display the response
assistant_message = chat_data['choices'][0]['message']['content']
print(f"\nπ€ Assistant Response:")
print(f" {assistant_message}")
print(f"π Finish Reason: {chat_data['choices'][0]['finish_reason']}")
else:
print(f"β Chat completion failed with status: {response.status_code}")
print(f"π Response: {response.text}")
return False
except Exception as e:
print(f"β Chat completion failed: {e}")
return False
# Test 4: Multiple Conversation Turns
print("\n4. π Testing Multi-turn Conversation")
multi_turn_request = {
"model": "gemma-3n-e4b-it",
"messages": [
{"role": "user", "content": "What is AI?"},
{"role": "assistant", "content": "AI stands for Artificial Intelligence. It refers to the simulation of human intelligence in machines."},
{"role": "user", "content": "What are some practical applications?"}
],
"max_tokens": 100,
"temperature": 0.5
}
try:
response = requests.post(
f"{base_url}/v1/chat/completions",
headers={"Content-Type": "application/json"},
json=multi_turn_request
)
if response.status_code == 200:
chat_data = response.json()
print("β
Multi-turn conversation successful!")
assistant_response = chat_data['choices'][0]['message']['content']
print(f"π€ Follow-up Response: {assistant_response[:100]}...")
else:
print(f"β Multi-turn failed with status: {response.status_code}")
except Exception as e:
print(f"β Multi-turn conversation failed: {e}")
print("\n" + "=" * 50)
print("π Gemma 3n GGUF Backend Integration Test Complete!")
print("β
Your app is successfully using the Gemma-3n-E4B-it-GGUF model!")
return True
if __name__ == "__main__":
print("π Starting Gemma 3n Integration Test...")
print("π Make sure the backend is running: python3 gemma_gguf_backend.py")
print("β³ Waiting 2 seconds for you to start the backend if needed...")
time.sleep(2)
success = test_gemma_backend()
if success:
print("\nπ― Integration Summary:")
print(" β
Backend is running correctly")
print(" β
OpenAI-compatible API working")
print(" β
Gemma 3n model integration successful")
print(" β
Ready for production use!")
else:
print("\nβ Some tests failed. Check the backend logs.")
print("π‘ Make sure to run: python3 gemma_gguf_backend.py")
|