|
|
|
""" |
|
Test Client for Gemma 3n GGUF Backend |
|
Demonstrates the complete integration working |
|
""" |
|
|
|
import requests |
|
import json |
|
import time |
|
|
|
def test_gemma_backend(): |
|
"""Test the Gemma 3n GGUF backend integration""" |
|
base_url = "http://localhost:8000" |
|
|
|
print("π§ͺ Testing Gemma 3n GGUF Backend Integration") |
|
print("=" * 50) |
|
|
|
|
|
print("\n1. π Testing Health Endpoint") |
|
try: |
|
response = requests.get(f"{base_url}/health") |
|
health_data = response.json() |
|
print(f"β
Health Status: {health_data['status']}") |
|
print(f"π€ Model: {health_data['model']}") |
|
print(f"π οΈ Backend: {health_data['backend']}") |
|
print(f"π Version: {health_data['version']}") |
|
except Exception as e: |
|
print(f"β Health check failed: {e}") |
|
return False |
|
|
|
|
|
print("\n2. π Testing Root Info Endpoint") |
|
try: |
|
response = requests.get(f"{base_url}/") |
|
root_data = response.json() |
|
print(f"β
Service: {root_data['message']}") |
|
print(f"π Model Loaded: {root_data.get('model_loaded', 'unknown')}") |
|
print(f"π― Available Endpoints: {', '.join(root_data['endpoints'].keys())}") |
|
except Exception as e: |
|
print(f"β Root info failed: {e}") |
|
return False |
|
|
|
|
|
print("\n3. π¬ Testing Chat Completion") |
|
chat_request = { |
|
"model": "gemma-3n-e4b-it", |
|
"messages": [ |
|
{"role": "user", "content": "Hello! What is your name and what can you help me with?"} |
|
], |
|
"max_tokens": 150, |
|
"temperature": 0.7 |
|
} |
|
|
|
try: |
|
start_time = time.time() |
|
response = requests.post( |
|
f"{base_url}/v1/chat/completions", |
|
headers={"Content-Type": "application/json"}, |
|
json=chat_request |
|
) |
|
end_time = time.time() |
|
|
|
if response.status_code == 200: |
|
chat_data = response.json() |
|
print(f"β
Chat completion successful!") |
|
print(f"β‘ Response time: {end_time - start_time:.2f}s") |
|
print(f"π― Model: {chat_data['model']}") |
|
print(f"π’ Completion ID: {chat_data['id']}") |
|
|
|
|
|
assistant_message = chat_data['choices'][0]['message']['content'] |
|
print(f"\nπ€ Assistant Response:") |
|
print(f" {assistant_message}") |
|
print(f"π Finish Reason: {chat_data['choices'][0]['finish_reason']}") |
|
else: |
|
print(f"β Chat completion failed with status: {response.status_code}") |
|
print(f"π Response: {response.text}") |
|
return False |
|
|
|
except Exception as e: |
|
print(f"β Chat completion failed: {e}") |
|
return False |
|
|
|
|
|
print("\n4. π Testing Multi-turn Conversation") |
|
multi_turn_request = { |
|
"model": "gemma-3n-e4b-it", |
|
"messages": [ |
|
{"role": "user", "content": "What is AI?"}, |
|
{"role": "assistant", "content": "AI stands for Artificial Intelligence. It refers to the simulation of human intelligence in machines."}, |
|
{"role": "user", "content": "What are some practical applications?"} |
|
], |
|
"max_tokens": 100, |
|
"temperature": 0.5 |
|
} |
|
|
|
try: |
|
response = requests.post( |
|
f"{base_url}/v1/chat/completions", |
|
headers={"Content-Type": "application/json"}, |
|
json=multi_turn_request |
|
) |
|
|
|
if response.status_code == 200: |
|
chat_data = response.json() |
|
print("β
Multi-turn conversation successful!") |
|
assistant_response = chat_data['choices'][0]['message']['content'] |
|
print(f"π€ Follow-up Response: {assistant_response[:100]}...") |
|
else: |
|
print(f"β Multi-turn failed with status: {response.status_code}") |
|
|
|
except Exception as e: |
|
print(f"β Multi-turn conversation failed: {e}") |
|
|
|
print("\n" + "=" * 50) |
|
print("π Gemma 3n GGUF Backend Integration Test Complete!") |
|
print("β
Your app is successfully using the Gemma-3n-E4B-it-GGUF model!") |
|
|
|
return True |
|
|
|
if __name__ == "__main__": |
|
print("π Starting Gemma 3n Integration Test...") |
|
print("π Make sure the backend is running: python3 gemma_gguf_backend.py") |
|
print("β³ Waiting 2 seconds for you to start the backend if needed...") |
|
time.sleep(2) |
|
|
|
success = test_gemma_backend() |
|
|
|
if success: |
|
print("\nπ― Integration Summary:") |
|
print(" β
Backend is running correctly") |
|
print(" β
OpenAI-compatible API working") |
|
print(" β
Gemma 3n model integration successful") |
|
print(" β
Ready for production use!") |
|
else: |
|
print("\nβ Some tests failed. Check the backend logs.") |
|
print("π‘ Make sure to run: python3 gemma_gguf_backend.py") |
|
|