File size: 5,035 Bytes
375ade4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python3
"""
Test Client for Gemma 3n GGUF Backend
Demonstrates the complete integration working
"""

import requests
import json
import time

def test_gemma_backend():
    """Test the Gemma 3n GGUF backend integration"""
    base_url = "http://localhost:8000"
    
    print("πŸ§ͺ Testing Gemma 3n GGUF Backend Integration")
    print("=" * 50)
    
    # Test 1: Health Check
    print("\n1. πŸ” Testing Health Endpoint")
    try:
        response = requests.get(f"{base_url}/health")
        health_data = response.json()
        print(f"βœ… Health Status: {health_data['status']}")
        print(f"πŸ€– Model: {health_data['model']}")
        print(f"πŸ› οΈ Backend: {health_data['backend']}")
        print(f"πŸ“Š Version: {health_data['version']}")
    except Exception as e:
        print(f"❌ Health check failed: {e}")
        return False
    
    # Test 2: Root Info
    print("\n2. πŸ“‹ Testing Root Info Endpoint")
    try:
        response = requests.get(f"{base_url}/")
        root_data = response.json()
        print(f"βœ… Service: {root_data['message']}")
        print(f"πŸ“ˆ Model Loaded: {root_data.get('model_loaded', 'unknown')}")
        print(f"🎯 Available Endpoints: {', '.join(root_data['endpoints'].keys())}")
    except Exception as e:
        print(f"❌ Root info failed: {e}")
        return False
    
    # Test 3: Chat Completion
    print("\n3. πŸ’¬ Testing Chat Completion")
    chat_request = {
        "model": "gemma-3n-e4b-it",
        "messages": [
            {"role": "user", "content": "Hello! What is your name and what can you help me with?"}
        ],
        "max_tokens": 150,
        "temperature": 0.7
    }
    
    try:
        start_time = time.time()
        response = requests.post(
            f"{base_url}/v1/chat/completions",
            headers={"Content-Type": "application/json"},
            json=chat_request
        )
        end_time = time.time()
        
        if response.status_code == 200:
            chat_data = response.json()
            print(f"βœ… Chat completion successful!")
            print(f"⚑ Response time: {end_time - start_time:.2f}s")
            print(f"🎯 Model: {chat_data['model']}")
            print(f"πŸ”’ Completion ID: {chat_data['id']}")
            
            # Display the response
            assistant_message = chat_data['choices'][0]['message']['content']
            print(f"\nπŸ€– Assistant Response:")
            print(f"   {assistant_message}")
            print(f"🏁 Finish Reason: {chat_data['choices'][0]['finish_reason']}")
        else:
            print(f"❌ Chat completion failed with status: {response.status_code}")
            print(f"πŸ“„ Response: {response.text}")
            return False
            
    except Exception as e:
        print(f"❌ Chat completion failed: {e}")
        return False
    
    # Test 4: Multiple Conversation Turns
    print("\n4. πŸ”„ Testing Multi-turn Conversation")
    multi_turn_request = {
        "model": "gemma-3n-e4b-it",
        "messages": [
            {"role": "user", "content": "What is AI?"},
            {"role": "assistant", "content": "AI stands for Artificial Intelligence. It refers to the simulation of human intelligence in machines."},
            {"role": "user", "content": "What are some practical applications?"}
        ],
        "max_tokens": 100,
        "temperature": 0.5
    }
    
    try:
        response = requests.post(
            f"{base_url}/v1/chat/completions",
            headers={"Content-Type": "application/json"},
            json=multi_turn_request
        )
        
        if response.status_code == 200:
            chat_data = response.json()
            print("βœ… Multi-turn conversation successful!")
            assistant_response = chat_data['choices'][0]['message']['content']
            print(f"πŸ€– Follow-up Response: {assistant_response[:100]}...")
        else:
            print(f"❌ Multi-turn failed with status: {response.status_code}")
            
    except Exception as e:
        print(f"❌ Multi-turn conversation failed: {e}")
    
    print("\n" + "=" * 50)
    print("πŸŽ‰ Gemma 3n GGUF Backend Integration Test Complete!")
    print("βœ… Your app is successfully using the Gemma-3n-E4B-it-GGUF model!")
    
    return True

if __name__ == "__main__":
    print("πŸš€ Starting Gemma 3n Integration Test...")
    print("πŸ“‹ Make sure the backend is running: python3 gemma_gguf_backend.py")
    print("⏳ Waiting 2 seconds for you to start the backend if needed...")
    time.sleep(2)
    
    success = test_gemma_backend()
    
    if success:
        print("\n🎯 Integration Summary:")
        print("   βœ… Backend is running correctly")
        print("   βœ… OpenAI-compatible API working")
        print("   βœ… Gemma 3n model integration successful")
        print("   βœ… Ready for production use!")
    else:
        print("\n❌ Some tests failed. Check the backend logs.")
        print("πŸ’‘ Make sure to run: python3 gemma_gguf_backend.py")