Spaces:

cong182
/

firstAI

Sleeping

File size: 5,035 Bytes

375ade4

#!/usr/bin/env python3
"""
Test Client for Gemma 3n GGUF Backend
Demonstrates the complete integration working
"""

import requests
import json
import time

def test_gemma_backend():
    """Test the Gemma 3n GGUF backend integration"""
    base_url = "http://localhost:8000"
    
    print("🧪 Testing Gemma 3n GGUF Backend Integration")
    print("=" * 50)
    
    # Test 1: Health Check
    print("\n1. 🔍 Testing Health Endpoint")
    try:
        response = requests.get(f"{base_url}/health")
        health_data = response.json()
        print(f"✅ Health Status: {health_data['status']}")
        print(f"🤖 Model: {health_data['model']}")
        print(f"🛠️ Backend: {health_data['backend']}")
        print(f"📊 Version: {health_data['version']}")
    except Exception as e:
        print(f"❌ Health check failed: {e}")
        return False
    
    # Test 2: Root Info
    print("\n2. 📋 Testing Root Info Endpoint")
    try:
        response = requests.get(f"{base_url}/")
        root_data = response.json()
        print(f"✅ Service: {root_data['message']}")
        print(f"📈 Model Loaded: {root_data.get('model_loaded', 'unknown')}")
        print(f"🎯 Available Endpoints: {', '.join(root_data['endpoints'].keys())}")
    except Exception as e:
        print(f"❌ Root info failed: {e}")
        return False
    
    # Test 3: Chat Completion
    print("\n3. 💬 Testing Chat Completion")
    chat_request = {
        "model": "gemma-3n-e4b-it",
        "messages": [
            {"role": "user", "content": "Hello! What is your name and what can you help me with?"}
        ],
        "max_tokens": 150,
        "temperature": 0.7
    }
    
    try:
        start_time = time.time()
        response = requests.post(
            f"{base_url}/v1/chat/completions",
            headers={"Content-Type": "application/json"},
            json=chat_request
        )
        end_time = time.time()
        
        if response.status_code == 200:
            chat_data = response.json()
            print(f"✅ Chat completion successful!")
            print(f"⚡ Response time: {end_time - start_time:.2f}s")
            print(f"🎯 Model: {chat_data['model']}")
            print(f"🔢 Completion ID: {chat_data['id']}")
            
            # Display the response
            assistant_message = chat_data['choices'][0]['message']['content']
            print(f"\n🤖 Assistant Response:")
            print(f"   {assistant_message}")
            print(f"🏁 Finish Reason: {chat_data['choices'][0]['finish_reason']}")
        else:
            print(f"❌ Chat completion failed with status: {response.status_code}")
            print(f"📄 Response: {response.text}")
            return False
            
    except Exception as e:
        print(f"❌ Chat completion failed: {e}")
        return False
    
    # Test 4: Multiple Conversation Turns
    print("\n4. 🔄 Testing Multi-turn Conversation")
    multi_turn_request = {
        "model": "gemma-3n-e4b-it",
        "messages": [
            {"role": "user", "content": "What is AI?"},
            {"role": "assistant", "content": "AI stands for Artificial Intelligence. It refers to the simulation of human intelligence in machines."},
            {"role": "user", "content": "What are some practical applications?"}
        ],
        "max_tokens": 100,
        "temperature": 0.5
    }
    
    try:
        response = requests.post(
            f"{base_url}/v1/chat/completions",
            headers={"Content-Type": "application/json"},
            json=multi_turn_request
        )
        
        if response.status_code == 200:
            chat_data = response.json()
            print("✅ Multi-turn conversation successful!")
            assistant_response = chat_data['choices'][0]['message']['content']
            print(f"🤖 Follow-up Response: {assistant_response[:100]}...")
        else:
            print(f"❌ Multi-turn failed with status: {response.status_code}")
            
    except Exception as e:
        print(f"❌ Multi-turn conversation failed: {e}")
    
    print("\n" + "=" * 50)
    print("🎉 Gemma 3n GGUF Backend Integration Test Complete!")
    print("✅ Your app is successfully using the Gemma-3n-E4B-it-GGUF model!")
    
    return True

if __name__ == "__main__":
    print("🚀 Starting Gemma 3n Integration Test...")
    print("📋 Make sure the backend is running: python3 gemma_gguf_backend.py")
    print("⏳ Waiting 2 seconds for you to start the backend if needed...")
    time.sleep(2)
    
    success = test_gemma_backend()
    
    if success:
        print("\n🎯 Integration Summary:")
        print("   ✅ Backend is running correctly")
        print("   ✅ OpenAI-compatible API working")
        print("   ✅ Gemma 3n model integration successful")
        print("   ✅ Ready for production use!")
    else:
        print("\n❌ Some tests failed. Check the backend logs.")
        print("💡 Make sure to run: python3 gemma_gguf_backend.py")