#!/usr/bin/env python3
"""
Simple test script to check if llama-cpp-python works
"""

import os
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def test_llama_cpp():
    """Test llama-cpp-python import and basic functionality"""
    try:
        logger.info("🧪 Testing llama-cpp-python import...")
        from llama_cpp import Llama
        logger.info("✅ llama-cpp-python imported successfully")
        
        # Test model loading (this will fail without actual model file)
        logger.info("🧪 Testing Gemma 3n GGUF model loading...")
        try:
            llm = Llama.from_pretrained(
                repo_id="unsloth/gemma-3n-E4B-it-GGUF",
                filename="*q4_k_m.gguf",  # Try Q4_K_M first
                verbose=True,
                n_ctx=1024,  # Small context for testing
                n_threads=2,  # Fewer threads for testing
                n_gpu_layers=0,  # CPU only for testing
            )
            logger.info("✅ Successfully loaded Gemma 3n GGUF model!")
            
            # Test a simple generation
            logger.info("🧪 Testing simple generation...")
            response = llm("Hello", max_tokens=10, echo=False)
            logger.info(f"✅ Generation test successful: {response}")
            
        except Exception as model_error:
            logger.warning(f"⚠️ Model loading failed (expected): {model_error}")
            logger.info("💡 This is normal if you haven't downloaded the GGUF model file yet")
            logger.info("💡 To download, visit: https://huggingface.co/unsloth/gemma-3n-E4B-it-GGUF")
            
        return True
        
    except ImportError as e:
        logger.error(f"❌ llama-cpp-python import failed: {e}")
        return False
    except Exception as e:
        logger.error(f"❌ Unexpected error: {e}")
        return False

def test_minimal_fastapi():
    """Test a minimal FastAPI setup without transformers"""
    try:
        logger.info("🧪 Testing minimal FastAPI setup...")
        from fastapi import FastAPI
        from uvicorn import run
        logger.info("✅ FastAPI imports successful")
        
        app = FastAPI(title="Test API")
        
        @app.get("/")
        def root():
            return {"message": "Hello from test API!"}
        
        @app.get("/health")
        def health():
            return {"status": "healthy", "llama_cpp_available": True}
        
        logger.info("✅ Minimal FastAPI app created successfully")
        logger.info("🚀 You can test this by running: uvicorn test_gguf:app --reload")
        
        return app
        
    except Exception as e:
        logger.error(f"❌ FastAPI test failed: {e}")
        return None

if __name__ == "__main__":
    logger.info("🧪 Starting GGUF model integration tests...")
    
    # Test llama-cpp-python
    llama_success = test_llama_cpp()
    
    # Test FastAPI
    app = test_minimal_fastapi()
    
    if llama_success and app:
        logger.info("✅ All tests passed! Ready for GGUF model integration")
        logger.info("💡 Next steps:")
        logger.info("💡 1. Download GGUF model: https://huggingface.co/unsloth/gemma-3n-E4B-it-GGUF")
        logger.info("💡 2. Run: uvicorn test_gguf:app --reload")
        logger.info("💡 3. Test at: http://localhost:8000/health")
    else:
        logger.error("❌ Some tests failed. Check the logs above.")