File size: 3,479 Bytes
375ade4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python3
"""
Simple test script to check if llama-cpp-python works
"""

import os
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def test_llama_cpp():
    """Test llama-cpp-python import and basic functionality"""
    try:
        logger.info("πŸ§ͺ Testing llama-cpp-python import...")
        from llama_cpp import Llama
        logger.info("βœ… llama-cpp-python imported successfully")
        
        # Test model loading (this will fail without actual model file)
        logger.info("πŸ§ͺ Testing Gemma 3n GGUF model loading...")
        try:
            llm = Llama.from_pretrained(
                repo_id="unsloth/gemma-3n-E4B-it-GGUF",
                filename="*q4_k_m.gguf",  # Try Q4_K_M first
                verbose=True,
                n_ctx=1024,  # Small context for testing
                n_threads=2,  # Fewer threads for testing
                n_gpu_layers=0,  # CPU only for testing
            )
            logger.info("βœ… Successfully loaded Gemma 3n GGUF model!")
            
            # Test a simple generation
            logger.info("πŸ§ͺ Testing simple generation...")
            response = llm("Hello", max_tokens=10, echo=False)
            logger.info(f"βœ… Generation test successful: {response}")
            
        except Exception as model_error:
            logger.warning(f"⚠️ Model loading failed (expected): {model_error}")
            logger.info("πŸ’‘ This is normal if you haven't downloaded the GGUF model file yet")
            logger.info("πŸ’‘ To download, visit: https://huggingface.co/unsloth/gemma-3n-E4B-it-GGUF")
            
        return True
        
    except ImportError as e:
        logger.error(f"❌ llama-cpp-python import failed: {e}")
        return False
    except Exception as e:
        logger.error(f"❌ Unexpected error: {e}")
        return False

def test_minimal_fastapi():
    """Test a minimal FastAPI setup without transformers"""
    try:
        logger.info("πŸ§ͺ Testing minimal FastAPI setup...")
        from fastapi import FastAPI
        from uvicorn import run
        logger.info("βœ… FastAPI imports successful")
        
        app = FastAPI(title="Test API")
        
        @app.get("/")
        def root():
            return {"message": "Hello from test API!"}
        
        @app.get("/health")
        def health():
            return {"status": "healthy", "llama_cpp_available": True}
        
        logger.info("βœ… Minimal FastAPI app created successfully")
        logger.info("πŸš€ You can test this by running: uvicorn test_gguf:app --reload")
        
        return app
        
    except Exception as e:
        logger.error(f"❌ FastAPI test failed: {e}")
        return None

if __name__ == "__main__":
    logger.info("πŸ§ͺ Starting GGUF model integration tests...")
    
    # Test llama-cpp-python
    llama_success = test_llama_cpp()
    
    # Test FastAPI
    app = test_minimal_fastapi()
    
    if llama_success and app:
        logger.info("βœ… All tests passed! Ready for GGUF model integration")
        logger.info("πŸ’‘ Next steps:")
        logger.info("πŸ’‘ 1. Download GGUF model: https://huggingface.co/unsloth/gemma-3n-E4B-it-GGUF")
        logger.info("πŸ’‘ 2. Run: uvicorn test_gguf:app --reload")
        logger.info("πŸ’‘ 3. Test at: http://localhost:8000/health")
    else:
        logger.error("❌ Some tests failed. Check the logs above.")