firstAI / test_gguf.py
ndc8
chg model
375ade4
#!/usr/bin/env python3
"""
Simple test script to check if llama-cpp-python works
"""
import os
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_llama_cpp():
"""Test llama-cpp-python import and basic functionality"""
try:
logger.info("πŸ§ͺ Testing llama-cpp-python import...")
from llama_cpp import Llama
logger.info("βœ… llama-cpp-python imported successfully")
# Test model loading (this will fail without actual model file)
logger.info("πŸ§ͺ Testing Gemma 3n GGUF model loading...")
try:
llm = Llama.from_pretrained(
repo_id="unsloth/gemma-3n-E4B-it-GGUF",
filename="*q4_k_m.gguf", # Try Q4_K_M first
verbose=True,
n_ctx=1024, # Small context for testing
n_threads=2, # Fewer threads for testing
n_gpu_layers=0, # CPU only for testing
)
logger.info("βœ… Successfully loaded Gemma 3n GGUF model!")
# Test a simple generation
logger.info("πŸ§ͺ Testing simple generation...")
response = llm("Hello", max_tokens=10, echo=False)
logger.info(f"βœ… Generation test successful: {response}")
except Exception as model_error:
logger.warning(f"⚠️ Model loading failed (expected): {model_error}")
logger.info("πŸ’‘ This is normal if you haven't downloaded the GGUF model file yet")
logger.info("πŸ’‘ To download, visit: https://huggingface.co/unsloth/gemma-3n-E4B-it-GGUF")
return True
except ImportError as e:
logger.error(f"❌ llama-cpp-python import failed: {e}")
return False
except Exception as e:
logger.error(f"❌ Unexpected error: {e}")
return False
def test_minimal_fastapi():
"""Test a minimal FastAPI setup without transformers"""
try:
logger.info("πŸ§ͺ Testing minimal FastAPI setup...")
from fastapi import FastAPI
from uvicorn import run
logger.info("βœ… FastAPI imports successful")
app = FastAPI(title="Test API")
@app.get("/")
def root():
return {"message": "Hello from test API!"}
@app.get("/health")
def health():
return {"status": "healthy", "llama_cpp_available": True}
logger.info("βœ… Minimal FastAPI app created successfully")
logger.info("πŸš€ You can test this by running: uvicorn test_gguf:app --reload")
return app
except Exception as e:
logger.error(f"❌ FastAPI test failed: {e}")
return None
if __name__ == "__main__":
logger.info("πŸ§ͺ Starting GGUF model integration tests...")
# Test llama-cpp-python
llama_success = test_llama_cpp()
# Test FastAPI
app = test_minimal_fastapi()
if llama_success and app:
logger.info("βœ… All tests passed! Ready for GGUF model integration")
logger.info("πŸ’‘ Next steps:")
logger.info("πŸ’‘ 1. Download GGUF model: https://huggingface.co/unsloth/gemma-3n-E4B-it-GGUF")
logger.info("πŸ’‘ 2. Run: uvicorn test_gguf:app --reload")
logger.info("πŸ’‘ 3. Test at: http://localhost:8000/health")
else:
logger.error("❌ Some tests failed. Check the logs above.")