File size: 3,479 Bytes
375ade4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
#!/usr/bin/env python3
"""
Simple test script to check if llama-cpp-python works
"""
import os
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_llama_cpp():
"""Test llama-cpp-python import and basic functionality"""
try:
logger.info("π§ͺ Testing llama-cpp-python import...")
from llama_cpp import Llama
logger.info("β
llama-cpp-python imported successfully")
# Test model loading (this will fail without actual model file)
logger.info("π§ͺ Testing Gemma 3n GGUF model loading...")
try:
llm = Llama.from_pretrained(
repo_id="unsloth/gemma-3n-E4B-it-GGUF",
filename="*q4_k_m.gguf", # Try Q4_K_M first
verbose=True,
n_ctx=1024, # Small context for testing
n_threads=2, # Fewer threads for testing
n_gpu_layers=0, # CPU only for testing
)
logger.info("β
Successfully loaded Gemma 3n GGUF model!")
# Test a simple generation
logger.info("π§ͺ Testing simple generation...")
response = llm("Hello", max_tokens=10, echo=False)
logger.info(f"β
Generation test successful: {response}")
except Exception as model_error:
logger.warning(f"β οΈ Model loading failed (expected): {model_error}")
logger.info("π‘ This is normal if you haven't downloaded the GGUF model file yet")
logger.info("π‘ To download, visit: https://huggingface.co/unsloth/gemma-3n-E4B-it-GGUF")
return True
except ImportError as e:
logger.error(f"β llama-cpp-python import failed: {e}")
return False
except Exception as e:
logger.error(f"β Unexpected error: {e}")
return False
def test_minimal_fastapi():
"""Test a minimal FastAPI setup without transformers"""
try:
logger.info("π§ͺ Testing minimal FastAPI setup...")
from fastapi import FastAPI
from uvicorn import run
logger.info("β
FastAPI imports successful")
app = FastAPI(title="Test API")
@app.get("/")
def root():
return {"message": "Hello from test API!"}
@app.get("/health")
def health():
return {"status": "healthy", "llama_cpp_available": True}
logger.info("β
Minimal FastAPI app created successfully")
logger.info("π You can test this by running: uvicorn test_gguf:app --reload")
return app
except Exception as e:
logger.error(f"β FastAPI test failed: {e}")
return None
if __name__ == "__main__":
logger.info("π§ͺ Starting GGUF model integration tests...")
# Test llama-cpp-python
llama_success = test_llama_cpp()
# Test FastAPI
app = test_minimal_fastapi()
if llama_success and app:
logger.info("β
All tests passed! Ready for GGUF model integration")
logger.info("π‘ Next steps:")
logger.info("π‘ 1. Download GGUF model: https://huggingface.co/unsloth/gemma-3n-E4B-it-GGUF")
logger.info("π‘ 2. Run: uvicorn test_gguf:app --reload")
logger.info("π‘ 3. Test at: http://localhost:8000/health")
else:
logger.error("β Some tests failed. Check the logs above.")
|