from llama_cpp import Llama | |
import os | |
model_path = "llama-3.2-1B-it-Ecommerce-ChatBot-merged-F16.gguf" | |
n_threads = os.cpu_count() | |
print(f"number of cpu cores: {n_threads}") | |
llm = Llama( | |
model_path=model_path, | |
n_ctx=512, | |
n_batch=512, | |
n_threads=64, | |
n_gpu_layers=-1, | |
chat_format="llama-3" | |
) | |