Update app.py
Browse files
app.py
CHANGED
@@ -37,9 +37,8 @@ recommended_threads = 4
|
|
37 |
# === Load the model ===
|
38 |
llm = Llama(
|
39 |
model_path=model_path,
|
40 |
-
n_ctx=
|
41 |
n_threads=recommended_threads,
|
42 |
-
n_batch=32, # adjust depending on RAM
|
43 |
use_mlock=True, # lock model in RAM for faster access
|
44 |
n_gpu_layers=0, # CPU only, use >0 if GPU is present
|
45 |
chat_format="chatml", # for Hermes 2
|
|
|
37 |
# === Load the model ===
|
38 |
llm = Llama(
|
39 |
model_path=model_path,
|
40 |
+
n_ctx=2048, # Can increase depending on memory
|
41 |
n_threads=recommended_threads,
|
|
|
42 |
use_mlock=True, # lock model in RAM for faster access
|
43 |
n_gpu_layers=0, # CPU only, use >0 if GPU is present
|
44 |
chat_format="chatml", # for Hermes 2
|