Priyanshukr-1 commited on
Commit
c1d1082
·
verified ·
1 Parent(s): 676ed5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -2
app.py CHANGED
@@ -37,9 +37,8 @@ recommended_threads = 4
37
  # === Load the model ===
38
  llm = Llama(
39
  model_path=model_path,
40
- n_ctx=8192, # Can increase depending on memory
41
  n_threads=recommended_threads,
42
- n_batch=32, # adjust depending on RAM
43
  use_mlock=True, # lock model in RAM for faster access
44
  n_gpu_layers=0, # CPU only, use >0 if GPU is present
45
  chat_format="chatml", # for Hermes 2
 
37
  # === Load the model ===
38
  llm = Llama(
39
  model_path=model_path,
40
+ n_ctx=2048, # Can increase depending on memory
41
  n_threads=recommended_threads,
 
42
  use_mlock=True, # lock model in RAM for faster access
43
  n_gpu_layers=0, # CPU only, use >0 if GPU is present
44
  chat_format="chatml", # for Hermes 2