Priyanshukr-1 commited on
Commit
676ed5b
·
verified ·
1 Parent(s): af4a857

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -10,8 +10,8 @@ import time
10
  app = FastAPI()
11
 
12
  # === Model Config ===
13
- REPO_ID = "TheBloke/OpenHermes-2-Mistral-7B-GGUF"
14
- FILENAME = "openhermes-2-mistral-7b.Q4_K_M.gguf"
15
  MODEL_DIR = "models"
16
  MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
17
 
@@ -39,7 +39,7 @@ llm = Llama(
39
  model_path=model_path,
40
  n_ctx=8192, # Can increase depending on memory
41
  n_threads=recommended_threads,
42
- n_batch=64, # adjust depending on RAM
43
  use_mlock=True, # lock model in RAM for faster access
44
  n_gpu_layers=0, # CPU only, use >0 if GPU is present
45
  chat_format="chatml", # for Hermes 2
 
10
  app = FastAPI()
11
 
12
  # === Model Config ===
13
+ REPO_ID = "TheBloke/phi-2-GGUF"
14
+ FILENAME = "phi-2.Q4_K_M.gguf"
15
  MODEL_DIR = "models"
16
  MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
17
 
 
39
  model_path=model_path,
40
  n_ctx=8192, # Can increase depending on memory
41
  n_threads=recommended_threads,
42
+ n_batch=32, # adjust depending on RAM
43
  use_mlock=True, # lock model in RAM for faster access
44
  n_gpu_layers=0, # CPU only, use >0 if GPU is present
45
  chat_format="chatml", # for Hermes 2