Spaces:

Priyanshukr-1
/

openhermes_mistral_API

Sleeping

Priyanshukr-1 commited on Jul 17

Commit

676ed5b

verified ·

1 Parent(s): af4a857

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,8 +10,8 @@ import time
 app = FastAPI()
 # === Model Config ===
-REPO_ID = "TheBloke/OpenHermes-2-Mistral-7B-GGUF"
-FILENAME = "openhermes-2-mistral-7b.Q4_K_M.gguf"
 MODEL_DIR = "models"
 MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
@@ -39,7 +39,7 @@ llm = Llama(
     model_path=model_path,
     n_ctx=8192,  # Can increase depending on memory
     n_threads=recommended_threads,
-    n_batch=64,  # adjust depending on RAM
     use_mlock=True,  # lock model in RAM for faster access
     n_gpu_layers=0,  # CPU only, use >0 if GPU is present
     chat_format="chatml",  # for Hermes 2

 app = FastAPI()
 # === Model Config ===
+REPO_ID = "TheBloke/phi-2-GGUF"
+FILENAME = "phi-2.Q4_K_M.gguf"
 MODEL_DIR = "models"
 MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
     model_path=model_path,
     n_ctx=8192,  # Can increase depending on memory
     n_threads=recommended_threads,
+    n_batch=32,  # adjust depending on RAM
     use_mlock=True,  # lock model in RAM for faster access
     n_gpu_layers=0,  # CPU only, use >0 if GPU is present
     chat_format="chatml",  # for Hermes 2