Spaces:

lukestanley
/

ChillTranslator

Sleeping

lukestanley commited on Feb 25, 2024

Commit

88e6118

1 Parent(s): 358cd20

Add n_gpu_layers parameter to Llama initialization

Files changed (1) hide show

utils.py CHANGED Viewed

@@ -35,7 +35,7 @@ else:
 if in_memory_llm is None and USE_HTTP_SERVER is False:
     print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
-    in_memory_llm = Llama(model_path=LLM_MODEL_PATH, n_ctx=4096)
 def llm_streaming(
     prompt: str, pydantic_model_class, return_pydantic_object=False

 if in_memory_llm is None and USE_HTTP_SERVER is False:
     print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
+    in_memory_llm = Llama(model_path=LLM_MODEL_PATH, n_ctx=4096, n_gpu_layers=20)
 def llm_streaming(
     prompt: str, pydantic_model_class, return_pydantic_object=False