Spaces:
Sleeping
Sleeping
Commit
·
88e6118
1
Parent(s):
358cd20
Add n_gpu_layers parameter to Llama initialization
Browse files
utils.py
CHANGED
|
@@ -35,7 +35,7 @@ else:
|
|
| 35 |
|
| 36 |
if in_memory_llm is None and USE_HTTP_SERVER is False:
|
| 37 |
print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
|
| 38 |
-
in_memory_llm = Llama(model_path=LLM_MODEL_PATH, n_ctx=4096)
|
| 39 |
|
| 40 |
def llm_streaming(
|
| 41 |
prompt: str, pydantic_model_class, return_pydantic_object=False
|
|
|
|
| 35 |
|
| 36 |
if in_memory_llm is None and USE_HTTP_SERVER is False:
|
| 37 |
print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
|
| 38 |
+
in_memory_llm = Llama(model_path=LLM_MODEL_PATH, n_ctx=4096, n_gpu_layers=20)
|
| 39 |
|
| 40 |
def llm_streaming(
|
| 41 |
prompt: str, pydantic_model_class, return_pydantic_object=False
|