Spaces:
Sleeping
Sleeping
Luke Stanley
commited on
Commit
·
9475016
1
Parent(s):
976ea17
Ensure N_GPU_LAYERS is int
Browse files
utils.py
CHANGED
|
@@ -19,7 +19,7 @@ from llama_cpp import Llama, LlamaGrammar, json_schema_to_gbnf
|
|
| 19 |
URL = "http://localhost:5834/v1/chat/completions"
|
| 20 |
in_memory_llm = None
|
| 21 |
|
| 22 |
-
N_GPU_LAYERS = env.get("N_GPU_LAYERS",
|
| 23 |
CONTEXT_SIZE = int(env.get("CONTEXT_SIZE", 4096))
|
| 24 |
LLM_MODEL_PATH = env.get("LLM_MODEL_PATH", None)
|
| 25 |
USE_HTTP_SERVER = env.get("USE_HTTP_SERVER", "false").lower() == "true"
|
|
@@ -147,3 +147,27 @@ def query_ai_prompt(prompt, replacements, model_class, in_memory=True):
|
|
| 147 |
return llm_stream_sans_network(prompt, model_class)
|
| 148 |
else:
|
| 149 |
return llm_streaming(prompt, model_class)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
URL = "http://localhost:5834/v1/chat/completions"
|
| 20 |
in_memory_llm = None
|
| 21 |
|
| 22 |
+
N_GPU_LAYERS = int(env.get("N_GPU_LAYERS", 20)) # Default to -1, which means use all layers if available
|
| 23 |
CONTEXT_SIZE = int(env.get("CONTEXT_SIZE", 4096))
|
| 24 |
LLM_MODEL_PATH = env.get("LLM_MODEL_PATH", None)
|
| 25 |
USE_HTTP_SERVER = env.get("USE_HTTP_SERVER", "false").lower() == "true"
|
|
|
|
| 147 |
return llm_stream_sans_network(prompt, model_class)
|
| 148 |
else:
|
| 149 |
return llm_streaming(prompt, model_class)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def llm_stream_sans_network_simple(
|
| 153 |
+
prompt: str, json_schema:str
|
| 154 |
+
):
|
| 155 |
+
grammar = LlamaGrammar.from_json_schema(json_schema)
|
| 156 |
+
|
| 157 |
+
stream = in_memory_llm(
|
| 158 |
+
prompt,
|
| 159 |
+
max_tokens=MAX_TOKENS,
|
| 160 |
+
temperature=TEMPERATURE,
|
| 161 |
+
grammar=grammar,
|
| 162 |
+
stream=True
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
output_text = ""
|
| 166 |
+
for chunk in stream:
|
| 167 |
+
result = chunk["choices"][0]
|
| 168 |
+
print(result["text"], end='', flush=True)
|
| 169 |
+
output_text = output_text + result["text"]
|
| 170 |
+
#yield result["text"]
|
| 171 |
+
|
| 172 |
+
print('\n')
|
| 173 |
+
return output_text
|