Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
5021e53
1
Parent(s):
6c63a2d
specified device
Browse files- utils/models.py +1 -1
utils/models.py
CHANGED
@@ -193,7 +193,7 @@ def run_inference(model_name, context, question, result_queue):
|
|
193 |
# else:
|
194 |
# # Decode the generated tokens, excluding the input tokens
|
195 |
# result = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
|
196 |
-
llm = LLM(model_name, dtype=torch.bfloat16, hf_token=True, enforce_eager=True)
|
197 |
params = SamplingParams(
|
198 |
max_tokens=512,
|
199 |
)
|
|
|
193 |
# else:
|
194 |
# # Decode the generated tokens, excluding the input tokens
|
195 |
# result = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
|
196 |
+
llm = LLM(model_name, dtype=torch.bfloat16, hf_token=True, enforce_eager=True, device="cpu")
|
197 |
params = SamplingParams(
|
198 |
max_tokens=512,
|
199 |
)
|