SLM-RAG-Arena

Running on Zero

oliver-aizip commited on 23 days ago

Commit

5021e53

1 Parent(s): 6c63a2d

specified device

Files changed (1) hide show

utils/models.py CHANGED Viewed

@@ -193,7 +193,7 @@ def run_inference(model_name, context, question, result_queue):
         # else:
         #     # Decode the generated tokens, excluding the input tokens
         #     result = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
-        llm = LLM(model_name, dtype=torch.bfloat16, hf_token=True, enforce_eager=True)
         params = SamplingParams(
             max_tokens=512,
             )

         # else:
         #     # Decode the generated tokens, excluding the input tokens
         #     result = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
+        llm = LLM(model_name, dtype=torch.bfloat16, hf_token=True, enforce_eager=True, device="cpu")
         params = SamplingParams(
             max_tokens=512,
             )