SLM-RAG-Arena

Running on Zero

oliver-aizip commited on May 9

Commit

f35135e

1 Parent(s): e24b19e

change where trust_remote is applied

Files changed (1) hide show

utils/models.py CHANGED Viewed

@@ -101,9 +101,7 @@ def run_inference(model_name, context, question):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     result = ""
-    model_kwargs = {
-        "trust_remote_code": True,
-    } # make sure qwen3 doesn't use thinking
     if "qwen3" in model_name.lower():
         print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
         model_kwargs["enable_thinking"] = False
@@ -131,6 +129,7 @@ def run_inference(model_name, context, question):
             temperature=0.6,
             top_p=0.9,
             model_kwargs=model_kwargs,
         )
         text_input = format_rag_prompt(question, context, accepts_sys)

     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     result = ""
+    model_kwargs = {} # make sure qwen3 doesn't use thinking
     if "qwen3" in model_name.lower():
         print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
         model_kwargs["enable_thinking"] = False
             temperature=0.6,
             top_p=0.9,
             model_kwargs=model_kwargs,
+            trust_remote_code=True,
         )
         text_input = format_rag_prompt(question, context, accepts_sys)