oliver-aizip commited on
Commit
b41b93b
·
1 Parent(s): 211ca67

switch to applying chat template

Browse files
Files changed (1) hide show
  1. utils/models.py +8 -1
utils/models.py CHANGED
@@ -112,7 +112,6 @@ def run_inference(model_name, context, question):
112
  if "qwen3" in model_name.lower():
113
  print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
114
  tokenizer_kwargs["enable_thinking"] = False
115
- generation_kwargs["enable_thinking"] = False
116
 
117
  try:
118
  if model_name in tokenizer_cache:
@@ -138,6 +137,7 @@ def run_inference(model_name, context, question):
138
  if generation_interrupt.is_set():
139
  return ""
140
 
 
141
  pipe = pipeline(
142
  "text-generation",
143
  model=model_name,
@@ -148,6 +148,13 @@ def run_inference(model_name, context, question):
148
  )
149
 
150
  text_input = format_rag_prompt(question, context, accepts_sys)
 
 
 
 
 
 
 
151
 
152
  # Check interrupt before generation
153
  if generation_interrupt.is_set():
 
112
  if "qwen3" in model_name.lower():
113
  print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
114
  tokenizer_kwargs["enable_thinking"] = False
 
115
 
116
  try:
117
  if model_name in tokenizer_cache:
 
137
  if generation_interrupt.is_set():
138
  return ""
139
 
140
+
141
  pipe = pipeline(
142
  "text-generation",
143
  model=model_name,
 
148
  )
149
 
150
  text_input = format_rag_prompt(question, context, accepts_sys)
151
+ formatted = tokenizer.apply_chat_template(
152
+ text_input,
153
+ tokenize=False,
154
+ **tokenizer_kwargs,
155
+ )
156
+
157
+
158
 
159
  # Check interrupt before generation
160
  if generation_interrupt.is_set():