Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
b41b93b
1
Parent(s):
211ca67
switch to applying chat template
Browse files- utils/models.py +8 -1
utils/models.py
CHANGED
@@ -112,7 +112,6 @@ def run_inference(model_name, context, question):
|
|
112 |
if "qwen3" in model_name.lower():
|
113 |
print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
|
114 |
tokenizer_kwargs["enable_thinking"] = False
|
115 |
-
generation_kwargs["enable_thinking"] = False
|
116 |
|
117 |
try:
|
118 |
if model_name in tokenizer_cache:
|
@@ -138,6 +137,7 @@ def run_inference(model_name, context, question):
|
|
138 |
if generation_interrupt.is_set():
|
139 |
return ""
|
140 |
|
|
|
141 |
pipe = pipeline(
|
142 |
"text-generation",
|
143 |
model=model_name,
|
@@ -148,6 +148,13 @@ def run_inference(model_name, context, question):
|
|
148 |
)
|
149 |
|
150 |
text_input = format_rag_prompt(question, context, accepts_sys)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
# Check interrupt before generation
|
153 |
if generation_interrupt.is_set():
|
|
|
112 |
if "qwen3" in model_name.lower():
|
113 |
print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
|
114 |
tokenizer_kwargs["enable_thinking"] = False
|
|
|
115 |
|
116 |
try:
|
117 |
if model_name in tokenizer_cache:
|
|
|
137 |
if generation_interrupt.is_set():
|
138 |
return ""
|
139 |
|
140 |
+
|
141 |
pipe = pipeline(
|
142 |
"text-generation",
|
143 |
model=model_name,
|
|
|
148 |
)
|
149 |
|
150 |
text_input = format_rag_prompt(question, context, accepts_sys)
|
151 |
+
formatted = tokenizer.apply_chat_template(
|
152 |
+
text_input,
|
153 |
+
tokenize=False,
|
154 |
+
**tokenizer_kwargs,
|
155 |
+
)
|
156 |
+
|
157 |
+
|
158 |
|
159 |
# Check interrupt before generation
|
160 |
if generation_interrupt.is_set():
|