Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
b41b93b
1
Parent(s):
211ca67
switch to applying chat template
Browse files- utils/models.py +8 -1
utils/models.py
CHANGED
|
@@ -112,7 +112,6 @@ def run_inference(model_name, context, question):
|
|
| 112 |
if "qwen3" in model_name.lower():
|
| 113 |
print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
|
| 114 |
tokenizer_kwargs["enable_thinking"] = False
|
| 115 |
-
generation_kwargs["enable_thinking"] = False
|
| 116 |
|
| 117 |
try:
|
| 118 |
if model_name in tokenizer_cache:
|
|
@@ -138,6 +137,7 @@ def run_inference(model_name, context, question):
|
|
| 138 |
if generation_interrupt.is_set():
|
| 139 |
return ""
|
| 140 |
|
|
|
|
| 141 |
pipe = pipeline(
|
| 142 |
"text-generation",
|
| 143 |
model=model_name,
|
|
@@ -148,6 +148,13 @@ def run_inference(model_name, context, question):
|
|
| 148 |
)
|
| 149 |
|
| 150 |
text_input = format_rag_prompt(question, context, accepts_sys)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
# Check interrupt before generation
|
| 153 |
if generation_interrupt.is_set():
|
|
|
|
| 112 |
if "qwen3" in model_name.lower():
|
| 113 |
print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
|
| 114 |
tokenizer_kwargs["enable_thinking"] = False
|
|
|
|
| 115 |
|
| 116 |
try:
|
| 117 |
if model_name in tokenizer_cache:
|
|
|
|
| 137 |
if generation_interrupt.is_set():
|
| 138 |
return ""
|
| 139 |
|
| 140 |
+
|
| 141 |
pipe = pipeline(
|
| 142 |
"text-generation",
|
| 143 |
model=model_name,
|
|
|
|
| 148 |
)
|
| 149 |
|
| 150 |
text_input = format_rag_prompt(question, context, accepts_sys)
|
| 151 |
+
formatted = tokenizer.apply_chat_template(
|
| 152 |
+
text_input,
|
| 153 |
+
tokenize=False,
|
| 154 |
+
**tokenizer_kwargs,
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
|
| 159 |
# Check interrupt before generation
|
| 160 |
if generation_interrupt.is_set():
|