Spaces:
Running
Running
support chat_template.json (and bump dependencies)
Browse files- README.md +1 -1
- app.py +31 -11
- requirements.txt +3 -3
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 💬📝
|
|
4 |
colorFrom: purple
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
python_version: 3.11
|
9 |
app_file: app.py
|
10 |
pinned: false
|
|
|
4 |
colorFrom: purple
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.38.0
|
8 |
python_version: 3.11
|
9 |
app_file: app.py
|
10 |
pinned: false
|
app.py
CHANGED
@@ -14,7 +14,7 @@ hfapi = HfApi()
|
|
14 |
|
15 |
class ModelFiles(StrEnum):
|
16 |
CHAT_TEMPLATE_JSON = "chat_template.json"
|
17 |
-
TOKENIZER_CHAT_TEMPLATE = "
|
18 |
TOKENIZER_CONFIG = "tokenizer_config.json"
|
19 |
TOKENIZER_INVERSE_TEMPLATE = "inverse_template.jinja"
|
20 |
|
@@ -325,21 +325,21 @@ example_values = [
|
|
325 |
"content": [
|
326 |
{
|
327 |
"type": "text",
|
328 |
-
"
|
329 |
},
|
330 |
{
|
331 |
"type": "image"
|
332 |
},
|
333 |
{
|
334 |
"type": "text",
|
335 |
-
"
|
336 |
},
|
337 |
{
|
338 |
"type": "audio"
|
339 |
},
|
340 |
{
|
341 |
"type": "text",
|
342 |
-
"
|
343 |
},
|
344 |
{
|
345 |
"type": "video"
|
@@ -379,7 +379,7 @@ class TokenizerConfig():
|
|
379 |
@chat_template.setter
|
380 |
def chat_template(self, value: str | list | None):
|
381 |
if not value:
|
382 |
-
self.chat_templates
|
383 |
elif isinstance(value, str):
|
384 |
self.chat_templates = {
|
385 |
"default": value,
|
@@ -711,6 +711,9 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
711 |
org_template_tool_use = ""
|
712 |
org_template_rag = ""
|
713 |
|
|
|
|
|
|
|
714 |
for config_file_name in (ModelFiles.CHAT_TEMPLATE_JSON, ModelFiles.TOKENIZER_CONFIG):
|
715 |
config_file = info.get(config_file_name, {})
|
716 |
org_config = config_file.get("data")
|
@@ -719,7 +722,7 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
719 |
if org_content and ("chat_template" in org_content or not org_template):
|
720 |
tokenizer_config = TokenizerConfig(org_content)
|
721 |
|
722 |
-
org_template = tokenizer_config.chat_templates.get("default") or ""
|
723 |
org_template_tool_use = tokenizer_config.chat_templates.get("tool_use") or ""
|
724 |
org_template_rag = tokenizer_config.chat_templates.get("rag") or ""
|
725 |
# org_template_inverse = tokenizer_config.inverse_template or ""
|
@@ -738,9 +741,6 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
738 |
for token in unified_diff(new_config.splitlines(keepends = True), org_config.splitlines(keepends = True), fromfile = config_file_name, tofile = config_file_name)
|
739 |
]
|
740 |
|
741 |
-
tokenizer_chat_template = info.get(ModelFiles.TOKENIZER_CHAT_TEMPLATE, {})
|
742 |
-
org_template = tokenizer_chat_template.get("data", org_template)
|
743 |
-
|
744 |
tokenizer_inverse_template = info.get(ModelFiles.TOKENIZER_INVERSE_TEMPLATE, {})
|
745 |
org_template_inverse = tokenizer_inverse_template.get("data", org_template_inverse)
|
746 |
|
@@ -1171,6 +1171,24 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
1171 |
revision = parent_commit or branch,
|
1172 |
token = oauth_token.token if oauth_token else False,
|
1173 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1174 |
except Exception as e:
|
1175 |
pass
|
1176 |
else:
|
@@ -1183,6 +1201,8 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
1183 |
"content": template_content,
|
1184 |
}
|
1185 |
info["chat_template"] = template_content.get("chat_template")
|
|
|
|
|
1186 |
|
1187 |
pr_details = None
|
1188 |
if branch and branch.startswith("refs/pr/"):
|
@@ -1216,8 +1236,8 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
1216 |
pr_submit: gr.Button(
|
1217 |
value = f"Commit to PR #{pr_details.num}" if pr_details else "Create Pull Request",
|
1218 |
),
|
1219 |
-
chat_template: gr.skip() if ModelFiles.CHAT_TEMPLATE_JSON not in info else gr.Code(
|
1220 |
-
value = TokenizerConfig(info[ModelFiles.CHAT_TEMPLATE_JSON]["content"]).chat_templates.get("default"),
|
1221 |
),
|
1222 |
# inverse_template: gr.skip() if ModelFiles.TOKENIZER_INVERSE_TEMPLATE not in info else gr.Code(
|
1223 |
# value = info[ModelFiles.TOKENIZER_INVERSE_TEMPLATE]["data"],
|
|
|
14 |
|
15 |
class ModelFiles(StrEnum):
|
16 |
CHAT_TEMPLATE_JSON = "chat_template.json"
|
17 |
+
TOKENIZER_CHAT_TEMPLATE = "chat_template.jinja"
|
18 |
TOKENIZER_CONFIG = "tokenizer_config.json"
|
19 |
TOKENIZER_INVERSE_TEMPLATE = "inverse_template.jinja"
|
20 |
|
|
|
325 |
"content": [
|
326 |
{
|
327 |
"type": "text",
|
328 |
+
"text": "Can this animal"
|
329 |
},
|
330 |
{
|
331 |
"type": "image"
|
332 |
},
|
333 |
{
|
334 |
"type": "text",
|
335 |
+
"text": "make this sound"
|
336 |
},
|
337 |
{
|
338 |
"type": "audio"
|
339 |
},
|
340 |
{
|
341 |
"type": "text",
|
342 |
+
"text": "while moving like this?"
|
343 |
},
|
344 |
{
|
345 |
"type": "video"
|
|
|
379 |
@chat_template.setter
|
380 |
def chat_template(self, value: str | list | None):
|
381 |
if not value:
|
382 |
+
self.chat_templates = {}
|
383 |
elif isinstance(value, str):
|
384 |
self.chat_templates = {
|
385 |
"default": value,
|
|
|
711 |
org_template_tool_use = ""
|
712 |
org_template_rag = ""
|
713 |
|
714 |
+
tokenizer_chat_template = info.get(ModelFiles.TOKENIZER_CHAT_TEMPLATE, {})
|
715 |
+
org_template = tokenizer_chat_template.get("data", org_template)
|
716 |
+
|
717 |
for config_file_name in (ModelFiles.CHAT_TEMPLATE_JSON, ModelFiles.TOKENIZER_CONFIG):
|
718 |
config_file = info.get(config_file_name, {})
|
719 |
org_config = config_file.get("data")
|
|
|
722 |
if org_content and ("chat_template" in org_content or not org_template):
|
723 |
tokenizer_config = TokenizerConfig(org_content)
|
724 |
|
725 |
+
org_template = org_template or tokenizer_config.chat_templates.get("default") or ""
|
726 |
org_template_tool_use = tokenizer_config.chat_templates.get("tool_use") or ""
|
727 |
org_template_rag = tokenizer_config.chat_templates.get("rag") or ""
|
728 |
# org_template_inverse = tokenizer_config.inverse_template or ""
|
|
|
741 |
for token in unified_diff(new_config.splitlines(keepends = True), org_config.splitlines(keepends = True), fromfile = config_file_name, tofile = config_file_name)
|
742 |
]
|
743 |
|
|
|
|
|
|
|
744 |
tokenizer_inverse_template = info.get(ModelFiles.TOKENIZER_INVERSE_TEMPLATE, {})
|
745 |
org_template_inverse = tokenizer_inverse_template.get("data", org_template_inverse)
|
746 |
|
|
|
1171 |
revision = parent_commit or branch,
|
1172 |
token = oauth_token.token if oauth_token else False,
|
1173 |
)
|
1174 |
+
|
1175 |
+
if ModelFiles.TOKENIZER_CHAT_TEMPLATE not in info and (hfapi.file_exists(
|
1176 |
+
repo,
|
1177 |
+
ModelFiles.TOKENIZER_CHAT_TEMPLATE,
|
1178 |
+
revision = branch,
|
1179 |
+
token = oauth_token.token if oauth_token else False,
|
1180 |
+
)):
|
1181 |
+
tokenizer_chat_template = hfapi.hf_hub_download(
|
1182 |
+
repo,
|
1183 |
+
ModelFiles.TOKENIZER_CHAT_TEMPLATE,
|
1184 |
+
revision = parent_commit or branch,
|
1185 |
+
token = oauth_token.token if oauth_token else False,
|
1186 |
+
)
|
1187 |
+
with open(tokenizer_chat_template, "r", encoding = "utf-8") as fp:
|
1188 |
+
template_data = fp.read()
|
1189 |
+
info[ModelFiles.TOKENIZER_CHAT_TEMPLATE] = {
|
1190 |
+
"data": template_data,
|
1191 |
+
}
|
1192 |
except Exception as e:
|
1193 |
pass
|
1194 |
else:
|
|
|
1201 |
"content": template_content,
|
1202 |
}
|
1203 |
info["chat_template"] = template_content.get("chat_template")
|
1204 |
+
elif ModelFiles.TOKENIZER_CHAT_TEMPLATE in info:
|
1205 |
+
info["chat_template"] = info[ModelFiles.TOKENIZER_CHAT_TEMPLATE].get("data")
|
1206 |
|
1207 |
pr_details = None
|
1208 |
if branch and branch.startswith("refs/pr/"):
|
|
|
1236 |
pr_submit: gr.Button(
|
1237 |
value = f"Commit to PR #{pr_details.num}" if pr_details else "Create Pull Request",
|
1238 |
),
|
1239 |
+
chat_template: gr.skip() if ModelFiles.CHAT_TEMPLATE_JSON not in info and ModelFiles.TOKENIZER_CHAT_TEMPLATE not in info else gr.Code(
|
1240 |
+
value = info.get(ModelFiles.TOKENIZER_CHAT_TEMPLATE, {}).get("data") or TokenizerConfig(info[ModelFiles.CHAT_TEMPLATE_JSON]["content"]).chat_templates.get("default"),
|
1241 |
),
|
1242 |
# inverse_template: gr.skip() if ModelFiles.TOKENIZER_INVERSE_TEMPLATE not in info else gr.Code(
|
1243 |
# value = info[ModelFiles.TOKENIZER_INVERSE_TEMPLATE]["data"],
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
|
2 |
-
huggingface_hub==0.
|
3 |
# gradio_huggingfacehub_search==0.0.8
|
4 |
-
transformers==4.
|
5 |
https://huggingface.co/spaces/CISCai/chat-template-editor/resolve/main/gradio_huggingfacehub_search-0.0.8-py3-none-any.whl
|
|
|
1 |
+
gradio[oauth]==5.38.0
|
2 |
+
huggingface_hub==0.33.4
|
3 |
# gradio_huggingfacehub_search==0.0.8
|
4 |
+
transformers==4.53.2
|
5 |
https://huggingface.co/spaces/CISCai/chat-template-editor/resolve/main/gradio_huggingfacehub_search-0.0.8-py3-none-any.whl
|