|
|
|
class PromptFormat: |
|
|
|
def __init__(self): |
|
pass |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
raise NotImplementedError |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
raise NotImplementedError |
|
|
|
def is_instruct(self): |
|
raise NotImplementedError |
|
|
|
def encode_special_tokens(self): |
|
return True |
|
|
|
def context_bos(self): |
|
return False |
|
|
|
@staticmethod |
|
def supports_system_prompt(): |
|
return True |
|
|
|
|
|
class PromptFormat_raw(PromptFormat): |
|
|
|
description = "Model-agnostic mode simulating a raw chatlog between two or more users" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return False |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
raise NotImplementedError |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
raise NotImplementedError |
|
|
|
def encode_special_tokens(self): |
|
return True |
|
|
|
|
|
class PromptFormat_llama(PromptFormat): |
|
|
|
description = "Llama-chat, Llama2-chat and Mistral-instruct models" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "<s>[INST] " |
|
if system_prompt and system_prompt.strip() != "": |
|
text += "<<SYS>>\n" |
|
text += system_prompt |
|
text += "\n<</SYS>>\n\n " |
|
text += prompt |
|
text += " [/INST]" |
|
if response: |
|
text += response |
|
text += "</s>" |
|
return text |
|
|
|
class PromptFormat_mistral(PromptFormat): |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id] |
|
|
|
def context_bos(self): |
|
return True |
|
|
|
class PromptFormat_mistralv1(PromptFormat_mistral): |
|
""" |
|
<s> [INST] user message [/INST] assistant message</s> [INST] new user message [/INST] |
|
""" |
|
description = "Mistral tokenizer v1" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def format(self, p, r, sp, settings): |
|
if sp and sp.strip(): |
|
text = f" [INST] {sp.strip()}\n\n {p.strip()} [/INST]" |
|
else: |
|
text = f" [INST] {p.strip()} [/INST]" |
|
if r: |
|
text += f" {r.strip()}</s>" |
|
return text |
|
|
|
class PromptFormat_mistralv2v3(PromptFormat_mistral): |
|
""" |
|
<s>[INST] user message[/INST] assistant message</s>[INST] new user message[/INST] |
|
""" |
|
description = "Mistral tokenizer v2/v3" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def format(self, p, r, sp, settings): |
|
if sp and sp.strip(): |
|
text = f"[INST] {sp.strip()}\n\n {p.strip()}[/INST]" |
|
else: |
|
text = f"[INST] {p.strip()}[/INST]" |
|
if r: |
|
text += f" {r.strip()}</s>" |
|
return text |
|
|
|
class PromptFormat_mistralTekken(PromptFormat_mistral): |
|
""" |
|
<s>[INST]user message[/INST]assistant message</s>[INST]new user message[/INST] |
|
""" |
|
description = "Mistral tokenizer V3 (Tekken)" |
|
|
|
def format(self, p, r, sp, settings): |
|
if sp and sp.strip(): |
|
text = f"[INST]{sp.strip()}\n\n{p.strip()}[/INST]" |
|
else: |
|
text = f"[INST]{p.strip()}[/INST]" |
|
if r: |
|
text += f"{r.strip()}</s>" |
|
return text |
|
|
|
|
|
class PromptFormat_llama3(PromptFormat): |
|
|
|
description = "Llama-3 instruct template." |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.single_id("<|eot_id|>"), |
|
tokenizer.single_id("<|start_header_id|>"), |
|
tokenizer.eos_token_id] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "" |
|
if system_prompt and system_prompt.strip() != "": |
|
text += "<|start_header_id|>system<|end_header_id|>\n\n" |
|
text += system_prompt |
|
text += "<|eot_id|>" |
|
text += "<|start_header_id|>user<|end_header_id|>\n\n" |
|
text += prompt |
|
text += "<|eot_id|>" |
|
text += "<|start_header_id|>assistant<|end_header_id|>\n\n" |
|
if response: |
|
text += response |
|
text += "<|eot_id|>" |
|
return text |
|
|
|
def context_bos(self): |
|
return True |
|
|
|
|
|
class PromptFormat_phi3(PromptFormat): |
|
|
|
description = "Phi-3 instruct" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.single_id("<|end|>"), |
|
tokenizer.single_id("<|assistant|>"), |
|
tokenizer.single_id("<|endoftext|>"), |
|
tokenizer.eos_token_id] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "" |
|
if system_prompt and system_prompt.strip() != "": |
|
text += "<|system|>\n" |
|
text += system_prompt |
|
text += "<|end|>\n" |
|
text += "<|user|>\n" |
|
text += prompt |
|
text += "<|end|>\n" |
|
text += "<|assistant|>\n" |
|
if response: |
|
text += response |
|
text += "<|end|>" |
|
return text |
|
|
|
def context_bos(self): |
|
return True |
|
|
|
class PromptFormat_phi4(PromptFormat): |
|
|
|
description = "Phi-4" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id, |
|
"""<|im_end|>"""] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "" |
|
if system_prompt and system_prompt.strip() != "": |
|
text += "<|im_start|>system\n" |
|
text += system_prompt |
|
text += "\n<|im_end|>\n" |
|
text += "<|im_start|>user\n" |
|
text += prompt |
|
text += "<|im_end|>\n" |
|
text += "<|im_start|>assistant\n" |
|
if response: |
|
text += response |
|
text += "<|im_end|>\n" |
|
return text |
|
|
|
def context_bos(self): |
|
return True |
|
|
|
class PromptFormat_mistrallite(PromptFormat): |
|
|
|
description = "MistralLite format" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "<|prompter|>" |
|
if system_prompt and system_prompt.strip() != "": |
|
text += system_prompt |
|
text += "</s><|assistant|>Understood.</s><|prompter|>" |
|
text += prompt |
|
text += "</s><|assistant|>" |
|
if response: |
|
text += response |
|
text += "</s>" |
|
return text |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PromptFormat_chatml(PromptFormat): |
|
|
|
description = "ChatML format, as used by e.g. (Mistral)Orca" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id, |
|
"""<|im_end|>"""] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "" |
|
if system_prompt and system_prompt.strip() != "": |
|
text += "<|im_start|>system\n" |
|
text += system_prompt |
|
text += "\n<|im_end|>\n" |
|
text += "<|im_start|>user\n" |
|
text += prompt |
|
text += "<|im_end|>\n" |
|
text += "<|im_start|>assistant\n" |
|
if response: |
|
text += response |
|
text += "<|im_end|>\n" |
|
return text |
|
|
|
def context_bos(self): |
|
return True |
|
|
|
|
|
class PromptFormat_tinyllama(PromptFormat_chatml): |
|
|
|
description = "ChatML format, but ignoring special/added tokens. Use for TinyLlama-chat v0.3" |
|
|
|
def encode_special_tokens(self): |
|
return False |
|
|
|
|
|
class PromptFormat_phind_codellama(PromptFormat): |
|
|
|
description = "Vicuna/Alpaca-like format for Phind-CodeLlama" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id, "\n### "] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "" |
|
if system_prompt and system_prompt.strip() != "": |
|
text += "### System Prompt\n" |
|
text += system_prompt |
|
text += "\n\n" |
|
text += "### User Message\n" |
|
text += prompt |
|
text += "\n\n### Assistant\n" |
|
if response: |
|
text += response |
|
text += "\n\n" |
|
return text |
|
|
|
|
|
class PromptFormat_deepseek_chat(PromptFormat): |
|
|
|
description = "Deepseek LLM chat format" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id, "\n\nAssistant:"] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "" |
|
if system_prompt and system_prompt.strip() != "": |
|
text += system_prompt |
|
text += "\n\n" |
|
text += "User: " |
|
text += prompt |
|
text += "\n\nAssistant:" |
|
if response: |
|
text += response |
|
text += "\n\n" |
|
return text |
|
|
|
|
|
class PromptFormat_deepseek_instruct(PromptFormat): |
|
|
|
description = "Deepseek instruct format for 'coder' models" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id, "<|EOT|>"] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "" |
|
if system_prompt and system_prompt.strip() != "": |
|
text += "<|begin▁of▁sentence|>" |
|
text += system_prompt |
|
text += "\n" |
|
text += "### Instruction:\n" |
|
text += prompt |
|
text += "\n### Response:\n" |
|
if response: |
|
text += response |
|
text += "\n<|EOT|>\n" |
|
return text |
|
|
|
|
|
class PromptFormat_openchat(PromptFormat): |
|
|
|
description = "OpenChat" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id, |
|
"<|end_of_turn|>", |
|
"<|endoftext|>", |
|
"GPT4 Correct User:" |
|
] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "" |
|
if system_prompt and system_prompt.strip() != "": |
|
text += system_prompt |
|
text += "<|end_of_turn|>" |
|
text += "GPT4 Correct User:" |
|
text += prompt |
|
text += "<|end_of_turn|>" |
|
text += "GPT4 Correct Assistant:" |
|
if response: |
|
text += response |
|
text += "<|end_of_turn|>" |
|
return text |
|
|
|
|
|
class PromptFormat_gemma(PromptFormat): |
|
|
|
description = "OpenChat" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id, |
|
"<end_of_turn>", |
|
] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "" |
|
if system_prompt is not None: |
|
text += "<bos>" |
|
|
|
|
|
|
|
|
|
|
|
|
|
text += "<start_of_turn>user\n" |
|
text += prompt |
|
text += "<end_of_turn>\n" |
|
text += "<start_of_turn>model\n" |
|
if response: |
|
text += response |
|
text += "<end_of_turn>\n" |
|
return text |
|
|
|
@staticmethod |
|
def supports_system_prompt(): |
|
return False |
|
|
|
|
|
class PromptFormat_cohere(PromptFormat): |
|
|
|
description = "Cohere" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id, |
|
"<|END_OF_TURN_TOKEN|>", |
|
] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "" |
|
if system_prompt is not None: |
|
text += "<BOS_TOKEN>" |
|
text += "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" |
|
text += system_prompt.strip() |
|
text += "<|END_OF_TURN_TOKEN|>" |
|
text += "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" |
|
text += prompt |
|
text += "<|END_OF_TURN_TOKEN|>" |
|
text += "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" |
|
if response: |
|
text += response |
|
text += "<|END_OF_TURN_TOKEN|>" |
|
return text |
|
|
|
|
|
class PromptFormat_granite(PromptFormat): |
|
|
|
description = "Granite" |
|
|
|
def __init__(self): |
|
super().__init__() |
|
pass |
|
|
|
def is_instruct(self): |
|
return True |
|
|
|
def stop_conditions(self, tokenizer, settings): |
|
return \ |
|
[tokenizer.eos_token_id, |
|
"\n\nQuestion:", |
|
] |
|
|
|
def format(self, prompt, response, system_prompt, settings): |
|
text = "" |
|
if system_prompt is not None: |
|
text += "System:\n" |
|
text += system_prompt.strip() |
|
text += "\n\n" |
|
text += "Question:\n" |
|
text += prompt |
|
text += "\n\n" |
|
text += "Answer:\n" |
|
if response: |
|
text += response |
|
text += "\n\n" |
|
return text |
|
|
|
def context_bos(self): |
|
return True |
|
|
|
|
|
prompt_formats = \ |
|
{ |
|
"Chat-RP": PromptFormat_raw, |
|
"Llama-chat": PromptFormat_llama, |
|
"Llama3-instruct": PromptFormat_llama3, |
|
"ChatML": PromptFormat_chatml, |
|
"TinyLlama-chat": PromptFormat_tinyllama, |
|
"MistralLite": PromptFormat_mistrallite, |
|
"Phind-CodeLlama": PromptFormat_phind_codellama, |
|
"Deepseek-chat": PromptFormat_deepseek_chat, |
|
"Deepseek-instruct": PromptFormat_deepseek_instruct, |
|
"OpenChat": PromptFormat_openchat, |
|
"Gemma": PromptFormat_gemma, |
|
"Cohere": PromptFormat_cohere, |
|
"Phi3-instruct": PromptFormat_phi3, |
|
"Phi4": PromptFormat_phi4, |
|
"Granite": PromptFormat_granite, |
|
"Mistral V1": PromptFormat_mistralv1, |
|
"Mistral V2/V3": PromptFormat_mistralv2v3, |
|
"Mistral V3 (Tekken)": PromptFormat_mistralTekken, |
|
} |
|
|
|
def list_prompt_formats(): |
|
global prompt_formats |
|
prompts = [ |
|
{ |
|
"name": k, |
|
"supports_system_prompt": v.supports_system_prompt() |
|
} |
|
for k, v in prompt_formats.items() |
|
] |
|
return prompts |
|
|