From 4bab03a5dd1702e8d8959b9e483bb05fbf8ca4d0 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Fri, 17 Jan 2025 14:23:59 -0500 Subject: [PATCH 1/5] chat template cleanup Signed-off-by: Jared Van Bortel --- gpt4all-chat/metadata/models3.json | 6 ++-- gpt4all-chat/src/jinja_replacements.cpp | 40 ++++++++++++------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/gpt4all-chat/metadata/models3.json b/gpt4all-chat/metadata/models3.json index 6a277ba0087e..c9ced6550542 100644 --- a/gpt4all-chat/metadata/models3.json +++ b/gpt4all-chat/metadata/models3.json @@ -12,7 +12,7 @@ "type": "qwen2", "description": "", "url": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_0.gguf", - "chatTemplate": "{{- '<|im_start|>system\\n' }}\n{% if toolList|length > 0 %}You have access to the following functions:\n{% for tool in toolList %}\nUse the function '{{tool.function}}' to: '{{tool.description}}'\n{% if tool.parameters|length > 0 %}\nparameters:\n{% for info in tool.parameters %}\n {{info.name}}:\n type: {{info.type}}\n description: {{info.description}}\n required: {{info.required}}\n{% endfor %}\n{% endif %}\n# Tool Instructions\nIf you CHOOSE to call this function ONLY reply with the following format:\n'{{tool.symbolicFormat}}'\nHere is an example. If the user says, '{{tool.examplePrompt}}', then you reply\n'{{tool.exampleCall}}'\nAfter the result you might reply with, '{{tool.exampleReply}}'\n{% endfor %}\nYou MUST include both the start and end tags when you use a function.\n\nYou are a helpful AI assistant who uses the functions to break down, analyze, perform, and verify complex reasoning tasks. You SHOULD try to verify your answers using the functions where possible.\n{% endif %}\n{{- '<|im_end|>\\n' }}\n{% for message in messages %}\n{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}\n{% endfor %}\n{% if add_generation_prompt %}\n{{ '<|im_start|>assistant\\n' }}\n{% endif %}\n", + "chatTemplate": "{{- '<|im_start|>system\\n' }}\n{% if toolList|length > 0 %}You have access to the following functions:\n{% for tool in toolList %}\nUse the function '{{tool.function}}' to: '{{tool.description}}'\n{% if tool.parameters|length > 0 %}\nparameters:\n{% for info in tool.parameters %}\n {{info.name}}:\n type: {{info.type}}\n description: {{info.description}}\n required: {{info.required}}\n{% endfor %}\n{% endif %}\n# Tool Instructions\nIf you CHOOSE to call this function ONLY reply with the following format:\n'{{tool.symbolicFormat}}'\nHere is an example. If the user says, '{{tool.examplePrompt}}', then you reply\n'{{tool.exampleCall}}'\nAfter the result you might reply with, '{{tool.exampleReply}}'\n{% endfor %}\nYou MUST include both the start and end tags when you use a function.\n\nYou are a helpful AI assistant who uses the functions to break down, analyze, perform, and verify complex reasoning tasks. You SHOULD try to verify your answers using the functions where possible.\n{% endif %}\n{{- '<|im_end|>\\n' }}\n{% for message in messages %}\n{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>\\n' }}\n{% endfor %}\n{% if add_generation_prompt %}\n{{ '<|im_start|>assistant\\n' }}\n{% endif %}\n", "systemPrompt": "" }, { @@ -81,7 +81,7 @@ "url": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mistral-7B-DPO-GGUF/resolve/main/Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf", "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n", "systemPrompt": "", - "chatTemplate": "{%- for message in messages %}\n {{- '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}" + "chatTemplate": "{%- for message in messages %}\n {{- '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>\\n' }}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}" }, { "order": "e", @@ -132,7 +132,7 @@ "url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf", "promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>\n", "systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI.\n<|im_end|>\n", - "chatTemplate": "{%- for message in messages %}\n {{- '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}" + "chatTemplate": "{%- for message in messages %}\n {{- '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>\\n' }}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}" }, { "order": "h", diff --git a/gpt4all-chat/src/jinja_replacements.cpp b/gpt4all-chat/src/jinja_replacements.cpp index 3c425051d737..0a02ffa5c8e3 100644 --- a/gpt4all-chat/src/jinja_replacements.cpp +++ b/gpt4all-chat/src/jinja_replacements.cpp @@ -106,6 +106,19 @@ const std::unordered_map CHAT_TEMPLATE_SUBST {{- '<|assistant|>' }} {%- endif %} {%- endfor %})TEMPLATE", + }, + // Hermes-3-Llama-3.2-3B.Q4_0.gguf, mistral-7b-openorca.gguf2.Q4_0.gguf + { + R"TEMPLATE({% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + ' +' + message['content'] + '<|im_end|>' + ' +'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant +' }}{% endif %})TEMPLATE", + R"TEMPLATE({%- for message in messages %} + {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %})TEMPLATE", }, // Llama-3.2-1B-Instruct-Q4_0.gguf, Llama-3.2-3B-Instruct-Q4_0.gguf, SummLlama3.2-3B-Q4_0.gguf (nomic-ai/gpt4all#3309) { @@ -342,23 +355,23 @@ const std::unordered_map CHAT_TEMPLATE_SUBST {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} {%- endif %})TEMPLATE", R"TEMPLATE({{- bos_token }} -{%- set date_string = strftime_now("%d %b %Y") %} +{%- set date_string = strftime_now('%d %b %Y') %} {#- This block extracts the system message, so we can slot it into the right place. #} {%- if messages[0]['role'] == 'system' %} {%- set system_message = messages[0]['content'] | trim %} {%- set loop_start = 1 %} {%- else %} - {%- set system_message = "" %} + {%- set system_message = '' %} {%- set loop_start = 0 %} {%- endif %} {#- System message #} -{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} -{{- "Cutting Knowledge Date: December 2023\n" }} -{{- "Today Date: " + date_string + "\n\n" }} +{{- '<|start_header_id|>system<|end_header_id|>\n\n' }} +{{- 'Cutting Knowledge Date: December 2023\n' }} +{{- 'Today Date: ' + date_string + '\n\n' }} {{- system_message }} -{{- "<|eot_id|>" }} +{{- '<|eot_id|>' }} {%- for message in messages %} {%- if loop.index0 >= loop_start %} @@ -400,19 +413,6 @@ const std::unordered_map CHAT_TEMPLATE_SUBST {%- endfor %} {%- if add_generation_prompt %} {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} -{%- endif %})TEMPLATE", - }, - // mistral-7b-openorca.gguf2.Q4_0.gguf, Hermes-3-Llama-3.2-3B.Q4_0.gguf - { - R"TEMPLATE({% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + ' -' + message['content'] + '<|im_end|>' + ' -'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant -' }}{% endif %})TEMPLATE", - R"TEMPLATE({%- for message in messages %} - {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }} -{%- endfor %} -{%- if add_generation_prompt %} - {{- '<|im_start|>assistant\n' }} {%- endif %})TEMPLATE", }, // Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf @@ -444,7 +444,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST {{- eos_token }} {%- endif %})TEMPLATE", }, - // qwen2-1_5b-instruct-q4_0.gguf, Qwen2-1.5B-Instruct.Q6_K.gguf (nomic-ai/gpt4all#3263), Qwen2-72B-Instruct.Q4_K_M.gguf + // qwen2-1_5b-instruct-q4_0.gguf (nomic-ai/gpt4all#3263), qwen2-72b-instruct-q4_0.gguf { R"TEMPLATE({% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system You are a helpful assistant.<|im_end|> From 68a358c2492187ca458740f838288b8e99f47e4e Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Fri, 17 Jan 2025 14:26:08 -0500 Subject: [PATCH 2/5] add some new chat templates to the substitution list Signed-off-by: Jared Van Bortel --- gpt4all-chat/src/jinja_replacements.cpp | 189 +++++++++++++++++++++++- 1 file changed, 187 insertions(+), 2 deletions(-) diff --git a/gpt4all-chat/src/jinja_replacements.cpp b/gpt4all-chat/src/jinja_replacements.cpp index 0a02ffa5c8e3..e9bc8b33abd6 100644 --- a/gpt4all-chat/src/jinja_replacements.cpp +++ b/gpt4all-chat/src/jinja_replacements.cpp @@ -27,7 +27,7 @@ // compatibility with older versions of GPT4All. const std::unordered_map CHAT_TEMPLATE_SUBSTITUTIONS { - // calme-2.1-phi3.5-4b.Q6_K.gguf (reported by ThilotE on Discord) + // calme-2.1-phi3.5-4b.Q6_K.gguf (reported by ThilotE on Discord), Phi-3.5-mini-instruct-Q4_0.gguf (nomic-ai/gpt4all#3345) { R"TEMPLATE({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|> ' + message['content'] + '<|end|> @@ -378,6 +378,24 @@ const std::unordered_map CHAT_TEMPLATE_SUBST {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }} {%- endif %} {%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %})TEMPLATE", + }, + // Llama3-DiscoLeo-Instruct-8B-32k-v0.1-Q4_0.gguf (nomic-ai/gpt4all#3347) + { + R"TEMPLATE({% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %})TEMPLATE", + R"TEMPLATE({%- for message in messages %} + {%- set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' %} + {%- if loop.index0 == 0 %} + {%- set content = bos_token + content %} + {%- endif %} + {{- content }} +{%- endfor %} {%- if add_generation_prompt %} {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} {%- endif %})TEMPLATE", @@ -414,6 +432,122 @@ const std::unordered_map CHAT_TEMPLATE_SUBST {%- if add_generation_prompt %} {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} {%- endif %})TEMPLATE", + }, + // Mistral-Nemo-Instruct-2407-Q4_0.gguf (nomic-ai/gpt4all#3284) + { + R"TEMPLATE({%- if messages[0]["role"] == "system" %} + {%- set system_message = messages[0]["content"] %} + {%- set loop_messages = messages[1:] %} +{%- else %} + {%- set loop_messages = messages %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} +{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %} + +{#- This block checks for alternating user/assistant messages, skipping tool calling messages #} +{%- set ns = namespace() %} +{%- set ns.index = 0 %} +{%- for message in loop_messages %} + {%- if not (message.role == "tool" or message.role == "tool_results" or (message.tool_calls is defined and message.tool_calls is not none)) %} + {%- if (message["role"] == "user") != (ns.index % 2 == 0) %} + {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }} + {%- endif %} + {%- set ns.index = ns.index + 1 %} + {%- endif %} +{%- endfor %} + +{{- bos_token }} +{%- for message in loop_messages %} + {%- if message["role"] == "user" %} + {%- if tools is not none and (message == user_messages[-1]) %} + {{- "[AVAILABLE_TOOLS][" }} + {%- for tool in tools %} + {%- set tool = tool.function %} + {{- '{"type": "function", "function": {' }} + {%- for key, val in tool.items() if key != "return" %} + {%- if val is string %} + {{- '"' + key + '": "' + val + '"' }} + {%- else %} + {{- '"' + key + '": ' + val|tojson }} + {%- endif %} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- "}}" }} + {%- if not loop.last %} + {{- ", " }} + {%- else %} + {{- "]" }} + {%- endif %} + {%- endfor %} + {{- "[/AVAILABLE_TOOLS]" }} + {%- endif %} + {%- if loop.last and system_message is defined %} + {{- "[INST]" + system_message + "\n\n" + message["content"] + "[/INST]" }} + {%- else %} + {{- "[INST]" + message["content"] + "[/INST]" }} + {%- endif %} + {%- elif (message.tool_calls is defined and message.tool_calls is not none) %} + {{- "[TOOL_CALLS][" }} + {%- for tool_call in message.tool_calls %} + {%- set out = tool_call.function|tojson %} + {{- out[:-1] }} + {%- if not tool_call.id is defined or tool_call.id|length != 9 %} + {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }} + {%- endif %} + {{- ', "id": "' + tool_call.id + '"}' }} + {%- if not loop.last %} + {{- ", " }} + {%- else %} + {{- "]" + eos_token }} + {%- endif %} + {%- endfor %} + {%- elif message["role"] == "assistant" %} + {{- message["content"] + eos_token}} + {%- elif message["role"] == "tool_results" or message["role"] == "tool" %} + {%- if message.content is defined and message.content.content is defined %} + {%- set content = message.content.content %} + {%- else %} + {%- set content = message.content %} + {%- endif %} + {{- '[TOOL_RESULTS]{"content": ' + content|string + ", " }} + {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %} + {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }} + {%- endif %} + {{- '"call_id": "' + message.tool_call_id + '"}[/TOOL_RESULTS]' }} + {%- else %} + {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }} + {%- endif %} +{%- endfor %})TEMPLATE", + R"TEMPLATE({%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content'] %} + {%- set loop_start = 1 %} +{%- else %} + {%- set loop_start = 0 %} +{%- endif %} + +{{- bos_token }} +{%- for message in messages %} + {#- This block checks for alternating user/assistant messages, skipping tool calling messages #} + {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %} + {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }} + {%- endif %} + + {%- if message['role'] == 'user' %} + {%- if loop.last and loop_start == 1 %} + {{- '[INST]' + system_message + '\n\n' + message['content'] + '[/INST]' }} + {%- else %} + {{- '[INST]' + message['content'] + '[/INST]' }} + {%- endif %} + {%- elif message['role'] == 'assistant' %} + {{- message['content'] + eos_token }} + {%- else %} + {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }} + {%- endif %} +{%- endfor %})TEMPLATE", }, // Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf { @@ -422,10 +556,61 @@ const std::unordered_map CHAT_TEMPLATE_SUBST '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant ' }}{% endif %})TEMPLATE", R"TEMPLATE({%- for message in messages %} - {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }} + {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %})TEMPLATE", + }, + // occiglot-7b-de-en-instruct.Q4_0.gguf (nomic-ai/gpt4all#3283) + { + R"TEMPLATE({{''}}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful assistant. Please give a long and detailed answer.' %}{% endif %}{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in loop_messages %}{% if loop.index0 == 0 %}{{'<|im_start|>system +' + system_message + '<|im_end|> +'}}{% endif %}{{'<|im_start|>' + message['role'] + ' +' + message['content'] + '<|im_end|>' + ' +'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant +' }}{% endif %})TEMPLATE", + R"TEMPLATE({{- bos_token }} +{%- if messages[0]['role'] == 'system' %} + {%- set loop_start = 1 %} + {%- set system_message = messages[0]['content'] %} +{%- else %} + {%- set loop_start = 0 %} + {%- set system_message = 'You are a helpful assistant. Please give a long and detailed answer.' %} +{%- endif %} +{{- '<|im_start|>system\n' + system_message + '<|im_end|>\n' }} +{%- for message in messages %} + {%- if loop.index0 >= loop_start %} + {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }} + {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\n' }} +{%- endif %})TEMPLATE", + }, + // Phi-3.1-mini-128k-instruct-Q4_0.gguf (nomic-ai/gpt4all#3346) + { + R"TEMPLATE({% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|> +' + message['content'] + '<|end|> +'}}{% elif message['role'] == 'user' %}{{'<|user|> +' + message['content'] + '<|end|> +'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|> +' + message['content'] + '<|end|> +'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|> +' }}{% else %}{{ eos_token }}{% endif %})TEMPLATE", + R"TEMPLATE({%- for message in messages %} + {%- if message['role'] == 'system' %} + {{-'<|system|>\n' + message['content'] + '<|end|>\n'}} + {%- elif message['role'] == 'user' %} + {{- '<|user|>\n' + message['content'] + '<|end|>\n' }} + {%- elif message['role'] == 'assistant' %} + {{- '<|assistant|>\n' + message['content'] + '<|end|>\n' }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|assistant|>\n' }} +{%- else %} + {{- eos_token }} {%- endif %})TEMPLATE", }, // Phi-3-mini-4k-instruct.Q4_0.gguf From 5f08034a48ff5f083d327bc0bf5f4691c2b07edf Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Fri, 17 Jan 2025 14:32:12 -0500 Subject: [PATCH 3/5] add changelog entry Signed-off-by: Jared Van Bortel --- gpt4all-chat/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md index 5f5427669fca..cab1e1cdbdeb 100644 --- a/gpt4all-chat/CHANGELOG.md +++ b/gpt4all-chat/CHANGELOG.md @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Remove 'X is defined' checks from templates as they work incorrectly with Jinja2Cpp ([#3372](https://github.com/nomic-ai/gpt4all/pull/3372)) - Jinja2Cpp: Add 'if' requirement for 'else' parsing to fix crash ([#3373](https://github.com/nomic-ai/gpt4all/pull/3373)) - Save chats on quit, even if the window isn't closed first ([#3387](https://github.com/nomic-ai/gpt4all/pull/3387)) +- Add chat template replacements for five new models ([#3393](https://github.com/nomic-ai/gpt4all/pull/3393)) ## [3.6.1] - 2024-12-20 From 672f294c897918fd6a87422ea1fecdc3d688777f Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Fri, 17 Jan 2025 14:46:48 -0500 Subject: [PATCH 4/5] fix EM German Mistral prompt template Signed-off-by: Jared Van Bortel --- gpt4all-chat/CHANGELOG.md | 2 +- gpt4all-chat/metadata/models3.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md index cab1e1cdbdeb..d6584bb2b344 100644 --- a/gpt4all-chat/CHANGELOG.md +++ b/gpt4all-chat/CHANGELOG.md @@ -12,7 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Remove 'X is defined' checks from templates as they work incorrectly with Jinja2Cpp ([#3372](https://github.com/nomic-ai/gpt4all/pull/3372)) - Jinja2Cpp: Add 'if' requirement for 'else' parsing to fix crash ([#3373](https://github.com/nomic-ai/gpt4all/pull/3373)) - Save chats on quit, even if the window isn't closed first ([#3387](https://github.com/nomic-ai/gpt4all/pull/3387)) -- Add chat template replacements for five new models ([#3393](https://github.com/nomic-ai/gpt4all/pull/3393)) +- Add chat template replacements for five new models and fix EM German Mistral ([#3393](https://github.com/nomic-ai/gpt4all/pull/3393)) ## [3.6.1] - 2024-12-20 diff --git a/gpt4all-chat/metadata/models3.json b/gpt4all-chat/metadata/models3.json index c9ced6550542..d829fad0bd89 100644 --- a/gpt4all-chat/metadata/models3.json +++ b/gpt4all-chat/metadata/models3.json @@ -426,7 +426,7 @@ "url": "https://huggingface.co/TheBloke/em_german_mistral_v01-GGUF/resolve/main/em_german_mistral_v01.Q4_0.gguf", "promptTemplate": "USER: %1 ASSISTANT: ", "systemPrompt": "Du bist ein hilfreicher Assistent. ", - "chatTemplate": "{%- set system_message = false %}\n{%- if messages[0]['role'] == 'system' %}\n {%- set loop_start = 1 %}\n {%- set system_message = true %}\n {{- messages[0]['content'] }}\n{%- else %}\n {%- set loop_start = 0 %}\n{%- endif %}\n{%- for message in messages %}\n {%- if loop.index0 >= loop_start %}\n {%- if (not loop.first) or (system_message is not none) %}\n {{- ' ' }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {{- 'USER: ' + message['content'] }}\n {%- elif message['role'] == 'assistant' %}\n {{- 'ASSISTANT: ' + message['content'] }}\n {%- else %}\n {{- raise_exception('After the optional system message, conversation roles must be either user or assistant.') }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {%- if messages %}\n {{- ' ' }}\n {%- endif %}\n {{- 'ASSISTANT:' }}\n{%- endif %}", + "chatTemplate": "{%- if messages[0]['role'] == 'system' %}\n {%- set loop_start = 1 %}\n {{- messages[0]['content'] }}\n{%- else %}\n {%- set loop_start = 0 %}\n{%- endif %}\n{%- for message in messages %}\n {%- if loop.index0 >= loop_start %}\n {%- if not loop.first %}\n {{- ' ' }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {{- 'USER: ' + message['content'] }}\n {%- elif message['role'] == 'assistant' %}\n {{- 'ASSISTANT: ' + message['content'] }}\n {%- else %}\n {{- raise_exception('After the optional system message, conversation roles must be either user or assistant.') }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {%- if messages %}\n {{- ' ' }}\n {%- endif %}\n {{- 'ASSISTANT:' }}\n{%- endif %}", "systemMessage": "Du bist ein hilfreicher Assistent." }, { From 5ada9d6116cd2ee879c382c40ceb253f8f511e4b Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Fri, 17 Jan 2025 18:44:38 -0500 Subject: [PATCH 5/5] add comments to clarify which template is which Signed-off-by: Jared Van Bortel --- gpt4all-chat/src/jinja_replacements.cpp | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/gpt4all-chat/src/jinja_replacements.cpp b/gpt4all-chat/src/jinja_replacements.cpp index e9bc8b33abd6..09b6563ef370 100644 --- a/gpt4all-chat/src/jinja_replacements.cpp +++ b/gpt4all-chat/src/jinja_replacements.cpp @@ -29,6 +29,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBSTITUTIONS { // calme-2.1-phi3.5-4b.Q6_K.gguf (reported by ThilotE on Discord), Phi-3.5-mini-instruct-Q4_0.gguf (nomic-ai/gpt4all#3345) { + // original R"TEMPLATE({% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|> ' + message['content'] + '<|end|> '}}{% elif message['role'] == 'user' %}{{'<|user|> @@ -37,6 +38,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST ' + message['content'] + '<|end|> '}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|> ' }}{% else %}{{ eos_token }}{% endif %})TEMPLATE", + // replacement R"TEMPLATE({%- for message in messages %} {%- if message['role'] == 'system' and message['content'] %} {{- '<|system|>\n' + message['content'] + '<|end|>\n' }} @@ -54,10 +56,12 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // gemma-2-9b-it-Q4_0.gguf (nomic-ai/gpt4all#3282) { + // original R"TEMPLATE({{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' ' + message['content'] | trim + ' ' }}{% endfor %}{% if add_generation_prompt %}{{'model '}}{% endif %})TEMPLATE", + // replacement R"TEMPLATE({{- bos_token }} {%- if messages[0]['role'] == 'system' %} {{- raise_exception('System role not supported') }} @@ -79,6 +83,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // ghost-7b-v0.9.1-Q4_0.gguf { + // original R"TEMPLATE({% for message in messages %} {% if message['role'] == 'user' %} {{ '<|user|> @@ -94,6 +99,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST {{ '<|assistant|>' }} {% endif %} {% endfor %})TEMPLATE", + // replacement R"TEMPLATE({%- for message in messages %} {%- if message['role'] == 'user' %} {{- '<|user|>\n' + message['content'] + eos_token }} @@ -109,10 +115,12 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // Hermes-3-Llama-3.2-3B.Q4_0.gguf, mistral-7b-openorca.gguf2.Q4_0.gguf { + // original R"TEMPLATE({% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + ' ' + message['content'] + '<|im_end|>' + ' '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant ' }}{% endif %})TEMPLATE", + // replacement R"TEMPLATE({%- for message in messages %} {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }} {%- endfor %} @@ -122,6 +130,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // Llama-3.2-1B-Instruct-Q4_0.gguf, Llama-3.2-3B-Instruct-Q4_0.gguf, SummLlama3.2-3B-Q4_0.gguf (nomic-ai/gpt4all#3309) { + // original R"TEMPLATE({{- bos_token }} {%- if custom_tools is defined %} {%- set tools = custom_tools %} @@ -215,6 +224,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST {%- if add_generation_prompt %} {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} {%- endif %})TEMPLATE", + // replacement R"TEMPLATE({{- bos_token }} {%- set date_string = strftime_now('%d %b %Y') %} @@ -245,6 +255,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // Llama-3.3-70B-Instruct-Q4_0.gguf (nomic-ai/gpt4all#3305) { + // original R"TEMPLATE({{- bos_token }} {%- if custom_tools is defined %} {%- set tools = custom_tools %} @@ -354,6 +365,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST {%- if add_generation_prompt %} {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} {%- endif %})TEMPLATE", + // replacement R"TEMPLATE({{- bos_token }} {%- set date_string = strftime_now('%d %b %Y') %} @@ -384,11 +396,13 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // Llama3-DiscoLeo-Instruct-8B-32k-v0.1-Q4_0.gguf (nomic-ai/gpt4all#3347) { + // original R"TEMPLATE({% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> '+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> ' }}{% endif %})TEMPLATE", + // replacement R"TEMPLATE({%- for message in messages %} {%- set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' %} {%- if loop.index0 == 0 %} @@ -402,11 +416,13 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // Meta-Llama-3.1-8B-Instruct-128k-Q4_0.gguf { + // original R"TEMPLATE({% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> '+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|> ' }})TEMPLATE", + // replacement R"TEMPLATE({%- set loop_messages = messages %} {%- for message in loop_messages %} {%- set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %} @@ -419,11 +435,13 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // Meta-Llama-3-8B-Instruct.Q4_0.gguf { + // original R"TEMPLATE({% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> '+ message['content'] | trim + '<|eot_id|>' %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> ' }}{% endif %})TEMPLATE", + // replacement R"TEMPLATE({%- set loop_messages = messages %} {%- for message in loop_messages %} {%- set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %} @@ -435,6 +453,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // Mistral-Nemo-Instruct-2407-Q4_0.gguf (nomic-ai/gpt4all#3284) { + // original R"TEMPLATE({%- if messages[0]["role"] == "system" %} {%- set system_message = messages[0]["content"] %} {%- set loop_messages = messages[1:] %} @@ -522,6 +541,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }} {%- endif %} {%- endfor %})TEMPLATE", + // replacement R"TEMPLATE({%- if messages[0]['role'] == 'system' %} {%- set system_message = messages[0]['content'] %} {%- set loop_start = 1 %} @@ -551,10 +571,12 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // Nous-Hermes-2-Mistral-7B-DPO.Q4_0.gguf { + // original R"TEMPLATE({% for message in messages %}{{'<|im_start|>' + message['role'] + ' ' + message['content'] + '<|im_end|>' + ' '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant ' }}{% endif %})TEMPLATE", + // replacement R"TEMPLATE({%- for message in messages %} {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }} {%- endfor %} @@ -564,12 +586,14 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // occiglot-7b-de-en-instruct.Q4_0.gguf (nomic-ai/gpt4all#3283) { + // original R"TEMPLATE({{''}}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful assistant. Please give a long and detailed answer.' %}{% endif %}{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in loop_messages %}{% if loop.index0 == 0 %}{{'<|im_start|>system ' + system_message + '<|im_end|> '}}{% endif %}{{'<|im_start|>' + message['role'] + ' ' + message['content'] + '<|im_end|>' + ' '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant ' }}{% endif %})TEMPLATE", + // replacement R"TEMPLATE({{- bos_token }} {%- if messages[0]['role'] == 'system' %} {%- set loop_start = 1 %} @@ -590,6 +614,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // Phi-3.1-mini-128k-instruct-Q4_0.gguf (nomic-ai/gpt4all#3346) { + // original R"TEMPLATE({% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|> ' + message['content'] + '<|end|> '}}{% elif message['role'] == 'user' %}{{'<|user|> @@ -598,6 +623,7 @@ const std::unordered_map CHAT_TEMPLATE_SUBST ' + message['content'] + '<|end|> '}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|> ' }}{% else %}{{ eos_token }}{% endif %})TEMPLATE", + // replacement R"TEMPLATE({%- for message in messages %} {%- if message['role'] == 'system' %} {{-'<|system|>\n' + message['content'] + '<|end|>\n'}} @@ -615,10 +641,12 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // Phi-3-mini-4k-instruct.Q4_0.gguf { + // original R"TEMPLATE({{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + ' ' + message['content'] + '<|end|> ' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|> ' }}{% else %}{{ eos_token }}{% endif %})TEMPLATE", + // replacement R"TEMPLATE({{- bos_token }} {%- for message in messages %} {{- '<|' + message['role'] + '|>\n' + message['content'] + '<|end|>\n' }} @@ -631,12 +659,14 @@ const std::unordered_map CHAT_TEMPLATE_SUBST }, // qwen2-1_5b-instruct-q4_0.gguf (nomic-ai/gpt4all#3263), qwen2-72b-instruct-q4_0.gguf { + // original R"TEMPLATE({% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system You are a helpful assistant.<|im_end|> ' }}{% endif %}{{'<|im_start|>' + message['role'] + ' ' + message['content'] + '<|im_end|>' + ' '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant ' }}{% endif %})TEMPLATE", + // replacement R"TEMPLATE({%- for message in messages %} {%- if loop.first and messages[0]['role'] != 'system' %} {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}