fix: add generic fallback to detect trailing <think> tags in Jinja te…

…mplates and handle forced-open reasoning blocks - Detect trailing <think> tags in generic chat templates, trim whitespace, and either append the closing tag or mark the reasoning block as forced-open based on enable_thinking - Added a regression test covering a fallback template that opens the reasoning block in the prompt and verifies prompt differences, forced-open behaviour, and reasoning parsing - Now compatible with models using the default Jinja chat template, such as https://huggingface.co/unsloth/GLM-Z1-32B-0414-GGUF
ggml-org · ServeurpersoCom · Oct 4, 2025 · Oct 6, 2025 · Oct 6, 2025 · Oct 6, 2025
commit 9b47a58c4c2f11495329a755a1238dae61aec4d9
diff --git a/common/chat.cpp b/common/chat.cpp
@@ -9,6 +9,7 @@
 #include <minja/chat-template.hpp>
 #include <minja/minja.hpp>
 
+#include <cctype>
 #include <cstdio>
 #include <exception>
 #include <iostream>
@@ -2598,6 +2599,21 @@ static common_chat_params common_chat_params_init_without_tools(const common_cha
     } else {
         data.grammar = inputs.grammar;
     }
+
+    static constexpr size_t think_tag_len = 7; // strlen("<think>")
+    size_t prompt_trimmed_size = data.prompt.size();
+    while (prompt_trimmed_size > 0 &&
+           std::isspace(static_cast<unsigned char>(data.prompt[prompt_trimmed_size - 1]))) {
+        --prompt_trimmed_size;
+    }
+    if (prompt_trimmed_size >= think_tag_len &&
+        data.prompt.compare(prompt_trimmed_size - think_tag_len, think_tag_len, "<think>") == 0) {
+        if (!inputs.enable_thinking) {
+            data.prompt += "</think>";
+        } else {
+            data.thinking_forced_open = true;
+        }
+    }
     return data;
 }
 

@@ -1330,6 +1330,51 @@ static void test_template_output_parsers() {
         //               /* expect_grammar_triggered= */ true,
         //               /* test_grammar_if_triggered= */ false);
     }
+    {
+        // Generic fallback template that appends <think> when add_generation_prompt is true.
+        static const char * tmpl_str = R"(
+{% for message in messages %}
+<|{{ message.role }}|>
+{{ message.content }}
+{% endfor %}
+{% if add_generation_prompt %}<|assistant|>
+<think>
+{% endif %}
+)";
+
+        auto tmpls = common_chat_templates_ptr(common_chat_templates_init(/* model= */ nullptr, tmpl_str));
+
+        common_chat_templates_inputs inputs_base;
+        inputs_base.messages = { message_user };
+        inputs_base.add_generation_prompt = true;
+
+        auto inputs_no_thinking = inputs_base;
+        inputs_no_thinking.enable_thinking = false;
+        auto params_no_thinking = common_chat_templates_apply(tmpls.get(), inputs_no_thinking);
+        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params_no_thinking.format);
+        assert_equals(false, params_no_thinking.thinking_forced_open);
+        assert_equals(true, string_ends_with(params_no_thinking.prompt, "</think>"));
+
+        auto inputs_with_thinking = inputs_base;
+        inputs_with_thinking.enable_thinking = true;
+        auto params_with_thinking = common_chat_templates_apply(tmpls.get(), inputs_with_thinking);
+        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params_with_thinking.format);
+        assert_equals(true, params_with_thinking.thinking_forced_open);
+        assert_equals(true, string_ends_with(string_strip(params_with_thinking.prompt), "<think>"));
+
+        assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get()));
+
+        common_chat_syntax syntax;
+        syntax.format = params_with_thinking.format;
+        syntax.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+        syntax.thinking_forced_open = params_with_thinking.thinking_forced_open;
+
+        assert_msg_equals(simple_assist_msg("Final answer", "Reasoning trace"),
+            common_chat_parse(
+                "Reasoning trace</think>Final answer",
+                /* is_partial= */ false,
+                syntax));
+    }
     {
         // Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
         auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");