Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
fix: add generic fallback to detect trailing <think> tags in Jinja te…
…mplates and handle forced-open reasoning blocks

- Detect trailing <think> tags in generic chat templates, trim whitespace, and either append
  the closing tag or mark the reasoning block as forced-open based on enable_thinking
- Added a regression test covering a fallback template that opens the reasoning block in the
  prompt and verifies prompt differences, forced-open behaviour, and reasoning parsing
- Now compatible with models using the default Jinja chat template, such as
  https://huggingface.co/unsloth/GLM-Z1-32B-0414-GGUF
  • Loading branch information
ServeurpersoCom committed Oct 6, 2025
commit 9b47a58c4c2f11495329a755a1238dae61aec4d9
16 changes: 16 additions & 0 deletions common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <minja/chat-template.hpp>
#include <minja/minja.hpp>

#include <cctype>
#include <cstdio>
#include <exception>
#include <iostream>
Expand Down Expand Up @@ -2598,6 +2599,21 @@ static common_chat_params common_chat_params_init_without_tools(const common_cha
} else {
data.grammar = inputs.grammar;
}

static constexpr size_t think_tag_len = 7; // strlen("<think>")
size_t prompt_trimmed_size = data.prompt.size();
while (prompt_trimmed_size > 0 &&
std::isspace(static_cast<unsigned char>(data.prompt[prompt_trimmed_size - 1]))) {
--prompt_trimmed_size;
}
if (prompt_trimmed_size >= think_tag_len &&
data.prompt.compare(prompt_trimmed_size - think_tag_len, think_tag_len, "<think>") == 0) {
if (!inputs.enable_thinking) {
data.prompt += "</think>";
} else {
data.thinking_forced_open = true;
}
}
return data;
}

Expand Down
45 changes: 45 additions & 0 deletions tests/test-chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,51 @@ static void test_template_output_parsers() {
// /* expect_grammar_triggered= */ true,
// /* test_grammar_if_triggered= */ false);
}
{
// Generic fallback template that appends <think> when add_generation_prompt is true.
static const char * tmpl_str = R"(
{% for message in messages %}
<|{{ message.role }}|>
{{ message.content }}
{% endfor %}
{% if add_generation_prompt %}<|assistant|>
<think>
{% endif %}
)";

auto tmpls = common_chat_templates_ptr(common_chat_templates_init(/* model= */ nullptr, tmpl_str));

common_chat_templates_inputs inputs_base;
inputs_base.messages = { message_user };
inputs_base.add_generation_prompt = true;

auto inputs_no_thinking = inputs_base;
inputs_no_thinking.enable_thinking = false;
auto params_no_thinking = common_chat_templates_apply(tmpls.get(), inputs_no_thinking);
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params_no_thinking.format);
assert_equals(false, params_no_thinking.thinking_forced_open);
assert_equals(true, string_ends_with(params_no_thinking.prompt, "</think>"));

auto inputs_with_thinking = inputs_base;
inputs_with_thinking.enable_thinking = true;
auto params_with_thinking = common_chat_templates_apply(tmpls.get(), inputs_with_thinking);
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params_with_thinking.format);
assert_equals(true, params_with_thinking.thinking_forced_open);
assert_equals(true, string_ends_with(string_strip(params_with_thinking.prompt), "<think>"));

assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get()));

common_chat_syntax syntax;
syntax.format = params_with_thinking.format;
syntax.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
syntax.thinking_forced_open = params_with_thinking.thinking_forced_open;

assert_msg_equals(simple_assist_msg("Final answer", "Reasoning trace"),
common_chat_parse(
"Reasoning trace</think>Final answer",
/* is_partial= */ false,
syntax));
}
{
// Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");
Expand Down