Extract model‑specific thinking section parsing

cristianadam · cristianadam · commit 92d42a363d0e · 2025-09-19T21:03:57.000+02:00
* Added `ThinkingSectionParser` with token detection, extraction, and
formatting.
* Updated `ChatManager`, `ChatMessage`, and `ConversationsView` to use
the new parser.
* Removed in‑lining token constants and parsing logic from those
classes.
* Adjusted UI logic to toggle between raw and formatted thinking
content.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -67,6 +67,7 @@ add_qtc_plugin(LlamaCpp
     llamaprojectpanel.cpp llamaprojectpanel.h
     llamasettings.cpp llamasettings.h
     llamastorage.cpp llamastorage.h
+    llamathinkingsectionparser.cpp llamathinkingsectionparser.h
     llamatheme.cpp llamatheme.h
     llamatr.h
     llamatypes.h
diff --git a/llamachatmanager.cpp b/llamachatmanager.cpp
@@ -6,6 +6,7 @@
 #include "llamachatmanager.h"
 #include "llamasettings.h"
 #include "llamastorage.h"
+#include "llamathinkingsectionparser.h"
 
 Q_LOGGING_CATEGORY(llamaChatNetwork, "llama.cpp.chat.network", QtWarningMsg)
 
@@ -96,6 +97,7 @@ void ChatManager::initServerProps()
         m_serverProps.modalities.audio = mod.value("audio").toBool();
         reply->deleteLater();
 
+        ThinkingSectionParser::setTokensFromServerProps(m_serverProps);
         emit serverPropsUpdated();
     });
 }
@@ -292,12 +294,8 @@ void ChatManager::generateMessage(const QString &convId,
                 // (root + user + assistant) after the first reply.
                 if (msgs.size() == 3) {
                     summarizeConversationTitle(convId, pm.id, [this, convId](const QString &title) {
-                        QString shortTitle = title;
-                        const QString endToken = "<|end|>";
-                        auto endIdx = title.indexOf(endToken);
-                        if (endIdx != -1) {
-                            shortTitle = title.mid(endIdx + endToken.size());
-                        }
+                        auto [thinking, shortTitle] = ThinkingSectionParser::parseThinkingSection(
+                            title);
                         renameConversation(convId, shortTitle);
                     });
                 }
@@ -369,25 +367,31 @@ void ChatManager::followUpQuestions(const QString &convId,
 
         QJsonObject choice = choices[0].toObject();
         QJsonObject message = choice.value("message").toObject();
-        QString content = message.value("content").toString().trimmed();
 
         // Skip the thinking part
-        const QString endToken = "<|end|>";
-        auto endIdx = content.lastIndexOf(endToken);
-        if (endIdx != -1) {
-            content = content.mid(endIdx + endToken.size());
-        }
+        auto [thinking, content] = ThinkingSectionParser::parseThinkingSection(
+            message.value("content").toString().trimmed());
 
         if (content.isEmpty())
             return;
 
         // Sometimes the model continues "thinking" also in the answer
-        if (!content.startsWith("[\"")) {
-            auto startOfArrayIdx = content.lastIndexOf("[\"");
+        const QString startOfArray("[\"");
+        const QString endOfArray("\"]");
+
+        if (!content.startsWith(startOfArray)) {
+            auto startOfArrayIdx = content.lastIndexOf(startOfArray);
             if (startOfArrayIdx != -1)
                 content = content.mid(startOfArrayIdx);
         }
 
+        // Sometimes we have \n``` at the end
+        if (!content.endsWith(endOfArray)) {
+            auto endOfArrayIdx = content.lastIndexOf(endOfArray);
+            if (endOfArrayIdx != -1)
+                content = content.left(endOfArrayIdx + endOfArray.size());
+        }
+
         // `content` should be a JSON array of strings (plain text).
         QJsonParseError err;
         QJsonDocument arrDoc = QJsonDocument::fromJson(content.toUtf8(), &err);
diff --git a/llamachatmessage.cpp b/llamachatmessage.cpp
@@ -29,6 +29,7 @@
 #include "llamachatmessage.h"
 #include "llamamarkdownwidget.h"
 #include "llamatheme.h"
+#include "llamathinkingsectionparser.h"
 #include "llamatr.h"
 
 using namespace Core;
@@ -37,10 +38,6 @@ using namespace Utils;
 
 namespace LlamaCpp {
 
-static const QString thinkingToken("<|channel|>analysis<|message|>");
-static const QString endToken("<|end|>");
-static qsizetype notfound = -1;
-
 ChatMessage::ChatMessage(const Message &msg,
                          const QVector<qint64> &siblingLeafIds,
                          int siblingIdx,
@@ -68,7 +65,7 @@ void ChatMessage::buildUI()
     connect(m_markdownLabel, &MarkdownLabel::copyToClipboard, this, &ChatMessage::onCopyToClipboard);
     connect(m_markdownLabel, &MarkdownLabel::saveToFile, this, &ChatMessage::onSaveToDisk);
 
-    if (m_msg.content.indexOf(thinkingToken) != notfound) {
+    if (ThinkingSectionParser::hasThinkingSection(m_msg.content)) {
         m_thoughtToggle = new QPushButton(this);
         m_thoughtToggle->setText(Tr::tr("Thought Process"));
         m_thoughtToggle->setToolTip(Tr::tr("Click to expand / hide the thought process"));
@@ -258,39 +255,21 @@ bool ChatMessage::eventFilter(QObject *obj, QEvent *event)
 void ChatMessage::renderMarkdown(const QString &text)
 {
     if (m_thoughtToggle) {
-        bool isThinking = false;
+        auto [thinking, message] = ThinkingSectionParser::parseThinkingSection(text);
         if (m_thoughtToggle->isChecked()) {
-            QString message = text;
-            message.replace(thinkingToken, ">");
-            auto endIdx = message.indexOf(endToken);
-            if (endIdx != notfound) {
-                auto newLineIdx = message.indexOf("\n");
-                while (newLineIdx < endIdx && newLineIdx != notfound) {
-                    message.insert(newLineIdx + 1, ">");
-                    newLineIdx = message.indexOf("\n", newLineIdx + 2);
-                }
-            } else {
-                isThinking = true;
-                message.replace("\n", "\n>");
-            }
-            message.replace(endToken, "\n\n");
-            m_markdownLabel->setMarkdown(message);
+            m_markdownLabel->setMarkdown(
+                ThinkingSectionParser::formatThinkingContent(thinking) + "\n\n" + message);
         } else {
-            auto endIdx = text.indexOf(endToken);
-            if (endIdx != notfound) {
-                m_markdownLabel->setMarkdown(text.mid(endIdx + endToken.size()));
-            } else {
-                isThinking = true;
-                m_markdownLabel->setMarkdown("");
-            }
+            m_markdownLabel->setMarkdown(message);
         }
 
         static QVector<QChar> chars{u'⠋', u'⠙', u'⠹', u'⠸', u'⠼', u'⠴', u'⠦', u'⠧', u'⠇', u'⠏'};
         // Dividing with 33ms results in 30fps
         m_thoughtToggle->setText(
-            isThinking ? Tr::tr("Thinking %1")
-                             .arg(chars[(QDateTime::currentMSecsSinceEpoch() / 33) % chars.size()])
-                       : Tr::tr("Thought Process"));
+            message.isEmpty()
+                ? Tr::tr("Thinking %1")
+                      .arg(chars[(QDateTime::currentMSecsSinceEpoch() / 33) % chars.size()])
+                : Tr::tr("Thought Process"));
 
     } else {
         m_markdownLabel->setMarkdown(text);
diff --git a/llamaconversationsview.cpp b/llamaconversationsview.cpp
@@ -26,6 +26,7 @@
 #include "llamaconstants.h"
 #include "llamaconversationsmodel.h"
 #include "llamaconversationsview.h"
+#include "llamathinkingsectionparser.h"
 #include "llamatr.h"
 
 using namespace Core;
@@ -299,12 +300,7 @@ bool ConversationsView::summarizeConversation()
 
     ChatManager::instance()
         .summarizeConversationTitle(convId, chat.messages.last().id, [convId](const QString &title) {
-            QString shortTitle = title;
-            const QString endToken = "<|end|>";
-            auto endIdx = title.indexOf(endToken);
-            if (endIdx != -1) {
-                shortTitle = title.mid(endIdx + endToken.size());
-            }
+            auto [thinking, shortTitle] = ThinkingSectionParser::parseThinkingSection(title);
             ChatManager::instance().renameConversation(convId, shortTitle);
         });
 
diff --git a/llamathinkingsectionparser.cpp b/llamathinkingsectionparser.cpp
@@ -0,0 +1,52 @@
+#include "llamathinkingsectionparser.h"
+#include "llamatypes.h"
+
+namespace LlamaCpp {
+QString ThinkingSectionParser::m_startToken;
+QString ThinkingSectionParser::m_endToken;
+
+void ThinkingSectionParser::setTokensFromServerProps(const LlamaCppServerProps &serverProps)
+{
+    if (serverProps.model_path.contains("gpt-oss", Qt::CaseInsensitive)) {
+        m_startToken = "<|channel|>analysis<|message|>";
+        m_endToken = "<|end|>";
+    } else {
+        // Tested with DeepSeek.
+        m_startToken = "<think>";
+        m_endToken = "</think>";
+    }
+}
+
+QPair<QString, QString> ThinkingSectionParser::parseThinkingSection(const QString &text)
+{
+    int startIdx = text.indexOf(m_startToken);
+    int endIdx = text.indexOf(m_endToken);
+
+    if (startIdx != -1 && endIdx != -1 && startIdx + m_startToken.length() < endIdx) {
+        QString thinkingContent = text.mid(startIdx + m_startToken.length(),
+                                           endIdx - startIdx - m_startToken.length());
+        QString restContent = text.mid(endIdx + m_endToken.length());
+        return {thinkingContent, restContent};
+    } else if (startIdx != -1 && endIdx == -1) {
+        QString thinkingContent = text.mid(startIdx + m_startToken.length());
+        return {thinkingContent, {}};
+    }
+
+    return {{}, text};
+}
+
+bool ThinkingSectionParser::hasThinkingSection(const QString &text)
+{
+    int startIdx = text.indexOf(m_startToken);
+    int endIdx = text.indexOf(m_endToken);
+    return (startIdx != -1 && endIdx != -1 && startIdx < endIdx)
+           || (startIdx != -1 && endIdx == -1);
+}
+
+QString ThinkingSectionParser::formatThinkingContent(const QString &thinkingContent)
+{
+    QString formatted = thinkingContent;
+    formatted.replace("\n", "\n>");
+    return ">" + formatted;
+}
+} // namespace LlamaCpp
diff --git a/llamathinkingsectionparser.h b/llamathinkingsectionparser.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <QString>
+
+namespace LlamaCpp {
+
+class LlamaCppServerProps;
+
+class ThinkingSectionParser
+{
+private:
+    static QString m_startToken;
+    static QString m_endToken;
+
+public:
+    static void setTokensFromServerProps(const LlamaCppServerProps &serverProps);
+
+    static QPair<QString, QString> parseThinkingSection(const QString &text);
+    static bool hasThinkingSection(const QString &text);
+    static QString formatThinkingContent(const QString &thinkingContent);
+};
+} // namespace LlamaCpp