aboutsummaryrefslogtreecommitdiffstats
path: root/src/libs/utils/textutils.cpp
diff options
context:
space:
mode:
authorNikolai Kosjar <[email protected]>2019-07-24 09:34:50 +0200
committerNikolai Kosjar <[email protected]>2019-08-27 08:26:45 +0000
commitc4889e9904f9716f6c1998e45a6d345aee98075c (patch)
treefb399ed856bde7691f2242cdce57a74a6eb7738c /src/libs/utils/textutils.cpp
parent75a065d3d1f22afb9fdbe6011f162c57ed58c1e0 (diff)
Clang: Extract Utils::utf8AdvanceCodePoint
Change-Id: I922c7b0f2f0e0d50f34035e9affef4504df59892 Reviewed-by: David Schulz <[email protected]>
Diffstat (limited to 'src/libs/utils/textutils.cpp')
-rw-r--r--src/libs/utils/textutils.cpp25
1 files changed, 25 insertions, 0 deletions
diff --git a/src/libs/utils/textutils.cpp b/src/libs/utils/textutils.cpp
index 2233d1c680c..ad6aa2242a2 100644
--- a/src/libs/utils/textutils.cpp
+++ b/src/libs/utils/textutils.cpp
@@ -190,5 +190,30 @@ QString utf16LineTextInUtf8Buffer(const QByteArray &utf8Buffer, int currentUtf8O
utf8Buffer.mid(lineStartUtf8Offset, lineEndUtf8Offset - lineStartUtf8Offset));
}
+static bool isByteOfMultiByteCodePoint(unsigned char byte)
+{
+ return byte & 0x80; // Check if most significant bit is set
+}
+
+bool utf8AdvanceCodePoint(const char *&current)
+{
+ if (Q_UNLIKELY(*current == '\0'))
+ return false;
+
+ // Process multi-byte UTF-8 code point (non-latin1)
+ if (Q_UNLIKELY(isByteOfMultiByteCodePoint(*current))) {
+ unsigned trailingBytesCurrentCodePoint = 1;
+ for (unsigned char c = (*current) << 2; isByteOfMultiByteCodePoint(c); c <<= 1)
+ ++trailingBytesCurrentCodePoint;
+ current += trailingBytesCurrentCodePoint + 1;
+
+ // Process single-byte UTF-8 code point (latin1)
+ } else {
+ ++current;
+ }
+
+ return true;
+}
+
} // Text
} // Utils