summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qunicodetools.cpp
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@qt.io>2025-10-24 13:38:08 +0200
committerMarc Mutz <marc.mutz@qt.io>2025-10-28 16:08:26 +0100
commit8fc7655e5d91cbe6b8bc9910558f92ddccb7c9a9 (patch)
treee85b180da6aaed5c77d2d2aa76556a094c0ebdad /src/corelib/text/qunicodetools.cpp
parent8565f645e02195fad3f7c4f94a73e041fa52f953 (diff)
QUnicodeTools: don't look up surrogate line-break properties
We know they're SG, so don't go through the properties trie, hard-code the result. As a defense against changes, add checks to the generator and tst_QUnicodeTools. This is in preparation of porting getLineBreaks() to QStringIterator. Pick-to: 6.10 6.8 6.5 Change-Id: Ib3567398ba56f7ad3ce6fbca81f6b0f40379ee7d Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
Diffstat (limited to 'src/corelib/text/qunicodetools.cpp')
-rw-r--r--src/corelib/text/qunicodetools.cpp18
1 files changed, 12 insertions, 6 deletions
diff --git a/src/corelib/text/qunicodetools.cpp b/src/corelib/text/qunicodetools.cpp
index 862bd2cd0e9..d8f4d374322 100644
--- a/src/corelib/text/qunicodetools.cpp
+++ b/src/corelib/text/qunicodetools.cpp
@@ -767,17 +767,19 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes
// even after spaces.
// × [\p{Pf}&QU] ( SP | GL | WJ | CL | QU | CP | EX | IS
// | SY | BK | CR | LF | NL | ZW | eot)
- auto nncls = QUnicodeTables::LineBreak_LF;
-
- if (i + 1 < len) {
+ const auto nncls = [&] {
+ if (i + 1 >= len)
+ return QUnicodeTables::LineBreak_LF;
char32_t c = string[i + 1];
if (QChar::isHighSurrogate(c) && i + 2 < len) {
ushort low = string[i + 2];
if (QChar::isLowSurrogate(low))
c = QChar::surrogateToUcs4(c, low);
+ else
+ return QUnicodeTables::LineBreak_SG; // all surrogates
}
- nncls = QUnicodeTables::lineBreakClass(c);
- }
+ return QUnicodeTables::lineBreakClass(c);
+ }();
constexpr QUnicodeTables::LineBreakClass lb15b[] = {
QUnicodeTables::LineBreak_SP, QUnicodeTables::LineBreak_GL,
@@ -867,13 +869,17 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes
// ‘subtract .5’.
if (Q_UNLIKELY(lcls == QUnicodeTables::LineBreak_SP)) {
if (i + 1 < len) {
+ constexpr char32_t Invalid = ~U'\0';
char32_t ch = string[i + 1];
if (QChar::isHighSurrogate(ch) && i + 2 < len) {
ushort low = string[i + 2];
if (QChar::isLowSurrogate(low))
ch = QChar::surrogateToUcs4(ch, low);
+ else
+ ch = Invalid;
}
- if (QUnicodeTables::lineBreakClass(ch) == QUnicodeTables::LineBreak_NU) {
+ if (ch != Invalid // surrogates won't match (ensured by util/unicode)
+ && QUnicodeTables::lineBreakClass(ch) == QUnicodeTables::LineBreak_NU) {
attributes[pos].lineBreak = true;
goto next;
}