diff options
| author | Marc Mutz <marc.mutz@qt.io> | 2025-10-24 13:38:08 +0200 |
|---|---|---|
| committer | Marc Mutz <marc.mutz@qt.io> | 2025-10-28 16:08:26 +0100 |
| commit | 8fc7655e5d91cbe6b8bc9910558f92ddccb7c9a9 (patch) | |
| tree | e85b180da6aaed5c77d2d2aa76556a094c0ebdad /src/corelib/text/qunicodetools.cpp | |
| parent | 8565f645e02195fad3f7c4f94a73e041fa52f953 (diff) | |
QUnicodeTools: don't look up surrogate line-break properties
We know they're SG, so don't go through the properties trie, hard-code
the result.
As a defense against changes, add checks to the generator and
tst_QUnicodeTools.
This is in preparation of porting getLineBreaks() to QStringIterator.
Pick-to: 6.10 6.8 6.5
Change-Id: Ib3567398ba56f7ad3ce6fbca81f6b0f40379ee7d
Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
Diffstat (limited to 'src/corelib/text/qunicodetools.cpp')
| -rw-r--r-- | src/corelib/text/qunicodetools.cpp | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/src/corelib/text/qunicodetools.cpp b/src/corelib/text/qunicodetools.cpp index 862bd2cd0e9..d8f4d374322 100644 --- a/src/corelib/text/qunicodetools.cpp +++ b/src/corelib/text/qunicodetools.cpp @@ -767,17 +767,19 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes // even after spaces. // × [\p{Pf}&QU] ( SP | GL | WJ | CL | QU | CP | EX | IS // | SY | BK | CR | LF | NL | ZW | eot) - auto nncls = QUnicodeTables::LineBreak_LF; - - if (i + 1 < len) { + const auto nncls = [&] { + if (i + 1 >= len) + return QUnicodeTables::LineBreak_LF; char32_t c = string[i + 1]; if (QChar::isHighSurrogate(c) && i + 2 < len) { ushort low = string[i + 2]; if (QChar::isLowSurrogate(low)) c = QChar::surrogateToUcs4(c, low); + else + return QUnicodeTables::LineBreak_SG; // all surrogates } - nncls = QUnicodeTables::lineBreakClass(c); - } + return QUnicodeTables::lineBreakClass(c); + }(); constexpr QUnicodeTables::LineBreakClass lb15b[] = { QUnicodeTables::LineBreak_SP, QUnicodeTables::LineBreak_GL, @@ -867,13 +869,17 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes // ‘subtract .5’. if (Q_UNLIKELY(lcls == QUnicodeTables::LineBreak_SP)) { if (i + 1 < len) { + constexpr char32_t Invalid = ~U'\0'; char32_t ch = string[i + 1]; if (QChar::isHighSurrogate(ch) && i + 2 < len) { ushort low = string[i + 2]; if (QChar::isLowSurrogate(low)) ch = QChar::surrogateToUcs4(ch, low); + else + ch = Invalid; } - if (QUnicodeTables::lineBreakClass(ch) == QUnicodeTables::LineBreak_NU) { + if (ch != Invalid // surrogates won't match (ensured by util/unicode) + && QUnicodeTables::lineBreakClass(ch) == QUnicodeTables::LineBreak_NU) { attributes[pos].lineBreak = true; goto next; } |
