summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@qt.io>2024-10-01 12:01:24 +0200
committerMarc Mutz <marc.mutz@qt.io>2024-10-08 10:26:37 +0200
commit62108a08c12abfc1421c283cf34e75ffeded2c12 (patch)
tree1451bedf6148583a73851775559919c7c58a6889 /src
parentc095f7fbf820ac944c5d3096f48dd18752a218b3 (diff)
QStringConverter/ICU: optimize NUL-termination of codec name
ICU unfortunately requires converter names to be passed as NUL-terminated C strings. This means that the names that come in via QAnyStringView have to be encoding-converted (assuming US-ASCII, ie. Latin-1), and NUL-terminated. The old code used the convenient toString().toLatin1() methods for this. This, however, transforms L1 and U8 inputs twice: first to UTF-16, then to L1. It also always allocates memory. To fix, first change the temporary string container to std::string (which has an SSO buffer into which most common charset names will fit, avoiding memory allocation) and then skip the conversion to UTF-16, going directly from the source encoding to L1, treating UTF-8 as L1 (because US-ASCII is a common subset of both). Unfortunately, our L1-to-U16 converter doesn't allow to select a replacement character other than '?' for out-of-range input characters, but valid charset names should not contain question marks, so here's to hoping that ICU doesn't strip them willy-nilly, causing False Positive matches. The old code had the same problem. Amends f6c11ac4f20a16d0b2113014e2dac63b95d946ae. Pick-to: 6.8 Fixes: QTBUG-126109 Change-Id: If1dd494cf4ee8e2d304a0648c22dc8806718f104 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/corelib/text/qstringconverter.cpp31
1 files changed, 29 insertions, 2 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 633eb1b72d9..7d62cc865a5 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -30,10 +30,11 @@
#endif
#include <array>
-
#if __has_include(<bit>) && __cplusplus > 201703L
#include <bit>
#endif
+#include <string>
+#include <QtCore/q20utility.h>
QT_BEGIN_NAMESPACE
@@ -2152,9 +2153,35 @@ struct QStringConverterICU : QStringConverter
return conv;
}
+ static std::string nul_terminate_impl(QLatin1StringView name)
+ { return name.isNull() ? std::string() : std::string{name.data(), size_t(name.size())}; }
+
+ static std::string nul_terminate_impl(QUtf8StringView name)
+ { return nul_terminate_impl(QLatin1StringView{QByteArrayView{name}}); }
+
+ static std::string nul_terminate_impl(QStringView name)
+ {
+ std::string result;
+ const auto convert = [&](char *p, size_t n) {
+ const auto sz = QLatin1::convertFromUnicode(p, name) - p;
+ Q_ASSERT(q20::cmp_less_equal(sz, n));
+ return sz;
+ };
+#ifdef __cpp_lib_string_resize_and_overwrite
+ result.resize_and_overwrite(size_t(name.size()), convert);
+#else
+ result.resize(size_t(name.size()));
+ result.resize(convert(result.data(), result.size()));
+#endif // __cpp_lib_string_resize_and_overwrite
+ return result;
+ }
+
+ static std::string nul_terminate(QAnyStringView name)
+ { return name.visit([](auto name) { return nul_terminate_impl(name); }); }
+
static const QStringConverter::Interface *
make_icu_converter(QStringConverterBase::State *state, QAnyStringView name)
- { return make_icu_converter(state, name.toString().toLatin1().constData()); } // ### optimize
+ { return make_icu_converter(state, nul_terminate(name).data()); }
static const QStringConverter::Interface *make_icu_converter(
QStringConverterBase::State *state,