QStringConverter/ICU: optimize NUL-termination of codec name

ICU unfortunately requires converter names to be passed as NUL-terminated C strings. This means that the names that come in via QAnyStringView have to be encoding-converted (assuming US-ASCII, ie. Latin-1), and NUL-terminated. The old code used the convenient toString().toLatin1() methods for this. This, however, transforms L1 and U8 inputs twice: first to UTF-16, then to L1. It also always allocates memory. To fix, first change the temporary string container to std::string (which has an SSO buffer into which most common charset names will fit, avoiding memory allocation) and then skip the conversion to UTF-16, going directly from the source encoding to L1, treating UTF-8 as L1 (because US-ASCII is a common subset of both). Unfortunately, our L1-to-U16 converter doesn't allow to select a replacement character other than '?' for out-of-range input characters, but valid charset names should not contain question marks, so here's to hoping that ICU doesn't strip them willy-nilly, causing False Positive matches. The old code had the same problem. Amends f6c11ac4f20a16d0b2113014e2dac63b95d946ae. Pick-to: 6.8 Fixes: QTBUG-126109 Change-Id: If1dd494cf4ee8e2d304a0648c22dc8806718f104 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
author: Marc Mutz <marc.mutz@qt.io> 2024-10-01 12:01:24 +0200
committer: Marc Mutz <marc.mutz@qt.io> 2024-10-08 10:26:37 +0200
commit: 62108a08c12abfc1421c283cf34e75ffeded2c12 (patch)
tree: 1451bedf6148583a73851775559919c7c58a6889 /src
parent: c095f7fbf820ac944c5d3096f48dd18752a218b3 (diff)
1 files changed, 29 insertions, 2 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 633eb1b72d9..7d62cc865a5 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -30,10 +30,11 @@
 #endif
 
 #include <array>
-
 #if __has_include(<bit>) && __cplusplus > 201703L
 #include <bit>
 #endif
+#include <string>
+#include <QtCore/q20utility.h>
 
 QT_BEGIN_NAMESPACE
 
@@ -2152,9 +2153,35 @@ struct QStringConverterICU : QStringConverter
         return conv;
     }
 
+    static std::string nul_terminate_impl(QLatin1StringView name)
+    { return name.isNull() ? std::string() : std::string{name.data(), size_t(name.size())}; }
+
+    static std::string nul_terminate_impl(QUtf8StringView name)
+    { return nul_terminate_impl(QLatin1StringView{QByteArrayView{name}}); }
+
+    static std::string nul_terminate_impl(QStringView name)
+    {
+        std::string result;
+        const auto convert = [&](char *p, size_t n) {
+                const auto sz = QLatin1::convertFromUnicode(p, name) - p;
+                Q_ASSERT(q20::cmp_less_equal(sz, n));
+                return sz;
+            };
+#ifdef __cpp_lib_string_resize_and_overwrite
+        result.resize_and_overwrite(size_t(name.size()), convert);
+#else
+        result.resize(size_t(name.size()));
+        result.resize(convert(result.data(), result.size()));
+#endif // __cpp_lib_string_resize_and_overwrite
+        return result;
+    }
+
+    static std::string nul_terminate(QAnyStringView name)
+    { return name.visit([](auto name) { return nul_terminate_impl(name); }); }
+
     static const QStringConverter::Interface *
     make_icu_converter(QStringConverterBase::State *state, QAnyStringView name)
-    { return make_icu_converter(state, name.toString().toLatin1().constData()); } // ### optimize
+    { return make_icu_converter(state, nul_terminate(name).data()); }
 
     static const QStringConverter::Interface *make_icu_converter(
             QStringConverterBase::State *state,
author	Marc Mutz <marc.mutz@qt.io>	2024-10-01 12:01:24 +0200
committer	Marc Mutz <marc.mutz@qt.io>	2024-10-08 10:26:37 +0200
commit	62108a08c12abfc1421c283cf34e75ffeded2c12 (patch)
tree	1451bedf6148583a73851775559919c7c58a6889 /src
parent	c095f7fbf820ac944c5d3096f48dd18752a218b3 (diff)