33
44/*
55 * convert from utf8 to database encoding
6+ *
7+ * Returns a palloc'ed copy of the original string
68 */
79static inline char *
8- utf_u2e (const char * utf8_str , size_t len )
10+ utf_u2e (char * utf8_str , size_t len )
911{
1012 int enc = GetDatabaseEncoding ();
11-
12- char * ret = (char * ) pg_do_encoding_conversion ((unsigned char * ) utf8_str , len , PG_UTF8 , enc );
13+ char * ret ;
1314
1415 /*
15- * when we are a PG_UTF8 or SQL_ASCII database pg_do_encoding_conversion()
16- * will not do any conversion or verification. we need to do it manually
17- * instead.
16+ * When we are in a PG_UTF8 or SQL_ASCII database
17+ * pg_do_encoding_conversion() will not do any conversion (which is good)
18+ * or verification (not so much), so we need to run the verification step
19+ * separately.
1820 */
1921 if (enc == PG_UTF8 || enc == PG_SQL_ASCII )
20- pg_verify_mbstr_len (PG_UTF8 , utf8_str , len , false);
22+ {
23+ pg_verify_mbstr_len (enc , utf8_str , len , false);
24+ ret = utf8_str ;
25+ }
26+ else
27+ ret = (char * ) pg_do_encoding_conversion ((unsigned char * ) utf8_str ,
28+ len , PG_UTF8 , enc );
2129
2230 if (ret == utf8_str )
2331 ret = pstrdup (ret );
@@ -27,11 +35,15 @@ utf_u2e(const char *utf8_str, size_t len)
2735
2836/*
2937 * convert from database encoding to utf8
38+ *
39+ * Returns a palloc'ed copy of the original string
3040 */
3141static inline char *
3242utf_e2u (const char * str )
3343{
34- char * ret = (char * ) pg_do_encoding_conversion ((unsigned char * ) str , strlen (str ), GetDatabaseEncoding (), PG_UTF8 );
44+ char * ret =
45+ (char * ) pg_do_encoding_conversion ((unsigned char * ) str , strlen (str ),
46+ GetDatabaseEncoding (), PG_UTF8 );
3547
3648 if (ret == str )
3749 ret = pstrdup (ret );
@@ -41,6 +53,8 @@ utf_e2u(const char *str)
4153
4254/*
4355 * Convert an SV to a char * in the current database encoding
56+ *
57+ * Returns a palloc'ed copy of the original string
4458 */
4559static inline char *
4660sv2cstr (SV * sv )
@@ -51,7 +65,9 @@ sv2cstr(SV *sv)
5165
5266 /*
5367 * get a utf8 encoded char * out of perl. *note* it may not be valid utf8!
54- *
68+ */
69+
70+ /*
5571 * SvPVutf8() croaks nastily on certain things, like typeglobs and
5672 * readonly objects such as $^V. That's a perl bug - it's not supposed to
5773 * happen. To avoid crashing the backend, we make a copy of the sv before
@@ -63,18 +79,27 @@ sv2cstr(SV *sv)
6379 (SvTYPE (sv ) > SVt_PVLV && SvTYPE (sv ) != SVt_PVFM ))
6480 sv = newSVsv (sv );
6581 else
66-
82+ {
6783 /*
6884 * increase the reference count so we can just SvREFCNT_dec() it when
6985 * we are done
7086 */
7187 SvREFCNT_inc_simple_void (sv );
88+ }
7289
73- val = SvPVutf8 (sv , len );
90+ /*
91+ * Request the string from Perl, in UTF-8 encoding; but if we're in a
92+ * SQL_ASCII database, just request the byte soup without trying to make it
93+ * UTF8, because that might fail.
94+ */
95+ if (GetDatabaseEncoding () == PG_SQL_ASCII )
96+ val = SvPV (sv , len );
97+ else
98+ val = SvPVutf8 (sv , len );
7499
75100 /*
76- * we use perl's length in the event we had an embedded null byte to
77- * ensure we error out properly
101+ * Now convert to database encoding. We use perl's length in the event we
102+ * had an embedded null byte to ensure we error out properly.
78103 */
79104 res = utf_u2e (val , len );
80105
@@ -88,16 +113,20 @@ sv2cstr(SV *sv)
88113 * Create a new SV from a string assumed to be in the current database's
89114 * encoding.
90115 */
91-
92116static inline SV *
93117cstr2sv (const char * str )
94118{
95119 SV * sv ;
96- char * utf8_str = utf_e2u (str );
120+ char * utf8_str ;
121+
122+ /* no conversion when SQL_ASCII */
123+ if (GetDatabaseEncoding () == PG_SQL_ASCII )
124+ return newSVpv (str , 0 );
125+
126+ utf8_str = utf_e2u (str );
97127
98128 sv = newSVpv (utf8_str , 0 );
99129 SvUTF8_on (sv );
100-
101130 pfree (utf8_str );
102131
103132 return sv ;
0 commit comments