PostgreSQL Source Code git master
pg_locale.c
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities
4 *
5 * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12/*----------
13 * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 * toupper(), etc. are always in the same fixed locale.
17 *
18 * LC_MESSAGES is settable at run time and will take effect
19 * immediately.
20 *
21 * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are
22 * permanently set to "C", and then we use temporary locale_t
23 * objects when we need to look up locale data based on the GUCs
24 * of the same name. Information is cached when the GUCs change.
25 * The cached information is only used by the formatting functions
26 * (to_char, etc.) and the money type. For the user, this should all be
27 * transparent.
28 *----------
29 */
30
31
32#include "postgres.h"
33
34#include <time.h>
35
36#include "access/htup_details.h"
38#include "catalog/pg_database.h"
39#include "common/hashfn.h"
40#include "common/string.h"
41#include "mb/pg_wchar.h"
42#include "miscadmin.h"
43#include "utils/builtins.h"
44#include "utils/guc_hooks.h"
45#include "utils/lsyscache.h"
46#include "utils/memutils.h"
47#include "utils/pg_locale.h"
48#include "utils/pg_locale_c.h"
49#include "utils/relcache.h"
50#include "utils/syscache.h"
51
52#ifdef WIN32
53#include <shlwapi.h>
54#endif
55
56/* Error triggered for locale-sensitive subroutines */
57#define PGLOCALE_SUPPORT_ERROR(provider) \
58 elog(ERROR, "unsupported collprovider for %s: %c", __func__, provider)
59
60/*
61 * This should be large enough that most strings will fit, but small enough
62 * that we feel comfortable putting it on the stack
63 */
64#define TEXTBUFLEN 1024
65
66#define MAX_L10N_DATA 80
67
68/* pg_locale_builtin.c */
70extern char *get_collation_actual_version_builtin(const char *collcollate);
71
72/* pg_locale_icu.c */
73#ifdef USE_ICU
74extern UCollator *pg_ucol_open(const char *loc_str);
75extern char *get_collation_actual_version_icu(const char *collcollate);
76#endif
78
79/* pg_locale_libc.c */
81extern char *get_collation_actual_version_libc(const char *collcollate);
82
83/* GUC settings */
88
90
91/*
92 * lc_time localization cache.
93 *
94 * We use only the first 7 or 12 entries of these arrays. The last array
95 * element is left as NULL for the convenience of outside code that wants
96 * to sequentially scan these arrays.
97 */
102
104
105/* indicates whether locale information cache is valid */
106static bool CurrentLocaleConvValid = false;
107static bool CurrentLCTimeValid = false;
108
109static struct pg_locale_struct c_locale = {
110 .deterministic = true,
111 .collate_is_c = true,
112 .ctype_is_c = true,
113};
114
115/* Cache for collation-related knowledge */
116
117typedef struct
118{
119 Oid collid; /* hash key: pg_collation OID */
120 pg_locale_t locale; /* locale_t struct, or 0 if not valid */
121
122 /* needed for simplehash */
124 char status;
126
127#define SH_PREFIX collation_cache
128#define SH_ELEMENT_TYPE collation_cache_entry
129#define SH_KEY_TYPE Oid
130#define SH_KEY collid
131#define SH_HASH_KEY(tb, key) murmurhash32((uint32) key)
132#define SH_EQUAL(tb, a, b) (a == b)
133#define SH_GET_HASH(tb, a) a->hash
134#define SH_SCOPE static inline
135#define SH_STORE_HASH
136#define SH_DECLARE
137#define SH_DEFINE
138#include "lib/simplehash.h"
139
141static collation_cache_hash *CollationCache = NULL;
142
143/*
144 * The collation cache is often accessed repeatedly for the same collation, so
145 * remember the last one used.
146 */
149
150#if defined(WIN32) && defined(LC_MESSAGES)
151static char *IsoLocaleName(const char *);
152#endif
153
154/*
155 * pg_perm_setlocale
156 *
157 * This wraps the libc function setlocale(), with two additions. First, when
158 * changing LC_CTYPE, update gettext's encoding for the current message
159 * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
160 * not on Windows. Second, if the operation is successful, the corresponding
161 * LC_XXX environment variable is set to match. By setting the environment
162 * variable, we ensure that any subsequent use of setlocale(..., "") will
163 * preserve the settings made through this routine. Of course, LC_ALL must
164 * also be unset to fully ensure that, but that has to be done elsewhere after
165 * all the individual LC_XXX variables have been set correctly. (Thank you
166 * Perl for making this kluge necessary.)
167 */
168char *
169pg_perm_setlocale(int category, const char *locale)
170{
171 char *result;
172 const char *envvar;
173
174#ifndef WIN32
175 result = setlocale(category, locale);
176#else
177
178 /*
179 * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
180 * the given value is good and set it in the environment variables. We
181 * must ignore attempts to set to "", which means "keep using the old
182 * environment value".
183 */
184#ifdef LC_MESSAGES
185 if (category == LC_MESSAGES)
186 {
187 result = (char *) locale;
188 if (locale == NULL || locale[0] == '\0')
189 return result;
190 }
191 else
192#endif
193 result = setlocale(category, locale);
194#endif /* WIN32 */
195
196 if (result == NULL)
197 return result; /* fall out immediately on failure */
198
199 /*
200 * Use the right encoding in translated messages. Under ENABLE_NLS, let
201 * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
202 * format strings are ASCII, but database-encoding strings may enter the
203 * message via %s. This makes the overall message encoding equal to the
204 * database encoding.
205 */
206 if (category == LC_CTYPE)
207 {
208 static char save_lc_ctype[LOCALE_NAME_BUFLEN];
209
210 /* copy setlocale() return value before callee invokes it again */
211 strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
212 result = save_lc_ctype;
213
214#ifdef ENABLE_NLS
215 SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
216#else
218#endif
219 }
220
221 switch (category)
222 {
223 case LC_COLLATE:
224 envvar = "LC_COLLATE";
225 break;
226 case LC_CTYPE:
227 envvar = "LC_CTYPE";
228 break;
229#ifdef LC_MESSAGES
230 case LC_MESSAGES:
231 envvar = "LC_MESSAGES";
232#ifdef WIN32
233 result = IsoLocaleName(locale);
234 if (result == NULL)
235 result = (char *) locale;
236 elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
237#endif /* WIN32 */
238 break;
239#endif /* LC_MESSAGES */
240 case LC_MONETARY:
241 envvar = "LC_MONETARY";
242 break;
243 case LC_NUMERIC:
244 envvar = "LC_NUMERIC";
245 break;
246 case LC_TIME:
247 envvar = "LC_TIME";
248 break;
249 default:
250 elog(FATAL, "unrecognized LC category: %d", category);
251 return NULL; /* keep compiler quiet */
252 }
253
254 if (setenv(envvar, result, 1) != 0)
255 return NULL;
256
257 return result;
258}
259
260
261/*
262 * Is the locale name valid for the locale category?
263 *
264 * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
265 * canonical name is stored there. This is especially useful for figuring out
266 * what locale name "" means (ie, the server environment value). (Actually,
267 * it seems that on most implementations that's the only thing it's good for;
268 * we could wish that setlocale gave back a canonically spelled version of
269 * the locale name, but typically it doesn't.)
270 */
271bool
272check_locale(int category, const char *locale, char **canonname)
273{
274 char *save;
275 char *res;
276
277 /* Don't let Windows' non-ASCII locale names in. */
278 if (!pg_is_ascii(locale))
279 {
281 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
282 errmsg("locale name \"%s\" contains non-ASCII characters",
283 locale)));
284 return false;
285 }
286
287 if (canonname)
288 *canonname = NULL; /* in case of failure */
289
290 save = setlocale(category, NULL);
291 if (!save)
292 return false; /* won't happen, we hope */
293
294 /* save may be pointing at a modifiable scratch variable, see above. */
295 save = pstrdup(save);
296
297 /* set the locale with setlocale, to see if it accepts it. */
298 res = setlocale(category, locale);
299
300 /* save canonical name if requested. */
301 if (res && canonname)
302 *canonname = pstrdup(res);
303
304 /* restore old value. */
305 if (!setlocale(category, save))
306 elog(WARNING, "failed to restore old locale \"%s\"", save);
307 pfree(save);
308
309 /* Don't let Windows' non-ASCII locale names out. */
310 if (canonname && *canonname && !pg_is_ascii(*canonname))
311 {
313 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
314 errmsg("locale name \"%s\" contains non-ASCII characters",
315 *canonname)));
316 pfree(*canonname);
317 *canonname = NULL;
318 return false;
319 }
320
321 return (res != NULL);
322}
323
324
325/*
326 * GUC check/assign hooks
327 *
328 * For most locale categories, the assign hook doesn't actually set the locale
329 * permanently, just reset flags so that the next use will cache the
330 * appropriate values. (See explanation at the top of this file.)
331 *
332 * Note: we accept value = "" as selecting the postmaster's environment
333 * value, whatever it was (so long as the environment setting is legal).
334 * This will have been locked down by an earlier call to pg_perm_setlocale.
335 */
336bool
338{
339 return check_locale(LC_MONETARY, *newval, NULL);
340}
341
342void
343assign_locale_monetary(const char *newval, void *extra)
344{
346}
347
348bool
350{
351 return check_locale(LC_NUMERIC, *newval, NULL);
352}
353
354void
355assign_locale_numeric(const char *newval, void *extra)
356{
358}
359
360bool
362{
363 return check_locale(LC_TIME, *newval, NULL);
364}
365
366void
367assign_locale_time(const char *newval, void *extra)
368{
369 CurrentLCTimeValid = false;
370}
371
372/*
373 * We allow LC_MESSAGES to actually be set globally.
374 *
375 * Note: we normally disallow value = "" because it wouldn't have consistent
376 * semantics (it'd effectively just use the previous value). However, this
377 * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
378 * not even if the attempted setting fails due to invalid environment value.
379 * The idea there is just to accept the environment setting *if possible*
380 * during startup, until we can read the proper value from postgresql.conf.
381 */
382bool
384{
385 if (**newval == '\0')
386 {
387 if (source == PGC_S_DEFAULT)
388 return true;
389 else
390 return false;
391 }
392
393 /*
394 * LC_MESSAGES category does not exist everywhere, but accept it anyway
395 *
396 * On Windows, we can't even check the value, so accept blindly
397 */
398#if defined(LC_MESSAGES) && !defined(WIN32)
399 return check_locale(LC_MESSAGES, *newval, NULL);
400#else
401 return true;
402#endif
403}
404
405void
406assign_locale_messages(const char *newval, void *extra)
407{
408 /*
409 * LC_MESSAGES category does not exist everywhere, but accept it anyway.
410 * We ignore failure, as per comment above.
411 */
412#ifdef LC_MESSAGES
413 (void) pg_perm_setlocale(LC_MESSAGES, newval);
414#endif
415}
416
417
418/*
419 * Frees the malloced content of a struct lconv. (But not the struct
420 * itself.) It's important that this not throw elog(ERROR).
421 */
422static void
423free_struct_lconv(struct lconv *s)
424{
425 free(s->decimal_point);
426 free(s->thousands_sep);
427 free(s->grouping);
428 free(s->int_curr_symbol);
429 free(s->currency_symbol);
430 free(s->mon_decimal_point);
431 free(s->mon_thousands_sep);
432 free(s->mon_grouping);
433 free(s->positive_sign);
434 free(s->negative_sign);
435}
436
437/*
438 * Check that all fields of a struct lconv (or at least, the ones we care
439 * about) are non-NULL. The field list must match free_struct_lconv().
440 */
441static bool
442struct_lconv_is_valid(struct lconv *s)
443{
444 if (s->decimal_point == NULL)
445 return false;
446 if (s->thousands_sep == NULL)
447 return false;
448 if (s->grouping == NULL)
449 return false;
450 if (s->int_curr_symbol == NULL)
451 return false;
452 if (s->currency_symbol == NULL)
453 return false;
454 if (s->mon_decimal_point == NULL)
455 return false;
456 if (s->mon_thousands_sep == NULL)
457 return false;
458 if (s->mon_grouping == NULL)
459 return false;
460 if (s->positive_sign == NULL)
461 return false;
462 if (s->negative_sign == NULL)
463 return false;
464 return true;
465}
466
467
468/*
469 * Convert the strdup'd string at *str from the specified encoding to the
470 * database encoding.
471 */
472static void
474{
475 char *pstr;
476 char *mstr;
477
478 /* convert the string to the database encoding */
479 pstr = pg_any_to_server(*str, strlen(*str), encoding);
480 if (pstr == *str)
481 return; /* no conversion happened */
482
483 /* need it malloc'd not palloc'd */
484 mstr = strdup(pstr);
485 if (mstr == NULL)
487 (errcode(ERRCODE_OUT_OF_MEMORY),
488 errmsg("out of memory")));
489
490 /* replace old string */
491 free(*str);
492 *str = mstr;
493
494 pfree(pstr);
495}
496
497
498/*
499 * Return the POSIX lconv struct (contains number/money formatting
500 * information) with locale information for all categories.
501 */
502struct lconv *
504{
505 static struct lconv CurrentLocaleConv;
506 static bool CurrentLocaleConvAllocated = false;
507 struct lconv *extlconv;
508 struct lconv tmp;
509 struct lconv worklconv = {0};
510
511 /* Did we do it already? */
513 return &CurrentLocaleConv;
514
515 /* Free any already-allocated storage */
516 if (CurrentLocaleConvAllocated)
517 {
518 free_struct_lconv(&CurrentLocaleConv);
519 CurrentLocaleConvAllocated = false;
520 }
521
522 /*
523 * Use thread-safe method of obtaining a copy of lconv from the operating
524 * system.
525 */
528 &tmp) != 0)
529 elog(ERROR,
530 "could not get lconv for LC_MONETARY = \"%s\", LC_NUMERIC = \"%s\": %m",
532
533 /* Must copy data now so we can re-encode it. */
534 extlconv = &tmp;
535 worklconv.decimal_point = strdup(extlconv->decimal_point);
536 worklconv.thousands_sep = strdup(extlconv->thousands_sep);
537 worklconv.grouping = strdup(extlconv->grouping);
538 worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
539 worklconv.currency_symbol = strdup(extlconv->currency_symbol);
540 worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
541 worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
542 worklconv.mon_grouping = strdup(extlconv->mon_grouping);
543 worklconv.positive_sign = strdup(extlconv->positive_sign);
544 worklconv.negative_sign = strdup(extlconv->negative_sign);
545 /* Copy scalar fields as well */
546 worklconv.int_frac_digits = extlconv->int_frac_digits;
547 worklconv.frac_digits = extlconv->frac_digits;
548 worklconv.p_cs_precedes = extlconv->p_cs_precedes;
549 worklconv.p_sep_by_space = extlconv->p_sep_by_space;
550 worklconv.n_cs_precedes = extlconv->n_cs_precedes;
551 worklconv.n_sep_by_space = extlconv->n_sep_by_space;
552 worklconv.p_sign_posn = extlconv->p_sign_posn;
553 worklconv.n_sign_posn = extlconv->n_sign_posn;
554
555 /* Free the contents of the object populated by pg_localeconv_r(). */
556 pg_localeconv_free(&tmp);
557
558 /* If any of the preceding strdup calls failed, complain now. */
559 if (!struct_lconv_is_valid(&worklconv))
561 (errcode(ERRCODE_OUT_OF_MEMORY),
562 errmsg("out of memory")));
563
564 PG_TRY();
565 {
566 int encoding;
567
568 /*
569 * Now we must perform encoding conversion from whatever's associated
570 * with the locales into the database encoding. If we can't identify
571 * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
572 * use PG_SQL_ASCII, which will result in just validating that the
573 * strings are OK in the database encoding.
574 */
576 if (encoding < 0)
578
579 db_encoding_convert(encoding, &worklconv.decimal_point);
580 db_encoding_convert(encoding, &worklconv.thousands_sep);
581 /* grouping is not text and does not require conversion */
582
584 if (encoding < 0)
586
587 db_encoding_convert(encoding, &worklconv.int_curr_symbol);
588 db_encoding_convert(encoding, &worklconv.currency_symbol);
589 db_encoding_convert(encoding, &worklconv.mon_decimal_point);
590 db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
591 /* mon_grouping is not text and does not require conversion */
592 db_encoding_convert(encoding, &worklconv.positive_sign);
593 db_encoding_convert(encoding, &worklconv.negative_sign);
594 }
595 PG_CATCH();
596 {
597 free_struct_lconv(&worklconv);
598 PG_RE_THROW();
599 }
600 PG_END_TRY();
601
602 /*
603 * Everything is good, so save the results.
604 */
605 CurrentLocaleConv = worklconv;
606 CurrentLocaleConvAllocated = true;
608 return &CurrentLocaleConv;
609}
610
611#ifdef WIN32
612/*
613 * On Windows, strftime() returns its output in encoding CP_ACP (the default
614 * operating system codepage for the computer), which is likely different
615 * from SERVER_ENCODING. This is especially important in Japanese versions
616 * of Windows which will use SJIS encoding, which we don't support as a
617 * server encoding.
618 *
619 * So, instead of using strftime(), use wcsftime() to return the value in
620 * wide characters (internally UTF16) and then convert to UTF8, which we
621 * know how to handle directly.
622 *
623 * Note that this only affects the calls to strftime() in this file, which are
624 * used to get the locale-aware strings. Other parts of the backend use
625 * pg_strftime(), which isn't locale-aware and does not need to be replaced.
626 */
627static size_t
628strftime_l_win32(char *dst, size_t dstlen,
629 const char *format, const struct tm *tm, locale_t locale)
630{
631 size_t len;
632 wchar_t wformat[8]; /* formats used below need 3 chars */
633 wchar_t wbuf[MAX_L10N_DATA];
634
635 /*
636 * Get a wchar_t version of the format string. We only actually use
637 * plain-ASCII formats in this file, so we can say that they're UTF8.
638 */
639 len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
640 wformat, lengthof(wformat));
641 if (len == 0)
642 elog(ERROR, "could not convert format string from UTF-8: error code %lu",
643 GetLastError());
644
645 len = _wcsftime_l(wbuf, MAX_L10N_DATA, wformat, tm, locale);
646 if (len == 0)
647 {
648 /*
649 * wcsftime failed, possibly because the result would not fit in
650 * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
651 */
652 return 0;
653 }
654
655 len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
656 NULL, NULL);
657 if (len == 0)
658 elog(ERROR, "could not convert string to UTF-8: error code %lu",
659 GetLastError());
660
661 dst[len] = '\0';
662
663 return len;
664}
665
666/* redefine strftime_l() */
667#define strftime_l(a,b,c,d,e) strftime_l_win32(a,b,c,d,e)
668#endif /* WIN32 */
669
670/*
671 * Subroutine for cache_locale_time().
672 * Convert the given string from encoding "encoding" to the database
673 * encoding, and store the result at *dst, replacing any previous value.
674 */
675static void
676cache_single_string(char **dst, const char *src, int encoding)
677{
678 char *ptr;
679 char *olddst;
680
681 /* Convert the string to the database encoding, or validate it's OK */
682 ptr = pg_any_to_server(src, strlen(src), encoding);
683
684 /* Store the string in long-lived storage, replacing any previous value */
685 olddst = *dst;
687 if (olddst)
688 pfree(olddst);
689
690 /* Might as well clean up any palloc'd conversion result, too */
691 if (ptr != src)
692 pfree(ptr);
693}
694
695/*
696 * Update the lc_time localization cache variables if needed.
697 */
698void
700{
701 char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
702 char *bufptr;
703 time_t timenow;
704 struct tm *timeinfo;
705 struct tm timeinfobuf;
706 bool strftimefail = false;
707 int encoding;
708 int i;
710
711 /* did we do this already? */
713 return;
714
715 elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
716
717 errno = ENOENT;
718#ifdef WIN32
719 locale = _create_locale(LC_ALL, locale_time);
720 if (locale == (locale_t) 0)
721 _dosmaperr(GetLastError());
722#else
723 locale = newlocale(LC_ALL_MASK, locale_time, (locale_t) 0);
724#endif
725 if (!locale)
727
728 /* We use times close to current time as data for strftime(). */
729 timenow = time(NULL);
730 timeinfo = gmtime_r(&timenow, &timeinfobuf);
731
732 /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
733 bufptr = buf;
734
735 /*
736 * MAX_L10N_DATA is sufficient buffer space for every known locale, and
737 * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
738 * error.) An implementation might report errors (e.g. ENOMEM) by
739 * returning 0 (or, less plausibly, a negative value) and setting errno.
740 * Report errno just in case the implementation did that, but clear it in
741 * advance of the calls so we don't emit a stale, unrelated errno.
742 */
743 errno = 0;
744
745 /* localized days */
746 for (i = 0; i < 7; i++)
747 {
748 timeinfo->tm_wday = i;
749 if (strftime_l(bufptr, MAX_L10N_DATA, "%a", timeinfo, locale) <= 0)
750 strftimefail = true;
751 bufptr += MAX_L10N_DATA;
752 if (strftime_l(bufptr, MAX_L10N_DATA, "%A", timeinfo, locale) <= 0)
753 strftimefail = true;
754 bufptr += MAX_L10N_DATA;
755 }
756
757 /* localized months */
758 for (i = 0; i < 12; i++)
759 {
760 timeinfo->tm_mon = i;
761 timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
762 if (strftime_l(bufptr, MAX_L10N_DATA, "%b", timeinfo, locale) <= 0)
763 strftimefail = true;
764 bufptr += MAX_L10N_DATA;
765 if (strftime_l(bufptr, MAX_L10N_DATA, "%B", timeinfo, locale) <= 0)
766 strftimefail = true;
767 bufptr += MAX_L10N_DATA;
768 }
769
770#ifdef WIN32
771 _free_locale(locale);
772#else
773 freelocale(locale);
774#endif
775
776 /*
777 * At this point we've done our best to clean up, and can throw errors, or
778 * call functions that might throw errors, with a clean conscience.
779 */
780 if (strftimefail)
781 elog(ERROR, "strftime_l() failed");
782
783#ifndef WIN32
784
785 /*
786 * As in PGLC_localeconv(), we must convert strftime()'s output from the
787 * encoding implied by LC_TIME to the database encoding. If we can't
788 * identify the LC_TIME encoding, just perform encoding validation.
789 */
791 if (encoding < 0)
793
794#else
795
796 /*
797 * On Windows, strftime_win32() always returns UTF8 data, so convert from
798 * that if necessary.
799 */
801
802#endif /* WIN32 */
803
804 bufptr = buf;
805
806 /* localized days */
807 for (i = 0; i < 7; i++)
808 {
810 bufptr += MAX_L10N_DATA;
812 bufptr += MAX_L10N_DATA;
813 }
814 localized_abbrev_days[7] = NULL;
815 localized_full_days[7] = NULL;
816
817 /* localized months */
818 for (i = 0; i < 12; i++)
819 {
821 bufptr += MAX_L10N_DATA;
823 bufptr += MAX_L10N_DATA;
824 }
825 localized_abbrev_months[12] = NULL;
826 localized_full_months[12] = NULL;
827
828 CurrentLCTimeValid = true;
829}
830
831
832#if defined(WIN32) && defined(LC_MESSAGES)
833/*
834 * Convert a Windows setlocale() argument to a Unix-style one.
835 *
836 * Regardless of platform, we install message catalogs under a Unix-style
837 * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
838 * following that style will elicit localized interface strings.
839 *
840 * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
841 * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
842 * case-insensitive. setlocale() returns the fully-qualified form; for
843 * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
844 * setlocale() and _create_locale() select a "locale identifier"[1] and store
845 * it in an undocumented _locale_t field. From that LCID, we can retrieve the
846 * ISO 639 language and the ISO 3166 country. Character encoding does not
847 * matter, because the server and client encodings govern that.
848 *
849 * Windows Vista introduced the "locale name" concept[2], closely following
850 * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
851 * Studio 2012, setlocale() accepts locale names in addition to the strings it
852 * accepted historically. It does not standardize them; setlocale("Th-tH")
853 * returns "Th-tH". setlocale(category, "") still returns a traditional
854 * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
855 * content to carry locale names instead of locale identifiers.
856 *
857 * Visual Studio 2015 should still be able to do the same as Visual Studio
858 * 2012, but the declaration of locale_name is missing in _locale_t, causing
859 * this code compilation to fail, hence this falls back instead on to
860 * enumerating all system locales by using EnumSystemLocalesEx to find the
861 * required locale name. If the input argument is in Unix-style then we can
862 * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
863 * LOCALE_SNAME.
864 *
865 * This function returns a pointer to a static buffer bearing the converted
866 * name or NULL if conversion fails.
867 *
868 * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
869 * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
870 */
871
872/*
873 * Callback function for EnumSystemLocalesEx() in get_iso_localename().
874 *
875 * This function enumerates all system locales, searching for one that matches
876 * an input with the format: <Language>[_<Country>], e.g.
877 * English[_United States]
878 *
879 * The input is a three wchar_t array as an LPARAM. The first element is the
880 * locale_name we want to match, the second element is an allocated buffer
881 * where the Unix-style locale is copied if a match is found, and the third
882 * element is the search status, 1 if a match was found, 0 otherwise.
883 */
884static BOOL CALLBACK
885search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
886{
887 wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
888 wchar_t **argv;
889
890 (void) (dwFlags);
891
892 argv = (wchar_t **) lparam;
893 *argv[2] = (wchar_t) 0;
894
895 memset(test_locale, 0, sizeof(test_locale));
896
897 /* Get the name of the <Language> in English */
898 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
899 test_locale, LOCALE_NAME_MAX_LENGTH))
900 {
901 /*
902 * If the enumerated locale does not have a hyphen ("en") OR the
903 * locale_name input does not have an underscore ("English"), we only
904 * need to compare the <Language> tags.
905 */
906 if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
907 {
908 if (_wcsicmp(argv[0], test_locale) == 0)
909 {
910 wcscpy(argv[1], pStr);
911 *argv[2] = (wchar_t) 1;
912 return FALSE;
913 }
914 }
915
916 /*
917 * We have to compare a full <Language>_<Country> tag, so we append
918 * the underscore and name of the country/region in English, e.g.
919 * "English_United States".
920 */
921 else
922 {
923 size_t len;
924
925 wcscat(test_locale, L"_");
926 len = wcslen(test_locale);
927 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
928 test_locale + len,
929 LOCALE_NAME_MAX_LENGTH - len))
930 {
931 if (_wcsicmp(argv[0], test_locale) == 0)
932 {
933 wcscpy(argv[1], pStr);
934 *argv[2] = (wchar_t) 1;
935 return FALSE;
936 }
937 }
938 }
939 }
940
941 return TRUE;
942}
943
944/*
945 * This function converts a Windows locale name to an ISO formatted version
946 * for Visual Studio 2015 or greater.
947 *
948 * Returns NULL, if no valid conversion was found.
949 */
950static char *
951get_iso_localename(const char *winlocname)
952{
953 wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
954 wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
955 static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
956 char *period;
957 int len;
958 int ret_val;
959
960 /*
961 * Valid locales have the following syntax:
962 * <Language>[_<Country>[.<CodePage>]]
963 *
964 * GetLocaleInfoEx can only take locale name without code-page and for the
965 * purpose of this API the code-page doesn't matter.
966 */
967 period = strchr(winlocname, '.');
968 if (period != NULL)
969 len = period - winlocname;
970 else
971 len = pg_mbstrlen(winlocname);
972
973 memset(wc_locale_name, 0, sizeof(wc_locale_name));
974 memset(buffer, 0, sizeof(buffer));
975 MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
976 LOCALE_NAME_MAX_LENGTH);
977
978 /*
979 * If the lc_messages is already a Unix-style string, we have a direct
980 * match with LOCALE_SNAME, e.g. en-US, en_US.
981 */
982 ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
983 LOCALE_NAME_MAX_LENGTH);
984 if (!ret_val)
985 {
986 /*
987 * Search for a locale in the system that matches language and country
988 * name.
989 */
990 wchar_t *argv[3];
991
992 argv[0] = wc_locale_name;
993 argv[1] = buffer;
994 argv[2] = (wchar_t *) &ret_val;
995 EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
996 NULL);
997 }
998
999 if (ret_val)
1000 {
1001 size_t rc;
1002 char *hyphen;
1003
1004 /* Locale names use only ASCII, any conversion locale suffices. */
1005 rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1006 if (rc == -1 || rc == sizeof(iso_lc_messages))
1007 return NULL;
1008
1009 /*
1010 * Since the message catalogs sit on a case-insensitive filesystem, we
1011 * need not standardize letter case here. So long as we do not ship
1012 * message catalogs for which it would matter, we also need not
1013 * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1014 * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1015 */
1016 hyphen = strchr(iso_lc_messages, '-');
1017 if (hyphen)
1018 *hyphen = '_';
1019 return iso_lc_messages;
1020 }
1021
1022 return NULL;
1023}
1024
1025static char *
1026IsoLocaleName(const char *winlocname)
1027{
1028 static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1029
1030 if (pg_strcasecmp("c", winlocname) == 0 ||
1031 pg_strcasecmp("posix", winlocname) == 0)
1032 {
1033 strcpy(iso_lc_messages, "C");
1034 return iso_lc_messages;
1035 }
1036 else
1037 return get_iso_localename(winlocname);
1038}
1039
1040#endif /* WIN32 && LC_MESSAGES */
1041
1042/*
1043 * Create a new pg_locale_t struct for the given collation oid.
1044 */
1045static pg_locale_t
1047{
1048 HeapTuple tp;
1049 Form_pg_collation collform;
1050 pg_locale_t result;
1051 Datum datum;
1052 bool isnull;
1053
1054 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1055 if (!HeapTupleIsValid(tp))
1056 elog(ERROR, "cache lookup failed for collation %u", collid);
1057 collform = (Form_pg_collation) GETSTRUCT(tp);
1058
1059 if (collform->collprovider == COLLPROVIDER_BUILTIN)
1060 result = create_pg_locale_builtin(collid, context);
1061 else if (collform->collprovider == COLLPROVIDER_ICU)
1062 result = create_pg_locale_icu(collid, context);
1063 else if (collform->collprovider == COLLPROVIDER_LIBC)
1064 result = create_pg_locale_libc(collid, context);
1065 else
1066 /* shouldn't happen */
1067 PGLOCALE_SUPPORT_ERROR(collform->collprovider);
1068
1069 result->is_default = false;
1070
1071 Assert((result->collate_is_c && result->collate == NULL) ||
1072 (!result->collate_is_c && result->collate != NULL));
1073
1074 Assert((result->ctype_is_c && result->ctype == NULL) ||
1075 (!result->ctype_is_c && result->ctype != NULL));
1076
1077 datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1078 &isnull);
1079 if (!isnull)
1080 {
1081 char *actual_versionstr;
1082 char *collversionstr;
1083
1084 collversionstr = TextDatumGetCString(datum);
1085
1086 if (collform->collprovider == COLLPROVIDER_LIBC)
1087 datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1088 else
1089 datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1090
1091 actual_versionstr = get_collation_actual_version(collform->collprovider,
1092 TextDatumGetCString(datum));
1093 if (!actual_versionstr)
1094 {
1095 /*
1096 * This could happen when specifying a version in CREATE COLLATION
1097 * but the provider does not support versioning, or manually
1098 * creating a mess in the catalogs.
1099 */
1100 ereport(ERROR,
1101 (errmsg("collation \"%s\" has no actual version, but a version was recorded",
1102 NameStr(collform->collname))));
1103 }
1104
1105 if (strcmp(actual_versionstr, collversionstr) != 0)
1107 (errmsg("collation \"%s\" has version mismatch",
1108 NameStr(collform->collname)),
1109 errdetail("The collation in the database was created using version %s, "
1110 "but the operating system provides version %s.",
1111 collversionstr, actual_versionstr),
1112 errhint("Rebuild all objects affected by this collation and run "
1113 "ALTER COLLATION %s REFRESH VERSION, "
1114 "or build PostgreSQL with the right library version.",
1115 quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1116 NameStr(collform->collname)))));
1117 }
1118
1119 ReleaseSysCache(tp);
1120
1121 return result;
1122}
1123
1124/*
1125 * Initialize default_locale with database locale settings.
1126 */
1127void
1129{
1130 HeapTuple tup;
1131 Form_pg_database dbform;
1132 pg_locale_t result;
1133
1134 Assert(default_locale == NULL);
1135
1136 /* Fetch our pg_database row normally, via syscache */
1137 tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1138 if (!HeapTupleIsValid(tup))
1139 elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
1140 dbform = (Form_pg_database) GETSTRUCT(tup);
1141
1142 if (dbform->datlocprovider == COLLPROVIDER_BUILTIN)
1143 result = create_pg_locale_builtin(DEFAULT_COLLATION_OID,
1145 else if (dbform->datlocprovider == COLLPROVIDER_ICU)
1146 result = create_pg_locale_icu(DEFAULT_COLLATION_OID,
1148 else if (dbform->datlocprovider == COLLPROVIDER_LIBC)
1149 result = create_pg_locale_libc(DEFAULT_COLLATION_OID,
1151 else
1152 /* shouldn't happen */
1153 PGLOCALE_SUPPORT_ERROR(dbform->datlocprovider);
1154
1155 result->is_default = true;
1156
1157 Assert((result->collate_is_c && result->collate == NULL) ||
1158 (!result->collate_is_c && result->collate != NULL));
1159
1160 Assert((result->ctype_is_c && result->ctype == NULL) ||
1161 (!result->ctype_is_c && result->ctype != NULL));
1162
1163 ReleaseSysCache(tup);
1164
1165 default_locale = result;
1166}
1167
1168/*
1169 * Get database default locale.
1170 */
1173{
1174 return pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
1175}
1176
1177/*
1178 * Create a pg_locale_t from a collation OID. Results are cached for the
1179 * lifetime of the backend. Thus, do not free the result with freelocale().
1180 *
1181 * For simplicity, we always generate COLLATE + CTYPE even though we
1182 * might only need one of them. Since this is called only once per session,
1183 * it shouldn't cost much.
1184 */
1187{
1188 collation_cache_entry *cache_entry;
1189 bool found;
1190
1191 if (collid == DEFAULT_COLLATION_OID)
1192 return default_locale;
1193
1194 /*
1195 * Some callers expect C_COLLATION_OID to succeed even without catalog
1196 * access.
1197 */
1198 if (collid == C_COLLATION_OID)
1199 return &c_locale;
1200
1201 if (!OidIsValid(collid))
1202 elog(ERROR, "cache lookup failed for collation %u", collid);
1203
1205
1208
1209 if (CollationCache == NULL)
1210 {
1212 "collation cache",
1214 CollationCache = collation_cache_create(CollationCacheContext,
1215 16, NULL);
1216 }
1217
1218 cache_entry = collation_cache_insert(CollationCache, collid, &found);
1219 if (!found)
1220 {
1221 /*
1222 * Make sure cache entry is marked invalid, in case we fail before
1223 * setting things.
1224 */
1225 cache_entry->locale = 0;
1226 }
1227
1228 if (cache_entry->locale == 0)
1229 {
1231 }
1232
1234 last_collation_cache_locale = cache_entry->locale;
1235
1236 return cache_entry->locale;
1237}
1238
1239/*
1240 * Get provider-specific collation version string for the given collation from
1241 * the operating system/library.
1242 */
1243char *
1244get_collation_actual_version(char collprovider, const char *collcollate)
1245{
1246 char *collversion = NULL;
1247
1248 if (collprovider == COLLPROVIDER_BUILTIN)
1249 collversion = get_collation_actual_version_builtin(collcollate);
1250#ifdef USE_ICU
1251 else if (collprovider == COLLPROVIDER_ICU)
1252 collversion = get_collation_actual_version_icu(collcollate);
1253#endif
1254 else if (collprovider == COLLPROVIDER_LIBC)
1255 collversion = get_collation_actual_version_libc(collcollate);
1256
1257 return collversion;
1258}
1259
1260size_t
1261pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1263{
1264 return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
1265}
1266
1267size_t
1268pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1270{
1271 return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
1272}
1273
1274size_t
1275pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1277{
1278 return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
1279}
1280
1281size_t
1282pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
1284{
1285 if (locale->ctype->strfold)
1286 return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
1287 else
1288 return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
1289}
1290
1291/*
1292 * pg_strcoll
1293 *
1294 * Like pg_strncoll for NUL-terminated input strings.
1295 */
1296int
1297pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
1298{
1299 return locale->collate->strncoll(arg1, -1, arg2, -1, locale);
1300}
1301
1302/*
1303 * pg_strncoll
1304 *
1305 * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll_l() or wcscoll_l() as
1306 * appropriate for the given locale, platform, and database encoding. If the
1307 * locale is not specified, use the database collation.
1308 *
1309 * The input strings must be encoded in the database encoding. If an input
1310 * string is NUL-terminated, its length may be specified as -1.
1311 *
1312 * The caller is responsible for breaking ties if the collation is
1313 * deterministic; this maintains consistency with pg_strnxfrm(), which cannot
1314 * easily account for deterministic collations.
1315 */
1316int
1317pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
1319{
1320 return locale->collate->strncoll(arg1, len1, arg2, len2, locale);
1321}
1322
1323/*
1324 * Return true if the collation provider supports pg_strxfrm() and
1325 * pg_strnxfrm(); otherwise false.
1326 *
1327 *
1328 * No similar problem is known for the ICU provider.
1329 */
1330bool
1332{
1333 /*
1334 * locale->collate->strnxfrm is still a required method, even if it may
1335 * have the wrong behavior, because the planner uses it for estimates in
1336 * some cases.
1337 */
1338 return locale->collate->strxfrm_is_safe;
1339}
1340
1341/*
1342 * pg_strxfrm
1343 *
1344 * Like pg_strnxfrm for a NUL-terminated input string.
1345 */
1346size_t
1347pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
1348{
1349 return locale->collate->strnxfrm(dest, destsize, src, -1, locale);
1350}
1351
1352/*
1353 * pg_strnxfrm
1354 *
1355 * Transforms 'src' to a nul-terminated string stored in 'dest' such that
1356 * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
1357 * untransformed strings.
1358 *
1359 * The input string must be encoded in the database encoding. If the input
1360 * string is NUL-terminated, its length may be specified as -1. If 'destsize'
1361 * is zero, 'dest' may be NULL.
1362 *
1363 * Not all providers support pg_strnxfrm() safely. The caller should check
1364 * pg_strxfrm_enabled() first, otherwise this function may return wrong
1365 * results or an error.
1366 *
1367 * Returns the number of bytes needed (or more) to store the transformed
1368 * string, excluding the terminating nul byte. If the value returned is
1369 * 'destsize' or greater, the resulting contents of 'dest' are undefined.
1370 */
1371size_t
1372pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen,
1374{
1375 return locale->collate->strnxfrm(dest, destsize, src, srclen, locale);
1376}
1377
1378/*
1379 * Return true if the collation provider supports pg_strxfrm_prefix() and
1380 * pg_strnxfrm_prefix(); otherwise false.
1381 */
1382bool
1384{
1385 return (locale->collate->strnxfrm_prefix != NULL);
1386}
1387
1388/*
1389 * pg_strxfrm_prefix
1390 *
1391 * Like pg_strnxfrm_prefix for a NUL-terminated input string.
1392 */
1393size_t
1394pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
1396{
1397 return locale->collate->strnxfrm_prefix(dest, destsize, src, -1, locale);
1398}
1399
1400/*
1401 * pg_strnxfrm_prefix
1402 *
1403 * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
1404 * memcmp() on the byte sequence is equivalent to pg_strncoll() on
1405 * untransformed strings. The result is not nul-terminated.
1406 *
1407 * The input string must be encoded in the database encoding. If the input
1408 * string is NUL-terminated, its length may be specified as -1.
1409 *
1410 * Not all providers support pg_strnxfrm_prefix() safely. The caller should
1411 * check pg_strxfrm_prefix_enabled() first, otherwise this function may return
1412 * wrong results or an error.
1413 *
1414 * If destsize is not large enough to hold the resulting byte sequence, stores
1415 * only the first destsize bytes in 'dest'. Returns the number of bytes
1416 * actually copied to 'dest'.
1417 */
1418size_t
1419pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
1420 ssize_t srclen, pg_locale_t locale)
1421{
1422 return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
1423}
1424
1425bool
1427{
1428 if (locale->ctype == NULL)
1429 return (wc <= (pg_wchar) 127 &&
1431 else
1432 return locale->ctype->wc_isdigit(wc, locale);
1433}
1434
1435bool
1437{
1438 if (locale->ctype == NULL)
1439 return (wc <= (pg_wchar) 127 &&
1441 else
1442 return locale->ctype->wc_isalpha(wc, locale);
1443}
1444
1445bool
1447{
1448 if (locale->ctype == NULL)
1449 return (wc <= (pg_wchar) 127 &&
1451 else
1452 return locale->ctype->wc_isalnum(wc, locale);
1453}
1454
1455bool
1457{
1458 if (locale->ctype == NULL)
1459 return (wc <= (pg_wchar) 127 &&
1461 else
1462 return locale->ctype->wc_isupper(wc, locale);
1463}
1464
1465bool
1467{
1468 if (locale->ctype == NULL)
1469 return (wc <= (pg_wchar) 127 &&
1471 else
1472 return locale->ctype->wc_islower(wc, locale);
1473}
1474
1475bool
1477{
1478 if (locale->ctype == NULL)
1479 return (wc <= (pg_wchar) 127 &&
1481 else
1482 return locale->ctype->wc_isgraph(wc, locale);
1483}
1484
1485bool
1487{
1488 if (locale->ctype == NULL)
1489 return (wc <= (pg_wchar) 127 &&
1491 else
1492 return locale->ctype->wc_isprint(wc, locale);
1493}
1494
1495bool
1497{
1498 if (locale->ctype == NULL)
1499 return (wc <= (pg_wchar) 127 &&
1501 else
1502 return locale->ctype->wc_ispunct(wc, locale);
1503}
1504
1505bool
1507{
1508 if (locale->ctype == NULL)
1509 return (wc <= (pg_wchar) 127 &&
1511 else
1512 return locale->ctype->wc_isspace(wc, locale);
1513}
1514
1515bool
1517{
1518 if (locale->ctype == NULL)
1519 return (wc <= (pg_wchar) 127 &&
1520 ((pg_char_properties[wc] & PG_ISDIGIT) ||
1521 ((wc >= 'A' && wc <= 'F') ||
1522 (wc >= 'a' && wc <= 'f'))));
1523 else
1524 return locale->ctype->wc_isxdigit(wc, locale);
1525}
1526
1529{
1530 if (locale->ctype == NULL)
1531 {
1532 if (wc <= (pg_wchar) 127)
1533 return pg_ascii_toupper((unsigned char) wc);
1534 return wc;
1535 }
1536 else
1537 return locale->ctype->wc_toupper(wc, locale);
1538}
1539
1542{
1543 if (locale->ctype == NULL)
1544 {
1545 if (wc <= (pg_wchar) 127)
1546 return pg_ascii_tolower((unsigned char) wc);
1547 return wc;
1548 }
1549 else
1550 return locale->ctype->wc_tolower(wc, locale);
1551}
1552
1553/*
1554 * char_is_cased()
1555 *
1556 * Fuzzy test of whether the given char is case-varying or not. The argument
1557 * is a single byte, so in a multibyte encoding, just assume any non-ASCII
1558 * char is case-varying.
1559 */
1560bool
1562{
1563 return locale->ctype->char_is_cased(ch, locale);
1564}
1565
1566/*
1567 * char_tolower_enabled()
1568 *
1569 * Does the provider support char_tolower()?
1570 */
1571bool
1573{
1574 return (locale->ctype->char_tolower != NULL);
1575}
1576
1577/*
1578 * char_tolower()
1579 *
1580 * Convert char (single-byte encoding) to lowercase.
1581 */
1582char
1584{
1585 return locale->ctype->char_tolower(ch, locale);
1586}
1587
1588/*
1589 * Return required encoding ID for the given locale, or -1 if any encoding is
1590 * valid for the locale.
1591 */
1592int
1594{
1595 if (strcmp(locale, "C") == 0)
1596 return -1;
1597 else if (strcmp(locale, "C.UTF-8") == 0)
1598 return PG_UTF8;
1599 else if (strcmp(locale, "PG_UNICODE_FAST") == 0)
1600 return PG_UTF8;
1601
1602
1603 ereport(ERROR,
1604 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1605 errmsg("invalid locale name \"%s\" for builtin provider",
1606 locale)));
1607
1608 return 0; /* keep compiler quiet */
1609}
1610
1611
1612/*
1613 * Validate the locale and encoding combination, and return the canonical form
1614 * of the locale name.
1615 */
1616const char *
1618{
1619 const char *canonical_name = NULL;
1620 int required_encoding;
1621
1622 if (strcmp(locale, "C") == 0)
1623 canonical_name = "C";
1624 else if (strcmp(locale, "C.UTF-8") == 0 || strcmp(locale, "C.UTF8") == 0)
1625 canonical_name = "C.UTF-8";
1626 else if (strcmp(locale, "PG_UNICODE_FAST") == 0)
1627 canonical_name = "PG_UNICODE_FAST";
1628
1629 if (!canonical_name)
1630 ereport(ERROR,
1631 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1632 errmsg("invalid locale name \"%s\" for builtin provider",
1633 locale)));
1634
1635 required_encoding = builtin_locale_encoding(canonical_name);
1636 if (required_encoding >= 0 && encoding != required_encoding)
1637 ereport(ERROR,
1638 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1639 errmsg("encoding \"%s\" does not match locale \"%s\"",
1641
1642 return canonical_name;
1643}
1644
1645
1646
1647/*
1648 * Return the BCP47 language tag representation of the requested locale.
1649 *
1650 * This function should be called before passing the string to ucol_open(),
1651 * because conversion to a language tag also performs "level 2
1652 * canonicalization". In addition to producing a consistent format, level 2
1653 * canonicalization is able to more accurately interpret different input
1654 * locale string formats, such as POSIX and .NET IDs.
1655 */
1656char *
1657icu_language_tag(const char *loc_str, int elevel)
1658{
1659#ifdef USE_ICU
1660 UErrorCode status;
1661 char *langtag;
1662 size_t buflen = 32; /* arbitrary starting buffer size */
1663 const bool strict = true;
1664
1665 /*
1666 * A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
1667 * RFC5646 section 4.4). Additionally, in older ICU versions,
1668 * uloc_toLanguageTag() doesn't always return the ultimate length on the
1669 * first call, necessitating a loop.
1670 */
1671 langtag = palloc(buflen);
1672 while (true)
1673 {
1674 status = U_ZERO_ERROR;
1675 uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
1676
1677 /* try again if the buffer is not large enough */
1678 if ((status == U_BUFFER_OVERFLOW_ERROR ||
1679 status == U_STRING_NOT_TERMINATED_WARNING) &&
1680 buflen < MaxAllocSize)
1681 {
1682 buflen = Min(buflen * 2, MaxAllocSize);
1683 langtag = repalloc(langtag, buflen);
1684 continue;
1685 }
1686
1687 break;
1688 }
1689
1690 if (U_FAILURE(status))
1691 {
1692 pfree(langtag);
1693
1694 if (elevel > 0)
1695 ereport(elevel,
1696 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1697 errmsg("could not convert locale name \"%s\" to language tag: %s",
1698 loc_str, u_errorName(status))));
1699 return NULL;
1700 }
1701
1702 return langtag;
1703#else /* not USE_ICU */
1704 ereport(ERROR,
1705 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1706 errmsg("ICU is not supported in this build")));
1707 return NULL; /* keep compiler quiet */
1708#endif /* not USE_ICU */
1709}
1710
1711/*
1712 * Perform best-effort check that the locale is a valid one.
1713 */
1714void
1715icu_validate_locale(const char *loc_str)
1716{
1717#ifdef USE_ICU
1718 UCollator *collator;
1719 UErrorCode status;
1720 char lang[ULOC_LANG_CAPACITY];
1721 bool found = false;
1722 int elevel = icu_validation_level;
1723
1724 /* no validation */
1725 if (elevel < 0)
1726 return;
1727
1728 /* downgrade to WARNING during pg_upgrade */
1729 if (IsBinaryUpgrade && elevel > WARNING)
1730 elevel = WARNING;
1731
1732 /* validate that we can extract the language */
1733 status = U_ZERO_ERROR;
1734 uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
1735 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
1736 {
1737 ereport(elevel,
1738 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1739 errmsg("could not get language from ICU locale \"%s\": %s",
1740 loc_str, u_errorName(status)),
1741 errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".",
1742 "icu_validation_level", "disabled")));
1743 return;
1744 }
1745
1746 /* check for special language name */
1747 if (strcmp(lang, "") == 0 ||
1748 strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
1749 found = true;
1750
1751 /* search for matching language within ICU */
1752 for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
1753 {
1754 const char *otherloc = uloc_getAvailable(i);
1755 char otherlang[ULOC_LANG_CAPACITY];
1756
1757 status = U_ZERO_ERROR;
1758 uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
1759 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
1760 continue;
1761
1762 if (strcmp(lang, otherlang) == 0)
1763 found = true;
1764 }
1765
1766 if (!found)
1767 ereport(elevel,
1768 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1769 errmsg("ICU locale \"%s\" has unknown language \"%s\"",
1770 loc_str, lang),
1771 errhint("To disable ICU locale validation, set the parameter \"%s\" to \"%s\".",
1772 "icu_validation_level", "disabled")));
1773
1774 /* check that it can be opened */
1775 collator = pg_ucol_open(loc_str);
1776 ucol_close(collator);
1777#else /* not USE_ICU */
1778 /* could get here if a collation was created by a build with ICU */
1779 ereport(ERROR,
1780 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1781 errmsg("ICU is not supported in this build")));
1782#endif /* not USE_ICU */
1783}
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define NameStr(name)
Definition: c.h:756
#define Min(x, y)
Definition: c.h:1008
uint32_t uint32
Definition: c.h:543
#define lengthof(array)
Definition: c.h:792
#define OidIsValid(objectId)
Definition: c.h:779
Oid collid
int errdetail(const char *fmt,...)
Definition: elog.c:1216
int errhint(const char *fmt,...)
Definition: elog.c:1330
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define PG_RE_THROW()
Definition: elog.h:405
#define DEBUG3
Definition: elog.h:28
#define FATAL
Definition: elog.h:41
#define PG_TRY(...)
Definition: elog.h:372
#define WARNING
Definition: elog.h:36
#define PG_END_TRY(...)
Definition: elog.h:397
#define ERROR
Definition: elog.h:39
#define PG_CATCH(...)
Definition: elog.h:382
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
#define MaxAllocSize
Definition: fe_memutils.h:22
bool IsBinaryUpgrade
Definition: globals.c:121
Oid MyDatabaseId
Definition: globals.c:94
#define newval
GucSource
Definition: guc.h:112
@ PGC_S_DEFAULT
Definition: guc.h:113
Assert(PointerIsAligned(start, uint64))
const char * str
#define free(a)
Definition: header.h:65
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
Definition: htup_details.h:728
#define period
Definition: indent_codes.h:66
static char * locale
Definition: initdb.c:140
int i
Definition: isn.c:77
static struct pg_tm tm
Definition: localtime.c:104
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3533
unsigned int pg_wchar
Definition: mbprint.c:31
int GetDatabaseEncoding(void)
Definition: mbutils.c:1262
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:677
int pg_mbstrlen(const char *mbstr)
Definition: mbutils.c:1038
void SetMessageEncoding(int encoding)
Definition: mbutils.c:1172
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1746
char * pstrdup(const char *in)
Definition: mcxt.c:1759
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
void pfree(void *pointer)
Definition: mcxt.c:1594
MemoryContext TopMemoryContext
Definition: mcxt.c:166
void * palloc(Size size)
Definition: mcxt.c:1365
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
static char format
FormData_pg_collation * Form_pg_collation
Definition: pg_collation.h:58
const void size_t len
FormData_pg_database * Form_pg_database
Definition: pg_database.h:96
int32 encoding
Definition: pg_database.h:41
char char_tolower(unsigned char ch, pg_locale_t locale)
Definition: pg_locale.c:1583
int icu_validation_level
Definition: pg_locale.c:89
static pg_locale_t last_collation_cache_locale
Definition: pg_locale.c:148
void cache_locale_time(void)
Definition: pg_locale.c:699
size_t pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1372
bool pg_strxfrm_enabled(pg_locale_t locale)
Definition: pg_locale.c:1331
char * localized_full_months[12+1]
Definition: pg_locale.c:101
pg_wchar pg_towlower(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1541
bool pg_iswalnum(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1446
void icu_validate_locale(const char *loc_str)
Definition: pg_locale.c:1715
pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context)
static bool CurrentLCTimeValid
Definition: pg_locale.c:107
void assign_locale_time(const char *newval, void *extra)
Definition: pg_locale.c:367
char * get_collation_actual_version(char collprovider, const char *collcollate)
Definition: pg_locale.c:1244
pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context)
bool check_locale_time(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:361
char * locale_messages
Definition: pg_locale.c:84
bool pg_iswgraph(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1476
bool pg_iswprint(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1486
char * locale_numeric
Definition: pg_locale.c:86
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1186
bool pg_iswdigit(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1426
bool pg_iswupper(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1456
size_t pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1282
int builtin_locale_encoding(const char *locale)
Definition: pg_locale.c:1593
size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1419
static struct pg_locale_struct c_locale
Definition: pg_locale.c:109
bool pg_iswxdigit(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1516
char * pg_perm_setlocale(int category, const char *locale)
Definition: pg_locale.c:169
#define PGLOCALE_SUPPORT_ERROR(provider)
Definition: pg_locale.c:57
static pg_locale_t create_pg_locale(Oid collid, MemoryContext context)
Definition: pg_locale.c:1046
char * locale_time
Definition: pg_locale.c:87
static void cache_single_string(char **dst, const char *src, int encoding)
Definition: pg_locale.c:676
char * get_collation_actual_version_libc(const char *collcollate)
size_t pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1261
pg_wchar pg_towupper(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1528
bool check_locale_numeric(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:349
bool char_tolower_enabled(pg_locale_t locale)
Definition: pg_locale.c:1572
pg_locale_t pg_database_locale(void)
Definition: pg_locale.c:1172
bool pg_iswspace(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1506
static void db_encoding_convert(int encoding, char **str)
Definition: pg_locale.c:473
void assign_locale_numeric(const char *newval, void *extra)
Definition: pg_locale.c:355
bool check_locale_messages(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:383
#define MAX_L10N_DATA
Definition: pg_locale.c:66
char * get_collation_actual_version_builtin(const char *collcollate)
static void free_struct_lconv(struct lconv *s)
Definition: pg_locale.c:423
static MemoryContext CollationCacheContext
Definition: pg_locale.c:140
void assign_locale_messages(const char *newval, void *extra)
Definition: pg_locale.c:406
static bool CurrentLocaleConvValid
Definition: pg_locale.c:106
struct lconv * PGLC_localeconv(void)
Definition: pg_locale.c:503
pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context)
size_t pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1268
int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
Definition: pg_locale.c:1297
bool pg_iswpunct(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1496
bool pg_strxfrm_prefix_enabled(pg_locale_t locale)
Definition: pg_locale.c:1383
char * icu_language_tag(const char *loc_str, int elevel)
Definition: pg_locale.c:1657
bool char_is_cased(char ch, pg_locale_t locale)
Definition: pg_locale.c:1561
char * localized_abbrev_months[12+1]
Definition: pg_locale.c:100
static pg_locale_t default_locale
Definition: pg_locale.c:103
static collation_cache_hash * CollationCache
Definition: pg_locale.c:141
int pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)
Definition: pg_locale.c:1317
bool pg_iswlower(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1466
static bool struct_lconv_is_valid(struct lconv *s)
Definition: pg_locale.c:442
void init_database_collation(void)
Definition: pg_locale.c:1128
char * localized_full_days[7+1]
Definition: pg_locale.c:99
size_t pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1347
const char * builtin_validate_locale(int encoding, const char *locale)
Definition: pg_locale.c:1617
size_t pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1275
bool pg_iswalpha(pg_wchar wc, pg_locale_t locale)
Definition: pg_locale.c:1436
void assign_locale_monetary(const char *newval, void *extra)
Definition: pg_locale.c:343
bool check_locale(int category, const char *locale, char **canonname)
Definition: pg_locale.c:272
char * localized_abbrev_days[7+1]
Definition: pg_locale.c:98
size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1394
char * locale_monetary
Definition: pg_locale.c:85
bool check_locale_monetary(char **newval, void **extra, GucSource source)
Definition: pg_locale.c:337
static Oid last_collation_cache_oid
Definition: pg_locale.c:147
#define LOCALE_NAME_BUFLEN
Definition: pg_locale.h:27
#define PG_ISLOWER
Definition: pg_locale_c.h:23
#define PG_ISPRINT
Definition: pg_locale_c.h:25
#define PG_ISALPHA
Definition: pg_locale_c.h:20
#define PG_ISGRAPH
Definition: pg_locale_c.h:24
#define PG_ISPUNCT
Definition: pg_locale_c.h:26
#define PG_ISDIGIT
Definition: pg_locale_c.h:19
#define PG_ISUPPER
Definition: pg_locale_c.h:22
#define PG_ISALNUM
Definition: pg_locale_c.h:21
#define PG_ISSPACE
Definition: pg_locale_c.h:27
static const unsigned char pg_char_properties[128]
Definition: pg_locale_c.h:29
size_t wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
void report_newlocale_failure(const char *localename)
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:72
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
@ PG_UTF8
Definition: pg_wchar.h:232
#define pg_encoding_to_char
Definition: pg_wchar.h:630
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
int pg_localeconv_r(const char *lc_monetary, const char *lc_numeric, struct lconv *output)
unsigned char pg_ascii_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:146
int pg_get_encoding_from_locale(const char *ctype, bool write_message)
Definition: chklocale.c:301
unsigned char pg_ascii_toupper(unsigned char ch)
Definition: pgstrcasecmp.c:135
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
void pg_localeconv_free(struct lconv *lconv)
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
uint64_t Datum
Definition: postgres.h:70
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
static void AssertCouldGetRelation(void)
Definition: relcache.h:44
char * quote_qualified_identifier(const char *qualifier, const char *ident)
Definition: ruleutils.c:13146
bool pg_is_ascii(const char *str)
Definition: string.c:132
Definition: pg_locale.c:118
char status
Definition: pg_locale.c:124
Oid collid
Definition: pg_locale.c:119
pg_locale_t locale
Definition: pg_locale.c:120
uint32 hash
Definition: pg_locale.c:123
const struct ctype_methods * ctype
Definition: pg_locale.h:153
const struct collate_methods * collate
Definition: pg_locale.h:152
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:264
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:220
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:595
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:625
#define locale_t
Definition: win32_port.h:432
void _dosmaperr(unsigned long)
Definition: win32error.c:177
#define setenv(x, y, z)
Definition: win32_port.h:545
#define setlocale(a, b)
Definition: win32_port.h:475