@@ -386,11 +386,12 @@ initdb --locale-provider=icu --icu-locale=en
386386 linkend="icu-language-tag">Language Tag</link>.
387387
388388<programlisting>
389- CREATE COLLATION mycollation1 (PROVIDER = icu, LOCALE = 'ja-JP');
390- CREATE COLLATION mycollation2 (PROVIDER = icu, LOCALE = 'fr');
389+ CREATE COLLATION mycollation1 (provider = icu, locale = 'ja-JP');
390+ CREATE COLLATION mycollation2 (provider = icu, locale = 'fr');
391391</programlisting>
392392 </para>
393393 </sect3>
394+
394395 <sect3 id="icu-canonicalization">
395396 <title>Locale Canonicalization and Validation</title>
396397 <para>
@@ -399,14 +400,14 @@ CREATE COLLATION mycollation2 (PROVIDER = icu, LOCALE = 'fr');
399400 language tag if not already in that form. For instance,
400401
401402<screen>
402- CREATE COLLATION mycollation3 (PROVIDER = icu, LOCALE = 'en-US-u-kn-true');
403+ CREATE COLLATION mycollation3 (provider = icu, locale = 'en-US-u-kn-true');
403404NOTICE: using standard form "en-US-u-kn" for locale "en-US-u-kn-true"
404- CREATE COLLATION mycollation4 (PROVIDER = icu, LOCALE = 'de_DE.utf8');
405+ CREATE COLLATION mycollation4 (provider = icu, locale = 'de_DE.utf8');
405406NOTICE: using standard form "de-DE" for locale "de_DE.utf8"
406407</screen>
407408
408- If you see this notice, ensure that the <symbol>PROVIDER </symbol> and
409- <symbol>LOCALE </symbol> are the expected result. For consistent results
409+ If you see this notice, ensure that the <symbol>provider </symbol> and
410+ <symbol>locale </symbol> are the expected result. For consistent results
410411 when using the ICU provider, specify the canonical <link
411412 linkend="icu-language-tag">language tag</link> instead of relying on the
412413 transformation.
@@ -427,7 +428,7 @@ NOTICE: using standard form "de-DE" for locale "de_DE.utf8"
427428 the following warning:
428429
429430<screen>
430- CREATE COLLATION nonsense (PROVIDER = icu, LOCALE = 'nonsense');
431+ CREATE COLLATION nonsense (provider = icu, locale = 'nonsense');
431432WARNING: ICU locale "nonsense" has unknown language "nonsense"
432433HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
433434CREATE COLLATION
@@ -438,6 +439,7 @@ CREATE COLLATION
438439 still be created, but the behavior may not be what the user intended.
439440 </para>
440441 </sect3>
442+
441443 <sect3 id="icu-language-tag">
442444 <title>Language Tag</title>
443445 <para>
@@ -484,7 +486,7 @@ CREATE COLLATION
484486 of digits as a single number:
485487
486488<screen>
487- CREATE COLLATION mycollation5 (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'en-US-u-kn-ks-level2');
489+ CREATE COLLATION mycollation5 (provider = icu, deterministic = false, locale = 'en-US-u-kn-ks-level2');
488490SELECT 'aB' = 'Ab' COLLATE mycollation5 as result;
489491 result
490492--------
@@ -1109,16 +1111,16 @@ CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-tr
11091111
11101112<programlisting>
11111113-- ignore differences in accents and case
1112- CREATE COLLATION ignore_accent_case (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'und-u-ks-level1');
1114+ CREATE COLLATION ignore_accent_case (provider = icu, deterministic = false, locale = 'und-u-ks-level1');
11131115SELECT 'Å' = 'A' COLLATE ignore_accent_case; -- true
11141116SELECT 'z' = 'Z' COLLATE ignore_accent_case; -- true
11151117
11161118-- upper case letters sort before lower case.
1117- CREATE COLLATION upper_first (PROVIDER= icu, LOCALE = 'und-u-kf-upper');
1119+ CREATE COLLATION upper_first (provider = icu, locale = 'und-u-kf-upper');
11181120SELECT 'B' < 'b' COLLATE upper_first; -- true
11191121
11201122-- treat digits numerically and ignore punctuation
1121- CREATE COLLATION num_ignore_punct (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'und-u-ka-shifted-kn');
1123+ CREATE COLLATION num_ignore_punct (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-kn');
11221124SELECT 'id-45' < 'id-123' COLLATE num_ignore_punct; -- true
11231125SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
11241126</programlisting>
@@ -1136,6 +1138,13 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
11361138 linkend="icu-collation-settings-table">collation settings</link>. Higher
11371139 levels correspond to finer textual features.
11381140 </para>
1141+ <para>
1142+ <xref linkend="icu-collation-levels"/> shows which textual feature
1143+ differences are considered significant when determining equality at the
1144+ given level. The unicode character <literal>U+2063</literal> is an
1145+ invisible separator, and as seen in the table, is ignored for at all
1146+ levels of comparison less than <literal>identic</literal>.
1147+ </para>
11391148 <para>
11401149 <table id="icu-collation-levels">
11411150 <title>ICU Collation Levels</title>
@@ -1215,30 +1224,23 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
12151224 </tgroup>
12161225 </table>
12171226
1218- The above table shows which textual feature differences are
1219- considered significant when determining equality at the given level. The
1220- unicode character <literal>U+2063</literal> is an invisible separator,
1221- and as seen in the table, is ignored for at all levels of comparison less
1222- than <literal>identic</literal>.
1223- </para>
1224- <para>
12251227 At every level, even with full normalization off, basic normalization is
12261228 performed. For example, <literal>'á'</literal> may be composed of the
12271229 code points <literal>U&'\0061\0301'</literal> or the single code
12281230 point <literal>U&'\00E1'</literal>, and those sequences will be
12291231 considered equal even at the <literal>identic</literal> level. To treat
12301232 any difference in code point representation as distinct, use a collation
1231- created with <symbol>DETERMINISTIC </symbol> set to
1233+ created with <symbol>deterministic </symbol> set to
12321234 <literal>true</literal>.
12331235 </para>
12341236 <sect4 id="icu-collation-level-examples">
12351237 <title>Collation Level Examples</title>
12361238 <para>
12371239
12381240<programlisting>
1239- CREATE COLLATION level3 (PROVIDER= icu, DETERMINISTIC= false, LOCALE= 'und-u-ka-shifted-ks-level3');
1240- CREATE COLLATION level4 (PROVIDER= icu, DETERMINISTIC= false, LOCALE= 'und-u-ka-shifted-ks-level4');
1241- CREATE COLLATION identic (PROVIDER= icu, DETERMINISTIC= false, LOCALE= 'und-u-ka-shifted-ks-identic');
1241+ CREATE COLLATION level3 (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-level3');
1242+ CREATE COLLATION level4 (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-level4');
1243+ CREATE COLLATION identic (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-identic');
12421244
12431245-- invisible separator ignored at all levels except identic
12441246SELECT 'ab' = U&'a\2063b' COLLATE level4; -- true
@@ -1252,8 +1254,14 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
12521254 </para>
12531255 </sect4>
12541256 </sect3>
1257+
12551258 <sect3 id="icu-collation-settings">
12561259 <title>Collation Settings for an ICU Locale</title>
1260+ <para>
1261+ <xref linkend="icu-collation-settings-table"/> shows the available
1262+ collation settings, which can be used as part of a language tag to
1263+ customize a collation.
1264+ </para>
12571265 <para>
12581266 <table id="icu-collation-settings-table">
12591267 <title>ICU Collation Settings</title>
@@ -1272,14 +1280,11 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
12721280 </thead>
12731281 <tbody>
12741282 <row>
1275- <entry><literal>ks </literal></entry>
1276- <entry><literal>level1 </literal>, <literal>level2 </literal>, <literal>level3 </literal>, <literal>level4</literal>, <literal>identic</literal ></entry>
1277- <entry><literal>level3 </literal></entry>
1283+ <entry><literal>co </literal></entry>
1284+ <entry><literal>emoji </literal>, <literal>phonebk </literal>, <literal>standard </literal>, <replaceable>...</replaceable ></entry>
1285+ <entry><literal>standard </literal></entry>
12781286 <entry>
1279- Sensitivity (or "strength") when determining equality, with
1280- <literal>level1</literal> the least sensitive to differences and
1281- <literal>identic</literal> the most sensitive to differences. See
1282- <xref linkend="icu-collation-levels"/> for details.
1287+ Collation type. See <xref linkend="icu-external-references"/> for additional options and details.
12831288 </entry>
12841289 </row>
12851290 <row>
@@ -1304,29 +1309,6 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
13041309 before <literal>'aé'</literal>.
13051310 </entry>
13061311 </row>
1307- <row>
1308- <entry><literal>kk</literal></entry>
1309- <entry><literal>true</literal>, <literal>false</literal></entry>
1310- <entry><literal>false</literal></entry>
1311- <entry>
1312- <para>
1313- Enable full normalization; may affect performance. Basic
1314- normalization is performed even when set to
1315- <literal>false</literal>. Locales for languages that require full
1316- normalization typically enable it by default.
1317- </para>
1318- <para>
1319- Full normalization is important in some cases, such as when
1320- multiple accents are applied to a single character. For example,
1321- the code point sequences <literal>U&'\0065\0323\0302'</literal>
1322- and <literal>U&'\0065\0302\0323'</literal> represent
1323- an <literal>e</literal> with circumflex and dot-below accents
1324- applied in different orders. With full normalization
1325- on, these code point sequences are treated as equal; otherwise they
1326- are unequal.
1327- </para>
1328- </entry>
1329- </row>
13301312 <row>
13311313 <entry><literal>kc</literal></entry>
13321314 <entry><literal>true</literal>, <literal>false</literal></entry>
@@ -1368,6 +1350,29 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
13681350 <literal>'id-123'</literal>.
13691351 </entry>
13701352 </row>
1353+ <row>
1354+ <entry><literal>kk</literal></entry>
1355+ <entry><literal>true</literal>, <literal>false</literal></entry>
1356+ <entry><literal>false</literal></entry>
1357+ <entry>
1358+ <para>
1359+ Enable full normalization; may affect performance. Basic
1360+ normalization is performed even when set to
1361+ <literal>false</literal>. Locales for languages that require full
1362+ normalization typically enable it by default.
1363+ </para>
1364+ <para>
1365+ Full normalization is important in some cases, such as when
1366+ multiple accents are applied to a single character. For example,
1367+ the code point sequences <literal>U&'\0065\0323\0302'</literal>
1368+ and <literal>U&'\0065\0302\0323'</literal> represent
1369+ an <literal>e</literal> with circumflex and dot-below accents
1370+ applied in different orders. With full normalization
1371+ on, these code point sequences are treated as equal; otherwise they
1372+ are unequal.
1373+ </para>
1374+ </entry>
1375+ </row>
13711376 <row>
13721377 <entry><literal>kr</literal></entry>
13731378 <entry>
@@ -1393,6 +1398,17 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
13931398 </para>
13941399 </entry>
13951400 </row>
1401+ <row>
1402+ <entry><literal>ks</literal></entry>
1403+ <entry><literal>level1</literal>, <literal>level2</literal>, <literal>level3</literal>, <literal>level4</literal>, <literal>identic</literal></entry>
1404+ <entry><literal>level3</literal></entry>
1405+ <entry>
1406+ Sensitivity (or "strength") when determining equality, with
1407+ <literal>level1</literal> the least sensitive to differences and
1408+ <literal>identic</literal> the most sensitive to differences. See
1409+ <xref linkend="icu-collation-levels"/> for details.
1410+ </entry>
1411+ </row>
13961412 <row>
13971413 <entry><literal>kv</literal></entry>
13981414 <entry>
@@ -1410,14 +1426,6 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
14101426 to <literal>level3</literal> or lower to take effect.
14111427 </entry>
14121428 </row>
1413- <row>
1414- <entry><literal>co</literal></entry>
1415- <entry><literal>emoji</literal>, <literal>phonebk</literal>, <literal>standard</literal>, <replaceable>...</replaceable></entry>
1416- <entry><literal>standard</literal></entry>
1417- <entry>
1418- Collation type. See <xref linkend="icu-external-references"/> for additional options and details.
1419- </entry>
1420- </row>
14211429 </tbody>
14221430 </tgroup>
14231431 </table>
@@ -1428,7 +1436,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
14281436 <note>
14291437 <para>
14301438 For many collation settings, you must create the collation with
1431- <option>DETERMINISTIC </option> set to <literal>false</literal> for the
1439+ <option>deterministic </option> set to <literal>false</literal> for the
14321440 setting to have the desired effect (see <xref
14331441 linkend="collation-nondeterministic"/>). Additionally, some settings
14341442 only take effect when the key <literal>ka</literal> is set to
@@ -1437,6 +1445,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
14371445 </para>
14381446 </note>
14391447 </sect3>
1448+
14401449 <sect3 id="icu-locale-examples">
14411450 <title>Examples</title>
14421451 <para>
@@ -1487,6 +1496,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
14871496 </variablelist>
14881497 </para>
14891498 </sect3>
1499+
14901500 <sect3 id="icu-external-references">
14911501 <title>External References for ICU</title>
14921502 <para>
0 commit comments