postgrespro
diff --git a/‎doc/src/sgml/charset.sgml‎
Lines changed: 35 additions & 22 deletions b/‎doc/src/sgml/charset.sgml‎
Lines changed: 35 additions & 22 deletions
diff --git a/‎doc/src/sgml/ref/create_database.sgml‎
Lines changed: 18 additions & 2 deletions b/‎doc/src/sgml/ref/create_database.sgml‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎src/Makefile.global.in‎
Lines changed: 2 additions & 2 deletions b/‎src/Makefile.global.in‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/backend/commands/dbcommands.c‎
Lines changed: 29 additions & 1 deletion b/‎src/backend/commands/dbcommands.c‎
Lines changed: 29 additions & 1 deletion
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.83 2007/04/15 10:56:25 ishii Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.84 2007/09/28 22:25:49 tgl Exp $ -->
 
 <chapter id="charset">
  <title>Localization</>
@@ -249,7 +249,7 @@ initdb --locale=sv_SE
    <title>Problems</>
 
    <para>
-    If locale support doesn't work in spite of the explanation above,
+    If locale support doesn't work according to the explanation above,
     check that the locale support in your operating system is
     correctly configured.  To check what locales are installed on your
     system, you can use the command <literal>locale -a</literal> if
@@ -301,7 +301,8 @@ initdb --locale=sv_SE
 
   <para>
    The character set support in <productname>PostgreSQL</productname>
-   allows you to store text in a variety of character sets, including
+   allows you to store text in a variety of character sets (also called
+   encodings), including
    single-byte character sets such as the ISO 8859 series and
    multiple-byte character sets such as <acronym>EUC</> (Extended Unix
    Code), UTF-8, and Mule internal code.  All supported character sets
@@ -314,6 +315,20 @@ initdb --locale=sv_SE
    databases each with a different character set.
   </para>
 
+  <para>
+   An important restriction, however, is that each database character set
+   must be compatible with the server's <envar>LC_CTYPE</> setting.
+   When <envar>LC_CTYPE</> is <literal>C</> or <literal>POSIX</>, any
+   character set is allowed, but for other settings of <envar>LC_CTYPE</>
+   there is only one character set that will work correctly.
+   Since the <envar>LC_CTYPE</> setting is frozen by <command>initdb</>, the
+   apparent flexibility to use different encodings in different databases
+   of a cluster is more theoretical than real, except when you select
+   <literal>C</> or <literal>POSIX</> locale (thus disabling any real locale
+   awareness).  It is likely that these mechanisms will be revisited in future
+   versions of <productname>PostgreSQL</productname>.
+  </para>
+
    <sect2 id="multibyte-charset-supported">
     <title>Supported Character Sets</title>
 
@@ -716,7 +731,8 @@ initdb -E EUC_JP
     </para>
 
     <para>
-     You can create a database with a different character set:
+     If you have selected <literal>C</> or <literal>POSIX</> locale,
+     you can create a database with a different character set:
 
 <screen>
 createdb -E EUC_KR korean
@@ -731,7 +747,7 @@ CREATE DATABASE korean WITH ENCODING 'EUC_KR';
 </programlisting>
 
      The encoding for a database is stored in the system catalog
-     <literal>pg_database</literal>.  You can see that by using the
+     <literal>pg_database</literal>.  You can see it by using the
      <option>-l</option> option or the <command>\l</command> command
      of <command>psql</command>.
 
@@ -756,26 +772,23 @@ $ <userinput>psql -l</userinput>
 
     <important>
      <para>
-      Although you can specify any encoding you want for a database, it is
-      unwise to choose an encoding that is not what is expected by the locale
-      you have selected.  The <literal>LC_COLLATE</literal> and
-      <literal>LC_CTYPE</literal> settings imply a particular encoding,
-      and locale-dependent operations (such as sorting) are likely to
-      misinterpret data that is in an incompatible encoding.
-     </para>
-
-     <para>
-      Since these locale settings are frozen by <command>initdb</>, the
-      apparent flexibility to use different encodings in different databases
-      of a cluster is more theoretical than real.  It is likely that these
-      mechanisms will be revisited in future versions of
-      <productname>PostgreSQL</productname>.
+      On most modern operating systems, <productname>PostgreSQL</productname>
+      can determine which character set is implied by an <envar>LC_CTYPE</>
+      setting, and it will enforce that only the correct database encoding is
+      used.  On older systems it is your responsibility to ensure that you use
+      the encoding expected by the locale you have selected.  A mistake in
+      this area is likely to lead to strange misbehavior of locale-dependent
+      operations such as sorting.
      </para>
 
      <para>
-      One way to use multiple encodings safely is to set the locale to
-      <literal>C</> or <literal>POSIX</> during <command>initdb</>, thus
-      disabling any real locale awareness.
+      <productname>PostgreSQL</productname> will allow superusers to create
+      databases with <literal>SQL_ASCII</> encoding even when
+      <envar>LC_CTYPE</> is not <literal>C</> or <literal>POSIX</>.  As noted
+      above, <literal>SQL_ASCII</> does not enforce that the data stored in
+      the database has any particular encoding, and so this choice poses risks
+      of locale-dependent misbehavior.  Using this combination of settings is
+      deprecated and may someday be forbidden altogether.
      </para>
     </important>
    </sect2>
 
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/create_database.sgml,v 1.47 2007/01/31 23:26:03 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/create_database.sgml,v 1.48 2007/09/28 22:25:49 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -107,7 +107,8 @@ CREATE DATABASE <replaceable class="PARAMETER">name</replaceable>
         to use the default encoding (namely, the encoding of the
         template database). The character sets supported by the
         <productname>PostgreSQL</productname> server are described in
-        <xref linkend="multibyte-charset-supported">.
+        <xref linkend="multibyte-charset-supported">. See below for
+        additional restrictions.
        </para>
       </listitem>
      </varlistentry>
@@ -178,6 +179,21 @@ CREATE DATABASE <replaceable class="PARAMETER">name</replaceable>
    See <xref linkend="manage-ag-templatedbs"> for more information.
   </para>
 
+  <para>
+   Any character set encoding specified for the new database must be
+   compatible with the server's <envar>LC_CTYPE</> locale setting.
+   If <envar>LC_CTYPE</> is <literal>C</> (or equivalently
+   <literal>POSIX</>), then all encodings are allowed, but for other
+   locale settings there is only one encoding that will work properly,
+   and so the apparent freedom to specify an encoding is illusory if
+   you didn't initialize the database cluster in <literal>C</> locale.
+   <command>CREATE DATABASE</> will allow superusers to specify
+   <literal>SQL_ASCII</> encoding regardless of the locale setting,
+   but this choice is deprecated and may result in misbehavior of
+   character-string functions if data that is not encoding-compatible
+   with the locale is stored in the database.
+  </para>
+
   <para>
    The <literal>CONNECTION LIMIT</> option is only enforced approximately;
    if two new sessions start at about the same time when just one
 
@@ -1,5 +1,5 @@
 # -*-makefile-*-
-# $PostgreSQL: pgsql/src/Makefile.global.in,v 1.238 2007/08/20 08:53:12 petere Exp $
+# $PostgreSQL: pgsql/src/Makefile.global.in,v 1.239 2007/09/28 22:25:49 tgl Exp $
 
 #------------------------------------------------------------------------------
 # All PostgreSQL makefiles include this file and use the variables it sets,
@@ -423,7 +423,7 @@ endif
 #
 # substitute implementations of C library routines (see src/port/)
 
-LIBOBJS = @LIBOBJS@ copydir.o dirmod.o exec.o noblock.o path.o pipe.o pgsleep.o pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o
+LIBOBJS = @LIBOBJS@
 
 LIBS := -lpgport $(LIBS)
 # add location of libpgport.a to LDFLAGS
 
@@ -13,13 +13,14 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.198 2007/09/03 18:46:29 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.199 2007/09/28 22:25:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include <fcntl.h>
+#include <locale.h>
 #include <unistd.h>
 #include <sys/stat.h>
 
@@ -96,6 +97,7 @@ createdb(const CreatedbStmt *stmt)
 	const char *dbtemplate = NULL;
 	int			encoding = -1;
 	int			dbconnlimit = -1;
+	int			ctype_encoding;
 
 	/* Extract options from the statement node tree */
 	foreach(option, stmt->options)
@@ -254,6 +256,32 @@ createdb(const CreatedbStmt *stmt)
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("invalid server encoding %d", encoding)));
 
+	/*
+	 * Check whether encoding matches server locale settings.  We allow
+	 * mismatch in two cases:
+	 *
+	 * 1. ctype_encoding = SQL_ASCII, which means either that the locale
+	 * is C/POSIX which works with any encoding, or that we couldn't determine
+	 * the locale's encoding and have to trust the user to get it right.
+	 *
+	 * 2. selected encoding is SQL_ASCII, but only if you're a superuser.
+	 * This is risky but we have historically allowed it --- notably, the
+	 * regression tests require it.
+	 *
+	 * Note: if you change this policy, fix initdb to match.
+	 */
+	ctype_encoding = pg_get_encoding_from_locale(NULL);
+
+	if (!(ctype_encoding == encoding ||
+		  ctype_encoding == PG_SQL_ASCII ||
+		  (encoding == PG_SQL_ASCII && superuser())))
+		ereport(ERROR,
+				(errmsg("encoding %s does not match server's locale %s",
+						pg_encoding_to_char(encoding),
+						setlocale(LC_CTYPE, NULL)),
+				 errdetail("The server's LC_CTYPE setting requires encoding %s.",
+						   pg_encoding_to_char(ctype_encoding))));
+
 	/* Resolve default tablespace for new database */
 	if (dtablespacename && dtablespacename->arg)
 	{