2828#include "utils/builtins.h"
2929#include "utils/bytea.h"
3030#include "utils/lsyscache.h"
31+ #include "utils/memutils.h"
3132#include "utils/pg_locale.h"
33+ #include "utils/sortsupport.h"
3234
3335
3436/* GUC variable */
@@ -50,12 +52,32 @@ typedef struct
5052 int skiptable [256 ]; /* skip distance for given mismatched char */
5153} TextPositionState ;
5254
55+ typedef struct
56+ {
57+ char * buf1 ; /* 1st string */
58+ char * buf2 ; /* 2nd string */
59+ int buflen1 ;
60+ int buflen2 ;
61+ #ifdef HAVE_LOCALE_T
62+ pg_locale_t locale ;
63+ #endif
64+ } TextSortSupport ;
65+
66+ /*
67+ * This should be large enough that most strings will fit, but small enough
68+ * that we feel comfortable putting it on the stack
69+ */
70+ #define TEXTBUFLEN 1024
71+
5372#define DatumGetUnknownP (X ) ((unknown *) PG_DETOAST_DATUM(X))
5473#define DatumGetUnknownPCopy (X ) ((unknown *) PG_DETOAST_DATUM_COPY(X))
5574#define PG_GETARG_UNKNOWN_P (n ) DatumGetUnknownP(PG_GETARG_DATUM(n))
5675#define PG_GETARG_UNKNOWN_P_COPY (n ) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
5776#define PG_RETURN_UNKNOWN_P (x ) PG_RETURN_POINTER(x)
5877
78+ static void btsortsupport_worker (SortSupport ssup , Oid collid );
79+ static int bttextfastcmp_c (Datum x , Datum y , SortSupport ssup );
80+ static int bttextfastcmp_locale (Datum x , Datum y , SortSupport ssup );
5981static int32 text_length (Datum str );
6082static text * text_catenate (text * t1 , text * t2 );
6183static text * text_substring (Datum str ,
@@ -1356,10 +1378,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
13561378 }
13571379 else
13581380 {
1359- #define STACKBUFLEN 1024
1360-
1361- char a1buf [STACKBUFLEN ];
1362- char a2buf [STACKBUFLEN ];
1381+ char a1buf [TEXTBUFLEN ];
1382+ char a2buf [TEXTBUFLEN ];
13631383 char * a1p ,
13641384 * a2p ;
13651385
@@ -1393,24 +1413,24 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
13931413 int a2len ;
13941414 int r ;
13951415
1396- if (len1 >= STACKBUFLEN / 2 )
1416+ if (len1 >= TEXTBUFLEN / 2 )
13971417 {
13981418 a1len = len1 * 2 + 2 ;
13991419 a1p = palloc (a1len );
14001420 }
14011421 else
14021422 {
1403- a1len = STACKBUFLEN ;
1423+ a1len = TEXTBUFLEN ;
14041424 a1p = a1buf ;
14051425 }
1406- if (len2 >= STACKBUFLEN / 2 )
1426+ if (len2 >= TEXTBUFLEN / 2 )
14071427 {
14081428 a2len = len2 * 2 + 2 ;
14091429 a2p = palloc (a2len );
14101430 }
14111431 else
14121432 {
1413- a2len = STACKBUFLEN ;
1433+ a2len = TEXTBUFLEN ;
14141434 a2p = a2buf ;
14151435 }
14161436
@@ -1475,11 +1495,11 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
14751495 }
14761496#endif /* WIN32 */
14771497
1478- if (len1 >= STACKBUFLEN )
1498+ if (len1 >= TEXTBUFLEN )
14791499 a1p = (char * ) palloc (len1 + 1 );
14801500 else
14811501 a1p = a1buf ;
1482- if (len2 >= STACKBUFLEN )
1502+ if (len2 >= TEXTBUFLEN )
14831503 a2p = (char * ) palloc (len2 + 1 );
14841504 else
14851505 a2p = a2buf ;
@@ -1683,6 +1703,186 @@ bttextcmp(PG_FUNCTION_ARGS)
16831703 PG_RETURN_INT32 (result );
16841704}
16851705
1706+ Datum
1707+ bttextsortsupport (PG_FUNCTION_ARGS )
1708+ {
1709+ SortSupport ssup = (SortSupport ) PG_GETARG_POINTER (0 );
1710+ Oid collid = ssup -> ssup_collation ;
1711+ MemoryContext oldcontext ;
1712+
1713+ oldcontext = MemoryContextSwitchTo (ssup -> ssup_cxt );
1714+
1715+ btsortsupport_worker (ssup , collid );
1716+
1717+ MemoryContextSwitchTo (oldcontext );
1718+
1719+ PG_RETURN_VOID ();
1720+ }
1721+
1722+ static void
1723+ btsortsupport_worker (SortSupport ssup , Oid collid )
1724+ {
1725+ TextSortSupport * tss ;
1726+
1727+ /*
1728+ * If LC_COLLATE = C, we can make things quite a bit faster by using
1729+ * memcmp() rather than strcoll(). To minimize the per-comparison
1730+ * overhead, we make this decision just once for the whole sort.
1731+ */
1732+ if (lc_collate_is_c (collid ))
1733+ {
1734+ ssup -> comparator = bttextfastcmp_c ;
1735+ return ;
1736+ }
1737+
1738+ /*
1739+ * WIN32 requires complex hacks when the database encoding is UTF-8 (except
1740+ * when using the "C" collation). For now, we don't optimize that case.
1741+ */
1742+ #ifdef WIN32
1743+ if (GetDatabaseEncoding () == PG_UTF8 )
1744+ return ;
1745+ #endif
1746+
1747+ /*
1748+ * We may need a collation-sensitive comparison. To make things faster,
1749+ * we'll figure out the collation based on the locale id and cache the
1750+ * result. Also, since strxfrm()/strcoll() require NUL-terminated inputs,
1751+ * prepare one or two palloc'd buffers to use as temporary workspace. In
1752+ * the ad-hoc comparison case we only use palloc'd buffers when we need
1753+ * more space than we're comfortable allocating on the stack, but here we
1754+ * can keep the buffers around for the whole sort, so it makes sense to
1755+ * allocate them once and use them unconditionally.
1756+ */
1757+ tss = palloc (sizeof (TextSortSupport ));
1758+ #ifdef HAVE_LOCALE_T
1759+ tss -> locale = 0 ;
1760+ #endif
1761+
1762+ if (collid != DEFAULT_COLLATION_OID )
1763+ {
1764+ if (!OidIsValid (collid ))
1765+ {
1766+ /*
1767+ * This typically means that the parser could not resolve a
1768+ * conflict of implicit collations, so report it that way.
1769+ */
1770+ ereport (ERROR ,
1771+ (errcode (ERRCODE_INDETERMINATE_COLLATION ),
1772+ errmsg ("could not determine which collation to use for string comparison" ),
1773+ errhint ("Use the COLLATE clause to set the collation explicitly." )));
1774+ }
1775+ #ifdef HAVE_LOCALE_T
1776+ tss -> locale = pg_newlocale_from_collation (collid );
1777+ #endif
1778+ }
1779+
1780+ tss -> buf1 = palloc (TEXTBUFLEN );
1781+ tss -> buflen1 = TEXTBUFLEN ;
1782+ tss -> buf2 = palloc (TEXTBUFLEN );
1783+ tss -> buflen2 = TEXTBUFLEN ;
1784+
1785+ ssup -> ssup_extra = tss ;
1786+ ssup -> comparator = bttextfastcmp_locale ;
1787+ }
1788+
1789+ /*
1790+ * sortsupport comparison func (for C locale case)
1791+ */
1792+ static int
1793+ bttextfastcmp_c (Datum x , Datum y , SortSupport ssup )
1794+ {
1795+ text * arg1 = DatumGetTextPP (x );
1796+ text * arg2 = DatumGetTextPP (y );
1797+ char * a1p ,
1798+ * a2p ;
1799+ int len1 ,
1800+ len2 ,
1801+ result ;
1802+
1803+ a1p = VARDATA_ANY (arg1 );
1804+ a2p = VARDATA_ANY (arg2 );
1805+
1806+ len1 = VARSIZE_ANY_EXHDR (arg1 );
1807+ len2 = VARSIZE_ANY_EXHDR (arg2 );
1808+
1809+ result = memcmp (a1p , a2p , Min (len1 , len2 ));
1810+ if ((result == 0 ) && (len1 != len2 ))
1811+ result = (len1 < len2 ) ? -1 : 1 ;
1812+
1813+ /* We can't afford to leak memory here. */
1814+ if (PointerGetDatum (arg1 ) != x )
1815+ pfree (arg1 );
1816+ if (PointerGetDatum (arg2 ) != y )
1817+ pfree (arg2 );
1818+
1819+ return result ;
1820+ }
1821+
1822+ /*
1823+ * sortsupport comparison func (for locale case)
1824+ */
1825+ static int
1826+ bttextfastcmp_locale (Datum x , Datum y , SortSupport ssup )
1827+ {
1828+ text * arg1 = DatumGetTextPP (x );
1829+ text * arg2 = DatumGetTextPP (y );
1830+ TextSortSupport * tss = (TextSortSupport * ) ssup -> ssup_extra ;
1831+
1832+ /* working state */
1833+ char * a1p ,
1834+ * a2p ;
1835+ int len1 ,
1836+ len2 ,
1837+ result ;
1838+
1839+ a1p = VARDATA_ANY (arg1 );
1840+ a2p = VARDATA_ANY (arg2 );
1841+
1842+ len1 = VARSIZE_ANY_EXHDR (arg1 );
1843+ len2 = VARSIZE_ANY_EXHDR (arg2 );
1844+
1845+ if (len1 >= tss -> buflen1 )
1846+ {
1847+ pfree (tss -> buf1 );
1848+ tss -> buflen1 = Max (len1 + 1 , Min (tss -> buflen1 * 2 , MaxAllocSize ));
1849+ tss -> buf1 = MemoryContextAlloc (ssup -> ssup_cxt , tss -> buflen1 );
1850+ }
1851+ if (len2 >= tss -> buflen2 )
1852+ {
1853+ pfree (tss -> buf2 );
1854+ tss -> buflen1 = Max (len2 + 1 , Min (tss -> buflen2 * 2 , MaxAllocSize ));
1855+ tss -> buf2 = MemoryContextAlloc (ssup -> ssup_cxt , tss -> buflen2 );
1856+ }
1857+
1858+ memcpy (tss -> buf1 , a1p , len1 );
1859+ tss -> buf1 [len1 ] = '\0' ;
1860+ memcpy (tss -> buf2 , a2p , len2 );
1861+ tss -> buf2 [len2 ] = '\0' ;
1862+
1863+ #ifdef HAVE_LOCALE_T
1864+ if (tss -> locale )
1865+ result = strcoll_l (tss -> buf1 , tss -> buf2 , tss -> locale );
1866+ else
1867+ #endif
1868+ result = strcoll (tss -> buf1 , tss -> buf2 );
1869+
1870+ /*
1871+ * In some locales strcoll() can claim that nonidentical strings are equal.
1872+ * Believing that would be bad news for a number of reasons, so we follow
1873+ * Perl's lead and sort "equal" strings according to strcmp().
1874+ */
1875+ if (result == 0 )
1876+ result = strcmp (tss -> buf1 , tss -> buf2 );
1877+
1878+ /* We can't afford to leak memory here. */
1879+ if (PointerGetDatum (arg1 ) != x )
1880+ pfree (arg1 );
1881+ if (PointerGetDatum (arg2 ) != y )
1882+ pfree (arg2 );
1883+
1884+ return result ;
1885+ }
16861886
16871887Datum
16881888text_larger (PG_FUNCTION_ARGS )
0 commit comments