@@ -62,6 +62,9 @@ typedef struct
6262 char * buf2 ; /* 2nd string, or abbreviation strxfrm() buf */
6363 int buflen1 ;
6464 int buflen2 ;
65+ int last_len1 ; /* Length of last buf1 string/strxfrm() blob */
66+ int last_len2 ; /* Length of last buf2 string/strxfrm() blob */
67+ int last_returned ; /* Last comparison result (cache) */
6568 bool collate_c ;
6669 hyperLogLogState abbr_card ; /* Abbreviated key cardinality state */
6770 hyperLogLogState full_card ; /* Full key cardinality state */
@@ -1832,9 +1835,20 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
18321835 tss -> buflen1 = TEXTBUFLEN ;
18331836 tss -> buf2 = palloc (TEXTBUFLEN );
18341837 tss -> buflen2 = TEXTBUFLEN ;
1838+ /* Start with invalid values */
1839+ tss -> last_len1 = -1 ;
1840+ tss -> last_len2 = -1 ;
18351841#ifdef HAVE_LOCALE_T
18361842 tss -> locale = locale ;
18371843#endif
1844+ /*
1845+ * To avoid somehow confusing a strxfrm() blob and an original string
1846+ * within bttextfastcmp_locale(), initialize last returned cache to a
1847+ * sentinel value. A platform-specific actual strcoll() return value
1848+ * of INT_MIN seems unlikely, but if that occurs it will not cause
1849+ * wrong answers.
1850+ */
1851+ tss -> last_returned = INT_MIN ;
18381852 tss -> collate_c = collate_c ;
18391853 ssup -> ssup_extra = tss ;
18401854
@@ -1897,6 +1911,7 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
18971911{
18981912 text * arg1 = DatumGetTextPP (x );
18991913 text * arg2 = DatumGetTextPP (y );
1914+ bool arg1_match ;
19001915 TextSortSupport * tss = (TextSortSupport * ) ssup -> ssup_extra ;
19011916
19021917 /* working state */
@@ -1915,6 +1930,11 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
19151930 /* Fast pre-check for equality, as discussed in varstr_cmp() */
19161931 if (len1 == len2 && memcmp (a1p , a2p , len1 ) == 0 )
19171932 {
1933+ /*
1934+ * No change in buf1 or buf2 contents, so avoid changing last_len1 or
1935+ * last_len2. Existing contents of buffers might still be used by next
1936+ * call.
1937+ */
19181938 result = 0 ;
19191939 goto done ;
19201940 }
@@ -1932,10 +1952,43 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
19321952 tss -> buf2 = MemoryContextAlloc (ssup -> ssup_cxt , tss -> buflen2 );
19331953 }
19341954
1935- memcpy (tss -> buf1 , a1p , len1 );
1936- tss -> buf1 [len1 ] = '\0' ;
1937- memcpy (tss -> buf2 , a2p , len2 );
1938- tss -> buf2 [len2 ] = '\0' ;
1955+ /*
1956+ * We're likely to be asked to compare the same strings repeatedly, and
1957+ * memcmp() is so much cheaper than strcoll() that it pays to try to cache
1958+ * comparisons, even though in general there is no reason to think that
1959+ * that will work out (every text datum may be unique). Caching does not
1960+ * slow things down measurably when it doesn't work out, and can speed
1961+ * things up by rather a lot when it does. In part, this is because the
1962+ * memcmp() compares data from cachelines that are needed in L1 cache even
1963+ * when the last comparison's result cannot be reused.
1964+ */
1965+ arg1_match = true;
1966+ if (len1 != tss -> last_len1 || memcmp (tss -> buf1 , a1p , len1 ) != 0 )
1967+ {
1968+ arg1_match = false;
1969+ memcpy (tss -> buf1 , a1p , len1 );
1970+ tss -> buf1 [len1 ] = '\0' ;
1971+ tss -> last_len1 = len1 ;
1972+ }
1973+
1974+ /*
1975+ * If we're comparing the same two strings as last time, we can return the
1976+ * same answer without calling strcoll() again. This is more likely than
1977+ * it seems (at least with moderate to low cardinality sets), because
1978+ * quicksort compares the same pivot against many values.
1979+ */
1980+ if (len2 != tss -> last_len2 || memcmp (tss -> buf2 , a2p , len2 ) != 0 )
1981+ {
1982+ memcpy (tss -> buf2 , a2p , len2 );
1983+ tss -> buf2 [len2 ] = '\0' ;
1984+ tss -> last_len2 = len2 ;
1985+ }
1986+ else if (arg1_match && tss -> last_returned != INT_MIN )
1987+ {
1988+ /* Use result cached following last actual strcoll() call */
1989+ result = tss -> last_returned ;
1990+ goto done ;
1991+ }
19391992
19401993#ifdef HAVE_LOCALE_T
19411994 if (tss -> locale )
@@ -1952,6 +2005,8 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
19522005 if (result == 0 )
19532006 result = strcmp (tss -> buf1 , tss -> buf2 );
19542007
2008+ /* Cache result, perhaps saving an expensive strcoll() call next time */
2009+ tss -> last_returned = result ;
19552010done :
19562011 /* We can't afford to leak memory here. */
19572012 if (PointerGetDatum (arg1 ) != x )
@@ -2034,9 +2089,19 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
20342089 tss -> buf1 = palloc (tss -> buflen1 );
20352090 }
20362091
2092+ /* Might be able to reuse strxfrm() blob from last call */
2093+ if (tss -> last_len1 == len &&
2094+ memcmp (tss -> buf1 , authoritative_data , len ) == 0 )
2095+ {
2096+ memcpy (pres , tss -> buf2 , Min (sizeof (Datum ), tss -> last_len2 ));
2097+ /* No change affecting cardinality, so no hashing required */
2098+ goto done ;
2099+ }
2100+
20372101 /* Just like strcoll(), strxfrm() expects a NUL-terminated string */
20382102 memcpy (tss -> buf1 , authoritative_data , len );
20392103 tss -> buf1 [len ] = '\0' ;
2104+ tss -> last_len1 = len ;
20402105
20412106 for (;;)
20422107 {
@@ -2048,6 +2113,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
20482113#endif
20492114 bsize = strxfrm (tss -> buf2 , tss -> buf1 , tss -> buflen2 );
20502115
2116+ tss -> last_len2 = bsize ;
20512117 if (bsize < tss -> buflen2 )
20522118 break ;
20532119
@@ -2105,6 +2171,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
21052171
21062172 addHyperLogLog (& tss -> abbr_card , hash );
21072173
2174+ done :
21082175 /*
21092176 * Byteswap on little-endian machines.
21102177 *
0 commit comments