@@ -416,17 +416,34 @@ tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
416416 return -1 ;
417417}
418418
419+ /*
420+ * qsort comparator functions
421+ */
422+
419423static int
420- compareint (const void * va , const void * vb )
424+ compare_int (const void * va , const void * vb )
421425{
422- int32 a = * ((const int32 * ) va );
423- int32 b = * ((const int32 * ) vb );
426+ int a = * ((const int * ) va );
427+ int b = * ((const int * ) vb );
424428
425429 if (a == b )
426430 return 0 ;
427431 return (a > b ) ? 1 : -1 ;
428432}
429433
434+ static int
435+ compare_text_lexemes (const void * va , const void * vb )
436+ {
437+ Datum a = * ((const Datum * ) va );
438+ Datum b = * ((const Datum * ) vb );
439+ char * alex = VARDATA_ANY (a );
440+ int alex_len = VARSIZE_ANY_EXHDR (a );
441+ char * blex = VARDATA_ANY (b );
442+ int blex_len = VARSIZE_ANY_EXHDR (b );
443+
444+ return tsCompareString (alex , alex_len , blex , blex_len , false);
445+ }
446+
430447/*
431448 * Internal routine to delete lexemes from TSVector by array of offsets.
432449 *
@@ -459,7 +476,7 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
459476 {
460477 int kp ;
461478
462- qsort (indices_to_delete , indices_count , sizeof (int ), compareint );
479+ qsort (indices_to_delete , indices_count , sizeof (int ), compare_int );
463480 kp = 0 ;
464481 for (k = 1 ; k < indices_count ; k ++ )
465482 {
@@ -743,32 +760,50 @@ array_to_tsvector(PG_FUNCTION_ARGS)
743760 bool * nulls ;
744761 int nitems ,
745762 i ,
763+ j ,
746764 tslen ,
747765 datalen = 0 ;
748766 char * cur ;
749767
750768 deconstruct_array (v , TEXTOID , -1 , false, 'i' , & dlexemes , & nulls , & nitems );
751769
770+ /* Reject nulls (maybe we should just ignore them, instead?) */
752771 for (i = 0 ; i < nitems ; i ++ )
753772 {
754773 if (nulls [i ])
755774 ereport (ERROR ,
756775 (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
757776 errmsg ("lexeme array may not contain nulls" )));
777+ }
758778
759- datalen += VARSIZE_ANY_EXHDR (dlexemes [i ]);
779+ /* Sort and de-dup, because this is required for a valid tsvector. */
780+ if (nitems > 1 )
781+ {
782+ qsort (dlexemes , nitems , sizeof (Datum ), compare_text_lexemes );
783+ j = 0 ;
784+ for (i = 1 ; i < nitems ; i ++ )
785+ {
786+ if (compare_text_lexemes (& dlexemes [j ], & dlexemes [i ]) < 0 )
787+ dlexemes [++ j ] = dlexemes [i ];
788+ }
789+ nitems = ++ j ;
760790 }
761791
792+ /* Calculate space needed for surviving lexemes. */
793+ for (i = 0 ; i < nitems ; i ++ )
794+ datalen += VARSIZE_ANY_EXHDR (dlexemes [i ]);
762795 tslen = CALCDATASIZE (nitems , datalen );
796+
797+ /* Allocate and fill tsvector. */
763798 tsout = (TSVector ) palloc0 (tslen );
764799 SET_VARSIZE (tsout , tslen );
765800 tsout -> size = nitems ;
801+
766802 arrout = ARRPTR (tsout );
767803 cur = STRPTR (tsout );
768-
769804 for (i = 0 ; i < nitems ; i ++ )
770805 {
771- char * lex = VARDATA (dlexemes [i ]);
806+ char * lex = VARDATA_ANY (dlexemes [i ]);
772807 int lex_len = VARSIZE_ANY_EXHDR (dlexemes [i ]);
773808
774809 memcpy (cur , lex , lex_len );
0 commit comments