@@ -317,7 +317,7 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
317317
318318 if (nulls [i ])
319319 ereport (ERROR ,
320- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
320+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
321321 errmsg ("lexeme array may not contain nulls" )));
322322
323323 lex = VARDATA (dlexemes [i ]);
@@ -430,7 +430,7 @@ compareint(const void *va, const void *vb)
430430/*
431431 * Internal routine to delete lexemes from TSVector by array of offsets.
432432 *
433- * int *indices_to_delete -- array of lexeme offsets to delete
433+ * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
434434 * int indices_count -- size of that array
435435 *
436436 * Returns new TSVector without given lexemes along with their positions
@@ -445,52 +445,68 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
445445 * arrout ;
446446 char * data = STRPTR (tsv ),
447447 * dataout ;
448- int i ,
449- j ,
450- k ,
451- curoff ;
448+ int i , /* index in arrin */
449+ j , /* index in arrout */
450+ k , /* index in indices_to_delete */
451+ curoff ; /* index in dataout area */
452452
453453 /*
454- * Here we overestimates tsout size, since we don't know exact size
455- * occupied by positions and weights. We will set exact size later after a
456- * pass through TSVector .
454+ * Sort the filter array to simplify membership checks below. Also, get
455+ * rid of any duplicate entries, so that we can assume that indices_count
456+ * is exactly equal to the number of lexemes that will be removed .
457457 */
458- tsout = (TSVector ) palloc0 (VARSIZE (tsv ));
459- arrout = ARRPTR (tsout );
460- tsout -> size = tsv -> size - indices_count ;
461-
462- /* Sort our filter array to simplify membership check later. */
463458 if (indices_count > 1 )
459+ {
460+ int kp ;
461+
464462 qsort (indices_to_delete , indices_count , sizeof (int ), compareint );
463+ kp = 0 ;
464+ for (k = 1 ; k < indices_count ; k ++ )
465+ {
466+ if (indices_to_delete [k ] != indices_to_delete [kp ])
467+ indices_to_delete [++ kp ] = indices_to_delete [k ];
468+ }
469+ indices_count = ++ kp ;
470+ }
465471
466472 /*
467- * Copy tsv to tsout skipping lexemes that enlisted in indices_to_delete.
473+ * Here we overestimate tsout size, since we don't know how much space is
474+ * used by the deleted lexeme(s). We will set exact size below.
468475 */
469- curoff = 0 ;
476+ tsout = (TSVector ) palloc0 (VARSIZE (tsv ));
477+
478+ /* This count must be correct because STRPTR(tsout) relies on it. */
479+ tsout -> size = tsv -> size - indices_count ;
480+
481+ /*
482+ * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
483+ */
484+ arrout = ARRPTR (tsout );
470485 dataout = STRPTR (tsout );
486+ curoff = 0 ;
471487 for (i = j = k = 0 ; i < tsv -> size ; i ++ )
472488 {
473489 /*
474- * Here we should check whether current i is present in
475- * indices_to_delete or not. Since indices_to_delete is already sorted
476- * we can advance it index only when we have match .
490+ * If current i is present in indices_to_delete, skip this lexeme.
491+ * Since indices_to_delete is already sorted, we only need to check
492+ * the current (k'th) entry .
477493 */
478494 if (k < indices_count && i == indices_to_delete [k ])
479495 {
480496 k ++ ;
481497 continue ;
482498 }
483499
484- /* Copy lexeme, it's positions and weights */
500+ /* Copy lexeme and its positions and weights */
485501 memcpy (dataout + curoff , data + arrin [i ].pos , arrin [i ].len );
486502 arrout [j ].haspos = arrin [i ].haspos ;
487503 arrout [j ].len = arrin [i ].len ;
488504 arrout [j ].pos = curoff ;
489505 curoff += arrin [i ].len ;
490506 if (arrin [i ].haspos )
491507 {
492- int len = POSDATALEN (tsv , arrin + i ) * sizeof (WordEntryPos ) +
493- sizeof (uint16 );
508+ int len = POSDATALEN (tsv , arrin + i ) * sizeof (WordEntryPos )
509+ + sizeof (uint16 );
494510
495511 curoff = SHORTALIGN (curoff );
496512 memcpy (dataout + curoff ,
@@ -503,10 +519,9 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
503519 }
504520
505521 /*
506- * After the pass through TSVector k should equals exactly to
507- * indices_count. If it isn't then the caller provided us with indices
508- * outside of [0, tsv->size) range and estimation of tsout's size is
509- * wrong.
522+ * k should now be exactly equal to indices_count. If it isn't then the
523+ * caller provided us with indices outside of [0, tsv->size) range and
524+ * estimation of tsout's size is wrong.
510525 */
511526 Assert (k == indices_count );
512527
@@ -560,7 +575,7 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
560575
561576 /*
562577 * In typical use case array of lexemes to delete is relatively small. So
563- * here we optimizing things for that scenario: iterate through lexarr
578+ * here we optimize things for that scenario: iterate through lexarr
564579 * performing binary search of each lexeme from lexarr in tsvector.
565580 */
566581 skip_indices = palloc0 (nlex * sizeof (int ));
@@ -572,10 +587,10 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
572587
573588 if (nulls [i ])
574589 ereport (ERROR ,
575- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
590+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
576591 errmsg ("lexeme array may not contain nulls" )));
577592
578- lex = VARDATA (dlexemes [i ]);
593+ lex = VARDATA_ANY (dlexemes [i ]);
579594 lex_len = VARSIZE_ANY_EXHDR (dlexemes [i ]);
580595 lex_pos = tsvector_bsearch (tsin , lex , lex_len );
581596
@@ -738,7 +753,7 @@ array_to_tsvector(PG_FUNCTION_ARGS)
738753 {
739754 if (nulls [i ])
740755 ereport (ERROR ,
741- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
756+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
742757 errmsg ("lexeme array may not contain nulls" )));
743758
744759 datalen += VARSIZE_ANY_EXHDR (dlexemes [i ]);
@@ -797,7 +812,7 @@ tsvector_filter(PG_FUNCTION_ARGS)
797812
798813 if (nulls [i ])
799814 ereport (ERROR ,
800- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
815+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
801816 errmsg ("weight array may not contain nulls" )));
802817
803818 char_weight = DatumGetChar (dweights [i ]);
0 commit comments