From 0c1951c36fdc6558b334ad4c14121952a530fe00 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki@enterprisedb.com>
Date: Thu, 27 Nov 2008 21:17:39 +0000
Subject: [PATCH] Fix bug in the tsvector stats collection function, which
 caused a crash if the sample contains just a one tsvector, containing only
 one lexeme.

---
 src/backend/tsearch/ts_typanalyze.c | 41 +++++++++++++++--------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
index 3d35f47c66..d56e6b45b3 100644
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -290,26 +290,6 @@ compute_tsvector_stats(VacAttrStats *stats,
 		if (num_mcelem > track_len)
 			num_mcelem = track_len;
 
-		/* Grab the minimal and maximal frequencies that will get stored */
-		minfreq = sort_table[num_mcelem - 1]->frequency;
-		maxfreq = sort_table[0]->frequency;
-
-		/*
-		 * We want to store statistics sorted on the lexeme value using first
-		 * length, then byte-for-byte comparison. The reason for doing length
-		 * comparison first is that we don't care about the ordering so long
-		 * as it's consistent, and comparing lengths first gives us a chance
-		 * to avoid a strncmp() call.
-		 *
-		 * This is different from what we do with scalar statistics -- they get
-		 * sorted on frequencies. The rationale is that we usually search
-		 * through most common elements looking for a specific value, so we can
-		 * grab its frequency.  When values are presorted we can employ binary
-		 * search for that.  See ts_selfuncs.c for a real usage scenario.
-		 */
-		qsort(sort_table, num_mcelem, sizeof(TrackItem *),
-			  trackitem_compare_lexemes);
-
 		/* Generate MCELEM slot entry */
 		if (num_mcelem > 0)
 		{
@@ -317,6 +297,27 @@ compute_tsvector_stats(VacAttrStats *stats,
 			Datum			*mcelem_values;
 			float4			*mcelem_freqs;
 
+			/* Grab the minimal and maximal frequencies that will get stored */
+			minfreq = sort_table[num_mcelem - 1]->frequency;
+			maxfreq = sort_table[0]->frequency;
+
+			/*
+			 * We want to store statistics sorted on the lexeme value using
+			 * first length, then byte-for-byte comparison. The reason for
+			 * doing length comparison first is that we don't care about the
+			 * ordering so long as it's consistent, and comparing lengths first
+			 * gives us a chance to avoid a strncmp() call.
+			 *
+			 * This is different from what we do with scalar statistics -- they
+			 * get sorted on frequencies. The rationale is that we usually
+			 * search through most common elements looking for a specific
+			 * value, so we can grab its frequency.  When values are presorted
+			 * we can employ binary search for that.  See ts_selfuncs.c for a
+			 * real usage scenario.
+			 */
+			qsort(sort_table, num_mcelem, sizeof(TrackItem *),
+				  trackitem_compare_lexemes);
+
 			/* Must copy the target values into anl_context */
 			old_context = MemoryContextSwitchTo(stats->anl_context);
 
-- 
2.39.5