88 *
99 *
1010 * IDENTIFICATION
11- * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.9 2000/11/16 22:30:19 tgl Exp $
11+ * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.10 2000/12/02 19:38:34 tgl Exp $
1212 *
13-
1413 *-------------------------------------------------------------------------
1514 */
15+ #include "postgres.h"
16+
1617#include <sys/types.h>
1718#include <sys/file.h>
1819#include <sys/stat.h>
1920#include <fcntl.h>
2021#include <unistd.h>
2122
22- #include "postgres.h"
23-
2423#include "access/heapam.h"
2524#include "catalog/catname.h"
2625#include "catalog/indexing.h"
@@ -159,7 +158,8 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
159158
160159 stats = & vacattrstats [i ];
161160 stats -> attr = palloc (ATTRIBUTE_TUPLE_SIZE );
162- memmove (stats -> attr , attr [((attnums ) ? attnums [i ] : i )], ATTRIBUTE_TUPLE_SIZE );
161+ memcpy (stats -> attr , attr [((attnums ) ? attnums [i ] : i )],
162+ ATTRIBUTE_TUPLE_SIZE );
163163 stats -> best = stats -> guess1 = stats -> guess2 = 0 ;
164164 stats -> max = stats -> min = 0 ;
165165 stats -> best_len = stats -> guess1_len = stats -> guess2_len = 0 ;
@@ -220,6 +220,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
220220 /* delete existing pg_statistic rows for relation */
221221 del_stats (relid , ((attnums ) ? attr_cnt : 0 ), attnums );
222222
223+ /* scan relation to gather statistics */
223224 scan = heap_beginscan (onerel , false, SnapshotNow , 0 , NULL );
224225
225226 while (HeapTupleIsValid (tuple = heap_getnext (scan , 0 )))
@@ -237,7 +238,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
237238}
238239
239240/*
240- * attr_stats() -- compute column statistics used by the optimzer
241+ * attr_stats() -- compute column statistics used by the planner
241242 *
242243 * We compute the column min, max, null and non-null counts.
243244 * Plus we attempt to find the count of the value that occurs most
@@ -266,6 +267,7 @@ attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple
266267 for (i = 0 ; i < attr_cnt ; i ++ )
267268 {
268269 VacAttrStats * stats = & vacattrstats [i ];
270+ Datum origvalue ;
269271 Datum value ;
270272 bool isnull ;
271273 bool value_hit ;
@@ -278,16 +280,25 @@ attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple
278280 continue ;
279281#endif /* _DROP_COLUMN_HACK__ */
280282
281- value = heap_getattr (tuple ,
282- stats -> attr -> attnum , tupDesc , & isnull );
283+ origvalue = heap_getattr (tuple , stats -> attr -> attnum ,
284+ tupDesc , & isnull );
283285
284286 if (isnull )
285287 {
286288 stats -> null_cnt ++ ;
287289 continue ;
288290 }
289-
290291 stats -> nonnull_cnt ++ ;
292+
293+ /*
294+ * If the value is toasted, detoast it to avoid repeated detoastings
295+ * and resultant memory leakage inside the comparison routines.
296+ */
297+ if (!stats -> attr -> attbyval && stats -> attr -> attlen == -1 )
298+ value = PointerGetDatum (PG_DETOAST_DATUM (origvalue ));
299+ else
300+ value = origvalue ;
301+
291302 if (! stats -> initialized )
292303 {
293304 bucketcpy (stats -> attr , value , & stats -> best , & stats -> best_len );
@@ -365,22 +376,26 @@ attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple
365376 stats -> guess1_hits = 1 ;
366377 stats -> guess2_hits = 1 ;
367378 }
379+
380+ /* Clean up detoasted copy, if any */
381+ if (value != origvalue )
382+ pfree (DatumGetPointer (value ));
368383 }
369384}
370385
371386/*
372387 * bucketcpy() -- copy a new value into one of the statistics buckets
373- *
374388 */
375389static void
376390bucketcpy (Form_pg_attribute attr , Datum value , Datum * bucket , int * bucket_len )
377391{
378- if (attr -> attbyval && attr -> attlen != -1 )
392+ if (attr -> attbyval )
379393 * bucket = value ;
380394 else
381395 {
382396 int len = (attr -> attlen != -1 ? attr -> attlen : VARSIZE (value ));
383397
398+ /* Avoid unnecessary palloc() traffic... */
384399 if (len > * bucket_len )
385400 {
386401 if (* bucket_len != 0 )
@@ -396,8 +411,27 @@ bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
396411/*
397412 * update_attstats() -- update attribute statistics for one relation
398413 *
399- * Updates of pg_attribute statistics are handled by over-write,
400- * for reasons described above. pg_statistic rows are added normally.
414+ * Statistics are stored in several places: the pg_class row for the
415+ * relation has stats about the whole relation, the pg_attribute rows
416+ * for each attribute store "dispersion", and there is a pg_statistic
417+ * row for each (non-system) attribute. (Dispersion probably ought to
418+ * be moved to pg_statistic, but it's not worth doing unless there's
419+ * another reason to have to change pg_attribute.) The pg_class values
420+ * are updated by VACUUM, not here.
421+ *
422+ * We violate no-overwrite semantics here by storing new values for
423+ * the dispersion column directly into the pg_attribute tuple that's
424+ * already on the page. The reason for this is that if we updated
425+ * these tuples in the usual way, vacuuming pg_attribute itself
426+ * wouldn't work very well --- by the time we got done with a vacuum
427+ * cycle, most of the tuples in pg_attribute would've been obsoleted.
428+ * Updating pg_attribute's own statistics would be especially tricky.
429+ * Of course, this only works for fixed-size never-null columns, but
430+ * dispersion is.
431+ *
432+ * pg_statistic rows are just added normally. This means that
433+ * pg_statistic will probably contain some deleted rows at the
434+ * completion of a vacuum cycle, unless it happens to get vacuumed last.
401435 *
402436 * To keep things simple, we punt for pg_statistic, and don't try
403437 * to compute or store rows for pg_statistic itself in pg_statistic.
0 commit comments