@@ -150,7 +150,9 @@ typedef struct HeapCheckContext
150150} HeapCheckContext ;
151151
152152/* Internal implementation */
153- static void check_tuple (HeapCheckContext * ctx );
153+ static void check_tuple (HeapCheckContext * ctx ,
154+ bool * xmin_commit_status_ok ,
155+ XidCommitStatus * xmin_commit_status );
154156static void check_toast_tuple (HeapTuple toasttup , HeapCheckContext * ctx ,
155157 ToastedAttribute * ta , int32 * expected_chunk_seq ,
156158 uint32 extsize );
@@ -160,7 +162,9 @@ static void check_toasted_attribute(HeapCheckContext *ctx,
160162 ToastedAttribute * ta );
161163
162164static bool check_tuple_header (HeapCheckContext * ctx );
163- static bool check_tuple_visibility (HeapCheckContext * ctx );
165+ static bool check_tuple_visibility (HeapCheckContext * ctx ,
166+ bool * xmin_commit_status_ok ,
167+ XidCommitStatus * xmin_commit_status );
164168
165169static void report_corruption (HeapCheckContext * ctx , char * msg );
166170static void report_toast_corruption (HeapCheckContext * ctx ,
@@ -399,9 +403,16 @@ verify_heapam(PG_FUNCTION_ARGS)
399403 for (ctx .blkno = first_block ; ctx .blkno <= last_block ; ctx .blkno ++ )
400404 {
401405 OffsetNumber maxoff ;
406+ OffsetNumber predecessor [MaxOffsetNumber ];
407+ OffsetNumber successor [MaxOffsetNumber ];
408+ bool lp_valid [MaxOffsetNumber ];
409+ bool xmin_commit_status_ok [MaxOffsetNumber ];
410+ XidCommitStatus xmin_commit_status [MaxOffsetNumber ];
402411
403412 CHECK_FOR_INTERRUPTS ();
404413
414+ memset (predecessor , 0 , sizeof (OffsetNumber ) * MaxOffsetNumber );
415+
405416 /* Optionally skip over all-frozen or all-visible blocks */
406417 if (skip_option != SKIP_PAGES_NONE )
407418 {
@@ -433,6 +444,12 @@ verify_heapam(PG_FUNCTION_ARGS)
433444 for (ctx .offnum = FirstOffsetNumber ; ctx .offnum <= maxoff ;
434445 ctx .offnum = OffsetNumberNext (ctx .offnum ))
435446 {
447+ BlockNumber nextblkno ;
448+ OffsetNumber nextoffnum ;
449+
450+ successor [ctx .offnum ] = InvalidOffsetNumber ;
451+ lp_valid [ctx .offnum ] = false;
452+ xmin_commit_status_ok [ctx .offnum ] = false;
436453 ctx .itemid = PageGetItemId (ctx .page , ctx .offnum );
437454
438455 /* Skip over unused/dead line pointers */
@@ -469,6 +486,14 @@ verify_heapam(PG_FUNCTION_ARGS)
469486 report_corruption (& ctx ,
470487 psprintf ("line pointer redirection to unused item at offset %u" ,
471488 (unsigned ) rdoffnum ));
489+
490+ /*
491+ * Record the fact that this line pointer has passed basic
492+ * sanity checking, and also the offset number to which it
493+ * points.
494+ */
495+ lp_valid [ctx .offnum ] = true;
496+ successor [ctx .offnum ] = rdoffnum ;
472497 continue ;
473498 }
474499
@@ -502,11 +527,237 @@ verify_heapam(PG_FUNCTION_ARGS)
502527 }
503528
504529 /* It should be safe to examine the tuple's header, at least */
530+ lp_valid [ctx .offnum ] = true;
505531 ctx .tuphdr = (HeapTupleHeader ) PageGetItem (ctx .page , ctx .itemid );
506532 ctx .natts = HeapTupleHeaderGetNatts (ctx .tuphdr );
507533
508534 /* Ok, ready to check this next tuple */
509- check_tuple (& ctx );
535+ check_tuple (& ctx ,
536+ & xmin_commit_status_ok [ctx .offnum ],
537+ & xmin_commit_status [ctx .offnum ]);
538+
539+ /*
540+ * If the CTID field of this tuple seems to point to another tuple
541+ * on the same page, record that tuple as the successor of this
542+ * one.
543+ */
544+ nextblkno = ItemPointerGetBlockNumber (& (ctx .tuphdr )-> t_ctid );
545+ nextoffnum = ItemPointerGetOffsetNumber (& (ctx .tuphdr )-> t_ctid );
546+ if (nextblkno == ctx .blkno && nextoffnum != ctx .offnum )
547+ successor [ctx .offnum ] = nextoffnum ;
548+ }
549+
550+ /*
551+ * Update chain validation. Check each line pointer that's got a valid
552+ * successor against that successor.
553+ */
554+ ctx .attnum = -1 ;
555+ for (ctx .offnum = FirstOffsetNumber ; ctx .offnum <= maxoff ;
556+ ctx .offnum = OffsetNumberNext (ctx .offnum ))
557+ {
558+ ItemId curr_lp ;
559+ ItemId next_lp ;
560+ HeapTupleHeader curr_htup ;
561+ HeapTupleHeader next_htup ;
562+ TransactionId curr_xmin ;
563+ TransactionId curr_xmax ;
564+ TransactionId next_xmin ;
565+ OffsetNumber nextoffnum = successor [ctx .offnum ];
566+
567+ /*
568+ * The current line pointer may not have a successor, either
569+ * because it's not valid or because it didn't point to anything.
570+ * In either case, we have to give up.
571+ *
572+ * If the current line pointer does point to something, it's
573+ * possible that the target line pointer isn't valid. We have to
574+ * give up in that case, too.
575+ */
576+ if (nextoffnum == InvalidOffsetNumber || !lp_valid [nextoffnum ])
577+ continue ;
578+
579+ /* We have two valid line pointers that we can examine. */
580+ curr_lp = PageGetItemId (ctx .page , ctx .offnum );
581+ next_lp = PageGetItemId (ctx .page , nextoffnum );
582+
583+ /* Handle the cases where the current line pointer is a redirect. */
584+ if (ItemIdIsRedirected (curr_lp ))
585+ {
586+ /* Can't redirect to another redirect. */
587+ if (ItemIdIsRedirected (next_lp ))
588+ {
589+ report_corruption (& ctx ,
590+ psprintf ("redirected line pointer points to another redirected line pointer at offset %u" ,
591+ (unsigned ) nextoffnum ));
592+ continue ;
593+ }
594+
595+ /* Can only redirect to a HOT tuple. */
596+ next_htup = (HeapTupleHeader ) PageGetItem (ctx .page , next_lp );
597+ if (!HeapTupleHeaderIsHeapOnly (next_htup ))
598+ {
599+ report_corruption (& ctx ,
600+ psprintf ("redirected line pointer points to a non-heap-only tuple at offset %u" ,
601+ (unsigned ) nextoffnum ));
602+ }
603+
604+ /*
605+ * Redirects are created by updates, so successor should be
606+ * the result of an update.
607+ */
608+ if ((next_htup -> t_infomask & HEAP_UPDATED ) == 0 )
609+ {
610+ report_corruption (& ctx ,
611+ psprintf ("redirected line pointer points to a non-heap-updated tuple at offset %u" ,
612+ (unsigned ) nextoffnum ));
613+ }
614+
615+ /* HOT chains should not intersect. */
616+ if (predecessor [nextoffnum ] != InvalidOffsetNumber )
617+ {
618+ report_corruption (& ctx ,
619+ psprintf ("redirect line pointer points to offset %u, but offset %u also points there" ,
620+ (unsigned ) nextoffnum , (unsigned ) predecessor [nextoffnum ]));
621+ continue ;
622+ }
623+
624+ /*
625+ * This redirect and the tuple to which it points seem to be
626+ * part of an update chain.
627+ */
628+ predecessor [nextoffnum ] = ctx .offnum ;
629+ continue ;
630+ }
631+
632+ /*
633+ * If the next line pointer is a redirect, or if it's a tuple
634+ * but the XMAX of this tuple doesn't match the XMIN of the next
635+ * tuple, then the two aren't part of the same update chain and
636+ * there is nothing more to do.
637+ */
638+ if (ItemIdIsRedirected (next_lp ))
639+ continue ;
640+ curr_htup = (HeapTupleHeader ) PageGetItem (ctx .page , curr_lp );
641+ curr_xmax = HeapTupleHeaderGetUpdateXid (curr_htup );
642+ next_htup = (HeapTupleHeader ) PageGetItem (ctx .page , next_lp );
643+ next_xmin = HeapTupleHeaderGetXmin (next_htup );
644+ if (!TransactionIdIsValid (curr_xmax ) ||
645+ !TransactionIdEquals (curr_xmax , next_xmin ))
646+ continue ;
647+
648+ /* HOT chains should not intersect. */
649+ if (predecessor [nextoffnum ] != InvalidOffsetNumber )
650+ {
651+ report_corruption (& ctx ,
652+ psprintf ("tuple points to new version at offset %u, but offset %u also points there" ,
653+ (unsigned ) nextoffnum , (unsigned ) predecessor [nextoffnum ]));
654+ continue ;
655+ }
656+
657+ /*
658+ * This tuple and the tuple to which it points seem to be part
659+ * of an update chain.
660+ */
661+ predecessor [nextoffnum ] = ctx .offnum ;
662+
663+ /*
664+ * If the current tuple is marked as HOT-updated, then the next
665+ * tuple should be marked as a heap-only tuple. Conversely, if the
666+ * current tuple isn't marked as HOT-updated, then the next tuple
667+ * shouldn't be marked as a heap-only tuple.
668+ */
669+ if (!HeapTupleHeaderIsHotUpdated (curr_htup ) &&
670+ HeapTupleHeaderIsHeapOnly (next_htup ))
671+ {
672+ report_corruption (& ctx ,
673+ psprintf ("non-heap-only update produced a heap-only tuple at offset %u" ,
674+ (unsigned ) nextoffnum ));
675+ }
676+ if (HeapTupleHeaderIsHotUpdated (curr_htup ) &&
677+ !HeapTupleHeaderIsHeapOnly (next_htup ))
678+ {
679+ report_corruption (& ctx ,
680+ psprintf ("heap-only update produced a non-heap only tuple at offset %u" ,
681+ (unsigned ) nextoffnum ));
682+ }
683+
684+ /*
685+ * If the current tuple's xmin is still in progress but the
686+ * successor tuple's xmin is committed, that's corruption.
687+ *
688+ * NB: We recheck the commit status of the current tuple's xmin
689+ * here, because it might have committed after we checked it and
690+ * before we checked the commit status of the successor tuple's
691+ * xmin. This should be safe because the xmin itself can't have
692+ * changed, only its commit status.
693+ */
694+ curr_xmin = HeapTupleHeaderGetXmin (curr_htup );
695+ if (xmin_commit_status_ok [ctx .offnum ] &&
696+ xmin_commit_status [ctx .offnum ] == XID_IN_PROGRESS &&
697+ xmin_commit_status_ok [nextoffnum ] &&
698+ xmin_commit_status [nextoffnum ] == XID_COMMITTED &&
699+ TransactionIdIsInProgress (curr_xmin ))
700+ {
701+ report_corruption (& ctx ,
702+ psprintf ("tuple with in-progress xmin %u was updated to produce a tuple at offset %u with committed xmin %u" ,
703+ (unsigned ) curr_xmin ,
704+ (unsigned ) ctx .offnum ,
705+ (unsigned ) next_xmin ));
706+ }
707+
708+ /*
709+ * If the current tuple's xmin is aborted but the successor tuple's
710+ * xmin is in-progress or committed, that's corruption.
711+ */
712+ if (xmin_commit_status_ok [ctx .offnum ] &&
713+ xmin_commit_status [ctx .offnum ] == XID_ABORTED &&
714+ xmin_commit_status_ok [nextoffnum ])
715+ {
716+ if (xmin_commit_status [nextoffnum ] == XID_IN_PROGRESS )
717+ report_corruption (& ctx ,
718+ psprintf ("tuple with aborted xmin %u was updated to produce a tuple at offset %u with in-progress xmin %u" ,
719+ (unsigned ) curr_xmin ,
720+ (unsigned ) ctx .offnum ,
721+ (unsigned ) next_xmin ));
722+ else if (xmin_commit_status [nextoffnum ] == XID_COMMITTED )
723+ report_corruption (& ctx ,
724+ psprintf ("tuple with aborted xmin %u was updated to produce a tuple at offset %u with committed xmin %u" ,
725+ (unsigned ) curr_xmin ,
726+ (unsigned ) ctx .offnum ,
727+ (unsigned ) next_xmin ));
728+ }
729+ }
730+
731+ /*
732+ * An update chain can start either with a non-heap-only tuple or with
733+ * a redirect line pointer, but not with a heap-only tuple.
734+ *
735+ * (This check is in a separate loop because we need the predecessor
736+ * array to be fully populated before we can perform it.)
737+ */
738+ for (ctx .offnum = FirstOffsetNumber ;
739+ ctx .offnum <= maxoff ;
740+ ctx .offnum = OffsetNumberNext (ctx .offnum ))
741+ {
742+ if (xmin_commit_status_ok [ctx .offnum ] &&
743+ (xmin_commit_status [ctx .offnum ] == XID_COMMITTED ||
744+ xmin_commit_status [ctx .offnum ] == XID_IN_PROGRESS ) &&
745+ predecessor [ctx .offnum ] == InvalidOffsetNumber )
746+ {
747+ ItemId curr_lp ;
748+
749+ curr_lp = PageGetItemId (ctx .page , ctx .offnum );
750+ if (!ItemIdIsRedirected (curr_lp ))
751+ {
752+ HeapTupleHeader curr_htup ;
753+
754+ curr_htup = (HeapTupleHeader )
755+ PageGetItem (ctx .page , curr_lp );
756+ if (HeapTupleHeaderIsHeapOnly (curr_htup ))
757+ report_corruption (& ctx ,
758+ psprintf ("tuple is root of chain but is marked as heap-only tuple" ));
759+ }
760+ }
510761 }
511762
512763 /* clean up */
@@ -638,6 +889,7 @@ check_tuple_header(HeapCheckContext *ctx)
638889{
639890 HeapTupleHeader tuphdr = ctx -> tuphdr ;
640891 uint16 infomask = tuphdr -> t_infomask ;
892+ TransactionId curr_xmax = HeapTupleHeaderGetUpdateXid (tuphdr );
641893 bool result = true;
642894 unsigned expected_hoff ;
643895
@@ -663,6 +915,19 @@ check_tuple_header(HeapCheckContext *ctx)
663915 */
664916 }
665917
918+ if (!TransactionIdIsValid (curr_xmax ) &&
919+ HeapTupleHeaderIsHotUpdated (tuphdr ))
920+ {
921+ report_corruption (ctx ,
922+ psprintf ("tuple has been HOT updated, but xmax is 0" ));
923+
924+ /*
925+ * As above, even though this shouldn't happen, it's not sufficient
926+ * justification for skipping further checks, we should still be able
927+ * to perform sensibly.
928+ */
929+ }
930+
666931 if (infomask & HEAP_HASNULL )
667932 expected_hoff = MAXALIGN (SizeofHeapTupleHeader + BITMAPLEN (ctx -> natts ));
668933 else
@@ -718,9 +983,14 @@ check_tuple_header(HeapCheckContext *ctx)
718983 * Returns true if the tuple itself should be checked, false otherwise. Sets
719984 * ctx->tuple_could_be_pruned if the tuple -- and thus also any associated
720985 * TOAST tuples -- are eligible for pruning.
986+ *
987+ * Sets *xmin_commit_status_ok to true if the commit status of xmin is known
988+ * and false otherwise. If it's set to true, then also set *xid_commit_status
989+ * to the actual commit status.
721990 */
722991static bool
723- check_tuple_visibility (HeapCheckContext * ctx )
992+ check_tuple_visibility (HeapCheckContext * ctx , bool * xmin_commit_status_ok ,
993+ XidCommitStatus * xmin_commit_status )
724994{
725995 TransactionId xmin ;
726996 TransactionId xvac ;
@@ -731,13 +1001,17 @@ check_tuple_visibility(HeapCheckContext *ctx)
7311001 HeapTupleHeader tuphdr = ctx -> tuphdr ;
7321002
7331003 ctx -> tuple_could_be_pruned = true; /* have not yet proven otherwise */
1004+ * xmin_commit_status_ok = false; /* have not yet proven otherwise */
7341005
7351006 /* If xmin is normal, it should be within valid range */
7361007 xmin = HeapTupleHeaderGetXmin (tuphdr );
7371008 switch (get_xid_status (xmin , ctx , & xmin_status ))
7381009 {
7391010 case XID_INVALID :
1011+ break ;
7401012 case XID_BOUNDS_OK :
1013+ * xmin_commit_status_ok = true;
1014+ * xmin_commit_status = xmin_status ;
7411015 break ;
7421016 case XID_IN_FUTURE :
7431017 report_corruption (ctx ,
@@ -1515,9 +1789,13 @@ check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
15151789/*
15161790 * Check the current tuple as tracked in ctx, recording any corruption found in
15171791 * ctx->tupstore.
1792+ *
1793+ * We return some information about the status of xmin to aid in validating
1794+ * update chains.
15181795 */
15191796static void
1520- check_tuple (HeapCheckContext * ctx )
1797+ check_tuple (HeapCheckContext * ctx , bool * xmin_commit_status_ok ,
1798+ XidCommitStatus * xmin_commit_status )
15211799{
15221800 /*
15231801 * Check various forms of tuple header corruption, and if the header is
@@ -1531,7 +1809,8 @@ check_tuple(HeapCheckContext *ctx)
15311809 * cannot assume our relation description matches the tuple structure, and
15321810 * therefore cannot check it.
15331811 */
1534- if (!check_tuple_visibility (ctx ))
1812+ if (!check_tuple_visibility (ctx , xmin_commit_status_ok ,
1813+ xmin_commit_status ))
15351814 return ;
15361815
15371816 /*
0 commit comments