3131#include "replication/basebackup.h"
3232#include "replication/walsender.h"
3333#include "replication/walsender_private.h"
34+ #include "storage/bufpage.h"
35+ #include "storage/checksum.h"
3436#include "storage/dsm_impl.h"
3537#include "storage/fd.h"
3638#include "storage/ipc.h"
@@ -70,6 +72,7 @@ static void parse_basebackup_options(List *options, basebackup_options *opt);
7072static void SendXlogRecPtrResult (XLogRecPtr ptr , TimeLineID tli );
7173static int compareWalFileNames (const void * a , const void * b );
7274static void throttle (size_t increment );
75+ static bool is_checksummed_file (const char * fullpath , const char * filename );
7376
7477/* Was the backup currently in-progress initiated in recovery mode? */
7578static bool backup_started_in_recovery = false;
@@ -99,6 +102,15 @@ static TimeOffset elapsed_min_unit;
99102/* The last check of the transfer rate. */
100103static TimestampTz throttled_last ;
101104
105+ /* The starting XLOG position of the base backup. */
106+ static XLogRecPtr startptr ;
107+
108+ /* Total number of checksum failures during base backup. */
109+ static int64 total_checksum_failures ;
110+
111+ /* Do not verify checksums. */
112+ static bool noverify_checksums = false;
113+
102114/*
103115 * The contents of these directories are removed or recreated during server
104116 * start so they are not included in backups. The directories themselves are
@@ -175,6 +187,18 @@ static const char *excludeFiles[] =
175187 NULL
176188};
177189
190+ /*
191+ * List of files excluded from checksum validation.
192+ */
193+ static const char * noChecksumFiles [] = {
194+ "pg_control" ,
195+ "pg_filenode.map" ,
196+ "pg_internal.init" ,
197+ "PG_VERSION" ,
198+ NULL ,
199+ };
200+
201+
178202/*
179203 * Called when ERROR or FATAL happens in perform_base_backup() after
180204 * we have started the backup - make sure we end it!
@@ -194,7 +218,6 @@ base_backup_cleanup(int code, Datum arg)
194218static void
195219perform_base_backup (basebackup_options * opt )
196220{
197- XLogRecPtr startptr ;
198221 TimeLineID starttli ;
199222 XLogRecPtr endptr ;
200223 TimeLineID endtli ;
@@ -210,6 +233,8 @@ perform_base_backup(basebackup_options *opt)
210233 labelfile = makeStringInfo ();
211234 tblspc_map_file = makeStringInfo ();
212235
236+ total_checksum_failures = 0 ;
237+
213238 startptr = do_pg_start_backup (opt -> label , opt -> fastcheckpoint , & starttli ,
214239 labelfile , & tablespaces ,
215240 tblspc_map_file ,
@@ -568,6 +593,17 @@ perform_base_backup(basebackup_options *opt)
568593 pq_putemptymessage ('c' );
569594 }
570595 SendXlogRecPtrResult (endptr , endtli );
596+
597+ if (total_checksum_failures )
598+ {
599+ if (total_checksum_failures > 1 )
600+ ereport (WARNING ,
601+ (errmsg ("%ld total checksum verification failures" , total_checksum_failures )));
602+ ereport (ERROR ,
603+ (errcode (ERRCODE_DATA_CORRUPTED ),
604+ errmsg ("checksum verification failure during base backup" )));
605+ }
606+
571607}
572608
573609/*
@@ -597,6 +633,7 @@ parse_basebackup_options(List *options, basebackup_options *opt)
597633 bool o_wal = false;
598634 bool o_maxrate = false;
599635 bool o_tablespace_map = false;
636+ bool o_noverify_checksums = false;
600637
601638 MemSet (opt , 0 , sizeof (* opt ));
602639 foreach (lopt , options )
@@ -676,6 +713,15 @@ parse_basebackup_options(List *options, basebackup_options *opt)
676713 opt -> sendtblspcmapfile = true;
677714 o_tablespace_map = true;
678715 }
716+ else if (strcmp (defel -> defname , "noverify_checksums" ) == 0 )
717+ {
718+ if (o_noverify_checksums )
719+ ereport (ERROR ,
720+ (errcode (ERRCODE_SYNTAX_ERROR ),
721+ errmsg ("duplicate option \"%s\"" , defel -> defname )));
722+ noverify_checksums = true;
723+ o_noverify_checksums = true;
724+ }
679725 else
680726 elog (ERROR , "option \"%s\" not recognized" ,
681727 defel -> defname );
@@ -1257,6 +1303,33 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
12571303 return size ;
12581304}
12591305
1306+ /*
1307+ * Check if a file should have its checksum validated.
1308+ * We validate checksums on files in regular tablespaces
1309+ * (including global and default) only, and in those there
1310+ * are some files that are explicitly excluded.
1311+ */
1312+ static bool
1313+ is_checksummed_file (const char * fullpath , const char * filename )
1314+ {
1315+ const char * * f ;
1316+
1317+ /* Check that the file is in a tablespace */
1318+ if (strncmp (fullpath , "./global/" , 9 ) == 0 ||
1319+ strncmp (fullpath , "./base/" , 7 ) == 0 ||
1320+ strncmp (fullpath , "/" , 1 ) == 0 )
1321+ {
1322+ /* Compare file against noChecksumFiles skiplist */
1323+ for (f = noChecksumFiles ; * f ; f ++ )
1324+ if (strcmp (* f , filename ) == 0 )
1325+ return false;
1326+
1327+ return true;
1328+ }
1329+ else
1330+ return false;
1331+ }
1332+
12601333/*****
12611334 * Functions for handling tar file format
12621335 *
@@ -1277,10 +1350,20 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
12771350 bool missing_ok )
12781351{
12791352 FILE * fp ;
1353+ BlockNumber blkno = 0 ;
1354+ bool block_retry = false;
12801355 char buf [TAR_SEND_SIZE ];
1356+ uint16 checksum ;
1357+ int checksum_failures = 0 ;
12811358 size_t cnt ;
1359+ int i ;
12821360 pgoff_t len = 0 ;
1361+ char * page ;
12831362 size_t pad ;
1363+ PageHeader phdr ;
1364+ int segmentno = 0 ;
1365+ char * segmentpath ;
1366+ bool verify_checksum = false;
12841367
12851368 fp = AllocateFile (readfilename , "rb" );
12861369 if (fp == NULL )
@@ -1294,8 +1377,142 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
12941377
12951378 _tarWriteHeader (tarfilename , NULL , statbuf , false);
12961379
1380+ if (!noverify_checksums && DataChecksumsEnabled ())
1381+ {
1382+ char * filename ;
1383+
1384+ /*
1385+ * Get the filename (excluding path). As last_dir_separator()
1386+ * includes the last directory separator, we chop that off by
1387+ * incrementing the pointer.
1388+ */
1389+ filename = last_dir_separator (readfilename ) + 1 ;
1390+
1391+ if (is_checksummed_file (readfilename , filename ))
1392+ {
1393+ verify_checksum = true;
1394+
1395+ /*
1396+ * Cut off at the segment boundary (".") to get the segment number
1397+ * in order to mix it into the checksum.
1398+ */
1399+ segmentpath = strstr (filename , "." );
1400+ if (segmentpath != NULL )
1401+ {
1402+ segmentno = atoi (segmentpath + 1 );
1403+ if (segmentno == 0 )
1404+ ereport (ERROR ,
1405+ (errmsg ("invalid segment number %d in file \"%s\"" ,
1406+ segmentno , filename )));
1407+ }
1408+ }
1409+ }
1410+
12971411 while ((cnt = fread (buf , 1 , Min (sizeof (buf ), statbuf -> st_size - len ), fp )) > 0 )
12981412 {
1413+ if (verify_checksum )
1414+ {
1415+ /*
1416+ * The checksums are verified at block level, so we iterate over
1417+ * the buffer in chunks of BLCKSZ, after making sure that
1418+ * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple
1419+ * of BLCKSZ bytes.
1420+ */
1421+ Assert (TAR_SEND_SIZE % BLCKSZ == 0 );
1422+
1423+ if (cnt % BLCKSZ != 0 )
1424+ {
1425+ ereport (WARNING ,
1426+ (errmsg ("cannot verify checksum in file \"%s\", block "
1427+ "%d: read buffer size %d and page size %d "
1428+ "differ" ,
1429+ readfilename , blkno , (int ) cnt , BLCKSZ )));
1430+ verify_checksum = false;
1431+ continue ;
1432+ }
1433+ for (i = 0 ; i < cnt / BLCKSZ ; i ++ )
1434+ {
1435+ page = buf + BLCKSZ * i ;
1436+
1437+ /*
1438+ * Only check pages which have not been modified since the
1439+ * start of the base backup. Otherwise, they might have been
1440+ * written only halfway and the checksum would not be valid.
1441+ * However, replaying WAL would reinstate the correct page in
1442+ * this case.
1443+ */
1444+ if (PageGetLSN (page ) < startptr )
1445+ {
1446+ checksum = pg_checksum_page ((char * ) page , blkno + segmentno * RELSEG_SIZE );
1447+ phdr = (PageHeader ) page ;
1448+ if (phdr -> pd_checksum != checksum )
1449+ {
1450+ /*
1451+ * Retry the block on the first failure. It's
1452+ * possible that we read the first 4K page of the
1453+ * block just before postgres updated the entire block
1454+ * so it ends up looking torn to us. We only need to
1455+ * retry once because the LSN should be updated to
1456+ * something we can ignore on the next pass. If the
1457+ * error happens again then it is a true validation
1458+ * failure.
1459+ */
1460+ if (block_retry == false)
1461+ {
1462+ /* Reread the failed block */
1463+ if (fseek (fp , - (cnt - BLCKSZ * i ), SEEK_CUR ) == -1 )
1464+ {
1465+ ereport (ERROR ,
1466+ (errcode_for_file_access (),
1467+ errmsg ("could not fseek in file \"%s\": %m" ,
1468+ readfilename )));
1469+ }
1470+
1471+ if (fread (buf + BLCKSZ * i , 1 , BLCKSZ , fp ) != BLCKSZ )
1472+ {
1473+ ereport (ERROR ,
1474+ (errcode_for_file_access (),
1475+ errmsg ("could not reread block %d of file \"%s\": %m" ,
1476+ blkno , readfilename )));
1477+ }
1478+
1479+ if (fseek (fp , cnt - BLCKSZ * i - BLCKSZ , SEEK_CUR ) == -1 )
1480+ {
1481+ ereport (ERROR ,
1482+ (errcode_for_file_access (),
1483+ errmsg ("could not fseek in file \"%s\": %m" ,
1484+ readfilename )));
1485+ }
1486+
1487+ /* Set flag so we know a retry was attempted */
1488+ block_retry = true;
1489+
1490+ /* Reset loop to validate the block again */
1491+ i -- ;
1492+ continue ;
1493+ }
1494+
1495+ checksum_failures ++ ;
1496+
1497+ if (checksum_failures <= 5 )
1498+ ereport (WARNING ,
1499+ (errmsg ("checksum verification failed in "
1500+ "file \"%s\", block %d: calculated "
1501+ "%X but expected %X" ,
1502+ readfilename , blkno , checksum ,
1503+ phdr -> pd_checksum )));
1504+ if (checksum_failures == 5 )
1505+ ereport (WARNING ,
1506+ (errmsg ("further checksum verification "
1507+ "failures in file \"%s\" will not "
1508+ "be reported" , readfilename )));
1509+ }
1510+ }
1511+ block_retry = false;
1512+ blkno ++ ;
1513+ }
1514+ }
1515+
12991516 /* Send the chunk as a CopyData message */
13001517 if (pq_putmessage ('d' , buf , cnt ))
13011518 ereport (ERROR ,
@@ -1341,6 +1558,14 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
13411558
13421559 FreeFile (fp );
13431560
1561+ if (checksum_failures > 1 )
1562+ {
1563+ ereport (WARNING ,
1564+ (errmsg ("file \"%s\" has a total of %d checksum verification "
1565+ "failures" , readfilename , checksum_failures )));
1566+ }
1567+ total_checksum_failures += checksum_failures ;
1568+
13441569 return true;
13451570}
13461571
0 commit comments