75 "RELSEG_SIZE must fit in an integer");
101#define INIT_MD_FILETAG(a,xx_rlocator,xx_forknum,xx_segno) \
103 memset(&(a), 0, sizeof(FileTag)), \
104 (a).handler = SYNC_HANDLER_MD, \
105 (a).rlocator = (xx_rlocator), \
106 (a).forknum = (xx_forknum), \
107 (a).segno = (xx_segno) \
113#define EXTENSION_FAIL (1 << 0)
115#define EXTENSION_RETURN_NULL (1 << 1)
117#define EXTENSION_CREATE (1 << 2)
119#define EXTENSION_CREATE_RECOVERY (1 << 3)
121#define EXTENSION_DONT_OPEN (1 << 5)
131#define SEGMENT_CHARS OIDCHARS
132#define MD_PATH_STR_MAXLEN \
134 REL_PATH_STR_MAXLEN \
135 + sizeof((char)'.') \
252 int save_errno = errno;
262 errmsg(
"could not create file \"%s\": %m", path.
str)));
269 mdfd->mdfd_segno = 0;
342 for (forknum = 0; forknum <=
MAX_FORKNUM; forknum++)
361 if (ret < 0 && errno != ENOENT)
366 errmsg(
"could not truncate file \"%s\": %m", path)));
380 path =
relpath(rlocator, forknum);
403 if (ret >= 0 || errno != ENOENT)
405 ret = unlink(path.
str);
406 if (ret < 0 && errno != ENOENT)
411 errmsg(
"could not remove file \"%s\": %m", path.
str)));
439 if (ret >= 0 || errno != ENOENT)
444 for (segno = 1;; segno++)
464 if (unlink(segpath.
str) < 0)
470 errmsg(
"could not remove file \"%s\": %m", segpath.
str)));
488 const void *buffer,
bool skipFsync)
499#ifdef CHECK_WRITE_VS_EXTEND
511 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
512 errmsg(
"cannot extend file \"%s\" beyond %u blocks",
522 if ((nbytes =
FileWrite(v->
mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
527 errmsg(
"could not extend file \"%s\": %m",
529 errhint(
"Check free disk space.")));
533 errmsg(
"could not extend file \"%s\": wrote only %d of %d bytes at block %u",
535 nbytes, BLCKSZ, blocknum),
536 errhint(
"Check free disk space.")));
557 int remblocks = nblocks;
562#ifdef CHECK_WRITE_VS_EXTEND
573 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
574 errmsg(
"cannot extend file \"%s\" beyond %u blocks",
578 while (remblocks > 0)
584 if (segstartblock + remblocks > RELSEG_SIZE)
585 numblocks = RELSEG_SIZE - segstartblock;
587 numblocks = remblocks;
591 Assert(segstartblock < RELSEG_SIZE);
592 Assert(segstartblock + numblocks <= RELSEG_SIZE);
610 seekpos, (
pgoff_t) BLCKSZ * numblocks,
611 WAIT_EVENT_DATA_FILE_EXTEND);
616 errmsg(
"could not extend file \"%s\" with FileFallocate(): %m",
618 errhint(
"Check free disk space."));
633 seekpos, (
pgoff_t) BLCKSZ * numblocks,
634 WAIT_EVENT_DATA_FILE_EXTEND);
638 errmsg(
"could not extend file \"%s\": %m",
640 errhint(
"Check free disk space."));
648 remblocks -= numblocks;
649 curblocknum += numblocks;
685 errmsg(
"could not open file \"%s\": %m", path.
str)));
691 mdfd->mdfd_segno = 0;
705 for (
int forknum = 0; forknum <=
MAX_FORKNUM; forknum++)
722 while (nopensegs > 0)
750 int nblocks_this_segment;
761 nblocks_this_segment =
763 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
766 WAIT_EVENT_DATA_FILE_PREFETCH);
768 blocknum += nblocks_this_segment;
769 nblocks -= nblocks_this_segment;
792 for (
int i = 0;
i < nblocks; ++
i)
795 Assert((uintptr_t) buffers[
i] ==
801 iovp->iov_base = buffers[0];
802 iovp->iov_len = BLCKSZ;
806 for (
int i = 1;
i < nblocks; ++
i)
808 void *buffer = buffers[
i];
810 if (((
char *) iovp->iov_base + iovp->iov_len) == buffer)
813 iovp->iov_len += BLCKSZ;
819 iovp->iov_base = buffer;
820 iovp->iov_len = BLCKSZ;
840 return RELSEG_SIZE - segoff;
858 size_t transferred_this_segment;
859 size_t size_this_segment;
868 nblocks_this_segment =
870 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
871 nblocks_this_segment =
Min(nblocks_this_segment,
lengthof(iov));
873 if (nblocks_this_segment != nblocks)
874 elog(
ERROR,
"read crosses segment boundary");
877 size_this_segment = nblocks_this_segment * BLCKSZ;
878 transferred_this_segment = 0;
887 TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
893 WAIT_EVENT_DATA_FILE_READ);
894 TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
900 size_this_segment - transferred_this_segment);
902#ifdef SIMULATE_SHORT_READ
903 nbytes =
Min(nbytes, 4096);
909 errmsg(
"could not read blocks %u..%u in file \"%s\": %m",
911 blocknum + nblocks_this_segment - 1,
948 i < nblocks_this_segment;
950 memset(buffers[
i], 0, BLCKSZ);
956 errmsg(
"could not read blocks %u..%u in file \"%s\": read only %zu of %zu bytes",
958 blocknum + nblocks_this_segment - 1,
960 transferred_this_segment,
961 size_this_segment)));
965 transferred_this_segment += nbytes;
966 Assert(transferred_this_segment <= size_this_segment);
967 if (transferred_this_segment == size_this_segment)
975 nblocks -= nblocks_this_segment;
976 buffers += nblocks_this_segment;
977 blocknum += nblocks_this_segment;
1003 nblocks_this_segment =
1005 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
1007 if (nblocks_this_segment != nblocks)
1008 elog(
ERROR,
"read crossing segment boundary");
1012 Assert(nblocks <= iovcnt);
1016 Assert(iovcnt <= nblocks_this_segment);
1033 errmsg(
"could not start reading blocks %u..%u in file \"%s\": %m",
1035 blocknum + nblocks_this_segment - 1,
1060 const void **buffers,
BlockNumber nblocks,
bool skipFsync)
1063#ifdef CHECK_WRITE_VS_EXTEND
1075 size_t transferred_this_segment;
1076 size_t size_this_segment;
1085 nblocks_this_segment =
1087 RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE)));
1088 nblocks_this_segment =
Min(nblocks_this_segment,
lengthof(iov));
1090 if (nblocks_this_segment != nblocks)
1091 elog(
ERROR,
"write crosses segment boundary");
1094 size_this_segment = nblocks_this_segment * BLCKSZ;
1095 transferred_this_segment = 0;
1104 TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum,
1110 WAIT_EVENT_DATA_FILE_WRITE);
1111 TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
1117 size_this_segment - transferred_this_segment);
1119#ifdef SIMULATE_SHORT_WRITE
1120 nbytes =
Min(nbytes, 4096);
1125 bool enospc = errno == ENOSPC;
1129 errmsg(
"could not write blocks %u..%u in file \"%s\": %m",
1131 blocknum + nblocks_this_segment - 1,
1133 enospc ?
errhint(
"Check free disk space.") : 0));
1137 transferred_this_segment += nbytes;
1138 Assert(transferred_this_segment <= size_this_segment);
1139 if (transferred_this_segment == size_this_segment)
1150 nblocks -= nblocks_this_segment;
1151 buffers += nblocks_this_segment;
1152 blocknum += nblocks_this_segment;
1195 segnum_start = blocknum / RELSEG_SIZE;
1198 segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
1199 if (segnum_start != segnum_end)
1200 nflush = RELSEG_SIZE - (blocknum % ((
BlockNumber) RELSEG_SIZE));
1203 Assert(nflush <= nblocks);
1256 return (segno * ((
BlockNumber) RELSEG_SIZE)) + nblocks;
1293 if (nblocks > curnblk)
1299 (
errmsg(
"could not truncate file \"%s\" to %u blocks: it's only %u blocks now",
1301 nblocks, curnblk)));
1303 if (nblocks == curnblk)
1311 while (curopensegs > 0)
1315 priorblocks = (curopensegs - 1) * RELSEG_SIZE;
1317 v = &reln->
md_seg_fds[forknum][curopensegs - 1];
1319 if (priorblocks > nblocks)
1328 errmsg(
"could not truncate file \"%s\": %m",
1340 else if (priorblocks + ((
BlockNumber) RELSEG_SIZE) > nblocks)
1349 BlockNumber lastsegblocks = nblocks - priorblocks;
1354 errmsg(
"could not truncate file \"%s\" to %u blocks: %m",
1379 int min_inactive_seg;
1405 if (segno > min_inactive_seg)
1430 int min_inactive_seg;
1465 errmsg(
"could not fsync file \"%s\": %m",
1469 if (segno > min_inactive_seg)
1518 (
errmsg_internal(
"could not forward fsync request because request queue is full")));
1525 errmsg(
"could not fsync file \"%s\": %m",
1584 rlocator.
dbOid = dbid;
1603 for (
i = 0;
i < ndelrels;
i++)
1619 for (
i = 0;
i < ndelrels;
i++)
1687 strcpy(fullpath.
str, path.
str);
1741 bool skipFsync,
int behavior)
1755 if (targetseg < reln->md_num_open_segs[forknum])
1782 nextsegno <= targetseg; nextsegno++)
1816 zerobuf, skipFsync);
1842 errmsg(
"could not open file \"%s\" (target block %u): previous segment is only %u blocks",
1856 errmsg(
"could not open file \"%s\" (target block %u): %m",
1877 errmsg(
"could not seek to end of file \"%s\": %m",
1904 need_to_close =
false;
1916 need_to_close =
true;
1922 result =
FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
1951 return unlink(path);
1980 if (prior_result.
result < 0)
2057 errmsg(
"could not read blocks %u..%u in file \"%s\": %m",
2070 errmsg(
"could not read blocks %u..%u in file \"%s\": read only %zu of %zu bytes",
2074 result.
result * (
size_t) BLCKSZ,
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
void pgaio_result_report(PgAioResult result, const PgAioTargetData *target_data, int elevel)
int pgaio_io_get_iovec(PgAioHandle *ioh, struct iovec **iov)
PgAioTargetData * pgaio_io_get_target_data(PgAioHandle *ioh)
void TablespaceCreateDbspace(Oid spcOid, Oid dbOid, bool isRedo)
#define InvalidBlockNumber
#define TYPEALIGN(ALIGNVAL, LEN)
int errmsg_internal(const char *fmt,...)
int errcode_for_file_access(void)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
int pg_truncate(const char *path, pgoff_t length)
int FileGetRawDesc(File file)
void FileWriteback(File file, pgoff_t offset, pgoff_t nbytes, uint32 wait_event_info)
char * FilePathName(File file)
int FileSync(File file, uint32 wait_event_info)
int FileStartReadV(PgAioHandle *ioh, File file, int iovcnt, pgoff_t offset, uint32 wait_event_info)
ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info)
int FileFallocate(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
pgoff_t FileSize(File file)
void FileClose(File file)
int data_sync_elevel(int elevel)
File PathNameOpenFile(const char *fileName, int fileFlags)
int FileTruncate(File file, pgoff_t offset, uint32 wait_event_info)
int FileZero(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
int FilePrefetch(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info)
static ssize_t FileWrite(File file, const void *buffer, size_t amount, pgoff_t offset, uint32 wait_event_info)
#define FILE_POSSIBLY_DELETED(err)
int compute_remaining_iovec(struct iovec *destination, const struct iovec *source, int iovcnt, size_t transferred)
Assert(PointerIsAligned(start, uint64))
void * MemoryContextAlloc(MemoryContext context, Size size)
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
MemoryContext TopMemoryContext
void * palloc_aligned(Size size, Size alignto, int flags)
void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
static void md_readv_report(PgAioResult result, const PgAioTargetData *td, int elevel)
static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
#define EXTENSION_CREATE_RECOVERY
void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks)
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
void mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
bool mdfiletagmatches(const FileTag *ftag, const FileTag *candidate)
bool mdexists(SMgrRelation reln, ForkNumber forknum)
void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
static MdPathStr _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno)
#define EXTENSION_DONT_OPEN
BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum)
int mdunlinkfiletag(const FileTag *ftag, char *path)
static MemoryContext MdCxt
void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
int mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off)
void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
static PgAioResult md_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
static int do_truncate(const char *path)
void mdclose(SMgrRelation reln, ForkNumber forknum)
void mdzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
static MdfdVec * _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags)
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
int mdsyncfiletag(const FileTag *ftag, char *path)
void mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior)
#define EXTENSION_RETURN_NULL
void mdstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
void mdregistersync(SMgrRelation reln, ForkNumber forknum)
void mdopen(SMgrRelation reln)
const PgAioHandleCallbacks aio_md_readv_cb
static int _mdfd_open_flags(void)
#define INIT_MD_FILETAG(a, xx_rlocator, xx_forknum, xx_segno)
static MdfdVec * mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
void DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo)
static int buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
struct MdPathStr MdPathStr
#define MD_PATH_STR_MAXLEN
static void _fdvec_resize(SMgrRelation reln, ForkNumber forknum, int nseg)
void ForgetDatabaseSyncRequests(Oid dbid)
void mdimmedsync(SMgrRelation reln, ForkNumber forknum)
StaticAssertDecl(RELSEG_SIZE > 0 &&RELSEG_SIZE<=INT_MAX, "RELSEG_SIZE must fit in an integer")
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
#define ERRCODE_DATA_CORRUPTED
instr_time pgstat_prepare_io_time(bool track_io_guc)
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
size_t strlcpy(char *dst, const char *src, size_t siz)
static int fd(const char *x, int i)
#define INVALID_PROC_NUMBER
#define RelFileLocatorBackendIsTemp(rlocator)
#define relpath(rlocator, forknum)
#define relpathbackend(rlocator, backend, forknum)
#define relpathperm(rlocator, forknum)
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
void smgrclose(SMgrRelation reln)
void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
void pgaio_io_set_target_smgr(PgAioHandle *ioh, SMgrRelationData *smgr, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skip_fsync)
char str[MD_PATH_STR_MAXLEN+1]
PgAioHandleCallbackComplete complete_shared
char str[REL_PATH_STR_MAXLEN+1]
int md_num_open_segs[MAX_FORKNUM+1]
struct _MdfdVec * md_seg_fds[MAX_FORKNUM+1]
RelFileLocatorBackend smgr_rlocator
bool RegisterSyncRequest(const FileTag *ftag, SyncRequestType type, bool retryOnError)
struct PgAioTargetData::@125 smgr
void XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)