@@ -131,13 +131,21 @@ typedef struct ReorderBufferTupleCidEnt
131131 CommandId combocid ; /* just for debugging */
132132} ReorderBufferTupleCidEnt ;
133133
134+ /* Virtual file descriptor with file offset tracking */
135+ typedef struct TXNEntryFile
136+ {
137+ File vfd ; /* -1 when the file is closed */
138+ off_t curOffset ; /* offset for next write or read. Reset to 0
139+ * when vfd is opened. */
140+ } TXNEntryFile ;
141+
134142/* k-way in-order change iteration support structures */
135143typedef struct ReorderBufferIterTXNEntry
136144{
137145 XLogRecPtr lsn ;
138146 ReorderBufferChange * change ;
139147 ReorderBufferTXN * txn ;
140- int fd ;
148+ TXNEntryFile file ;
141149 XLogSegNo segno ;
142150} ReorderBufferIterTXNEntry ;
143151
@@ -207,7 +215,8 @@ static void AssertTXNLsnOrder(ReorderBuffer *rb);
207215 * subtransactions
208216 * ---------------------------------------
209217 */
210- static ReorderBufferIterTXNState * ReorderBufferIterTXNInit (ReorderBuffer * rb , ReorderBufferTXN * txn );
218+ static void ReorderBufferIterTXNInit (ReorderBuffer * rb , ReorderBufferTXN * txn ,
219+ ReorderBufferIterTXNState * volatile * iter_state );
211220static ReorderBufferChange * ReorderBufferIterTXNNext (ReorderBuffer * rb , ReorderBufferIterTXNState * state );
212221static void ReorderBufferIterTXNFinish (ReorderBuffer * rb ,
213222 ReorderBufferIterTXNState * state );
@@ -223,7 +232,7 @@ static void ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn);
223232static void ReorderBufferSerializeChange (ReorderBuffer * rb , ReorderBufferTXN * txn ,
224233 int fd , ReorderBufferChange * change );
225234static Size ReorderBufferRestoreChanges (ReorderBuffer * rb , ReorderBufferTXN * txn ,
226- int * fd , XLogSegNo * segno );
235+ TXNEntryFile * file , XLogSegNo * segno );
227236static void ReorderBufferRestoreChange (ReorderBuffer * rb , ReorderBufferTXN * txn ,
228237 char * change );
229238static void ReorderBufferRestoreCleanup (ReorderBuffer * rb , ReorderBufferTXN * txn );
@@ -996,15 +1005,23 @@ ReorderBufferIterCompare(Datum a, Datum b, void *arg)
9961005/*
9971006 * Allocate & initialize an iterator which iterates in lsn order over a
9981007 * transaction and all its subtransactions.
1008+ *
1009+ * Note: The iterator state is returned through iter_state parameter rather
1010+ * than the function's return value. This is because the state gets cleaned up
1011+ * in a PG_CATCH block in the caller, so we want to make sure the caller gets
1012+ * back the state even if this function throws an exception.
9991013 */
1000- static ReorderBufferIterTXNState *
1001- ReorderBufferIterTXNInit (ReorderBuffer * rb , ReorderBufferTXN * txn )
1014+ static void
1015+ ReorderBufferIterTXNInit (ReorderBuffer * rb , ReorderBufferTXN * txn ,
1016+ ReorderBufferIterTXNState * volatile * iter_state )
10021017{
10031018 Size nr_txns = 0 ;
10041019 ReorderBufferIterTXNState * state ;
10051020 dlist_iter cur_txn_i ;
10061021 int32 off ;
10071022
1023+ * iter_state = NULL ;
1024+
10081025 /*
10091026 * Calculate the size of our heap: one element for every transaction that
10101027 * contains changes. (Besides the transactions already in the reorder
@@ -1039,7 +1056,7 @@ ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn)
10391056
10401057 for (off = 0 ; off < state -> nr_txns ; off ++ )
10411058 {
1042- state -> entries [off ].fd = -1 ;
1059+ state -> entries [off ].file . vfd = -1 ;
10431060 state -> entries [off ].segno = 0 ;
10441061 }
10451062
@@ -1048,6 +1065,9 @@ ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn)
10481065 ReorderBufferIterCompare ,
10491066 state );
10501067
1068+ /* Now that the state fields are initialized, it is safe to return it. */
1069+ * iter_state = state ;
1070+
10511071 /*
10521072 * Now insert items into the binary heap, in an unordered fashion. (We
10531073 * will run a heap assembly step at the end; this is more efficient.)
@@ -1064,7 +1084,7 @@ ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn)
10641084 {
10651085 /* serialize remaining changes */
10661086 ReorderBufferSerializeTXN (rb , txn );
1067- ReorderBufferRestoreChanges (rb , txn , & state -> entries [off ].fd ,
1087+ ReorderBufferRestoreChanges (rb , txn , & state -> entries [off ].file ,
10681088 & state -> entries [off ].segno );
10691089 }
10701090
@@ -1094,7 +1114,7 @@ ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn)
10941114 /* serialize remaining changes */
10951115 ReorderBufferSerializeTXN (rb , cur_txn );
10961116 ReorderBufferRestoreChanges (rb , cur_txn ,
1097- & state -> entries [off ].fd ,
1117+ & state -> entries [off ].file ,
10981118 & state -> entries [off ].segno );
10991119 }
11001120 cur_change = dlist_head_element (ReorderBufferChange , node ,
@@ -1110,8 +1130,6 @@ ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn)
11101130
11111131 /* assemble a valid binary heap */
11121132 binaryheap_build (state -> heap );
1113-
1114- return state ;
11151133}
11161134
11171135/*
@@ -1175,7 +1193,7 @@ ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
11751193 dlist_delete (& change -> node );
11761194 dlist_push_tail (& state -> old_change , & change -> node );
11771195
1178- if (ReorderBufferRestoreChanges (rb , entry -> txn , & entry -> fd ,
1196+ if (ReorderBufferRestoreChanges (rb , entry -> txn , & entry -> file ,
11791197 & state -> entries [off ].segno ))
11801198 {
11811199 /* successfully restored changes from disk */
@@ -1214,8 +1232,8 @@ ReorderBufferIterTXNFinish(ReorderBuffer *rb,
12141232
12151233 for (off = 0 ; off < state -> nr_txns ; off ++ )
12161234 {
1217- if (state -> entries [off ].fd != -1 )
1218- CloseTransientFile (state -> entries [off ].fd );
1235+ if (state -> entries [off ].file . vfd != -1 )
1236+ FileClose (state -> entries [off ].file . vfd );
12191237 }
12201238
12211239 /* free memory we might have "leaked" in the last *Next call */
@@ -1558,7 +1576,7 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
15581576
15591577 rb -> begin (rb , txn );
15601578
1561- iterstate = ReorderBufferIterTXNInit (rb , txn );
1579+ ReorderBufferIterTXNInit (rb , txn , & iterstate );
15621580 while ((change = ReorderBufferIterTXNNext (rb , iterstate )) != NULL )
15631581 {
15641582 Relation relation = NULL ;
@@ -2765,11 +2783,12 @@ ReorderBufferChangeSize(ReorderBufferChange *change)
27652783 */
27662784static Size
27672785ReorderBufferRestoreChanges (ReorderBuffer * rb , ReorderBufferTXN * txn ,
2768- int * fd , XLogSegNo * segno )
2786+ TXNEntryFile * file , XLogSegNo * segno )
27692787{
27702788 Size restored = 0 ;
27712789 XLogSegNo last_segno ;
27722790 dlist_mutable_iter cleanup_iter ;
2791+ File * fd = & file -> vfd ;
27732792
27742793 Assert (txn -> first_lsn != InvalidXLogRecPtr );
27752794 Assert (txn -> final_lsn != InvalidXLogRecPtr );
@@ -2810,7 +2829,11 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
28102829 ReorderBufferSerializedPath (path , MyReplicationSlot , txn -> xid ,
28112830 * segno );
28122831
2813- * fd = OpenTransientFile (path , O_RDONLY | PG_BINARY );
2832+ * fd = PathNameOpenFile (path , O_RDONLY | PG_BINARY );
2833+
2834+ /* No harm in resetting the offset even in case of failure */
2835+ file -> curOffset = 0 ;
2836+
28142837 if (* fd < 0 && errno == ENOENT )
28152838 {
28162839 * fd = -1 ;
@@ -2830,14 +2853,14 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
28302853 * end of this file.
28312854 */
28322855 ReorderBufferSerializeReserve (rb , sizeof (ReorderBufferDiskChange ));
2833- pgstat_report_wait_start ( WAIT_EVENT_REORDER_BUFFER_READ );
2834- readBytes = read ( * fd , rb -> outbuf , sizeof (ReorderBufferDiskChange ));
2835- pgstat_report_wait_end ( );
2856+ readBytes = FileRead ( file -> vfd , rb -> outbuf ,
2857+ sizeof (ReorderBufferDiskChange ),
2858+ file -> curOffset , WAIT_EVENT_REORDER_BUFFER_READ );
28362859
28372860 /* eof */
28382861 if (readBytes == 0 )
28392862 {
2840- CloseTransientFile (* fd );
2863+ FileClose (* fd );
28412864 * fd = -1 ;
28422865 (* segno )++ ;
28432866 continue ;
@@ -2853,16 +2876,19 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
28532876 readBytes ,
28542877 (uint32 ) sizeof (ReorderBufferDiskChange ))));
28552878
2879+ file -> curOffset += readBytes ;
2880+
28562881 ondisk = (ReorderBufferDiskChange * ) rb -> outbuf ;
28572882
28582883 ReorderBufferSerializeReserve (rb ,
28592884 sizeof (ReorderBufferDiskChange ) + ondisk -> size );
28602885 ondisk = (ReorderBufferDiskChange * ) rb -> outbuf ;
28612886
2862- pgstat_report_wait_start (WAIT_EVENT_REORDER_BUFFER_READ );
2863- readBytes = read (* fd , rb -> outbuf + sizeof (ReorderBufferDiskChange ),
2864- ondisk -> size - sizeof (ReorderBufferDiskChange ));
2865- pgstat_report_wait_end ();
2887+ readBytes = FileRead (file -> vfd ,
2888+ rb -> outbuf + sizeof (ReorderBufferDiskChange ),
2889+ ondisk -> size - sizeof (ReorderBufferDiskChange ),
2890+ file -> curOffset ,
2891+ WAIT_EVENT_REORDER_BUFFER_READ );
28662892
28672893 if (readBytes < 0 )
28682894 ereport (ERROR ,
@@ -2875,6 +2901,8 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
28752901 readBytes ,
28762902 (uint32 ) (ondisk -> size - sizeof (ReorderBufferDiskChange )))));
28772903
2904+ file -> curOffset += readBytes ;
2905+
28782906 /*
28792907 * ok, read a full change from disk, now restore it into proper
28802908 * in-memory format
0 commit comments