2727
2828#include "access/htup_details.h"
2929#include "funcapi.h"
30- #include "utils/builtins .h"
30+ #include "catalog/pg_type .h"
3131#include "miscadmin.h"
32+ #include "utils/array.h"
33+ #include "utils/builtins.h"
34+ #include "utils/rel.h"
3235
3336
3437/*
@@ -54,6 +57,42 @@ bits_to_text(bits8 *bits, int len)
5457}
5558
5659
60+ /*
61+ * text_to_bits
62+ *
63+ * Converts a c-string representation of bits into a bits8-array. This is
64+ * the reverse operation of previous routine.
65+ */
66+ static bits8 *
67+ text_to_bits (char * str , int len )
68+ {
69+ bits8 * bits ;
70+ int off = 0 ;
71+ char byte = 0 ;
72+
73+ bits = palloc (len + 1 );
74+
75+ while (off < len )
76+ {
77+ if (off % 8 == 0 )
78+ byte = 0 ;
79+
80+ if ((str [off ] == '0' ) || (str [off ] == '1' ))
81+ byte = byte | ((str [off ] - '0' ) << off % 8 );
82+ else
83+ ereport (ERROR ,
84+ (errcode (ERRCODE_DATA_CORRUPTED ),
85+ errmsg ("illegal character '%c' in t_bits string" , str [off ])));
86+
87+ if (off % 8 == 7 )
88+ bits [off / 8 ] = byte ;
89+
90+ off ++ ;
91+ }
92+
93+ return bits ;
94+ }
95+
5796/*
5897 * heap_page_items
5998 *
@@ -122,8 +161,8 @@ heap_page_items(PG_FUNCTION_ARGS)
122161 HeapTuple resultTuple ;
123162 Datum result ;
124163 ItemId id ;
125- Datum values [13 ];
126- bool nulls [13 ];
164+ Datum values [14 ];
165+ bool nulls [14 ];
127166 uint16 lp_offset ;
128167 uint16 lp_flags ;
129168 uint16 lp_len ;
@@ -153,21 +192,31 @@ heap_page_items(PG_FUNCTION_ARGS)
153192 lp_offset == MAXALIGN (lp_offset ) &&
154193 lp_offset + lp_len <= raw_page_size )
155194 {
156- HeapTupleHeader tuphdr ;
157- int bits_len ;
195+ HeapTupleHeader tuphdr ;
196+ bytea * tuple_data_bytea ;
197+ int tuple_data_len ;
158198
159199 /* Extract information from the tuple header */
160200
161201 tuphdr = (HeapTupleHeader ) PageGetItem (page , id );
162202
163203 values [4 ] = UInt32GetDatum (HeapTupleHeaderGetRawXmin (tuphdr ));
164204 values [5 ] = UInt32GetDatum (HeapTupleHeaderGetRawXmax (tuphdr ));
165- values [6 ] = UInt32GetDatum (HeapTupleHeaderGetRawCommandId (tuphdr )); /* shared with xvac */
205+ /* shared with xvac */
206+ values [6 ] = UInt32GetDatum (HeapTupleHeaderGetRawCommandId (tuphdr ));
166207 values [7 ] = PointerGetDatum (& tuphdr -> t_ctid );
167208 values [8 ] = UInt32GetDatum (tuphdr -> t_infomask2 );
168209 values [9 ] = UInt32GetDatum (tuphdr -> t_infomask );
169210 values [10 ] = UInt8GetDatum (tuphdr -> t_hoff );
170211
212+ /* Copy raw tuple data into bytea attribute */
213+ tuple_data_len = lp_len - tuphdr -> t_hoff ;
214+ tuple_data_bytea = (bytea * ) palloc (tuple_data_len + VARHDRSZ );
215+ SET_VARSIZE (tuple_data_bytea , tuple_data_len + VARHDRSZ );
216+ memcpy (VARDATA (tuple_data_bytea ), (char * ) tuphdr + tuphdr -> t_hoff ,
217+ tuple_data_len );
218+ values [13 ] = PointerGetDatum (tuple_data_bytea );
219+
171220 /*
172221 * We already checked that the item is completely within the raw
173222 * page passed to us, with the length given in the line pointer.
@@ -180,11 +229,11 @@ heap_page_items(PG_FUNCTION_ARGS)
180229 {
181230 if (tuphdr -> t_infomask & HEAP_HASNULL )
182231 {
183- bits_len = tuphdr -> t_hoff -
184- offsetof( HeapTupleHeaderData , t_bits ) ;
232+ int bits_len =
233+ (( tuphdr -> t_infomask2 & HEAP_NATTS_MASK ) / 8 + 1 ) * 8 ;
185234
186235 values [11 ] = CStringGetTextDatum (
187- bits_to_text (tuphdr -> t_bits , bits_len * 8 ));
236+ bits_to_text (tuphdr -> t_bits , bits_len ));
188237 }
189238 else
190239 nulls [11 ] = true;
@@ -208,7 +257,7 @@ heap_page_items(PG_FUNCTION_ARGS)
208257 */
209258 int i ;
210259
211- for (i = 4 ; i <= 12 ; i ++ )
260+ for (i = 4 ; i <= 13 ; i ++ )
212261 nulls [i ] = true;
213262 }
214263
@@ -223,3 +272,205 @@ heap_page_items(PG_FUNCTION_ARGS)
223272 else
224273 SRF_RETURN_DONE (fctx );
225274}
275+
276+ /*
277+ * tuple_data_split_internal
278+ *
279+ * Split raw tuple data taken directly from a page into an array of bytea
280+ * elements. This routine does a lookup on NULL values and creates array
281+ * elements accordindly. This is a reimplementation of nocachegetattr()
282+ * in heaptuple.c simplified for educational purposes.
283+ */
284+ static Datum
285+ tuple_data_split_internal (Oid relid , char * tupdata ,
286+ uint16 tupdata_len , uint16 t_infomask ,
287+ uint16 t_infomask2 , bits8 * t_bits ,
288+ bool do_detoast )
289+ {
290+ ArrayBuildState * raw_attrs ;
291+ int nattrs ;
292+ int i ;
293+ int off = 0 ;
294+ Relation rel ;
295+ TupleDesc tupdesc ;
296+
297+ /* Get tuple descriptor from relation OID */
298+ rel = relation_open (relid , NoLock );
299+ tupdesc = CreateTupleDescCopyConstr (rel -> rd_att );
300+ relation_close (rel , NoLock );
301+
302+ raw_attrs = initArrayResult (BYTEAOID , CurrentMemoryContext , false);
303+ nattrs = tupdesc -> natts ;
304+
305+ if (nattrs < (t_infomask2 & HEAP_NATTS_MASK ))
306+ ereport (ERROR ,
307+ (errcode (ERRCODE_DATA_CORRUPTED ),
308+ errmsg ("number of attributes in tuple header is greater than number of attributes in tuple descriptor" )));
309+
310+ for (i = 0 ; i < nattrs ; i ++ )
311+ {
312+ Form_pg_attribute attr ;
313+ bool is_null ;
314+ bytea * attr_data = NULL ;
315+
316+ attr = tupdesc -> attrs [i ];
317+ is_null = (t_infomask & HEAP_HASNULL ) && att_isnull (i , t_bits );
318+
319+ /*
320+ * Tuple header can specify less attributes than tuple descriptor
321+ * as ALTER TABLE ADD COLUMN without DEFAULT keyword does not
322+ * actually change tuples in pages, so attributes with numbers greater
323+ * than (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
324+ */
325+ if (i >= (t_infomask2 & HEAP_NATTS_MASK ))
326+ is_null = true;
327+
328+ if (!is_null )
329+ {
330+ int len ;
331+
332+ if (attr -> attlen == -1 )
333+ {
334+ off = att_align_pointer (off , tupdesc -> attrs [i ]-> attalign , -1 ,
335+ tupdata + off );
336+ /*
337+ * As VARSIZE_ANY throws an exception if it can't properly
338+ * detect the type of external storage in macros VARTAG_SIZE,
339+ * this check is repeated to have a nicer error handling.
340+ */
341+ if (VARATT_IS_EXTERNAL (tupdata + off ) &&
342+ !VARATT_IS_EXTERNAL_ONDISK (tupdata + off ) &&
343+ !VARATT_IS_EXTERNAL_INDIRECT (tupdata + off ))
344+ ereport (ERROR ,
345+ (errcode (ERRCODE_DATA_CORRUPTED ),
346+ errmsg ("first byte of varlena attribute is incorrect for attribute %d" , i )));
347+
348+ len = VARSIZE_ANY (tupdata + off );
349+ }
350+ else
351+ {
352+ off = att_align_nominal (off , tupdesc -> attrs [i ]-> attalign );
353+ len = attr -> attlen ;
354+ }
355+
356+ if (tupdata_len < off + len )
357+ ereport (ERROR ,
358+ (errcode (ERRCODE_DATA_CORRUPTED ),
359+ errmsg ("unexpected end of tuple data" )));
360+
361+ if (attr -> attlen == -1 && do_detoast )
362+ attr_data = DatumGetByteaPCopy (tupdata + off );
363+ else
364+ {
365+ attr_data = (bytea * ) palloc (len + VARHDRSZ );
366+ SET_VARSIZE (attr_data , len + VARHDRSZ );
367+ memcpy (VARDATA (attr_data ), tupdata + off , len );
368+ }
369+
370+ off = att_addlength_pointer (off , tupdesc -> attrs [i ]-> attlen ,
371+ tupdata + off );
372+ }
373+
374+ raw_attrs = accumArrayResult (raw_attrs , PointerGetDatum (attr_data ),
375+ is_null , BYTEAOID , CurrentMemoryContext );
376+ if (attr_data )
377+ pfree (attr_data );
378+ }
379+
380+ if (tupdata_len != off )
381+ ereport (ERROR ,
382+ (errcode (ERRCODE_DATA_CORRUPTED ),
383+ errmsg ("end of tuple reached without looking at all its data" )));
384+
385+ return makeArrayResult (raw_attrs , CurrentMemoryContext );
386+ }
387+
388+ /*
389+ * tuple_data_split
390+ *
391+ * Split raw tuple data taken directly from page into distinct elements
392+ * taking into account null values.
393+ */
394+ PG_FUNCTION_INFO_V1 (tuple_data_split );
395+
396+ Datum
397+ tuple_data_split (PG_FUNCTION_ARGS )
398+ {
399+ Oid relid ;
400+ bytea * raw_data ;
401+ uint16 t_infomask ;
402+ uint16 t_infomask2 ;
403+ char * t_bits_str ;
404+ bool do_detoast = false;
405+ bits8 * t_bits = NULL ;
406+ Datum res ;
407+
408+ relid = PG_GETARG_OID (0 );
409+ raw_data = PG_ARGISNULL (1 ) ? NULL : PG_GETARG_BYTEA_P (1 );
410+ t_infomask = PG_GETARG_INT16 (2 );
411+ t_infomask2 = PG_GETARG_INT16 (3 );
412+ t_bits_str = PG_ARGISNULL (4 ) ? NULL :
413+ text_to_cstring (PG_GETARG_TEXT_PP (4 ));
414+
415+ if (PG_NARGS () >= 6 )
416+ do_detoast = PG_GETARG_BOOL (5 );
417+
418+ if (!superuser ())
419+ ereport (ERROR ,
420+ (errcode (ERRCODE_INSUFFICIENT_PRIVILEGE ),
421+ errmsg ("must be superuser to use raw page functions" )));
422+
423+ if (!raw_data )
424+ PG_RETURN_NULL ();
425+
426+ /*
427+ * Convert t_bits string back to the bits8 array as represented in the
428+ * tuple header.
429+ */
430+ if (t_infomask & HEAP_HASNULL )
431+ {
432+ int bits_str_len ;
433+ int bits_len ;
434+
435+ bits_len = (t_infomask2 & HEAP_NATTS_MASK ) / 8 + 1 ;
436+ if (!t_bits_str )
437+ ereport (ERROR ,
438+ (errcode (ERRCODE_DATA_CORRUPTED ),
439+ errmsg ("argument of t_bits is null, but it is expected to be null and %i character long" ,
440+ bits_len * 8 )));
441+
442+ bits_str_len = strlen (t_bits_str );
443+ if ((bits_str_len % 8 ) != 0 )
444+ ereport (ERROR ,
445+ (errcode (ERRCODE_DATA_CORRUPTED ),
446+ errmsg ("length of t_bits is not a multiple of eight" )));
447+
448+ if (bits_len * 8 != bits_str_len )
449+ ereport (ERROR ,
450+ (errcode (ERRCODE_DATA_CORRUPTED ),
451+ errmsg ("unexpected length of t_bits %u, expected %i" ,
452+ bits_str_len , bits_len * 8 )));
453+
454+ /* do the conversion */
455+ t_bits = text_to_bits (t_bits_str , bits_str_len );
456+ }
457+ else
458+ {
459+ if (t_bits_str )
460+ ereport (ERROR ,
461+ (errcode (ERRCODE_DATA_CORRUPTED ),
462+ errmsg ("t_bits string is expected to be NULL, but instead it is %lu bytes length" ,
463+ strlen (t_bits_str ))));
464+ }
465+
466+ /* Split tuple data */
467+ res = tuple_data_split_internal (relid , (char * ) raw_data + VARHDRSZ ,
468+ VARSIZE (raw_data ) - VARHDRSZ ,
469+ t_infomask , t_infomask2 , t_bits ,
470+ do_detoast );
471+
472+ if (t_bits )
473+ pfree (t_bits );
474+
475+ PG_RETURN_ARRAYTYPE_P (res );
476+ }
0 commit comments