77 *
88 *
99 * IDENTIFICATION
10- * $Header: /cvsroot/pgsql/src/backend/utils/adt/encode.c,v 1.1 2001/07/12 14:05:31 momjian Exp $
10+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/encode.c,v 1.2 2001/09/14 17:46:40 momjian Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
1919
2020struct pg_encoding
2121{
22- unsigned (* encode_len ) (unsigned dlen );
23- unsigned (* decode_len ) (unsigned dlen );
22+ unsigned (* encode_len ) (const uint8 * data , unsigned dlen );
23+ unsigned (* decode_len ) (const uint8 * data , unsigned dlen );
2424 unsigned (* encode ) (const uint8 * data , unsigned dlen , uint8 * res );
2525 unsigned (* decode ) (const uint8 * data , unsigned dlen , uint8 * res );
2626};
@@ -50,7 +50,7 @@ binary_encode(PG_FUNCTION_ARGS)
5050 if (enc == NULL )
5151 elog (ERROR , "No such encoding" );
5252
53- resultlen = enc -> encode_len (datalen );
53+ resultlen = enc -> encode_len (VARDATA ( data ), datalen );
5454 result = palloc (VARHDRSZ + resultlen );
5555
5656 res = enc -> encode (VARDATA (data ), datalen , VARDATA (result ));
@@ -81,7 +81,7 @@ binary_decode(PG_FUNCTION_ARGS)
8181 if (enc == NULL )
8282 elog (ERROR , "No such encoding" );
8383
84- resultlen = enc -> decode_len (datalen );
84+ resultlen = enc -> decode_len (VARDATA ( data ), datalen );
8585 result = palloc (VARHDRSZ + resultlen );
8686
8787 res = enc -> decode (VARDATA (data ), datalen , VARDATA (result ));
@@ -169,13 +169,13 @@ hex_decode(const uint8 * src, unsigned len, uint8 * dst)
169169}
170170
171171static unsigned
172- hex_enc_len (unsigned srclen )
172+ hex_enc_len (const uint8 * src , unsigned srclen )
173173{
174174 return srclen << 1 ;
175175}
176176
177177static unsigned
178- hex_dec_len (unsigned srclen )
178+ hex_dec_len (const uint8 * src , unsigned srclen )
179179{
180180 return srclen >> 1 ;
181181}
@@ -308,18 +308,188 @@ b64_decode(const uint8 * src, unsigned len, uint8 * dst)
308308
309309
310310static unsigned
311- b64_enc_len (unsigned srclen )
311+ b64_enc_len (const uint8 * src , unsigned srclen )
312312{
313313 /* 3 bytes will be converted to 4, linefeed after 76 chars */
314314 return (srclen + 2 ) * 4 / 3 + srclen / (76 * 3 / 4 );
315315}
316316
317317static unsigned
318- b64_dec_len (unsigned srclen )
318+ b64_dec_len (const uint8 * src , unsigned srclen )
319319{
320320 return (srclen * 3 ) >> 2 ;
321321}
322322
323+ /*
324+ * Escape
325+ * Minimally escape bytea to text.
326+ * De-escape text to bytea.
327+ *
328+ * Only two characters are escaped:
329+ * \0 (null) and \\ (backslash)
330+ *
331+ * De-escapes \\ and any \### octal
332+ */
333+
334+ #define VAL (CH ) ((CH) - '0')
335+ #define DIG (VAL ) ((VAL) + '0')
336+
337+ static unsigned
338+ esc_encode (const uint8 * src , unsigned srclen , uint8 * dst )
339+ {
340+ const uint8 * end = src + srclen ;
341+ uint8 * rp = dst ;
342+ int val ;
343+ int len = 0 ;
344+
345+ while (src < end )
346+ {
347+ if (* src == '\0' )
348+ {
349+ val = * src ;
350+ rp [0 ] = '\\' ;
351+ rp [1 ] = '0' ;
352+ rp [2 ] = '0' ;
353+ rp [3 ] = '0' ;
354+ rp += 4 ;
355+ len += 4 ;
356+ }
357+ else if (* src == '\\' )
358+ {
359+ val = * src ;
360+ rp [0 ] = '\\' ;
361+ rp [1 ] = '\\' ;
362+ rp += 2 ;
363+ len += 2 ;
364+ }
365+ else
366+ {
367+ * rp ++ = * src ;
368+ len ++ ;
369+ }
370+
371+ src ++ ;
372+ }
373+ * rp = '\0' ;
374+
375+ return len ;
376+ }
377+
378+ static unsigned
379+ esc_decode (const uint8 * src , unsigned srclen , uint8 * dst )
380+ {
381+ const uint8 * end = src + srclen ;
382+ uint8 * rp = dst ;
383+ int val ;
384+ int len = 0 ;
385+
386+ while (src < end )
387+ {
388+ if (src [0 ] != '\\' )
389+ {
390+ * rp ++ = * src ++ ;
391+ }
392+ else if ( (src [0 ] == '\\' ) &&
393+ (src [1 ] >= '0' && src [1 ] <= '3' ) &&
394+ (src [2 ] >= '0' && src [2 ] <= '7' ) &&
395+ (src [3 ] >= '0' && src [3 ] <= '7' ) )
396+ {
397+ val = VAL (src [1 ]);
398+ val <<= 3 ;
399+ val += VAL (src [2 ]);
400+ val <<= 3 ;
401+ * rp ++ = val + VAL (src [3 ]);
402+ src += 4 ;
403+ }
404+ else if ( (src [0 ] == '\\' ) &&
405+ (src [1 ] == '\\' ) )
406+ {
407+ * rp ++ = '\\' ;
408+ src += 2 ;
409+ }
410+ else
411+ {
412+ /*
413+ * One backslash, not followed by ### valid octal.
414+ * Should never get here, since esc_dec_len does same check.
415+ */
416+ elog (ERROR , "decode: Bad input string for type bytea" );
417+ }
418+
419+ len ++ ;
420+ }
421+ return len ;
422+ }
423+
424+ static unsigned
425+ esc_enc_len (const uint8 * src , unsigned srclen )
426+ {
427+ const uint8 * end = src + srclen ;
428+ int len = 0 ;
429+
430+ while (src < end )
431+ {
432+ if (* src == '\0' )
433+ len += 4 ;
434+ else if (* src == '\\' )
435+ len += 2 ;
436+ else
437+ len ++ ;
438+
439+ src ++ ;
440+ }
441+
442+ /*
443+ * Allow for null terminator
444+ */
445+ len ++ ;
446+
447+ return len ;
448+ }
449+
450+ static unsigned
451+ esc_dec_len (const uint8 * src , unsigned srclen )
452+ {
453+ const uint8 * end = src + srclen ;
454+ int len = 0 ;
455+
456+ while (src < end )
457+ {
458+ if (src [0 ] != '\\' )
459+ {
460+ src ++ ;
461+ }
462+ else if ( (src [0 ] == '\\' ) &&
463+ (src [1 ] >= '0' && src [1 ] <= '3' ) &&
464+ (src [2 ] >= '0' && src [2 ] <= '7' ) &&
465+ (src [3 ] >= '0' && src [3 ] <= '7' ) )
466+ {
467+ /*
468+ * backslash + valid octal
469+ */
470+ src += 4 ;
471+ }
472+ else if ( (src [0 ] == '\\' ) &&
473+ (src [1 ] == '\\' ) )
474+ {
475+ /*
476+ * two backslashes = backslash
477+ */
478+ src += 2 ;
479+ }
480+ else
481+ {
482+ /*
483+ * one backslash, not followed by ### valid octal
484+ */
485+ elog (ERROR , "decode: Bad input string for type bytea" );
486+ }
487+
488+ len ++ ;
489+ }
490+ return len ;
491+ }
492+
323493/*
324494 * Common
325495 */
@@ -330,6 +500,7 @@ static struct {
330500} enclist [] = {
331501 {"hex" , { hex_enc_len , hex_dec_len , hex_encode , hex_decode }},
332502 {"base64" , { b64_enc_len , b64_dec_len , b64_encode , b64_decode }},
503+ {"escape" , { esc_enc_len , esc_dec_len , esc_encode , esc_decode }},
333504 {NULL , { NULL , NULL , NULL , NULL } }
334505};
335506
0 commit comments