PostgreSQL Source Code git master
encode.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * encode.c
4 * Various data encoding/decoding things.
5 *
6 * Copyright (c) 2001-2025, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/utils/adt/encode.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include <ctype.h>
17
18#include "mb/pg_wchar.h"
19#include "port/simd.h"
20#include "utils/builtins.h"
21#include "utils/memutils.h"
22#include "varatt.h"
23
24
25/*
26 * Encoding conversion API.
27 * encode_len() and decode_len() compute the amount of space needed, while
28 * encode() and decode() perform the actual conversions. It is okay for
29 * the _len functions to return an overestimate, but not an underestimate.
30 * (Having said that, large overestimates could cause unnecessary errors,
31 * so it's better to get it right.) The conversion routines write to the
32 * buffer at *res and return the true length of their output.
33 */
35{
36 uint64 (*encode_len) (const char *data, size_t dlen);
37 uint64 (*decode_len) (const char *data, size_t dlen);
38 uint64 (*encode) (const char *data, size_t dlen, char *res);
39 uint64 (*decode) (const char *data, size_t dlen, char *res);
40};
41
42static const struct pg_encoding *pg_find_encoding(const char *name);
43
44/*
45 * SQL functions.
46 */
47
50{
53 text *result;
54 char *namebuf;
55 char *dataptr;
56 size_t datalen;
57 uint64 resultlen;
58 uint64 res;
59 const struct pg_encoding *enc;
60
61 namebuf = TextDatumGetCString(name);
62
63 enc = pg_find_encoding(namebuf);
64 if (enc == NULL)
66 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
67 errmsg("unrecognized encoding: \"%s\"", namebuf),
68 errhint("Valid encodings are \"%s\", \"%s\", \"%s\", and \"%s\".",
69 "base64", "base64url", "escape", "hex")));
70
71 dataptr = VARDATA_ANY(data);
72 datalen = VARSIZE_ANY_EXHDR(data);
73
74 resultlen = enc->encode_len(dataptr, datalen);
75
76 /*
77 * resultlen possibly overflows uint32, therefore on 32-bit machines it's
78 * unsafe to rely on palloc's internal check.
79 */
80 if (resultlen > MaxAllocSize - VARHDRSZ)
82 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
83 errmsg("result of encoding conversion is too large")));
84
85 result = palloc(VARHDRSZ + resultlen);
86
87 res = enc->encode(dataptr, datalen, VARDATA(result));
88
89 /* Make this FATAL 'cause we've trodden on memory ... */
90 if (res > resultlen)
91 elog(FATAL, "overflow - encode estimate too small");
92
93 SET_VARSIZE(result, VARHDRSZ + res);
94
95 PG_RETURN_TEXT_P(result);
96}
97
100{
103 bytea *result;
104 char *namebuf;
105 char *dataptr;
106 size_t datalen;
107 uint64 resultlen;
108 uint64 res;
109 const struct pg_encoding *enc;
110
111 namebuf = TextDatumGetCString(name);
112
113 enc = pg_find_encoding(namebuf);
114 if (enc == NULL)
116 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
117 errmsg("unrecognized encoding: \"%s\"", namebuf),
118 errhint("Valid encodings are \"%s\", \"%s\", \"%s\", and \"%s\".",
119 "base64", "base64url", "escape", "hex")));
120
121 dataptr = VARDATA_ANY(data);
122 datalen = VARSIZE_ANY_EXHDR(data);
123
124 resultlen = enc->decode_len(dataptr, datalen);
125
126 /*
127 * resultlen possibly overflows uint32, therefore on 32-bit machines it's
128 * unsafe to rely on palloc's internal check.
129 */
130 if (resultlen > MaxAllocSize - VARHDRSZ)
132 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
133 errmsg("result of decoding conversion is too large")));
134
135 result = palloc(VARHDRSZ + resultlen);
136
137 res = enc->decode(dataptr, datalen, VARDATA(result));
138
139 /* Make this FATAL 'cause we've trodden on memory ... */
140 if (res > resultlen)
141 elog(FATAL, "overflow - decode estimate too small");
142
143 SET_VARSIZE(result, VARHDRSZ + res);
144
145 PG_RETURN_BYTEA_P(result);
146}
147
148
149/*
150 * HEX
151 */
152
153/*
154 * The hex expansion of each possible byte value (two chars per value).
155 */
156static const char hextbl[512] =
157"000102030405060708090a0b0c0d0e0f"
158"101112131415161718191a1b1c1d1e1f"
159"202122232425262728292a2b2c2d2e2f"
160"303132333435363738393a3b3c3d3e3f"
161"404142434445464748494a4b4c4d4e4f"
162"505152535455565758595a5b5c5d5e5f"
163"606162636465666768696a6b6c6d6e6f"
164"707172737475767778797a7b7c7d7e7f"
165"808182838485868788898a8b8c8d8e8f"
166"909192939495969798999a9b9c9d9e9f"
167"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
168"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
169"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
170"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
171"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
172"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
173
174static const int8 hexlookup[128] = {
175 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
179 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
180 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
181 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
182 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
183};
184
185static inline uint64
186hex_encode_scalar(const char *src, size_t len, char *dst)
187{
188 const char *end = src + len;
189
190 while (src < end)
191 {
192 unsigned char usrc = *((const unsigned char *) src);
193
194 memcpy(dst, &hextbl[2 * usrc], 2);
195 src++;
196 dst += 2;
197 }
198 return (uint64) len * 2;
199}
200
201uint64
202hex_encode(const char *src, size_t len, char *dst)
203{
204#ifdef USE_NO_SIMD
205 return hex_encode_scalar(src, len, dst);
206#else
207 const uint64 tail_idx = len & ~(sizeof(Vector8) - 1);
208 uint64 i;
209
210 /*
211 * This splits the high and low nibbles of each byte into separate
212 * vectors, adds the vectors to a mask that converts the nibbles to their
213 * equivalent ASCII bytes, and interleaves those bytes back together to
214 * form the final hex-encoded string.
215 */
216 for (i = 0; i < tail_idx; i += sizeof(Vector8))
217 {
218 Vector8 srcv;
219 Vector8 lo;
220 Vector8 hi;
221 Vector8 mask;
222
223 vector8_load(&srcv, (const uint8 *) &src[i]);
224
225 lo = vector8_and(srcv, vector8_broadcast(0x0f));
226 mask = vector8_gt(lo, vector8_broadcast(0x9));
227 mask = vector8_and(mask, vector8_broadcast('a' - '0' - 10));
228 mask = vector8_add(mask, vector8_broadcast('0'));
229 lo = vector8_add(lo, mask);
230
231 hi = vector8_and(srcv, vector8_broadcast(0xf0));
232 hi = vector8_shift_right(hi, 4);
233 mask = vector8_gt(hi, vector8_broadcast(0x9));
234 mask = vector8_and(mask, vector8_broadcast('a' - '0' - 10));
235 mask = vector8_add(mask, vector8_broadcast('0'));
236 hi = vector8_add(hi, mask);
237
238 vector8_store((uint8 *) &dst[i * 2],
239 vector8_interleave_low(hi, lo));
240 vector8_store((uint8 *) &dst[i * 2 + sizeof(Vector8)],
241 vector8_interleave_high(hi, lo));
242 }
243
244 (void) hex_encode_scalar(src + i, len - i, dst + i * 2);
245
246 return (uint64) len * 2;
247#endif
248}
249
250static inline bool
251get_hex(const char *cp, char *out)
252{
253 unsigned char c = (unsigned char) *cp;
254 int res = -1;
255
256 if (c < 127)
257 res = hexlookup[c];
258
259 *out = (char) res;
260
261 return (res >= 0);
262}
263
264uint64
265hex_decode(const char *src, size_t len, char *dst)
266{
267 return hex_decode_safe(src, len, dst, NULL);
268}
269
270static inline uint64
271hex_decode_safe_scalar(const char *src, size_t len, char *dst, Node *escontext)
272{
273 const char *s,
274 *srcend;
275 char v1,
276 v2,
277 *p;
278
279 srcend = src + len;
280 s = src;
281 p = dst;
282 while (s < srcend)
283 {
284 if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
285 {
286 s++;
287 continue;
288 }
289 if (!get_hex(s, &v1))
290 ereturn(escontext, 0,
291 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
292 errmsg("invalid hexadecimal digit: \"%.*s\"",
293 pg_mblen(s), s)));
294 s++;
295 if (s >= srcend)
296 ereturn(escontext, 0,
297 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
298 errmsg("invalid hexadecimal data: odd number of digits")));
299 if (!get_hex(s, &v2))
300 ereturn(escontext, 0,
301 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
302 errmsg("invalid hexadecimal digit: \"%.*s\"",
303 pg_mblen(s), s)));
304 s++;
305 *p++ = (v1 << 4) | v2;
306 }
307
308 return p - dst;
309}
310
311/*
312 * This helper converts each byte to its binary-equivalent nibble by
313 * subtraction and combines them to form the return bytes (separated by zero
314 * bytes). Returns false if any input bytes are outside the expected ranges of
315 * ASCII values. Otherwise, returns true.
316 */
317#ifndef USE_NO_SIMD
318static inline bool
319hex_decode_simd_helper(const Vector8 src, Vector8 *dst)
320{
321 Vector8 sub;
322 Vector8 mask_hi = vector8_interleave_low(vector8_broadcast(0), vector8_broadcast(0x0f));
323 Vector8 mask_lo = vector8_interleave_low(vector8_broadcast(0x0f), vector8_broadcast(0));
324 Vector8 tmp;
325 bool ret;
326
327 tmp = vector8_gt(vector8_broadcast('9' + 1), src);
328 sub = vector8_and(tmp, vector8_broadcast('0'));
329
330 tmp = vector8_gt(src, vector8_broadcast('A' - 1));
331 tmp = vector8_and(tmp, vector8_broadcast('A' - 10));
332 sub = vector8_add(sub, tmp);
333
334 tmp = vector8_gt(src, vector8_broadcast('a' - 1));
335 tmp = vector8_and(tmp, vector8_broadcast('a' - 'A'));
336 sub = vector8_add(sub, tmp);
337
338 *dst = vector8_issub(src, sub);
339 ret = !vector8_has_ge(*dst, 0x10);
340
341 tmp = vector8_and(*dst, mask_hi);
342 tmp = vector8_shift_right(tmp, 8);
343 *dst = vector8_and(*dst, mask_lo);
344 *dst = vector8_shift_left(*dst, 4);
345 *dst = vector8_or(*dst, tmp);
346 return ret;
347}
348#endif /* ! USE_NO_SIMD */
349
350uint64
351hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
352{
353#ifdef USE_NO_SIMD
354 return hex_decode_safe_scalar(src, len, dst, escontext);
355#else
356 const uint64 tail_idx = len & ~(sizeof(Vector8) * 2 - 1);
357 uint64 i;
358 bool success = true;
359
360 /*
361 * We must process 2 vectors at a time since the output will be half the
362 * length of the input.
363 */
364 for (i = 0; i < tail_idx; i += sizeof(Vector8) * 2)
365 {
366 Vector8 srcv;
367 Vector8 dstv1;
368 Vector8 dstv2;
369
370 vector8_load(&srcv, (const uint8 *) &src[i]);
371 success &= hex_decode_simd_helper(srcv, &dstv1);
372
373 vector8_load(&srcv, (const uint8 *) &src[i + sizeof(Vector8)]);
374 success &= hex_decode_simd_helper(srcv, &dstv2);
375
376 vector8_store((uint8 *) &dst[i / 2], vector8_pack_16(dstv1, dstv2));
377 }
378
379 /*
380 * If something didn't look right in the vector path, try again in the
381 * scalar path so that we can handle it correctly.
382 */
383 if (!success)
384 i = 0;
385
386 return i / 2 + hex_decode_safe_scalar(src + i, len - i, dst + i / 2, escontext);
387#endif
388}
389
390static uint64
391hex_enc_len(const char *src, size_t srclen)
392{
393 return (uint64) srclen << 1;
394}
395
396static uint64
397hex_dec_len(const char *src, size_t srclen)
398{
399 return (uint64) srclen >> 1;
400}
401
402/*
403 * BASE64 and BASE64URL
404 */
405
406static const char _base64[] =
407"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
408
409static const char _base64url[] =
410"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
411
412static const int8 b64lookup[128] = {
413 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
414 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
415 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
416 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
417 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
418 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
419 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
420 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
421};
422
423/*
424 * pg_base64_encode_internal
425 *
426 * Helper for decoding base64 or base64url. When url is passed as true the
427 * input will be encoded using base64url. len bytes in src is encoded into
428 * dst.
429 */
430static uint64
431pg_base64_encode_internal(const char *src, size_t len, char *dst, bool url)
432{
433 char *p,
434 *lend = dst + 76;
435 const char *s,
436 *end = src + len;
437 int pos = 2;
438 uint32 buf = 0;
439 const char *alphabet = url ? _base64url : _base64;
440
441 s = src;
442 p = dst;
443
444 while (s < end)
445 {
446 buf |= (unsigned char) *s << (pos << 3);
447 pos--;
448 s++;
449
450 /* write it out */
451 if (pos < 0)
452 {
453 *p++ = alphabet[(buf >> 18) & 0x3f];
454 *p++ = alphabet[(buf >> 12) & 0x3f];
455 *p++ = alphabet[(buf >> 6) & 0x3f];
456 *p++ = alphabet[buf & 0x3f];
457
458 pos = 2;
459 buf = 0;
460
461 if (!url && p >= lend)
462 {
463 *p++ = '\n';
464 lend = p + 76;
465 }
466 }
467 }
468
469 /* Handle remaining bytes in buf */
470 if (pos != 2)
471 {
472 *p++ = alphabet[(buf >> 18) & 0x3f];
473 *p++ = alphabet[(buf >> 12) & 0x3f];
474
475 if (pos == 0)
476 {
477 *p++ = alphabet[(buf >> 6) & 0x3f];
478 if (!url)
479 *p++ = '=';
480 }
481 else if (!url)
482 {
483 *p++ = '=';
484 *p++ = '=';
485 }
486 }
487
488 return p - dst;
489}
490
491static uint64
492pg_base64_encode(const char *src, size_t len, char *dst)
493{
494 return pg_base64_encode_internal(src, len, dst, false);
495}
496
497static uint64
498pg_base64url_encode(const char *src, size_t len, char *dst)
499{
500 return pg_base64_encode_internal(src, len, dst, true);
501}
502
503/*
504 * pg_base64_decode_internal
505 *
506 * Helper for decoding base64 or base64url. When url is passed as true the
507 * input will be assumed to be encoded using base64url.
508 */
509static uint64
510pg_base64_decode_internal(const char *src, size_t len, char *dst, bool url)
511{
512 const char *srcend = src + len,
513 *s = src;
514 char *p = dst;
515 char c;
516 int b = 0;
517 uint32 buf = 0;
518 int pos = 0,
519 end = 0;
520
521 while (s < srcend)
522 {
523 c = *s++;
524
525 if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
526 continue;
527
528 /* convert base64url to base64 */
529 if (url)
530 {
531 if (c == '-')
532 c = '+';
533 else if (c == '_')
534 c = '/';
535 }
536
537 if (c == '=')
538 {
539 /* end sequence */
540 if (!end)
541 {
542 if (pos == 2)
543 end = 1;
544 else if (pos == 3)
545 end = 2;
546 else
547 {
548 /* translator: %s is the name of an encoding scheme */
550 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
551 errmsg("unexpected \"=\" while decoding %s sequence", url ? "base64url" : "base64")));
552 }
553 }
554 b = 0;
555 }
556 else
557 {
558 b = -1;
559 if (c > 0 && c < 127)
560 b = b64lookup[(unsigned char) c];
561 if (b < 0)
562 {
563 /* translator: %s is the name of an encoding scheme */
565 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
566 errmsg("invalid symbol \"%.*s\" found while decoding %s sequence",
567 pg_mblen(s - 1), s - 1,
568 url ? "base64url" : "base64")));
569 }
570 }
571 /* add it to buffer */
572 buf = (buf << 6) + b;
573 pos++;
574 if (pos == 4)
575 {
576 *p++ = (buf >> 16) & 255;
577 if (end == 0 || end > 1)
578 *p++ = (buf >> 8) & 255;
579 if (end == 0 || end > 2)
580 *p++ = buf & 255;
581 buf = 0;
582 pos = 0;
583 }
584 }
585
586 if (pos == 2)
587 {
588 buf <<= 12;
589 *p++ = (buf >> 16) & 0xFF;
590 }
591 else if (pos == 3)
592 {
593 buf <<= 6;
594 *p++ = (buf >> 16) & 0xFF;
595 *p++ = (buf >> 8) & 0xFF;
596 }
597 else if (pos != 0)
598 {
599 /* translator: %s is the name of an encoding scheme */
601 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
602 errmsg("invalid %s end sequence", url ? "base64url" : "base64"),
603 errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
604 }
605
606 return p - dst;
607}
608
609static uint64
610pg_base64_decode(const char *src, size_t len, char *dst)
611{
612 return pg_base64_decode_internal(src, len, dst, false);
613}
614
615static uint64
616pg_base64url_decode(const char *src, size_t len, char *dst)
617{
618 return pg_base64_decode_internal(src, len, dst, true);
619}
620
621static uint64
622pg_base64_enc_len(const char *src, size_t srclen)
623{
624 /* 3 bytes will be converted to 4, linefeed after 76 chars */
625 return ((uint64) srclen + 2) / 3 * 4 + (uint64) srclen / (76 * 3 / 4);
626}
627
628static uint64
629pg_base64_dec_len(const char *src, size_t srclen)
630{
631 return ((uint64) srclen * 3) >> 2;
632}
633
634static uint64
635pg_base64url_enc_len(const char *src, size_t srclen)
636{
637 /*
638 * Unlike standard base64, base64url doesn't use padding characters when
639 * the input length is not divisible by 3
640 */
641 return (srclen + 2) / 3 * 4;
642}
643
644static uint64
645pg_base64url_dec_len(const char *src, size_t srclen)
646{
647 /*
648 * For base64, each 4 characters of input produce at most 3 bytes of
649 * output. For base64url without padding, we need to round up to the
650 * nearest 4
651 */
652 size_t adjusted_len = srclen;
653
654 if (srclen % 4 != 0)
655 adjusted_len += 4 - (srclen % 4);
656
657 return (adjusted_len * 3) / 4;
658}
659
660/*
661 * Escape
662 * Minimally escape bytea to text.
663 * De-escape text to bytea.
664 *
665 * We must escape zero bytes and high-bit-set bytes to avoid generating
666 * text that might be invalid in the current encoding, or that might
667 * change to something else if passed through an encoding conversion
668 * (leading to failing to de-escape to the original bytea value).
669 * Also of course backslash itself has to be escaped.
670 *
671 * De-escaping processes \\ and any \### octal
672 */
673
674#define VAL(CH) ((CH) - '0')
675#define DIG(VAL) ((VAL) + '0')
676
677static uint64
678esc_encode(const char *src, size_t srclen, char *dst)
679{
680 const char *end = src + srclen;
681 char *rp = dst;
682 uint64 len = 0;
683
684 while (src < end)
685 {
686 unsigned char c = (unsigned char) *src;
687
688 if (c == '\0' || IS_HIGHBIT_SET(c))
689 {
690 rp[0] = '\\';
691 rp[1] = DIG(c >> 6);
692 rp[2] = DIG((c >> 3) & 7);
693 rp[3] = DIG(c & 7);
694 rp += 4;
695 len += 4;
696 }
697 else if (c == '\\')
698 {
699 rp[0] = '\\';
700 rp[1] = '\\';
701 rp += 2;
702 len += 2;
703 }
704 else
705 {
706 *rp++ = c;
707 len++;
708 }
709
710 src++;
711 }
712
713 return len;
714}
715
716static uint64
717esc_decode(const char *src, size_t srclen, char *dst)
718{
719 const char *end = src + srclen;
720 char *rp = dst;
721 uint64 len = 0;
722
723 while (src < end)
724 {
725 if (src[0] != '\\')
726 *rp++ = *src++;
727 else if (src + 3 < end &&
728 (src[1] >= '0' && src[1] <= '3') &&
729 (src[2] >= '0' && src[2] <= '7') &&
730 (src[3] >= '0' && src[3] <= '7'))
731 {
732 int val;
733
734 val = VAL(src[1]);
735 val <<= 3;
736 val += VAL(src[2]);
737 val <<= 3;
738 *rp++ = val + VAL(src[3]);
739 src += 4;
740 }
741 else if (src + 1 < end &&
742 (src[1] == '\\'))
743 {
744 *rp++ = '\\';
745 src += 2;
746 }
747 else
748 {
749 /*
750 * One backslash, not followed by ### valid octal. Should never
751 * get here, since esc_dec_len does same check.
752 */
754 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
755 errmsg("invalid input syntax for type %s", "bytea")));
756 }
757
758 len++;
759 }
760
761 return len;
762}
763
764static uint64
765esc_enc_len(const char *src, size_t srclen)
766{
767 const char *end = src + srclen;
768 uint64 len = 0;
769
770 while (src < end)
771 {
772 if (*src == '\0' || IS_HIGHBIT_SET(*src))
773 len += 4;
774 else if (*src == '\\')
775 len += 2;
776 else
777 len++;
778
779 src++;
780 }
781
782 return len;
783}
784
785static uint64
786esc_dec_len(const char *src, size_t srclen)
787{
788 const char *end = src + srclen;
789 uint64 len = 0;
790
791 while (src < end)
792 {
793 if (src[0] != '\\')
794 src++;
795 else if (src + 3 < end &&
796 (src[1] >= '0' && src[1] <= '3') &&
797 (src[2] >= '0' && src[2] <= '7') &&
798 (src[3] >= '0' && src[3] <= '7'))
799 {
800 /*
801 * backslash + valid octal
802 */
803 src += 4;
804 }
805 else if (src + 1 < end &&
806 (src[1] == '\\'))
807 {
808 /*
809 * two backslashes = backslash
810 */
811 src += 2;
812 }
813 else
814 {
815 /*
816 * one backslash, not followed by ### valid octal
817 */
819 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
820 errmsg("invalid input syntax for type %s", "bytea")));
821 }
822
823 len++;
824 }
825 return len;
826}
827
828/*
829 * Common
830 */
831
832static const struct
833{
834 const char *name;
836} enclist[] =
837
838{
839 {
840 "hex",
841 {
843 }
844 },
845 {
846 "base64",
847 {
849 }
850 },
851 {
852 "base64url",
853 {
855 }
856 },
857 {
858 "escape",
859 {
861 }
862 },
863 {
864 NULL,
865 {
866 NULL, NULL, NULL, NULL
867 }
868 }
870
871static const struct pg_encoding *
873{
874 int i;
875
876 for (i = 0; enclist[i].name; i++)
877 if (pg_strcasecmp(enclist[i].name, name) == 0)
878 return &enclist[i].enc;
879
880 return NULL;
881}
#define TextDatumGetCString(d)
Definition: builtins.h:98
uint8_t uint8
Definition: c.h:541
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1159
#define VARHDRSZ
Definition: c.h:702
int8_t int8
Definition: c.h:537
uint64_t uint64
Definition: c.h:544
uint32_t uint32
Definition: c.h:543
int errhint(const char *fmt,...)
Definition: elog.c:1330
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ereturn(context, dummy_value,...)
Definition: elog.h:278
#define FATAL
Definition: elog.h:41
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
static uint64 pg_base64_decode(const char *src, size_t len, char *dst)
Definition: encode.c:610
static uint64 pg_base64_decode_internal(const char *src, size_t len, char *dst, bool url)
Definition: encode.c:510
#define DIG(VAL)
Definition: encode.c:675
static bool get_hex(const char *cp, char *out)
Definition: encode.c:251
static uint64 hex_dec_len(const char *src, size_t srclen)
Definition: encode.c:397
static const struct pg_encoding * pg_find_encoding(const char *name)
Definition: encode.c:872
static uint64 pg_base64_encode(const char *src, size_t len, char *dst)
Definition: encode.c:492
static uint64 esc_encode(const char *src, size_t srclen, char *dst)
Definition: encode.c:678
static uint64 hex_enc_len(const char *src, size_t srclen)
Definition: encode.c:391
Datum binary_decode(PG_FUNCTION_ARGS)
Definition: encode.c:99
static const struct @24 enclist[]
static const char hextbl[512]
Definition: encode.c:156
static uint64 pg_base64url_enc_len(const char *src, size_t srclen)
Definition: encode.c:635
static uint64 pg_base64url_decode(const char *src, size_t len, char *dst)
Definition: encode.c:616
static const char _base64url[]
Definition: encode.c:409
const char * name
Definition: encode.c:834
uint64 hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
Definition: encode.c:351
static uint64 hex_decode_safe_scalar(const char *src, size_t len, char *dst, Node *escontext)
Definition: encode.c:271
static uint64 hex_encode_scalar(const char *src, size_t len, char *dst)
Definition: encode.c:186
uint64 hex_encode(const char *src, size_t len, char *dst)
Definition: encode.c:202
struct pg_encoding enc
Definition: encode.c:835
static uint64 esc_enc_len(const char *src, size_t srclen)
Definition: encode.c:765
static uint64 pg_base64url_encode(const char *src, size_t len, char *dst)
Definition: encode.c:498
static uint64 pg_base64url_dec_len(const char *src, size_t srclen)
Definition: encode.c:645
static uint64 pg_base64_enc_len(const char *src, size_t srclen)
Definition: encode.c:622
static uint64 pg_base64_encode_internal(const char *src, size_t len, char *dst, bool url)
Definition: encode.c:431
static const char _base64[]
Definition: encode.c:406
static uint64 esc_decode(const char *src, size_t srclen, char *dst)
Definition: encode.c:717
static uint64 esc_dec_len(const char *src, size_t srclen)
Definition: encode.c:786
Datum binary_encode(PG_FUNCTION_ARGS)
Definition: encode.c:49
#define VAL(CH)
Definition: encode.c:674
uint64 hex_decode(const char *src, size_t len, char *dst)
Definition: encode.c:265
static const int8 b64lookup[128]
Definition: encode.c:412
static const int8 hexlookup[128]
Definition: encode.c:174
static uint64 pg_base64_dec_len(const char *src, size_t srclen)
Definition: encode.c:629
#define MaxAllocSize
Definition: fe_memutils.h:22
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:372
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
long val
Definition: informix.c:689
static bool success
Definition: initdb.c:187
int b
Definition: isn.c:74
int i
Definition: isn.c:77
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1024
void * palloc(Size size)
Definition: mcxt.c:1365
const void size_t len
const void * data
static char * buf
Definition: pg_test_fsync.c:72
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
uint64_t Datum
Definition: postgres.h:70
char * c
static Vector8 vector8_broadcast(const uint8 c)
Definition: simd.h:149
static void vector8_load(Vector8 *v, const uint8 *s)
Definition: simd.h:107
static Vector8 vector8_or(const Vector8 v1, const Vector8 v2)
Definition: simd.h:373
uint64 Vector8
Definition: simd.h:60
Definition: nodes.h:135
uint64(* encode_len)(const char *data, size_t dlen)
Definition: encode.c:36
uint64(* decode_len)(const char *data, size_t dlen)
Definition: encode.c:37
uint64(* decode)(const char *data, size_t dlen, char *res)
Definition: encode.c:39
uint64(* encode)(const char *data, size_t dlen, char *res)
Definition: encode.c:38
Definition: c.h:697
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition: varatt.h:472
static char * VARDATA(const void *PTR)
Definition: varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition: varatt.h:486
static void SET_VARSIZE(void *PTR, Size len)
Definition: varatt.h:432