88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.264 2006/05/21 20:05:19 tgl Exp $
11+ * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.265 2006/05/25 18:42:17 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -243,8 +243,8 @@ static Datum CopyReadBinaryAttribute(CopyState cstate,
243243 int column_no , FmgrInfo * flinfo ,
244244 Oid typioparam , int32 typmod ,
245245 bool * isnull );
246- static void CopyAttributeOutText (CopyState cstate , char * server_string );
247- static void CopyAttributeOutCSV (CopyState cstate , char * server_string ,
246+ static void CopyAttributeOutText (CopyState cstate , char * string );
247+ static void CopyAttributeOutCSV (CopyState cstate , char * string ,
248248 bool use_quote , bool single_attr );
249249static List * CopyGetAttnums (Relation rel , List * attnamelist );
250250static char * limit_printout_length (const char * str );
@@ -2884,91 +2884,123 @@ CopyReadBinaryAttribute(CopyState cstate,
28842884/*
28852885 * Send text representation of one attribute, with conversion and escaping
28862886 */
2887+ #define DUMPSOFAR () \
2888+ do { \
2889+ if (ptr > start) \
2890+ CopySendData(cstate, start, ptr - start); \
2891+ } while (0)
2892+
28872893static void
2888- CopyAttributeOutText (CopyState cstate , char * server_string )
2894+ CopyAttributeOutText (CopyState cstate , char * string )
28892895{
2890- char * string ;
2896+ char * ptr ;
2897+ char * start ;
28912898 char c ;
28922899 char delimc = cstate -> delim [0 ];
2893- int mblen ;
28942900
28952901 if (cstate -> need_transcoding )
2896- string = pg_server_to_client (server_string , strlen (server_string ));
2902+ ptr = pg_server_to_client (string , strlen (string ));
28972903 else
2898- string = server_string ;
2904+ ptr = string ;
28992905
2900- for (; (c = * string ) != '\0' ; string += mblen )
2906+ /*
2907+ * We have to grovel through the string searching for control characters
2908+ * and instances of the delimiter character. In most cases, though, these
2909+ * are infrequent. To avoid overhead from calling CopySendData once per
2910+ * character, we dump out all characters between replaceable characters
2911+ * in a single call. The loop invariant is that the data from "start"
2912+ * to "ptr" can be sent literally, but hasn't yet been.
2913+ */
2914+ start = ptr ;
2915+ while ((c = * ptr ) != '\0' )
29012916 {
2902- mblen = 1 ;
2903-
29042917 switch (c )
29052918 {
29062919 case '\b' :
2920+ DUMPSOFAR ();
29072921 CopySendString (cstate , "\\b" );
2922+ start = ++ ptr ;
29082923 break ;
29092924 case '\f' :
2925+ DUMPSOFAR ();
29102926 CopySendString (cstate , "\\f" );
2927+ start = ++ ptr ;
29112928 break ;
29122929 case '\n' :
2930+ DUMPSOFAR ();
29132931 CopySendString (cstate , "\\n" );
2932+ start = ++ ptr ;
29142933 break ;
29152934 case '\r' :
2935+ DUMPSOFAR ();
29162936 CopySendString (cstate , "\\r" );
2937+ start = ++ ptr ;
29172938 break ;
29182939 case '\t' :
2940+ DUMPSOFAR ();
29192941 CopySendString (cstate , "\\t" );
2942+ start = ++ ptr ;
29202943 break ;
29212944 case '\v' :
2945+ DUMPSOFAR ();
29222946 CopySendString (cstate , "\\v" );
2947+ start = ++ ptr ;
29232948 break ;
29242949 case '\\' :
2950+ DUMPSOFAR ();
29252951 CopySendString (cstate , "\\\\" );
2952+ start = ++ ptr ;
29262953 break ;
29272954 default :
29282955 if (c == delimc )
2956+ {
2957+ DUMPSOFAR ();
29292958 CopySendChar (cstate , '\\' );
2959+ start = ptr ; /* we include char in next run */
2960+ }
29302961
29312962 /*
29322963 * We can skip pg_encoding_mblen() overhead when encoding is
29332964 * safe, because in valid backend encodings, extra bytes of a
29342965 * multibyte character never look like ASCII.
29352966 */
2936- if (cstate -> encoding_embeds_ascii && IS_HIGHBIT_SET (c ))
2937- mblen = pg_encoding_mblen (cstate -> client_encoding , string );
2938- CopySendData (cstate , string , mblen );
2967+ if (IS_HIGHBIT_SET (c ) && cstate -> encoding_embeds_ascii )
2968+ ptr += pg_encoding_mblen (cstate -> client_encoding , ptr );
2969+ else
2970+ ptr ++ ;
29392971 break ;
29402972 }
29412973 }
2974+
2975+ DUMPSOFAR ();
29422976}
29432977
29442978/*
2945- * Send CSV representation of one attribute, with conversion and
2946- * CSV type escaping
2979+ * Send text representation of one attribute, with conversion and
2980+ * CSV-style escaping
29472981 */
29482982static void
2949- CopyAttributeOutCSV (CopyState cstate , char * server_string ,
2983+ CopyAttributeOutCSV (CopyState cstate , char * string ,
29502984 bool use_quote , bool single_attr )
29512985{
2952- char * string ;
2986+ char * ptr ;
2987+ char * start ;
29532988 char c ;
29542989 char delimc = cstate -> delim [0 ];
29552990 char quotec = cstate -> quote [0 ];
29562991 char escapec = cstate -> escape [0 ];
2957- char * tstring ;
2958- int mblen ;
29592992
2960- /* force quoting if it matches null_print */
2961- if (!use_quote && strcmp (server_string , cstate -> null_print ) == 0 )
2993+ /* force quoting if it matches null_print (before conversion!) */
2994+ if (!use_quote && strcmp (string , cstate -> null_print ) == 0 )
29622995 use_quote = true;
29632996
29642997 if (cstate -> need_transcoding )
2965- string = pg_server_to_client (server_string , strlen (server_string ));
2998+ ptr = pg_server_to_client (string , strlen (string ));
29662999 else
2967- string = server_string ;
3000+ ptr = string ;
29683001
29693002 /*
2970- * have to run through the string twice, first time to see if it needs
2971- * quoting, second to actually send it
3003+ * Make a preliminary pass to discover if it needs quoting
29723004 */
29733005 if (!use_quote )
29743006 {
@@ -2977,41 +3009,57 @@ CopyAttributeOutCSV(CopyState cstate, char *server_string,
29773009 * alone on a line so it is not interpreted as the end-of-data
29783010 * marker.
29793011 */
2980- if (single_attr && strcmp (string , "\\." ) == 0 )
3012+ if (single_attr && strcmp (ptr , "\\." ) == 0 )
29813013 use_quote = true;
29823014 else
29833015 {
2984- for (tstring = string ; (c = * tstring ) != '\0' ; tstring += mblen )
3016+ char * tptr = ptr ;
3017+
3018+ while ((c = * tptr ) != '\0' )
29853019 {
29863020 if (c == delimc || c == quotec || c == '\n' || c == '\r' )
29873021 {
29883022 use_quote = true;
29893023 break ;
29903024 }
2991- if (cstate -> encoding_embeds_ascii && IS_HIGHBIT_SET (c ))
2992- mblen = pg_encoding_mblen (cstate -> client_encoding , tstring );
3025+ if (IS_HIGHBIT_SET (c ) && cstate -> encoding_embeds_ascii )
3026+ tptr + = pg_encoding_mblen (cstate -> client_encoding , tptr );
29933027 else
2994- mblen = 1 ;
3028+ tptr ++ ;
29953029 }
29963030 }
29973031 }
29983032
29993033 if (use_quote )
3034+ {
30003035 CopySendChar (cstate , quotec );
30013036
3002- for (; (c = * string ) != '\0' ; string += mblen )
3003- {
3004- if (use_quote && (c == quotec || c == escapec ))
3005- CopySendChar (cstate , escapec );
3006- if (cstate -> encoding_embeds_ascii && IS_HIGHBIT_SET (c ))
3007- mblen = pg_encoding_mblen (cstate -> client_encoding , string );
3008- else
3009- mblen = 1 ;
3010- CopySendData (cstate , string , mblen );
3011- }
3037+ /*
3038+ * We adopt the same optimization strategy as in CopyAttributeOutText
3039+ */
3040+ start = ptr ;
3041+ while ((c = * ptr ) != '\0' )
3042+ {
3043+ if (c == quotec || c == escapec )
3044+ {
3045+ DUMPSOFAR ();
3046+ CopySendChar (cstate , escapec );
3047+ start = ptr ; /* we include char in next run */
3048+ }
3049+ if (IS_HIGHBIT_SET (c ) && cstate -> encoding_embeds_ascii )
3050+ ptr += pg_encoding_mblen (cstate -> client_encoding , ptr );
3051+ else
3052+ ptr ++ ;
3053+ }
3054+ DUMPSOFAR ();
30123055
3013- if (use_quote )
30143056 CopySendChar (cstate , quotec );
3057+ }
3058+ else
3059+ {
3060+ /* If it doesn't need quoting, we can just dump it as-is */
3061+ CopySendString (cstate , ptr );
3062+ }
30153063}
30163064
30173065/*
0 commit comments