99 *
1010 *
1111 * IDENTIFICATION
12- * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.63 2000/01/26 05:56:43 momjian Exp $
12+ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.64 2000/02/19 04:17:25 tgl Exp $
1313 *
1414 *-------------------------------------------------------------------------
1515 */
@@ -41,15 +41,19 @@ static char *parseCh;
4141
4242/* set up my input handler --- need one flavor for flex, one for lex */
4343#if defined(FLEX_SCANNER)
44+
4445#define YY_NO_UNPUT
4546static int myinput (char * buf, int max);
4647#undef YY_INPUT
4748#define YY_INPUT (buf,result,max ) {result = myinput (buf,max);}
48- #else
49+
50+ #else /* !FLEX_SCANNER */
51+
4952#undef input
5053int input ();
5154#undef unput
5255void unput (char );
56+
5357#endif /* FLEX_SCANNER */
5458
5559extern YYSTYPE yylval;
@@ -68,27 +72,22 @@ static int literalalloc; /* current allocated buffer size */
6872static void addlit (char *ytext, int yleng);
6973
7074%}
71- /* OK, here is a short description of lex/flex rules behavior.
75+ /*
76+ * OK, here is a short description of lex/flex rules behavior.
7277 * The longest pattern which matches an input string is always chosen.
7378 * For equal-length patterns, the first occurring in the rules list is chosen.
74- * INITIAL is the starting condition, to which all non-conditional rules apply.
75- * When in an exclusive condition, only those rules defined for that condition apply.
79+ * INITIAL is the starting state, to which all non-conditional rules apply.
80+ * Exclusive states change parsing rules while the state is active. When in
81+ * an exclusive state, only those rules defined for that state apply.
7682 *
77- * Exclusive states change parsing rules while the state is active.
78- * There are exclusive states for quoted strings, extended comments,
79- * and to eliminate parsing troubles for numeric strings.
83+ * We use exclusive states for quoted strings, extended comments,
84+ * and to eliminate parsing troubles for numeric strings.
8085 * Exclusive states:
8186 * <xb> binary numeric string - thomas 1997-11-16
8287 * <xc> extended C-style comments - tgl 1997-07-12
8388 * <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
8489 * <xh> hexadecimal numeric string - thomas 1997-11-16
8590 * <xq> quoted strings - tgl 1997-07-30
86- *
87- * The "extended comment" syntax closely resembles allowable operator syntax.
88- * So, when in condition <xc>, only strings which would terminate the
89- * "extended comment" trigger any action other than "ignore".
90- * Be sure to match _any_ candidate comment, including those with appended
91- * operator-like symbols. - thomas 1997-07-14
9291 */
9392
9493%x xb
@@ -101,46 +100,58 @@ static void addlit(char *ytext, int yleng);
101100 */
102101xbstart [bB ]{quote }
103102xbstop {quote }
104- xbinside [^ ' ]*
105- xbcat {quote }{space } * \n { space } * {quote }
103+ xbinside [^ ' ]+
104+ xbcat {quote }{whitespace_with_newline } {quote }
106105
107106/* Hexadecimal number
108107 */
109108xhstart [xX ]{quote }
110109xhstop {quote }
111- xhinside [^ ' ]*
112- xhcat {quote }{space } * \n { space } * {quote }
110+ xhinside [^ ' ]+
111+ xhcat {quote }{whitespace_with_newline } {quote }
113112
114113/* Extended quote
115114 * xqdouble implements SQL92 embedded quote
116115 * xqcat allows strings to cross input lines
117116 * Note: reduction of '' and \ sequences to output text is done in scanstr(),
118- * not by rules here.
117+ * not by rules here. But we do get rid of xqcat sequences here.
119118 */
120119quote '
121120xqstart {quote }
122121xqstop {quote }
123122xqdouble {quote }{quote }
124- xqinside [^ \\ ' ]*
123+ xqinside [^ \\ ' ]+
125124xqliteral [\\ ](. | \n )
126- xqcat {quote }{space } * \n { space } * {quote }
125+ xqcat {quote }{whitespace_with_newline } {quote }
127126
128127/* Delimited quote
129128 * Allows embedded spaces and other special characters into identifiers.
130129 */
131130dquote \"
132131xdstart {dquote }
133132xdstop {dquote }
134- xdinside [^ " ]*
133+ xdinside [^ " ]+
135134
136- /* Comments
135+ /* C-style comments
137136 * Ignored by the scanner and parser.
137+ *
138+ * The "extended comment" syntax closely resembles allowable operator syntax.
139+ * The tricky part here is to get lex to recognize a string starting with
140+ * slash-star as a comment, when interpreting it as an operator would produce
141+ * a longer match --- remember lex will prefer a longer match! So, we have
142+ * to provide a special rule for xcline (a complete comment that could
143+ * otherwise look like an operator), as well as append {op_and_self}* to
144+ * xcstart so that it matches at least as much as {operator} would.
145+ * Then the tie-breaker (first matching rule of same length) wins.
146+ * There is still a problem if someone writes, eg, slash-star-star-slash-plus.
147+ * It'll be taken as an xcstart, rather than xcline and an operator as one
148+ * could wish. I don't see any way around that given lex's behavior;
149+ * that someone will just have to write a space after the comment.
138150 */
139- xcline [\/ ][\* ]. * [\* ][\/ ]{space }* \n *
140- xcstart [\/ ][\* ]{op_and_self }*
141- xcstop {op_and_self }* [\* ][\/ ]({space }* | \n )
142- xcinside [^ * ]*
143- xcstar [^ / ]
151+ xcline \/\* {op_and_self }* \*\/
152+ xcstart \/\* {op_and_self }*
153+ xcstop \* + \/
154+ xcinside ([^ * ]+ )| (\* + [^ / ])
144155
145156digit [0 -9 ]
146157letter [\200 -\377 _A -Za -z ]
@@ -161,13 +172,44 @@ operator {op_and_self}+
161172
162173integer {digit }+
163174decimal (({digit }* \. {digit }+ )| ({digit }+ \. {digit }* ))
164- real ((({digit }* \. {digit }+ )| ({digit }+ \. {digit }* )| ({digit }+ ))([Ee ][-+ ]? {digit }+ ))
175+ real ((({digit }* \. {digit }+ )| ({digit }+ \. {digit }* )| ({digit }+ ))([Ee ][-+ ]? {digit }+ ))
165176
166177param \$ {integer }
167178
168- comment (" --" | " //" ). *
179+ /*
180+ * In order to make the world safe for Windows and Mac clients as well as
181+ * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
182+ * sequence will be seen as two successive newlines, but that doesn't cause
183+ * any problems. SQL92-style comments, which start with -- and extend to the
184+ * next newline, are treated as equivalent to a single whitespace character.
185+ *
186+ * NOTE a fine point: if there is no newline following --, we will absorb
187+ * everything to the end of the input as a comment. This is correct. Older
188+ * versions of Postgres failed to recognize -- as a comment if the input
189+ * did not end with a newline.
190+ *
191+ * XXX perhaps \f (formfeed) should be treated as a newline as well?
192+ */
169193
170194space [ \t\n\r\f ]
195+ horiz_space [ \t\f ]
196+ newline [\n\r ]
197+ non_newline [^ \n\r ]
198+
199+ comment ((" --" | " //" ){non_newline }* )
200+
201+ whitespace ({space }| {comment })
202+
203+ /*
204+ * SQL92 requires at least one newline in the whitespace separating
205+ * string literals that are to be concatenated. Silly, but who are we
206+ * to argue? Note that {whitespace_with_newline} should not have * after
207+ * it, whereas {whitespace} should generally have a * after it...
208+ */
209+
210+ horiz_whitespace ({horiz_space }| {comment })
211+ whitespace_with_newline ({horiz_whitespace }* {newline }{whitespace }* )
212+
171213other .
172214
173215/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
@@ -181,14 +223,16 @@ other .
181223 * of escaped-quote "\'".
182224 * Other embedded escaped characters are matched explicitly and the leading
183225 * backslash is dropped from the string. - thomas 1997-09-24
226+ * Note that xcline must appear before xcstart, which must appear before
227+ * operator, as explained above! Also whitespace (comment) must appear
228+ * before operator.
184229 */
185230
186231%%
187- {comment } { /* ignore */ }
232+ {whitespace } { /* ignore */ }
188233
189234{xcline } { /* ignore */ }
190235
191- <xc >{xcstar } |
192236{xcstart } { BEGIN (xc); }
193237
194238<xc >{xcstop } { BEGIN (INITIAL); }
@@ -216,6 +260,7 @@ other .
216260 }
217261<xh >{xhcat } |
218262<xb >{xbcat } {
263+ /* ignore */
219264 }
220265
221266{xhstart } {
@@ -249,6 +294,7 @@ other .
249294 addlit (yytext, yyleng);
250295 }
251296<xq >{xqcat } {
297+ /* ignore */
252298 }
253299
254300
@@ -270,18 +316,18 @@ other .
270316{self } { return yytext[0 ]; }
271317
272318{operator } {
273- if (strcmp ((char *)yytext," !=" ) == 0 )
274- yylval.str = pstrdup (" <>" ); /* compatability */
319+ if (strcmp ((char *)yytext, " !=" ) == 0 )
320+ yylval.str = pstrdup (" <>" ); /* compatibility */
275321 else
276322 yylval.str = pstrdup ((char *)yytext);
277323 return Op;
278324 }
325+
279326{param } {
280327 yylval.ival = atoi ((char *)&yytext[1 ]);
281328 return PARAM;
282329 }
283330
284-
285331{integer } {
286332 char * endptr;
287333
@@ -354,7 +400,6 @@ other .
354400 return IDENT;
355401 }
356402 }
357- {space } { /* ignore */ }
358403
359404{other } { return yytext[0 ]; }
360405
0 commit comments