99 * (UTF8 is a special case because we can use a much more efficient version
1010 * of NextChar than can be used for general multi-byte encodings.)
1111 *
12- * Before the inclusion, we need to define following macros:
12+ * Before the inclusion, we need to define the following macros:
1313 *
1414 * NextChar
1515 * MatchText - to name of function wanted
1919 * Copyright (c) 1996-2009, PostgreSQL Global Development Group
2020 *
2121 * IDENTIFICATION
22- * $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.24 2009/01/01 17:23:49 momjian Exp $
22+ * $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.25 2009/05/24 18:10:37 tgl Exp $
2323 *
2424 *-------------------------------------------------------------------------
2525 */
2626
2727/*
28- ** Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
29- ** Rich $alz is now <rsalz@bbn.com>.
30- ** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
31- **
32- ** This code was shamelessly stolen from the "pql" code by myself and
33- ** slightly modified :)
34- **
35- ** All references to the word "star" were replaced by "percent"
36- ** All references to the word "wild" were replaced by "like"
37- **
38- ** All the nice shell RE matching stuff was replaced by just "_" and "%"
39- **
40- ** As I don't have a copy of the SQL standard handy I wasn't sure whether
41- ** to leave in the '\' escape character handling.
42- **
43- ** Keith Parks. <keith@mtcc.demon.co.uk>
44- **
45- ** SQL92 lets you specify the escape character by saying
46- ** LIKE <pattern> ESCAPE <escape character>. We are a small operation
47- ** so we force you to use '\'. - ay 7/95
48- **
49- ** Now we have the like_escape() function that converts patterns with
50- ** any specified escape character (or none at all) to the internal
51- ** default escape character, which is still '\'. - tgl 9/2000
52- **
53- ** The code is rewritten to avoid requiring null-terminated strings,
54- ** which in turn allows us to leave out some memcpy() operations.
55- ** This code should be faster and take less memory, but no promises...
56- ** - thomas 2000-08-06
57- **
58- */
28+ * Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
29+ * Rich $alz is now <rsalz@bbn.com>.
30+ * Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
31+ *
32+ * This code was shamelessly stolen from the "pql" code by myself and
33+ * slightly modified :)
34+ *
35+ * All references to the word "star" were replaced by "percent"
36+ * All references to the word "wild" were replaced by "like"
37+ *
38+ * All the nice shell RE matching stuff was replaced by just "_" and "%"
39+ *
40+ * As I don't have a copy of the SQL standard handy I wasn't sure whether
41+ * to leave in the '\' escape character handling.
42+ *
43+ * Keith Parks. <keith@mtcc.demon.co.uk>
44+ *
45+ * SQL92 lets you specify the escape character by saying
46+ * LIKE <pattern> ESCAPE <escape character>. We are a small operation
47+ * so we force you to use '\'. - ay 7/95
48+ *
49+ * Now we have the like_escape() function that converts patterns with
50+ * any specified escape character (or none at all) to the internal
51+ * default escape character, which is still '\'. - tgl 9/2000
52+ *
53+ * The code is rewritten to avoid requiring null-terminated strings,
54+ * which in turn allows us to leave out some memcpy() operations.
55+ * This code should be faster and take less memory, but no promises...
56+ * - thomas 2000-08-06
57+ */
5958
6059
6160/*--------------------
62- * Match text and p , return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
61+ * Match text and pattern , return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
6362 *
6463 * LIKE_TRUE: they match
6564 * LIKE_FALSE: they don't match
@@ -80,19 +79,18 @@ static int
8079MatchText (char * t , int tlen , char * p , int plen )
8180{
8281 /* Fast path for match-everything pattern */
83- if (( plen == 1 ) && ( * p == '%' ) )
82+ if (plen == 1 && * p == '%' )
8483 return LIKE_TRUE ;
8584
8685 /*
8786 * In this loop, we advance by char when matching wildcards (and thus on
8887 * recursive entry to this function we are properly char-synced). On other
8988 * occasions it is safe to advance by byte, as the text and pattern will
90- * be in lockstep. This allows us to perform all comparisons between the
89+ * be in lockstep. This allows us to perform all comparisons between the
9190 * text and pattern on a byte by byte basis, even for multi-byte
9291 * encodings.
9392 */
94-
95- while ((tlen > 0 ) && (plen > 0 ))
93+ while (tlen > 0 && plen > 0 )
9694 {
9795 if (* p == '\\' )
9896 {
@@ -116,7 +114,7 @@ MatchText(char *t, int tlen, char *p, int plen)
116114
117115 /* %% is the same as % according to the SQL standard */
118116 /* Advance past all %'s */
119- while (( plen > 0 ) && ( * p == '%' ) )
117+ while (plen > 0 && * p == '%' )
120118 NextByte (p , plen );
121119 /* Trailing percent matches everything. */
122120 if (plen <= 0 )
@@ -127,22 +125,24 @@ MatchText(char *t, int tlen, char *p, int plen)
127125 * rest of the pattern.
128126 */
129127 if (* p == '_' )
130-
131128 {
132129 /* %_ is the same as _% - avoid matching _ repeatedly */
133130
134- NextChar (t , tlen );
135- NextByte (p , plen );
136-
137- if (tlen <= 0 )
138- {
139- return (plen <= 0 ) ? LIKE_TRUE : LIKE_ABORT ;
140- }
141- else if (plen <= 0 )
131+ do
142132 {
143- return LIKE_FALSE ;
144- }
145-
133+ NextChar (t , tlen );
134+ NextByte (p , plen );
135+ } while (tlen > 0 && plen > 0 && * p == '_' );
136+
137+ /*
138+ * If we are at the end of the pattern, succeed: % followed
139+ * by n _'s matches any string of at least n characters, and
140+ * we have now found there are at least n characters.
141+ */
142+ if (plen <= 0 )
143+ return LIKE_TRUE ;
144+
145+ /* Look for a place that matches the rest of the pattern */
146146 while (tlen > 0 )
147147 {
148148 int matched = MatchText (t , tlen , p , plen );
@@ -155,7 +155,6 @@ MatchText(char *t, int tlen, char *p, int plen)
155155 }
156156 else
157157 {
158-
159158 char firstpat = TCHAR (* p );
160159
161160 if (* p == '\\' )
@@ -180,7 +179,6 @@ MatchText(char *t, int tlen, char *p, int plen)
180179 }
181180
182181 NextChar (t , tlen );
183-
184182 }
185183 }
186184
@@ -192,20 +190,20 @@ MatchText(char *t, int tlen, char *p, int plen)
192190 }
193191 else if (* p == '_' )
194192 {
193+ /* _ matches any single character, and we know there is one */
195194 NextChar (t , tlen );
196195 NextByte (p , plen );
197196 continue ;
198197 }
199- else if (TCHAR (* t ) != TCHAR (* p ))
198+ else if (TCHAR (* p ) != TCHAR (* t ))
200199 {
201- /*
202- * Not the single-character wildcard and no explicit match? Then
203- * time to quit...
204- */
200+ /* non-wildcard pattern char fails to match text char */
205201 return LIKE_FALSE ;
206202 }
207203
208204 /*
205+ * Pattern and text match, so advance.
206+ *
209207 * It is safe to use NextByte instead of NextChar here, even for
210208 * multi-byte character sets, because we are not following immediately
211209 * after a wildcard character. If we are in the middle of a multibyte
@@ -222,9 +220,8 @@ MatchText(char *t, int tlen, char *p, int plen)
222220 if (tlen > 0 )
223221 return LIKE_FALSE ; /* end of pattern, but not of text */
224222
225- /* End of input string. Do we have matching pattern remaining? */
226- while ((plen > 0 ) && (* p == '%' )) /* allow multiple %'s at end of
227- * pattern */
223+ /* End of text string. Do we have matching pattern remaining? */
224+ while (plen > 0 && * p == '%' ) /* allow multiple %'s at end of pattern */
228225 NextByte (p , plen );
229226
230227 if (plen <= 0 )
@@ -354,5 +351,4 @@ do_like_escape(text *pat, text *esc)
354351
355352#ifdef MATCH_LOWER
356353#undef MATCH_LOWER
357-
358354#endif
0 commit comments