@@ -43,7 +43,7 @@ static int freev(struct vars *, int);
4343static void makesearch (struct vars * , struct nfa * );
4444static struct subre * parse (struct vars * , int , int , struct state * , struct state * );
4545static struct subre * parsebranch (struct vars * , int , int , struct state * , struct state * , int );
46- static void parseqatom (struct vars * , int , int , struct state * , struct state * , struct subre * );
46+ static struct subre * parseqatom (struct vars * , int , int , struct state * , struct state * , struct subre * );
4747static void nonword (struct vars * , int , struct state * , struct state * );
4848static void word (struct vars * , int , struct state * , struct state * );
4949static void charclass (struct vars * , enum char_classes ,
@@ -756,7 +756,7 @@ parsebranch(struct vars *v,
756756 seencontent = 1 ;
757757
758758 /* NB, recursion in parseqatom() may swallow rest of branch */
759- parseqatom (v , stopper , type , lp , right , t );
759+ t = parseqatom (v , stopper , type , lp , right , t );
760760 NOERRN ();
761761 }
762762
@@ -777,8 +777,12 @@ parsebranch(struct vars *v,
777777 * The bookkeeping near the end cooperates very closely with parsebranch();
778778 * in particular, it contains a recursion that can involve parsing the rest
779779 * of the branch, making this function's name somewhat inaccurate.
780+ *
781+ * Usually, the return value is just "top", but in some cases where we
782+ * have parsed the rest of the branch, we may deem "top" redundant and
783+ * free it, returning some child subre instead.
780784 */
781- static void
785+ static struct subre *
782786parseqatom (struct vars * v ,
783787 int stopper , /* EOS or ')' */
784788 int type , /* LACON (lookaround subRE) or PLAIN */
@@ -818,103 +822,103 @@ parseqatom(struct vars *v,
818822 if (v -> cflags & REG_NLANCH )
819823 ARCV (BEHIND , v -> nlcolor );
820824 NEXT ();
821- return ;
825+ return top ;
822826 break ;
823827 case '$' :
824828 ARCV ('$' , 1 );
825829 if (v -> cflags & REG_NLANCH )
826830 ARCV (AHEAD , v -> nlcolor );
827831 NEXT ();
828- return ;
832+ return top ;
829833 break ;
830834 case SBEGIN :
831835 ARCV ('^' , 1 ); /* BOL */
832836 ARCV ('^' , 0 ); /* or BOS */
833837 NEXT ();
834- return ;
838+ return top ;
835839 break ;
836840 case SEND :
837841 ARCV ('$' , 1 ); /* EOL */
838842 ARCV ('$' , 0 ); /* or EOS */
839843 NEXT ();
840- return ;
844+ return top ;
841845 break ;
842846 case '<' :
843847 wordchrs (v );
844848 s = newstate (v -> nfa );
845- NOERR ();
849+ NOERRN ();
846850 nonword (v , BEHIND , lp , s );
847851 word (v , AHEAD , s , rp );
848852 NEXT ();
849- return ;
853+ return top ;
850854 break ;
851855 case '>' :
852856 wordchrs (v );
853857 s = newstate (v -> nfa );
854- NOERR ();
858+ NOERRN ();
855859 word (v , BEHIND , lp , s );
856860 nonword (v , AHEAD , s , rp );
857861 NEXT ();
858- return ;
862+ return top ;
859863 break ;
860864 case WBDRY :
861865 wordchrs (v );
862866 s = newstate (v -> nfa );
863- NOERR ();
867+ NOERRN ();
864868 nonword (v , BEHIND , lp , s );
865869 word (v , AHEAD , s , rp );
866870 s = newstate (v -> nfa );
867- NOERR ();
871+ NOERRN ();
868872 word (v , BEHIND , lp , s );
869873 nonword (v , AHEAD , s , rp );
870874 NEXT ();
871- return ;
875+ return top ;
872876 break ;
873877 case NWBDRY :
874878 wordchrs (v );
875879 s = newstate (v -> nfa );
876- NOERR ();
880+ NOERRN ();
877881 word (v , BEHIND , lp , s );
878882 word (v , AHEAD , s , rp );
879883 s = newstate (v -> nfa );
880- NOERR ();
884+ NOERRN ();
881885 nonword (v , BEHIND , lp , s );
882886 nonword (v , AHEAD , s , rp );
883887 NEXT ();
884- return ;
888+ return top ;
885889 break ;
886890 case LACON : /* lookaround constraint */
887891 latype = v -> nextvalue ;
888892 NEXT ();
889893 s = newstate (v -> nfa );
890894 s2 = newstate (v -> nfa );
891- NOERR ();
895+ NOERRN ();
892896 t = parse (v , ')' , LACON , s , s2 );
893897 freesubre (v , t ); /* internal structure irrelevant */
894- NOERR ();
898+ NOERRN ();
895899 assert (SEE (')' ));
896900 NEXT ();
897901 processlacon (v , s , s2 , latype , lp , rp );
898- return ;
902+ return top ;
899903 break ;
900904 /* then errors, to get them out of the way */
901905 case '*' :
902906 case '+' :
903907 case '?' :
904908 case '{' :
905909 ERR (REG_BADRPT );
906- return ;
910+ return top ;
907911 break ;
908912 default :
909913 ERR (REG_ASSERT );
910- return ;
914+ return top ;
911915 break ;
912916 /* then plain characters, and minor variants on that theme */
913917 case ')' : /* unbalanced paren */
914918 if ((v -> cflags & REG_ADVANCED ) != REG_EXTENDED )
915919 {
916920 ERR (REG_EPAREN );
917- return ;
921+ return top ;
918922 }
919923 /* legal in EREs due to specification botch */
920924 NOTE (REG_UPBOTCH );
@@ -923,7 +927,7 @@ parseqatom(struct vars *v,
923927 case PLAIN :
924928 onechr (v , v -> nextvalue , lp , rp );
925929 okcolors (v -> nfa , v -> cm );
926- NOERR ();
930+ NOERRN ();
927931 NEXT ();
928932 break ;
929933 case '[' :
@@ -972,14 +976,14 @@ parseqatom(struct vars *v,
972976 */
973977 s = newstate (v -> nfa );
974978 s2 = newstate (v -> nfa );
975- NOERR ();
979+ NOERRN ();
976980 EMPTYARC (lp , s );
977981 EMPTYARC (s2 , rp );
978- NOERR ();
982+ NOERRN ();
979983 atom = parse (v , ')' , type , s , s2 );
980984 assert (SEE (')' ) || ISERR ());
981985 NEXT ();
982- NOERR ();
986+ NOERRN ();
983987 if (cap )
984988 {
985989 assert (v -> subs [subno ] == NULL );
@@ -994,7 +998,7 @@ parseqatom(struct vars *v,
994998 {
995999 /* generate no-op wrapper node to handle "((x))" */
9961000 t = subre (v , '(' , atom -> flags | CAP , lp , rp );
997- NOERR ();
1001+ NOERRN ();
9981002 t -> capno = subno ;
9991003 t -> child = atom ;
10001004 atom = t ;
@@ -1006,10 +1010,10 @@ parseqatom(struct vars *v,
10061010 INSIST (type != LACON , REG_ESUBREG );
10071011 INSIST (v -> nextvalue < v -> nsubs , REG_ESUBREG );
10081012 INSIST (v -> subs [v -> nextvalue ] != NULL , REG_ESUBREG );
1009- NOERR ();
1013+ NOERRN ();
10101014 assert (v -> nextvalue > 0 );
10111015 atom = subre (v , 'b' , BACKR , lp , rp );
1012- NOERR ();
1016+ NOERRN ();
10131017 subno = v -> nextvalue ;
10141018 atom -> backno = subno ;
10151019 EMPTYARC (lp , rp ); /* temporarily, so there's something */
@@ -1050,7 +1054,7 @@ parseqatom(struct vars *v,
10501054 if (m > n )
10511055 {
10521056 ERR (REG_BADBR );
1053- return ;
1057+ return top ;
10541058 }
10551059 /* {m,n} exercises preference, even if it's {m,m} */
10561060 qprefer = (v -> nextvalue ) ? LONGER : SHORTER ;
@@ -1064,7 +1068,7 @@ parseqatom(struct vars *v,
10641068 if (!SEE ('}' ))
10651069 { /* catches errors too */
10661070 ERR (REG_BADBR );
1067- return ;
1071+ return top ;
10681072 }
10691073 NEXT ();
10701074 break ;
@@ -1083,7 +1087,7 @@ parseqatom(struct vars *v,
10831087 v -> subs [subno ] = NULL ;
10841088 delsub (v -> nfa , lp , rp );
10851089 EMPTYARC (lp , rp );
1086- return ;
1090+ return top ;
10871091 }
10881092
10891093 /* if not a messy case, avoid hard part */
@@ -1096,7 +1100,7 @@ parseqatom(struct vars *v,
10961100 if (atom != NULL )
10971101 freesubre (v , atom );
10981102 top -> flags = f ;
1099- return ;
1103+ return top ;
11001104 }
11011105
11021106 /*
@@ -1110,7 +1114,7 @@ parseqatom(struct vars *v,
11101114 if (atom == NULL )
11111115 {
11121116 atom = subre (v , '=' , 0 , lp , rp );
1113- NOERR ();
1117+ NOERRN ();
11141118 }
11151119
11161120 /*----------
@@ -1131,20 +1135,20 @@ parseqatom(struct vars *v,
11311135 */
11321136 s = newstate (v -> nfa ); /* first, new endpoints for the atom */
11331137 s2 = newstate (v -> nfa );
1134- NOERR ();
1138+ NOERRN ();
11351139 moveouts (v -> nfa , lp , s );
11361140 moveins (v -> nfa , rp , s2 );
1137- NOERR ();
1141+ NOERRN ();
11381142 atom -> begin = s ;
11391143 atom -> end = s2 ;
11401144 s = newstate (v -> nfa ); /* set up starting state */
1141- NOERR ();
1145+ NOERRN ();
11421146 EMPTYARC (lp , s );
1143- NOERR ();
1147+ NOERRN ();
11441148
11451149 /* break remaining subRE into x{...} and what follows */
11461150 t = subre (v , '.' , COMBINE (qprefer , atom -> flags ), lp , rp );
1147- NOERR ();
1151+ NOERRN ();
11481152 t -> child = atom ;
11491153 atomp = & t -> child ;
11501154
@@ -1163,7 +1167,7 @@ parseqatom(struct vars *v,
11631167 */
11641168 assert (top -> op == '=' && top -> child == NULL );
11651169 top -> child = subre (v , '=' , top -> flags , top -> begin , lp );
1166- NOERR ();
1170+ NOERRN ();
11671171 top -> op = '.' ;
11681172 top -> child -> sibling = t ;
11691173 /* top->flags will get updated later */
@@ -1182,11 +1186,11 @@ parseqatom(struct vars *v,
11821186 */
11831187 dupnfa (v -> nfa , v -> subs [subno ]-> begin , v -> subs [subno ]-> end ,
11841188 atom -> begin , atom -> end );
1185- NOERR ();
1189+ NOERRN ();
11861190
11871191 /* The backref node's NFA should not enforce any constraints */
11881192 removeconstraints (v -> nfa , atom -> begin , atom -> end );
1189- NOERR ();
1193+ NOERRN ();
11901194 }
11911195
11921196 /*
@@ -1226,7 +1230,7 @@ parseqatom(struct vars *v,
12261230 repeat (v , atom -> begin , atom -> end , m , n );
12271231 f = COMBINE (qprefer , atom -> flags );
12281232 t = subre (v , '=' , f , atom -> begin , atom -> end );
1229- NOERR ();
1233+ NOERRN ();
12301234 freesubre (v , atom );
12311235 * atomp = t ;
12321236 /* rest of branch can be strung starting from t->end */
@@ -1247,9 +1251,9 @@ parseqatom(struct vars *v,
12471251 repeat (v , s , atom -> begin , m - 1 , (n == DUPINF ) ? n : n - 1 );
12481252 f = COMBINE (qprefer , atom -> flags );
12491253 t = subre (v , '.' , f , s , atom -> end ); /* prefix and atom */
1250- NOERR ();
1254+ NOERRN ();
12511255 t -> child = subre (v , '=' , PREF (f ), s , atom -> begin );
1252- NOERR ();
1256+ NOERRN ();
12531257 t -> child -> sibling = atom ;
12541258 * atomp = t ;
12551259 /* rest of branch can be strung starting from atom->end */
@@ -1259,14 +1263,14 @@ parseqatom(struct vars *v,
12591263 {
12601264 /* general case: need an iteration node */
12611265 s2 = newstate (v -> nfa );
1262- NOERR ();
1266+ NOERRN ();
12631267 moveouts (v -> nfa , atom -> end , s2 );
1264- NOERR ();
1268+ NOERRN ();
12651269 dupnfa (v -> nfa , atom -> begin , atom -> end , s , s2 );
12661270 repeat (v , s , s2 , m , n );
12671271 f = COMBINE (qprefer , atom -> flags );
12681272 t = subre (v , '*' , f , s , s2 );
1269- NOERR ();
1273+ NOERRN ();
12701274 t -> min = (short ) m ;
12711275 t -> max = (short ) n ;
12721276 t -> child = atom ;
@@ -1280,7 +1284,7 @@ parseqatom(struct vars *v,
12801284 {
12811285 /* parse all the rest of the branch, and insert in t->child->sibling */
12821286 t -> child -> sibling = parsebranch (v , stopper , type , s2 , rp , 1 );
1283- NOERR ();
1287+ NOERRN ();
12841288 assert (SEE ('|' ) || SEE (stopper ) || SEE (EOS ));
12851289
12861290 /* here's the promised update of the flags */
@@ -1299,9 +1303,7 @@ parseqatom(struct vars *v,
12991303 *
13001304 * If the messy atom was the first thing in the branch, then
13011305 * top->child is vacuous and we can get rid of one level of
1302- * concatenation. Since the caller is holding a pointer to the top
1303- * node, we can't remove that node; but we're allowed to change its
1304- * properties.
1306+ * concatenation.
13051307 */
13061308 assert (top -> child -> op == '=' );
13071309 if (top -> child -> begin == top -> child -> end )
@@ -1351,21 +1353,13 @@ parseqatom(struct vars *v,
13511353 {
13521354 assert (!MESSY (top -> child -> flags ));
13531355 t = top -> child -> sibling ;
1354- freesubre (v , top -> child );
1355- top -> op = t -> op ;
1356- top -> flags = t -> flags ;
1357- top -> latype = t -> latype ;
1358- top -> id = t -> id ;
1359- top -> capno = t -> capno ;
1360- top -> backno = t -> backno ;
1361- top -> min = t -> min ;
1362- top -> max = t -> max ;
1363- top -> child = t -> child ;
1364- top -> begin = t -> begin ;
1365- top -> end = t -> end ;
1366- freesrnode (v , t );
1356+ top -> child -> sibling = NULL ;
1357+ freesubre (v , top );
1358+ top = t ;
13671359 }
13681360 }
1361+
1362+ return top ;
13691363}
13701364
13711365/*
@@ -2109,7 +2103,9 @@ freesrnode(struct vars *v, /* might be NULL */
21092103
21102104 if (!NULLCNFA (sr -> cnfa ))
21112105 freecnfa (& sr -> cnfa );
2112- sr -> flags = 0 ;
2106+ sr -> flags = 0 ; /* in particular, not INUSE */
2107+ sr -> child = sr -> sibling = NULL ;
2108+ sr -> begin = sr -> end = NULL ;
21132109
21142110 if (v != NULL && v -> treechain != NULL )
21152111 {
0 commit comments