@@ -57,9 +57,15 @@ newnfa(struct vars *v,
5757 return NULL ;
5858 }
5959
60+ /* Make the NFA minimally valid, so freenfa() will behave sanely */
6061 nfa -> states = NULL ;
6162 nfa -> slast = NULL ;
62- nfa -> free = NULL ;
63+ nfa -> freestates = NULL ;
64+ nfa -> freearcs = NULL ;
65+ nfa -> lastsb = NULL ;
66+ nfa -> lastab = NULL ;
67+ nfa -> lastsbused = 0 ;
68+ nfa -> lastabused = 0 ;
6369 nfa -> nstates = 0 ;
6470 nfa -> cm = cm ;
6571 nfa -> v = v ;
@@ -68,9 +74,10 @@ newnfa(struct vars *v,
6874 nfa -> flags = 0 ;
6975 nfa -> minmatchall = nfa -> maxmatchall = -1 ;
7076 nfa -> parent = parent ; /* Precedes newfstate so parent is valid. */
77+
78+ /* Create required infrastructure */
7179 nfa -> post = newfstate (nfa , '@' ); /* number 0 */
7280 nfa -> pre = newfstate (nfa , '>' ); /* number 1 */
73-
7481 nfa -> init = newstate (nfa ); /* may become invalid later */
7582 nfa -> final = newstate (nfa );
7683 if (ISERR ())
@@ -99,23 +106,27 @@ newnfa(struct vars *v,
99106static void
100107freenfa (struct nfa * nfa )
101108{
102- struct state * s ;
109+ struct statebatch * sb ;
110+ struct statebatch * sbnext ;
111+ struct arcbatch * ab ;
112+ struct arcbatch * abnext ;
103113
104- while (( s = nfa -> states ) != NULL )
114+ for ( sb = nfa -> lastsb ; sb != NULL ; sb = sbnext )
105115 {
106- s -> nins = s -> nouts = 0 ; /* don't worry about arcs */
107- freestate (nfa , s );
116+ sbnext = sb -> next ;
117+ nfa -> v -> spaceused -= STATEBATCHSIZE (sb -> nstates );
118+ FREE (sb );
108119 }
109- while ((s = nfa -> free ) != NULL )
120+ nfa -> lastsb = NULL ;
121+ for (ab = nfa -> lastab ; ab != NULL ; ab = abnext )
110122 {
111- nfa -> free = s -> next ;
112- destroystate (nfa , s );
123+ abnext = ab -> next ;
124+ nfa -> v -> spaceused -= ARCBATCHSIZE (ab -> narcs );
125+ FREE (ab );
113126 }
127+ nfa -> lastab = NULL ;
114128
115- nfa -> slast = NULL ;
116129 nfa -> nstates = -1 ;
117- nfa -> pre = NULL ;
118- nfa -> post = NULL ;
119130 FREE (nfa );
120131}
121132
@@ -138,28 +149,43 @@ newstate(struct nfa *nfa)
138149 return NULL ;
139150 }
140151
141- if (nfa -> free != NULL )
152+ /* first, recycle anything that's on the freelist */
153+ if (nfa -> freestates != NULL )
154+ {
155+ s = nfa -> freestates ;
156+ nfa -> freestates = s -> next ;
157+ }
158+ /* otherwise, is there anything left in the last statebatch? */
159+ else if (nfa -> lastsb != NULL && nfa -> lastsbused < nfa -> lastsb -> nstates )
142160 {
143- s = nfa -> free ;
144- nfa -> free = s -> next ;
161+ s = & nfa -> lastsb -> s [nfa -> lastsbused ++ ];
145162 }
163+ /* otherwise, need to allocate a new statebatch */
146164 else
147165 {
166+ struct statebatch * newSb ;
167+ size_t nstates ;
168+
148169 if (nfa -> v -> spaceused >= REG_MAX_COMPILE_SPACE )
149170 {
150171 NERR (REG_ETOOBIG );
151172 return NULL ;
152173 }
153- s = (struct state * ) MALLOC (sizeof (struct state ));
154- if (s == NULL )
174+ nstates = (nfa -> lastsb != NULL ) ? nfa -> lastsb -> nstates * 2 : FIRSTSBSIZE ;
175+ if (nstates > MAXSBSIZE )
176+ nstates = MAXSBSIZE ;
177+ newSb = (struct statebatch * ) MALLOC (STATEBATCHSIZE (nstates ));
178+ if (newSb == NULL )
155179 {
156180 NERR (REG_ESPACE );
157181 return NULL ;
158182 }
159- nfa -> v -> spaceused += sizeof (struct state );
160- s -> oas .next = NULL ;
161- s -> free = NULL ;
162- s -> noas = 0 ;
183+ nfa -> v -> spaceused += STATEBATCHSIZE (nstates );
184+ newSb -> nstates = nstates ;
185+ newSb -> next = nfa -> lastsb ;
186+ nfa -> lastsb = newSb ;
187+ nfa -> lastsbused = 1 ;
188+ s = & newSb -> s [0 ];
163189 }
164190
165191 assert (nfa -> nstates >= 0 );
@@ -240,32 +266,8 @@ freestate(struct nfa *nfa,
240266 nfa -> states = s -> next ;
241267 }
242268 s -> prev = NULL ;
243- s -> next = nfa -> free ; /* don't delete it, put it on the free list */
244- nfa -> free = s ;
245- }
246-
247- /*
248- * destroystate - really get rid of an already-freed state
249- */
250- static void
251- destroystate (struct nfa * nfa ,
252- struct state * s )
253- {
254- struct arcbatch * ab ;
255- struct arcbatch * abnext ;
256-
257- assert (s -> no == FREESTATE );
258- for (ab = s -> oas .next ; ab != NULL ; ab = abnext )
259- {
260- abnext = ab -> next ;
261- FREE (ab );
262- nfa -> v -> spaceused -= sizeof (struct arcbatch );
263- }
264- s -> ins = NULL ;
265- s -> outs = NULL ;
266- s -> next = NULL ;
267- FREE (s );
268- nfa -> v -> spaceused -= sizeof (struct state );
269+ s -> next = nfa -> freestates ; /* don't delete it, put it on the free list */
270+ nfa -> freestates = s ;
269271}
270272
271273/*
@@ -334,8 +336,7 @@ createarc(struct nfa *nfa,
334336{
335337 struct arc * a ;
336338
337- /* the arc is physically allocated within its from-state */
338- a = allocarc (nfa , from );
339+ a = allocarc (nfa );
339340 if (NISERR ())
340341 return ;
341342 assert (a != NULL );
@@ -369,55 +370,52 @@ createarc(struct nfa *nfa,
369370}
370371
371372/*
372- * allocarc - allocate a new out- arc within a state
373+ * allocarc - allocate a new arc within an NFA
373374 */
374375static struct arc * /* NULL for failure */
375- allocarc (struct nfa * nfa ,
376- struct state * s )
376+ allocarc (struct nfa * nfa )
377377{
378378 struct arc * a ;
379379
380- /* shortcut */
381- if (s -> free == NULL && s -> noas < ABSIZE )
380+ /* first, recycle anything that's on the freelist */
381+ if (nfa -> freearcs != NULL )
382382 {
383- a = & s -> oas .a [s -> noas ];
384- s -> noas ++ ;
385- return a ;
383+ a = nfa -> freearcs ;
384+ nfa -> freearcs = a -> freechain ;
386385 }
387-
388- /* if none at hand, get more */
389- if (s -> free == NULL )
386+ /* otherwise, is there anything left in the last arcbatch? */
387+ else if (nfa -> lastab != NULL && nfa -> lastabused < nfa -> lastab -> narcs )
388+ {
389+ a = & nfa -> lastab -> a [nfa -> lastabused ++ ];
390+ }
391+ /* otherwise, need to allocate a new arcbatch */
392+ else
390393 {
391394 struct arcbatch * newAb ;
392- int i ;
395+ size_t narcs ;
393396
394397 if (nfa -> v -> spaceused >= REG_MAX_COMPILE_SPACE )
395398 {
396399 NERR (REG_ETOOBIG );
397400 return NULL ;
398401 }
399- newAb = (struct arcbatch * ) MALLOC (sizeof (struct arcbatch ));
402+ narcs = (nfa -> lastab != NULL ) ? nfa -> lastab -> narcs * 2 : FIRSTABSIZE ;
403+ if (narcs > MAXABSIZE )
404+ narcs = MAXABSIZE ;
405+ newAb = (struct arcbatch * ) MALLOC (ARCBATCHSIZE (narcs ));
400406 if (newAb == NULL )
401407 {
402408 NERR (REG_ESPACE );
403409 return NULL ;
404410 }
405- nfa -> v -> spaceused += sizeof (struct arcbatch );
406- newAb -> next = s -> oas .next ;
407- s -> oas .next = newAb ;
408-
409- for (i = 0 ; i < ABSIZE ; i ++ )
410- {
411- newAb -> a [i ].type = 0 ;
412- newAb -> a [i ].freechain = & newAb -> a [i + 1 ];
413- }
414- newAb -> a [ABSIZE - 1 ].freechain = NULL ;
415- s -> free = & newAb -> a [0 ];
411+ nfa -> v -> spaceused += ARCBATCHSIZE (narcs );
412+ newAb -> narcs = narcs ;
413+ newAb -> next = nfa -> lastab ;
414+ nfa -> lastab = newAb ;
415+ nfa -> lastabused = 1 ;
416+ a = & newAb -> a [0 ];
416417 }
417- assert (s -> free != NULL );
418418
419- a = s -> free ;
420- s -> free = a -> freechain ;
421419 return a ;
422420}
423421
@@ -478,25 +476,66 @@ freearc(struct nfa *nfa,
478476 }
479477 to -> nins -- ;
480478
481- /* clean up and place on from-state 's free list */
479+ /* clean up and place on NFA 's free list */
482480 victim -> type = 0 ;
483481 victim -> from = NULL ; /* precautions... */
484482 victim -> to = NULL ;
485483 victim -> inchain = NULL ;
486484 victim -> inchainRev = NULL ;
487485 victim -> outchain = NULL ;
488486 victim -> outchainRev = NULL ;
489- victim -> freechain = from -> free ;
490- from -> free = victim ;
487+ victim -> freechain = nfa -> freearcs ;
488+ nfa -> freearcs = victim ;
491489}
492490
493491/*
494- * changearctarget - flip an arc to have a different to state
492+ * changearcsource - flip an arc to have a different from state
495493 *
496494 * Caller must have verified that there is no pre-existing duplicate arc.
495+ */
496+ static void
497+ changearcsource (struct arc * a , struct state * newfrom )
498+ {
499+ struct state * oldfrom = a -> from ;
500+ struct arc * predecessor ;
501+
502+ assert (oldfrom != newfrom );
503+
504+ /* take it off old source's out-chain */
505+ assert (oldfrom != NULL );
506+ predecessor = a -> outchainRev ;
507+ if (predecessor == NULL )
508+ {
509+ assert (oldfrom -> outs == a );
510+ oldfrom -> outs = a -> outchain ;
511+ }
512+ else
513+ {
514+ assert (predecessor -> outchain == a );
515+ predecessor -> outchain = a -> outchain ;
516+ }
517+ if (a -> outchain != NULL )
518+ {
519+ assert (a -> outchain -> outchainRev == a );
520+ a -> outchain -> outchainRev = predecessor ;
521+ }
522+ oldfrom -> nouts -- ;
523+
524+ a -> from = newfrom ;
525+
526+ /* prepend it to new source's out-chain */
527+ a -> outchain = newfrom -> outs ;
528+ a -> outchainRev = NULL ;
529+ if (newfrom -> outs )
530+ newfrom -> outs -> outchainRev = a ;
531+ newfrom -> outs = a ;
532+ newfrom -> nouts ++ ;
533+ }
534+
535+ /*
536+ * changearctarget - flip an arc to have a different to state
497537 *
498- * Note that because we store arcs in their from state, we can't easily have
499- * a similar changearcsource function.
538+ * Caller must have verified that there is no pre-existing duplicate arc.
500539 */
501540static void
502541changearctarget (struct arc * a , struct state * newto )
@@ -1009,6 +1048,8 @@ mergeins(struct nfa *nfa,
10091048
10101049/*
10111050 * moveouts - move all out arcs of a state to another state
1051+ *
1052+ * See comments for moveins()
10121053 */
10131054static void
10141055moveouts (struct nfa * nfa ,
@@ -1031,9 +1072,9 @@ moveouts(struct nfa *nfa,
10311072 else
10321073 {
10331074 /*
1034- * With many arcs, use a sort-merge approach. Note that createarc ()
1035- * will put new arcs onto the front of newState's chain, so it does
1036- * not break our walk through the sorted part of the chain.
1075+ * With many arcs, use a sort-merge approach. Note changearcsource ()
1076+ * will put the arc onto the front of newState's chain, so it does not
1077+ * break our walk through the sorted part of the chain.
10371078 */
10381079 struct arc * oa ;
10391080 struct arc * na ;
@@ -1063,8 +1104,12 @@ moveouts(struct nfa *nfa,
10631104 case -1 :
10641105 /* newState does not have anything matching oa */
10651106 oa = oa -> outchain ;
1066- createarc (nfa , a -> type , a -> co , newState , a -> to );
1067- freearc (nfa , a );
1107+
1108+ /*
1109+ * Rather than doing createarc+freearc, we can just unlink
1110+ * and relink the existing arc struct.
1111+ */
1112+ changearcsource (a , newState );
10681113 break ;
10691114 case 0 :
10701115 /* match, advance in both lists */
@@ -1087,8 +1132,7 @@ moveouts(struct nfa *nfa,
10871132 struct arc * a = oa ;
10881133
10891134 oa = oa -> outchain ;
1090- createarc (nfa , a -> type , a -> co , newState , a -> to );
1091- freearc (nfa , a );
1135+ changearcsource (a , newState );
10921136 }
10931137 }
10941138
@@ -3413,7 +3457,6 @@ dumparc(struct arc *a,
34133457 FILE * f )
34143458{
34153459 struct arc * aa ;
3416- struct arcbatch * ab ;
34173460
34183461 fprintf (f , "\t" );
34193462 switch (a -> type )
@@ -3451,16 +3494,11 @@ dumparc(struct arc *a,
34513494 }
34523495 if (a -> from != s )
34533496 fprintf (f , "?%d?" , a -> from -> no );
3454- for (ab = & a -> from -> oas ; ab != NULL ; ab = ab -> next )
3455- {
3456- for (aa = & ab -> a [0 ]; aa < & ab -> a [ABSIZE ]; aa ++ )
3457- if (aa == a )
3458- break ; /* NOTE BREAK OUT */
3459- if (aa < & ab -> a [ABSIZE ]) /* propagate break */
3497+ for (aa = a -> from -> outs ; aa != NULL ; aa = aa -> outchain )
3498+ if (aa == a )
34603499 break ; /* NOTE BREAK OUT */
3461- }
3462- if (ab == NULL )
3463- fprintf (f , "?!?" ); /* not in allocated space */
3500+ if (aa == NULL )
3501+ fprintf (f , "?!?" ); /* missing from out-chain */
34643502 fprintf (f , "->" );
34653503 if (a -> to == NULL )
34663504 {
0 commit comments