@@ -318,21 +318,22 @@ typedef struct
318318 * arcs - outgoing arcs of this state (List of TrgmArc)
319319 * enterKeys - enter keys reachable from this state without reading any
320320 * predictable trigram (List of TrgmStateKey)
321- * fin - flag indicating this state is final
322- * init - flag indicating this state is initial
321+ * flags - flag bits
323322 * parent - parent state, if this state has been merged into another
324- * children - child states (states that have been merged into this one)
323+ * tentParent - planned parent state, if considering a merge
325324 * number - number of this state (used at the packaging stage)
326325 */
326+ #define TSTATE_INIT 0x01 /* flag indicating this state is initial */
327+ #define TSTATE_FIN 0x02 /* flag indicating this state is final */
328+
327329typedef struct TrgmState
328330{
329331 TrgmStateKey stateKey ; /* hashtable key: must be first field */
330332 List * arcs ;
331333 List * enterKeys ;
332- bool fin ;
333- bool init ;
334+ int flags ;
334335 struct TrgmState * parent ;
335- List * children ;
336+ struct TrgmState * tentParent ;
336337 int number ;
337338} TrgmState ;
338339
@@ -599,7 +600,7 @@ createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
599600 * get from the initial state to the final state without reading any
600601 * predictable trigram.
601602 */
602- if (trgmNFA .initState -> fin )
603+ if (trgmNFA .initState -> flags & TSTATE_FIN )
603604 return NULL ;
604605
605606 /*
@@ -925,7 +926,7 @@ transformGraph(TrgmNFA *trgmNFA)
925926 initkey .nstate = pg_reg_getinitialstate (trgmNFA -> regex );
926927
927928 initstate = getState (trgmNFA , & initkey );
928- initstate -> init = true ;
929+ initstate -> flags |= TSTATE_INIT ;
929930 trgmNFA -> initState = initstate ;
930931
931932 /*
@@ -943,7 +944,7 @@ transformGraph(TrgmNFA *trgmNFA)
943944 * actual processing.
944945 */
945946 if (trgmNFA -> overflowed )
946- state -> fin = true ;
947+ state -> flags |= TSTATE_FIN ;
947948 else
948949 processState (trgmNFA , state );
949950
@@ -968,7 +969,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
968969 * queue is empty. But we can quit if the state gets marked final.
969970 */
970971 addKey (trgmNFA , state , & state -> stateKey );
971- while (trgmNFA -> keysQueue != NIL && !state -> fin )
972+ while (trgmNFA -> keysQueue != NIL && !( state -> flags & TSTATE_FIN ) )
972973 {
973974 TrgmStateKey * key = (TrgmStateKey * ) linitial (trgmNFA -> keysQueue );
974975
@@ -980,7 +981,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
980981 * Add outgoing arcs only if state isn't final (we have no interest in
981982 * outgoing arcs if we already match)
982983 */
983- if (!state -> fin )
984+ if (!( state -> flags & TSTATE_FIN ) )
984985 addArcs (trgmNFA , state );
985986}
986987
@@ -989,7 +990,7 @@ processState(TrgmNFA *trgmNFA, TrgmState *state)
989990 * whether this should result in any further enter keys being added.
990991 * If so, add those keys to keysQueue so that processState will handle them.
991992 *
992- * If the enter key is for the NFA's final state, set state->fin = TRUE .
993+ * If the enter key is for the NFA's final state, mark state as TSTATE_FIN .
993994 * This situation means that we can reach the final state from this expanded
994995 * state without reading any predictable trigram, so we must consider this
995996 * state as an accepting one.
@@ -1059,7 +1060,7 @@ addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
10591060 /* If state is now known final, mark it and we're done */
10601061 if (key -> nstate == pg_reg_getfinalstate (trgmNFA -> regex ))
10611062 {
1062- state -> fin = true ;
1063+ state -> flags |= TSTATE_FIN ;
10631064 return ;
10641065 }
10651066
@@ -1385,10 +1386,9 @@ getState(TrgmNFA *trgmNFA, TrgmStateKey *key)
13851386 /* New state: initialize and queue it */
13861387 state -> arcs = NIL ;
13871388 state -> enterKeys = NIL ;
1388- state -> init = false;
1389- state -> fin = false;
1389+ state -> flags = 0 ;
13901390 state -> parent = NULL ;
1391- state -> children = NIL ;
1391+ state -> tentParent = NULL ;
13921392 state -> number = -1 ;
13931393
13941394 trgmNFA -> queue = lappend (trgmNFA -> queue , state );
@@ -1582,20 +1582,60 @@ selectColorTrigrams(TrgmNFA *trgmNFA)
15821582 TrgmArcInfo * arcInfo = (TrgmArcInfo * ) lfirst (cell );
15831583 TrgmState * source = arcInfo -> source ,
15841584 * target = arcInfo -> target ;
1585+ int source_flags ,
1586+ target_flags ;
15851587
15861588 /* examine parent states, if any merging has already happened */
15871589 while (source -> parent )
15881590 source = source -> parent ;
15891591 while (target -> parent )
15901592 target = target -> parent ;
15911593
1592- if ((source -> init || target -> init ) &&
1593- (source -> fin || target -> fin ))
1594+ /* we must also consider merges we are planning right now */
1595+ source_flags = source -> flags ;
1596+ while (source -> tentParent )
1597+ {
1598+ source = source -> tentParent ;
1599+ source_flags |= source -> flags ;
1600+ }
1601+ target_flags = target -> flags ;
1602+ while (target -> tentParent )
1603+ {
1604+ target = target -> tentParent ;
1605+ target_flags |= target -> flags ;
1606+ }
1607+
1608+ /* would fully-merged state have both INIT and FIN set? */
1609+ if (((source_flags | target_flags ) & (TSTATE_INIT | TSTATE_FIN )) ==
1610+ (TSTATE_INIT | TSTATE_FIN ))
15941611 {
15951612 canRemove = false;
15961613 break ;
15971614 }
1615+
1616+ /* ok so far, so remember planned merge */
1617+ if (source != target )
1618+ target -> tentParent = source ;
15981619 }
1620+
1621+ /* We must clear all the tentParent fields before continuing */
1622+ foreach (cell , trgmInfo -> arcs )
1623+ {
1624+ TrgmArcInfo * arcInfo = (TrgmArcInfo * ) lfirst (cell );
1625+ TrgmState * target = arcInfo -> target ;
1626+ TrgmState * ttarget ;
1627+
1628+ while (target -> parent )
1629+ target = target -> parent ;
1630+
1631+ while ((ttarget = target -> tentParent ) != NULL )
1632+ {
1633+ target -> tentParent = NULL ;
1634+ target = ttarget ;
1635+ }
1636+ }
1637+
1638+ /* Now, move on if we can't drop this trigram */
15991639 if (!canRemove )
16001640 continue ;
16011641
@@ -1611,7 +1651,12 @@ selectColorTrigrams(TrgmNFA *trgmNFA)
16111651 while (target -> parent )
16121652 target = target -> parent ;
16131653 if (source != target )
1654+ {
16141655 mergeStates (source , target );
1656+ /* Assert we didn't merge initial and final states */
1657+ Assert ((source -> flags & (TSTATE_INIT | TSTATE_FIN )) !=
1658+ (TSTATE_INIT | TSTATE_FIN ));
1659+ }
16151660 }
16161661
16171662 /* Mark trigram unexpanded, and update totals */
@@ -1754,27 +1799,15 @@ fillTrgm(trgm *ptrgm, trgm_mb_char s[3])
17541799static void
17551800mergeStates (TrgmState * state1 , TrgmState * state2 )
17561801{
1757- ListCell * cell ;
1758-
17591802 Assert (state1 != state2 );
17601803 Assert (!state1 -> parent );
17611804 Assert (!state2 -> parent );
17621805
1763- /* state1 absorbs state2's init/fin flags */
1764- state1 -> init |= state2 -> init ;
1765- state1 -> fin |= state2 -> fin ;
1806+ /* state1 absorbs state2's flags */
1807+ state1 -> flags |= state2 -> flags ;
17661808
1767- /* state2, and all its children, become children of state1 */
1768- foreach (cell , state2 -> children )
1769- {
1770- TrgmState * state = (TrgmState * ) lfirst (cell );
1771-
1772- state -> parent = state1 ;
1773- }
1809+ /* state2, and indirectly all its children, become children of state1 */
17741810 state2 -> parent = state1 ;
1775- state1 -> children = list_concat (state1 -> children , state2 -> children );
1776- state1 -> children = lappend (state1 -> children , state2 );
1777- state2 -> children = NIL ;
17781811}
17791812
17801813/*
@@ -1843,9 +1876,9 @@ packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext)
18431876
18441877 if (state -> number < 0 )
18451878 {
1846- if (state -> init )
1879+ if (state -> flags & TSTATE_INIT )
18471880 state -> number = 0 ;
1848- else if (state -> fin )
1881+ else if (state -> flags & TSTATE_FIN )
18491882 state -> number = 1 ;
18501883 else
18511884 {
@@ -2109,9 +2142,9 @@ printTrgmNFA(TrgmNFA *trgmNFA)
21092142 ListCell * cell ;
21102143
21112144 appendStringInfo (& buf , "s%p" , (void * ) state );
2112- if (state -> fin )
2145+ if (state -> flags & TSTATE_FIN )
21132146 appendStringInfoString (& buf , " [shape = doublecircle]" );
2114- if (state -> init )
2147+ if (state -> flags & TSTATE_INIT )
21152148 initstate = state ;
21162149 appendStringInfo (& buf , " [label = \"%d\"]" , state -> stateKey .nstate );
21172150 appendStringInfoString (& buf , ";\n" );
0 commit comments