@@ -1513,6 +1513,7 @@ pullback(struct nfa * nfa,
15131513 struct state * nexts ;
15141514 struct arc * a ;
15151515 struct arc * nexta ;
1516+ struct state * intermediates ;
15161517 int progress ;
15171518
15181519 /* find and pull until there are no more */
@@ -1522,14 +1523,25 @@ pullback(struct nfa * nfa,
15221523 for (s = nfa -> states ; s != NULL && !NISERR (); s = nexts )
15231524 {
15241525 nexts = s -> next ;
1526+ intermediates = NULL ;
15251527 for (a = s -> outs ; a != NULL && !NISERR (); a = nexta )
15261528 {
15271529 nexta = a -> outchain ;
15281530 if (a -> type == '^' || a -> type == BEHIND )
1529- if (pull (nfa , a ))
1531+ if (pull (nfa , a , & intermediates ))
15301532 progress = 1 ;
1531- assert (nexta == NULL || s -> no != FREESTATE );
15321533 }
1534+ /* clear tmp fields of intermediate states created here */
1535+ while (intermediates != NULL )
1536+ {
1537+ struct state * ns = intermediates -> tmp ;
1538+
1539+ intermediates -> tmp = NULL ;
1540+ intermediates = ns ;
1541+ }
1542+ /* if s is now useless, get rid of it */
1543+ if ((s -> nins == 0 || s -> nouts == 0 ) && !s -> flag )
1544+ dropstate (nfa , s );
15331545 }
15341546 if (progress && f != NULL )
15351547 dumpnfa (nfa , f );
@@ -1557,13 +1569,26 @@ pullback(struct nfa * nfa,
15571569
15581570/*
15591571 * pull - pull a back constraint backward past its source state
1560- * A significant property of this function is that it deletes at most
1561- * one state -- the constraint's from state -- and only if the constraint
1562- * was that state's last outarc.
1572+ *
1573+ * Returns 1 if successful (which it always is unless the source is the
1574+ * start state or we have an internal error), 0 if nothing happened.
1575+ *
1576+ * A significant property of this function is that it deletes no pre-existing
1577+ * states, and no outarcs of the constraint's from state other than the given
1578+ * constraint arc. This makes the loops in pullback() safe, at the cost that
1579+ * we may leave useless states behind. Therefore, we leave it to pullback()
1580+ * to delete such states.
1581+ *
1582+ * If the from state has multiple back-constraint outarcs, and/or multiple
1583+ * compatible constraint inarcs, we only need to create one new intermediate
1584+ * state per combination of predecessor and successor states. *intermediates
1585+ * points to a list of such intermediate states for this from state (chained
1586+ * through their tmp fields).
15631587 */
1564- static int /* 0 couldn't, 1 could */
1588+ static int
15651589pull (struct nfa * nfa ,
1566- struct arc * con )
1590+ struct arc * con ,
1591+ struct state * * intermediates )
15671592{
15681593 struct state * from = con -> from ;
15691594 struct state * to = con -> to ;
@@ -1580,7 +1605,11 @@ pull(struct nfa * nfa,
15801605 return 1 ;
15811606 }
15821607
1583- /* first, clone from state if necessary to avoid other outarcs */
1608+ /*
1609+ * First, clone from state if necessary to avoid other outarcs. This may
1610+ * seem wasteful, but it simplifies the logic, and we'll get rid of the
1611+ * clone state again at the bottom.
1612+ */
15841613 if (from -> nouts > 1 )
15851614 {
15861615 s = newstate (nfa );
@@ -1589,13 +1618,15 @@ pull(struct nfa * nfa,
15891618 copyins (nfa , from , s , 1 ); /* duplicate inarcs */
15901619 cparc (nfa , con , s , to ); /* move constraint arc */
15911620 freearc (nfa , con );
1621+ if (NISERR ())
1622+ return 0 ;
15921623 from = s ;
15931624 con = from -> outs ;
15941625 }
15951626 assert (from -> nouts == 1 );
15961627
15971628 /* propagate the constraint into the from state's inarcs */
1598- for (a = from -> ins ; a != NULL ; a = nexta )
1629+ for (a = from -> ins ; a != NULL && ! NISERR () ; a = nexta )
15991630 {
16001631 nexta = a -> inchain ;
16011632 switch (combine (con , a ))
@@ -1606,13 +1637,23 @@ pull(struct nfa * nfa,
16061637 case SATISFIED : /* no action needed */
16071638 break ;
16081639 case COMPATIBLE : /* swap the two arcs, more or less */
1609- s = newstate (nfa );
1610- if (NISERR ())
1611- return 0 ;
1612- cparc (nfa , a , s , to ); /* anticipate move */
1640+ /* need an intermediate state, but might have one already */
1641+ for (s = * intermediates ; s != NULL ; s = s -> tmp )
1642+ {
1643+ assert (s -> nins > 0 && s -> nouts > 0 );
1644+ if (s -> ins -> from == a -> from && s -> outs -> to == to )
1645+ break ;
1646+ }
1647+ if (s == NULL )
1648+ {
1649+ s = newstate (nfa );
1650+ if (NISERR ())
1651+ return 0 ;
1652+ s -> tmp = * intermediates ;
1653+ * intermediates = s ;
1654+ }
16131655 cparc (nfa , con , a -> from , s );
1614- if (NISERR ())
1615- return 0 ;
1656+ cparc (nfa , a , s , to );
16161657 freearc (nfa , a );
16171658 break ;
16181659 default :
@@ -1623,7 +1664,8 @@ pull(struct nfa * nfa,
16231664
16241665 /* remaining inarcs, if any, incorporate the constraint */
16251666 moveins (nfa , from , to );
1626- dropstate (nfa , from ); /* will free the constraint */
1667+ freearc (nfa , con );
1668+ /* from state is now useless, but we leave it to pullback() to clean up */
16271669 return 1 ;
16281670}
16291671
@@ -1638,6 +1680,7 @@ pushfwd(struct nfa * nfa,
16381680 struct state * nexts ;
16391681 struct arc * a ;
16401682 struct arc * nexta ;
1683+ struct state * intermediates ;
16411684 int progress ;
16421685
16431686 /* find and push until there are no more */
@@ -1647,14 +1690,25 @@ pushfwd(struct nfa * nfa,
16471690 for (s = nfa -> states ; s != NULL && !NISERR (); s = nexts )
16481691 {
16491692 nexts = s -> next ;
1693+ intermediates = NULL ;
16501694 for (a = s -> ins ; a != NULL && !NISERR (); a = nexta )
16511695 {
16521696 nexta = a -> inchain ;
16531697 if (a -> type == '$' || a -> type == AHEAD )
1654- if (push (nfa , a ))
1698+ if (push (nfa , a , & intermediates ))
16551699 progress = 1 ;
1656- assert (nexta == NULL || s -> no != FREESTATE );
16571700 }
1701+ /* clear tmp fields of intermediate states created here */
1702+ while (intermediates != NULL )
1703+ {
1704+ struct state * ns = intermediates -> tmp ;
1705+
1706+ intermediates -> tmp = NULL ;
1707+ intermediates = ns ;
1708+ }
1709+ /* if s is now useless, get rid of it */
1710+ if ((s -> nins == 0 || s -> nouts == 0 ) && !s -> flag )
1711+ dropstate (nfa , s );
16581712 }
16591713 if (progress && f != NULL )
16601714 dumpnfa (nfa , f );
@@ -1682,13 +1736,26 @@ pushfwd(struct nfa * nfa,
16821736
16831737/*
16841738 * push - push a forward constraint forward past its destination state
1685- * A significant property of this function is that it deletes at most
1686- * one state -- the constraint's to state -- and only if the constraint
1687- * was that state's last inarc.
1739+ *
1740+ * Returns 1 if successful (which it always is unless the destination is the
1741+ * post state or we have an internal error), 0 if nothing happened.
1742+ *
1743+ * A significant property of this function is that it deletes no pre-existing
1744+ * states, and no inarcs of the constraint's to state other than the given
1745+ * constraint arc. This makes the loops in pushfwd() safe, at the cost that
1746+ * we may leave useless states behind. Therefore, we leave it to pushfwd()
1747+ * to delete such states.
1748+ *
1749+ * If the to state has multiple forward-constraint inarcs, and/or multiple
1750+ * compatible constraint outarcs, we only need to create one new intermediate
1751+ * state per combination of predecessor and successor states. *intermediates
1752+ * points to a list of such intermediate states for this to state (chained
1753+ * through their tmp fields).
16881754 */
1689- static int /* 0 couldn't, 1 could */
1755+ static int
16901756push (struct nfa * nfa ,
1691- struct arc * con )
1757+ struct arc * con ,
1758+ struct state * * intermediates )
16921759{
16931760 struct state * from = con -> from ;
16941761 struct state * to = con -> to ;
@@ -1705,22 +1772,28 @@ push(struct nfa * nfa,
17051772 return 1 ;
17061773 }
17071774
1708- /* first, clone to state if necessary to avoid other inarcs */
1775+ /*
1776+ * First, clone to state if necessary to avoid other inarcs. This may
1777+ * seem wasteful, but it simplifies the logic, and we'll get rid of the
1778+ * clone state again at the bottom.
1779+ */
17091780 if (to -> nins > 1 )
17101781 {
17111782 s = newstate (nfa );
17121783 if (NISERR ())
17131784 return 0 ;
17141785 copyouts (nfa , to , s , 1 ); /* duplicate outarcs */
1715- cparc (nfa , con , from , s ); /* move constraint */
1786+ cparc (nfa , con , from , s ); /* move constraint arc */
17161787 freearc (nfa , con );
1788+ if (NISERR ())
1789+ return 0 ;
17171790 to = s ;
17181791 con = to -> ins ;
17191792 }
17201793 assert (to -> nins == 1 );
17211794
17221795 /* propagate the constraint into the to state's outarcs */
1723- for (a = to -> outs ; a != NULL ; a = nexta )
1796+ for (a = to -> outs ; a != NULL && ! NISERR () ; a = nexta )
17241797 {
17251798 nexta = a -> outchain ;
17261799 switch (combine (con , a ))
@@ -1731,13 +1804,23 @@ push(struct nfa * nfa,
17311804 case SATISFIED : /* no action needed */
17321805 break ;
17331806 case COMPATIBLE : /* swap the two arcs, more or less */
1734- s = newstate (nfa );
1735- if (NISERR ())
1736- return 0 ;
1737- cparc (nfa , con , s , a -> to ); /* anticipate move */
1807+ /* need an intermediate state, but might have one already */
1808+ for (s = * intermediates ; s != NULL ; s = s -> tmp )
1809+ {
1810+ assert (s -> nins > 0 && s -> nouts > 0 );
1811+ if (s -> ins -> from == from && s -> outs -> to == a -> to )
1812+ break ;
1813+ }
1814+ if (s == NULL )
1815+ {
1816+ s = newstate (nfa );
1817+ if (NISERR ())
1818+ return 0 ;
1819+ s -> tmp = * intermediates ;
1820+ * intermediates = s ;
1821+ }
1822+ cparc (nfa , con , s , a -> to );
17381823 cparc (nfa , a , from , s );
1739- if (NISERR ())
1740- return 0 ;
17411824 freearc (nfa , a );
17421825 break ;
17431826 default :
@@ -1748,7 +1831,8 @@ push(struct nfa * nfa,
17481831
17491832 /* remaining outarcs, if any, incorporate the constraint */
17501833 moveouts (nfa , to , from );
1751- dropstate (nfa , to ); /* will free the constraint */
1834+ freearc (nfa , con );
1835+ /* to state is now useless, but we leave it to pushfwd() to clean up */
17521836 return 1 ;
17531837}
17541838
0 commit comments