tacme: revert regexp change - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 73778baeb38089772a806e6c2335f7ad6adb19a9
 (DIR) parent 608a09284ecf721781b2145ea62cce82f4f44712
 (HTM) Author: Russ Cox <rsc@swtch.com>
       Date:   Fri,  7 Dec 2007 17:32:35 -0500
       
       acme: revert regexp change
       
       Diffstat:
         M src/cmd/acme/regx.c                 |     110 ++++++++++++++-----------------
       
       1 file changed, 51 insertions(+), 59 deletions(-)
       ---
 (DIR) diff --git a/src/cmd/acme/regx.c b/src/cmd/acme/regx.c
       t@@ -521,56 +521,26 @@ classmatch(int classno, int c, int negate)
        }
        
        /*
       - * Add inst to the list [l, l+NLIST), but only if it is not there already.
       - * These work lists are stored and processed in increasing
       - * order of sep->p[0], so if the inst is there already, the one 
       - * there already is a more left match and takes priority.
       - * (This works for backward searches too, because there
       - * is no explicit comparison.)
       + * Note optimization in addinst:
       + *         *l must be pending when addinst called; if *l has been looked
       + *                at already, the optimization is a bug.
         */
       -Ilist*
       -addinst1(Ilist *l, Inst *inst, Rangeset *sep)
       -{
       -        Ilist *p;
       -
       -        for(p = l; p->inst; p++)
       -                if(p->inst==inst)
       -                        return 0;
       -        
       -        if(p == l+NLIST)
       -                return l+NLIST;
       -        
       -        p->inst = inst;
       -        p->se = *sep;
       -        (p+1)->inst = 0;
       -        return p;
       -}
       -
        int
        addinst(Ilist *l, Inst *inst, Rangeset *sep)
        {
       -        Ilist *ap;
       -        
       -        ap = addinst1(l, inst, sep);
       -        if(ap == 0)
       -                return 0;
       -        if(ap == l+NLIST)
       -                return -1;
       -        
       -        /*
       -         * Added inst to list at ap.
       -         * Expand any ORs right now, so that entire 
       -         * work list ends up being sorted by increasing sep->p[0].
       -         */
       -        for(; ap->inst; ap++){
       -                if(ap->inst->type == OR){
       -                        if(addinst1(l, ap->inst->u1.left, &ap->se) == l+NLIST)
       -                                return -1;
       -                        if(addinst1(l, ap->inst->u.right, &ap->se) == l+NLIST)
       -                                return -1;
       +        Ilist *p;
       +
       +        for(p = l; p->inst; p++){
       +                if(p->inst==inst){
       +                        if((sep)->r[0].q0 < p->se.r[0].q0)
       +                                p->se= *sep;        /* this would be bug */
       +                        return 0;        /* It's already there */
                        }
                }
       -        return 0;
       +        p->inst = inst;
       +        p->se= *sep;
       +        (p+1)->inst = nil;
       +        return 1;
        }
        
        int
       t@@ -587,6 +557,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                Inst *inst;
                Ilist *tlp;
                uint p;
       +        int nnl, ntl;
                int nc, c;
                int wrapped;
                int startchar;
       t@@ -595,6 +566,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                p = startp;
                startchar = 0;
                wrapped = 0;
       +        nnl = 0;
                if(startinst->type<OPERATOR)
                        startchar = startinst->type;
                list[0][0].inst = list[1][0].inst = nil;
       t@@ -604,7 +576,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                else
                        nc = runestrlen(r);
                /* Execute machine once for each character */
       -        nl = list[0];
                for(;;p++){
                doloop:
                        if(p>=eof || p>=nc){
       t@@ -623,7 +594,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                                }
                                c = 0;
                        }else{
       -                        if(((wrapped && p>=startp) || sel.r[0].q0>0) && nl->inst==0)
       +                        if(((wrapped && p>=startp) || sel.r[0].q0>0) && nnl==0)
                                        break;
                                if(t != nil)
                                        c = textreadc(t, p);
       t@@ -631,15 +602,18 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                                        c = r[p];
                        }
                        /* fast check for first char */
       -                if(startchar && nl->inst==0 && c!=startchar)
       +                if(startchar && nnl==0 && c!=startchar)
                                continue;
                        tl = list[flag];
                        nl = list[flag^=1];
                        nl->inst = nil;
       +                ntl = nnl;
       +                nnl = 0;
                        if(sel.r[0].q0<0 && (!wrapped || p<startp || startp==eof)){
                                /* Add first instruction to this list */
                                sempty.r[0].q0 = p;
       -                        if(addinst(tl, startinst, &sempty) < 0){
       +                        if(addinst(tl, startinst, &sempty))
       +                        if(++ntl >= NLIST){
                Overflow:
                                        warning(nil, "regexp list overflow\n");
                                        sel.r[0].q0 = -1;
       t@@ -653,7 +627,8 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                                default:        /* regular character */
                                        if(inst->type==c){
                Addinst:
       -                                        if(addinst(nl, inst->u1.next, &tlp->se) < 0)
       +                                        if(addinst(nl, inst->u1.next, &tlp->se))
       +                                        if(++nnl >= NLIST)
                                                        goto Overflow;
                                        }
                                        break;
       t@@ -691,8 +666,13 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                                                goto Addinst;
                                        break;
                                case OR:
       -                                /* already expanded; just a placeholder */
       -                                break;
       +                                /* evaluate right choice later */
       +                                if(addinst(tl, inst->u.right, &tlp->se))
       +                                if(++ntl >= NLIST)
       +                                        goto Overflow;
       +                                /* efficiency: advance and re-evaluate */
       +                                inst = inst->u1.left;
       +                                goto Switchstmt;
                                case END:        /* Match! */
                                        tlp->se.r[0].q1 = p;
                                        newmatch(&tlp->se);
       t@@ -720,11 +700,13 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
                Inst *inst;
                Ilist *tlp;
                int p;
       +        int nnl, ntl;
                int c;
                int wrapped;
                int startchar;
        
                flag = 0;
       +        nnl = 0;
                wrapped = 0;
                p = startp;
                startchar = 0;
       t@@ -733,7 +715,6 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
                list[0][0].inst = list[1][0].inst = nil;
                sel.r[0].q0= -1;
                /* Execute machine once for each character, including terminal NUL */
       -        nl = list[0];
                for(;;--p){
                doloop:
                        if(p <= 0){
       t@@ -753,20 +734,24 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
                                }
                                c = 0;
                        }else{
       -                        if(((wrapped && p<=startp) || sel.r[0].q0>0) && nl->inst==0)
       +                        if(((wrapped && p<=startp) || sel.r[0].q0>0) && nnl==0)
                                        break;
                                c = textreadc(t, p-1);
                        }
                        /* fast check for first char */
       -                if(startchar && nl->inst==0 && c!=startchar)
       +                if(startchar && nnl==0 && c!=startchar)
                                continue;
                        tl = list[flag];
                        nl = list[flag^=1];
                        nl->inst = nil;
       +                ntl = nnl;
       +                nnl = 0;
                        if(sel.r[0].q0<0 && (!wrapped || p>startp)){
                                /* Add first instruction to this list */
       -                        sempty.r[0].q0 = p;
       -                        if(addinst(tl, bstartinst, &sempty) < 0){
       +                        /* the minus is so the optimizations in addinst work */
       +                        sempty.r[0].q0 = -p;
       +                        if(addinst(tl, bstartinst, &sempty))
       +                        if(++ntl >= NLIST){
                Overflow:
                                        warning(nil, "regexp list overflow\n");
                                        sel.r[0].q0 = -1;
       t@@ -780,7 +765,8 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
                                default:        /* regular character */
                                        if(inst->type == c){
                Addinst:
       -                                        if(addinst(nl, inst->u1.next, &tlp->se) < 0)
       +                                        if(addinst(nl, inst->u1.next, &tlp->se))
       +                                        if(++nnl >= NLIST)
                                                        goto Overflow;
                                        }
                                        break;
       t@@ -818,9 +804,15 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
                                                goto Addinst;
                                        break;
                                case OR:
       -                                /* already expanded; just a placeholder */
       -                                break;
       +                                /* evaluate right choice later */
       +                                if(addinst(tl, inst->u.right, &tlp->se))
       +                                if(++ntl >= NLIST)
       +                                        goto Overflow;
       +                                /* efficiency: advance and re-evaluate */
       +                                inst = inst->u1.left;
       +                                goto Switchstmt;
                                case END:        /* Match! */
       +                                tlp->se.r[0].q0 = -tlp->se.r[0].q0; /* minus sign */
                                        tlp->se.r[0].q1 = p;
                                        bnewmatch(&tlp->se);
                                        break;