tacme: regexp fix (see libregexp change) - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 6f16e7fc1b35c2e99c9dc069e6ec81a9ac07ca21
 (DIR) parent a7511dd43d9afe8025a6d7bd2fcccf8f594a6f2b
 (HTM) Author: Russ Cox <rsc@swtch.com>
       Date:   Fri,  7 Dec 2007 15:33:38 -0500
       
       acme: regexp fix (see libregexp change)
       
       Diffstat:
         M src/cmd/acme/regx.c                 |     110 +++++++++++++++++--------------
       
       1 file changed, 59 insertions(+), 51 deletions(-)
       ---
 (DIR) diff --git a/src/cmd/acme/regx.c b/src/cmd/acme/regx.c
       t@@ -521,26 +521,56 @@ classmatch(int classno, int c, int negate)
        }
        
        /*
       - * Note optimization in addinst:
       - *         *l must be pending when addinst called; if *l has been looked
       - *                at already, the optimization is a bug.
       + * Add inst to the list [l, l+NLIST), but only if it is not there already.
       + * These work lists are stored and processed in increasing
       + * order of sep->p[0], so if the inst is there already, the one 
       + * there already is a more left match and takes priority.
       + * (This works for backward searches too, because there
       + * is no explicit comparison.)
         */
       -int
       -addinst(Ilist *l, Inst *inst, Rangeset *sep)
       +Ilist*
       +addinst1(Ilist *l, Inst *inst, Rangeset *sep)
        {
                Ilist *p;
        
       -        for(p = l; p->inst; p++){
       -                if(p->inst==inst){
       -                        if((sep)->r[0].q0 < p->se.r[0].q0)
       -                                p->se= *sep;        /* this would be bug */
       -                        return 0;        /* It's already there */
       +        for(p = l; p->inst; p++)
       +                if(p->inst==inst)
       +                        return 0;
       +        
       +        if(p == l+NLIST)
       +                return l+NLIST;
       +        
       +        p->inst = inst;
       +        p->se = *sep;
       +        (p+1)->inst = 0;
       +        return p;
       +}
       +
       +int
       +addinst(Ilist *l, Inst *inst, Rangeset *sep)
       +{
       +        Ilist *ap;
       +        
       +        ap = addinst1(l, inst, sep);
       +        if(ap == 0)
       +                return 0;
       +        if(ap == l+NLIST)
       +                return -1;
       +        
       +        /*
       +         * Added inst to list at ap.
       +         * Expand any ORs right now, so that entire 
       +         * work list ends up being sorted by increasing sep->p[0].
       +         */
       +        for(; ap->inst; ap++){
       +                if(ap->inst->type == OR){
       +                        if(addinst1(l, ap->inst->u1.left, &ap->se) == l+NLIST)
       +                                return -1;
       +                        if(addinst1(l, ap->inst->u.right, &ap->se) == l+NLIST)
       +                                return -1;
                        }
                }
       -        p->inst = inst;
       -        p->se= *sep;
       -        (p+1)->inst = nil;
       -        return 1;
       +        return 0;
        }
        
        int
       t@@ -557,7 +587,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                Inst *inst;
                Ilist *tlp;
                uint p;
       -        int nnl, ntl;
                int nc, c;
                int wrapped;
                int startchar;
       t@@ -566,7 +595,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                p = startp;
                startchar = 0;
                wrapped = 0;
       -        nnl = 0;
                if(startinst->type<OPERATOR)
                        startchar = startinst->type;
                list[0][0].inst = list[1][0].inst = nil;
       t@@ -576,6 +604,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                else
                        nc = runestrlen(r);
                /* Execute machine once for each character */
       +        nl = list[0];
                for(;;p++){
                doloop:
                        if(p>=eof || p>=nc){
       t@@ -594,7 +623,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                                }
                                c = 0;
                        }else{
       -                        if(((wrapped && p>=startp) || sel.r[0].q0>0) && nnl==0)
       +                        if(((wrapped && p>=startp) || sel.r[0].q0>0) && nl->inst==0)
                                        break;
                                if(t != nil)
                                        c = textreadc(t, p);
       t@@ -602,18 +631,15 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                                        c = r[p];
                        }
                        /* fast check for first char */
       -                if(startchar && nnl==0 && c!=startchar)
       +                if(startchar && nl->inst==0 && c!=startchar)
                                continue;
                        tl = list[flag];
                        nl = list[flag^=1];
                        nl->inst = nil;
       -                ntl = nnl;
       -                nnl = 0;
                        if(sel.r[0].q0<0 && (!wrapped || p<startp || startp==eof)){
                                /* Add first instruction to this list */
                                sempty.r[0].q0 = p;
       -                        if(addinst(tl, startinst, &sempty))
       -                        if(++ntl >= NLIST){
       +                        if(addinst(tl, startinst, &sempty) < 0){
                Overflow:
                                        warning(nil, "regexp list overflow\n");
                                        sel.r[0].q0 = -1;
       t@@ -627,8 +653,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                                default:        /* regular character */
                                        if(inst->type==c){
                Addinst:
       -                                        if(addinst(nl, inst->u1.next, &tlp->se))
       -                                        if(++nnl >= NLIST)
       +                                        if(addinst(nl, inst->u1.next, &tlp->se) < 0)
                                                        goto Overflow;
                                        }
                                        break;
       t@@ -666,13 +691,8 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
                                                goto Addinst;
                                        break;
                                case OR:
       -                                /* evaluate right choice later */
       -                                if(addinst(tl, inst->u.right, &tlp->se))
       -                                if(++ntl >= NLIST)
       -                                        goto Overflow;
       -                                /* efficiency: advance and re-evaluate */
       -                                inst = inst->u1.left;
       -                                goto Switchstmt;
       +                                /* already expanded; just a placeholder */
       +                                break;
                                case END:        /* Match! */
                                        tlp->se.r[0].q1 = p;
                                        newmatch(&tlp->se);
       t@@ -700,13 +720,11 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
                Inst *inst;
                Ilist *tlp;
                int p;
       -        int nnl, ntl;
                int c;
                int wrapped;
                int startchar;
        
                flag = 0;
       -        nnl = 0;
                wrapped = 0;
                p = startp;
                startchar = 0;
       t@@ -715,6 +733,7 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
                list[0][0].inst = list[1][0].inst = nil;
                sel.r[0].q0= -1;
                /* Execute machine once for each character, including terminal NUL */
       +        nl = list[0];
                for(;;--p){
                doloop:
                        if(p <= 0){
       t@@ -734,24 +753,20 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
                                }
                                c = 0;
                        }else{
       -                        if(((wrapped && p<=startp) || sel.r[0].q0>0) && nnl==0)
       +                        if(((wrapped && p<=startp) || sel.r[0].q0>0) && nl->inst==0)
                                        break;
                                c = textreadc(t, p-1);
                        }
                        /* fast check for first char */
       -                if(startchar && nnl==0 && c!=startchar)
       +                if(startchar && nl->inst==0 && c!=startchar)
                                continue;
                        tl = list[flag];
                        nl = list[flag^=1];
                        nl->inst = nil;
       -                ntl = nnl;
       -                nnl = 0;
                        if(sel.r[0].q0<0 && (!wrapped || p>startp)){
                                /* Add first instruction to this list */
       -                        /* the minus is so the optimizations in addinst work */
       -                        sempty.r[0].q0 = -p;
       -                        if(addinst(tl, bstartinst, &sempty))
       -                        if(++ntl >= NLIST){
       +                        sempty.r[0].q0 = p;
       +                        if(addinst(tl, bstartinst, &sempty) < 0){
                Overflow:
                                        warning(nil, "regexp list overflow\n");
                                        sel.r[0].q0 = -1;
       t@@ -765,8 +780,7 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
                                default:        /* regular character */
                                        if(inst->type == c){
                Addinst:
       -                                        if(addinst(nl, inst->u1.next, &tlp->se))
       -                                        if(++nnl >= NLIST)
       +                                        if(addinst(nl, inst->u1.next, &tlp->se) < 0)
                                                        goto Overflow;
                                        }
                                        break;
       t@@ -804,15 +818,9 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
                                                goto Addinst;
                                        break;
                                case OR:
       -                                /* evaluate right choice later */
       -                                if(addinst(tl, inst->u.right, &tlp->se))
       -                                if(++ntl >= NLIST)
       -                                        goto Overflow;
       -                                /* efficiency: advance and re-evaluate */
       -                                inst = inst->u1.left;
       -                                goto Switchstmt;
       +                                /* already expanded; just a placeholder */
       +                                break;
                                case END:        /* Match! */
       -                                tlp->se.r[0].q0 = -tlp->se.r[0].q0; /* minus sign */
                                        tlp->se.r[0].q1 = p;
                                        bnewmatch(&tlp->se);
                                        break;