trc: handle 4-byte utf-8 - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 0786c9647c0232825777d8e1c464bef72fdac738
 (DIR) parent 72f66c2d3ca556d35b818158e9de578c4bfa153e
 (HTM) Author: Russ Cox <rsc@swtch.com>
       Date:   Sun,  2 Jan 2011 13:44:15 -0500
       
       rc: handle 4-byte utf-8
       
       R=rsc
       http://codereview.appspot.com/3833043
       
       Diffstat:
         M src/cmd/rc/glob.c                   |      13 +++++++++++++
         M src/cmd/rc/lex.c                    |       5 +++++
         M src/cmd/rc/rc.h                     |       2 ++
       
       3 files changed, 20 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/src/cmd/rc/glob.c b/src/cmd/rc/glob.c
       t@@ -125,6 +125,17 @@ equtf(char *p, char *q)
                                return 1;        /* broken code at end of string! */
                        return p[2]==q[2];
                }
       +        if(fourbyte(*p)){
       +                if(p[1]!=q[1])
       +                        return 0;
       +                if(p[1]=='\0')
       +                        return 1;
       +                if(p[2]!=q[2])
       +                        return 0;
       +                if(p[2]=='\0')
       +                        return 1;
       +                return p[3]==q[3];
       +        }
                return 1;
        }
        /*
       t@@ -137,6 +148,7 @@ nextutf(char *p)
        {
                if(twobyte(*p)) return p[1]=='\0'?p+1:p+2;
                if(threebyte(*p)) return p[1]=='\0'?p+1:p[2]=='\0'?p+2:p+3;
       +        if(fourbyte(*p)) return p[1]=='\0'?p+1:p[2]=='\0'?p+2:p[3]=='\0'?p+3:p+4;
                return p+1;
        }
        /*
       t@@ -149,6 +161,7 @@ unicode(char *p)
                int u=*p&0xff;
                if(twobyte(u)) return ((u&0x1f)<<6)|(p[1]&0x3f);
                if(threebyte(u)) return (u<<12)|((p[1]&0x3f)<<6)|(p[2]&0x3f);
       +        if(fourbyte(u)) return (u<<18)|((p[1]&0x3f)<<12)|((p[2]&0x3f)<<6)|(p[3]&0x3f);
                return u;
        }
        /*
 (DIR) diff --git a/src/cmd/rc/lex.c b/src/cmd/rc/lex.c
       t@@ -173,6 +173,11 @@ addutf(char *p, int c)
                        p = addtok(p, advance());
                        return addtok(p, advance());
                }
       +        if(fourbyte(c)){        /* 4-byte escape */
       +                p = addtok(p, advance());
       +                p = addtok(p, advance());
       +                return addtok(p, advance());
       +        }
                return p;
        }
        int lastdol;        /* was the last token read '$' or '$#' or '"'? */
 (DIR) diff --git a/src/cmd/rc/rc.h b/src/cmd/rc/rc.h
       t@@ -121,6 +121,8 @@ int mypid;
        #define        onebyte(c)        ((c&0x80)==0x00)
        #define        twobyte(c)        ((c&0xe0)==0xc0)
        #define        threebyte(c)        ((c&0xf0)==0xe0)
       +#define        fourbyte(c)        ((c&0xf8)==0xf0)
       +
        char **argp;
        char **args;
        int nerror;                /* number of errors encountered during compilation */