ted: handle Unicode beyond the BMP correctly in list mode. - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 95220bf88775deab4a037264d08b21bacc612d70
 (DIR) parent 3850e6e177677885074c8896ef24534894726ad5
 (HTM) Author: sean <phonologus@gmail.com>
       Date:   Thu, 21 May 2020 16:10:30 +0100
       
       ed: handle Unicode beyond the BMP correctly in list mode.
       
       List mode was constrained to the BMP. This change introduces
       tthe following new list mode convention, using Go string literal syntax:
       
       Non-printing ASCII characters display as \xhh.
       Non-ASCII characters in the BMP display as \uhhhh.
       Characters beyond the BMP display as \Uhhhhhhhh.
       
       Diffstat:
         M man/man1/ed.1                       |      12 ++++++++++--
         M src/cmd/ed.c                        |      41 ++++++++++++++++++++++++-------
       
       2 files changed, 42 insertions(+), 11 deletions(-)
       ---
 (DIR) diff --git a/man/man1/ed.1 b/man/man1/ed.1
       t@@ -441,10 +441,18 @@ a backspace as
        .LR \eb ,
        backslashes as
        .LR \e\e ,
       -and non-printing characters as
       +and non-printing ASCII characters as
        a backslash, an
        .LR x ,
       -and four hexadecimal digits.
       +and two hexadecimal digits.
       +non-ASCII characters in the Basic Multilingual Plane
       +are printed as a backslash, a small
       +.LR u ,
       +and four hexadecimal digits; and characters above the
       +Basic Multilingual Plane are printed as a backslash,
       +a big
       +.LR U ,
       +and six hexadecimal digits.
        Long lines are folded,
        with the second and subsequent sub-lines indented one tab stop.
        If the last character in the line is a blank,
 (DIR) diff --git a/src/cmd/ed.c b/src/cmd/ed.c
       t@@ -21,6 +21,12 @@ enum
                EOF        = -1
        };
        
       +enum
       +{
       +        LINELEN = 70,        /* max number of glyphs in a display line */
       +        BELL = 6        /* A char could require up to BELL glyphs to display */
       +};
       +
        void        (*oldhup)(int);
        void        (*oldquit)(int);
        int*        addr1;
       t@@ -40,7 +46,7 @@ int        ichanged;
        int        io;
        Biobuf        iobuf;
        int        lastc;
       -char        line[70];
       +char        line[LINELEN];
        Rune*        linebp;
        Rune        linebuf[LBSIZE];
        int        listf;
       t@@ -1543,7 +1549,7 @@ putchr(int ac)
                                        *lp++ = 'n';
                                }
                        } else {
       -                        if(col > (72-6-2)) {
       +                        if(col > (LINELEN-BELL)) {
                                        col = 8;
                                        *lp++ = '\\';
                                        *lp++ = '\n';
       t@@ -1558,15 +1564,32 @@ putchr(int ac)
                                        if(c == '\t')
                                                c = 't';
                                        col++;
       -                        } else
       -                        if(c<' ' || c>='\177') {
       +                        } else if (c<' ' || c=='\177') {
                                        *lp++ = '\\';
                                        *lp++ = 'x';
       -                                *lp++ =  hex[c>>12];
       -                                *lp++ =  hex[c>>8&0xF];
       -                                *lp++ =  hex[c>>4&0xF];
       -                                c     =  hex[c&0xF];
       +                                *lp++ = hex[(c>>4)&0xF];
       +                                c     = hex[c&0xF];
       +                                col += 3;
       +                        } else if (c>'\177' && c<=0xFFFF) {
       +                                *lp++ = '\\';
       +                                *lp++ = 'u';
       +                                *lp++ = hex[(c>>12)&0xF];
       +                                *lp++ = hex[(c>>8)&0xF];
       +                                *lp++ = hex[(c>>4)&0xF];
       +                                c     = hex[c&0xF];
                                        col += 5;
       +                        } else if (c>0xFFFF) {
       +                                *lp++ = '\\';
       +                                *lp++ = 'U';
       +                                *lp++ = hex[(c>>28)&0xF];
       +                                *lp++ = hex[(c>>24)&0xF];
       +                                *lp++ = hex[(c>>20)&0xF];
       +                                *lp++ = hex[(c>>16)&0xF];
       +                                *lp++ = hex[(c>>12)&0xF];
       +                                *lp++ = hex[(c>>8)&0xF];
       +                                *lp++ = hex[(c>>4)&0xF];
       +                                c     = hex[c&0xF];
       +                                col += 9;
                                }
                        }
                }
       t@@ -1574,7 +1597,7 @@ putchr(int ac)
                rune = c;
                lp += runetochar(lp, &rune);
        
       -        if(c == '\n' || lp >= &line[sizeof(line)-5]) {
       +        if(c == '\n' || lp >= &line[LINELEN-BELL]) {
                        linp = line;
                        write(oflag? 2: 1, line, lp-line);
                        return;