ted: handle Unicode beyond the BMP correctly in list mode. - plan9port - [fork] Plan 9 from user space (HTM) git clone git://src.adamsgaard.dk/plan9port (DIR) Log (DIR) Files (DIR) Refs (DIR) README (DIR) LICENSE --- (DIR) commit 95220bf88775deab4a037264d08b21bacc612d70 (DIR) parent 3850e6e177677885074c8896ef24534894726ad5 (HTM) Author: sean <phonologus@gmail.com> Date: Thu, 21 May 2020 16:10:30 +0100 ed: handle Unicode beyond the BMP correctly in list mode. List mode was constrained to the BMP. This change introduces tthe following new list mode convention, using Go string literal syntax: Non-printing ASCII characters display as \xhh. Non-ASCII characters in the BMP display as \uhhhh. Characters beyond the BMP display as \Uhhhhhhhh. Diffstat: M man/man1/ed.1 | 12 ++++++++++-- M src/cmd/ed.c | 41 ++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 11 deletions(-) --- (DIR) diff --git a/man/man1/ed.1 b/man/man1/ed.1 t@@ -441,10 +441,18 @@ a backspace as .LR \eb , backslashes as .LR \e\e , -and non-printing characters as +and non-printing ASCII characters as a backslash, an .LR x , -and four hexadecimal digits. +and two hexadecimal digits. +non-ASCII characters in the Basic Multilingual Plane +are printed as a backslash, a small +.LR u , +and four hexadecimal digits; and characters above the +Basic Multilingual Plane are printed as a backslash, +a big +.LR U , +and six hexadecimal digits. Long lines are folded, with the second and subsequent sub-lines indented one tab stop. If the last character in the line is a blank, (DIR) diff --git a/src/cmd/ed.c b/src/cmd/ed.c t@@ -21,6 +21,12 @@ enum EOF = -1 }; +enum +{ + LINELEN = 70, /* max number of glyphs in a display line */ + BELL = 6 /* A char could require up to BELL glyphs to display */ +}; + void (*oldhup)(int); void (*oldquit)(int); int* addr1; t@@ -40,7 +46,7 @@ int ichanged; int io; Biobuf iobuf; int lastc; -char line[70]; +char line[LINELEN]; Rune* linebp; Rune linebuf[LBSIZE]; int listf; t@@ -1543,7 +1549,7 @@ putchr(int ac) *lp++ = 'n'; } } else { - if(col > (72-6-2)) { + if(col > (LINELEN-BELL)) { col = 8; *lp++ = '\\'; *lp++ = '\n'; t@@ -1558,15 +1564,32 @@ putchr(int ac) if(c == '\t') c = 't'; col++; - } else - if(c<' ' || c>='\177') { + } else if (c<' ' || c=='\177') { *lp++ = '\\'; *lp++ = 'x'; - *lp++ = hex[c>>12]; - *lp++ = hex[c>>8&0xF]; - *lp++ = hex[c>>4&0xF]; - c = hex[c&0xF]; + *lp++ = hex[(c>>4)&0xF]; + c = hex[c&0xF]; + col += 3; + } else if (c>'\177' && c<=0xFFFF) { + *lp++ = '\\'; + *lp++ = 'u'; + *lp++ = hex[(c>>12)&0xF]; + *lp++ = hex[(c>>8)&0xF]; + *lp++ = hex[(c>>4)&0xF]; + c = hex[c&0xF]; col += 5; + } else if (c>0xFFFF) { + *lp++ = '\\'; + *lp++ = 'U'; + *lp++ = hex[(c>>28)&0xF]; + *lp++ = hex[(c>>24)&0xF]; + *lp++ = hex[(c>>20)&0xF]; + *lp++ = hex[(c>>16)&0xF]; + *lp++ = hex[(c>>12)&0xF]; + *lp++ = hex[(c>>8)&0xF]; + *lp++ = hex[(c>>4)&0xF]; + c = hex[c&0xF]; + col += 9; } } } t@@ -1574,7 +1597,7 @@ putchr(int ac) rune = c; lp += runetochar(lp, &rune); - if(c == '\n' || lp >= &line[sizeof(line)-5]) { + if(c == '\n' || lp >= &line[LINELEN-BELL]) { linp = line; write(oflag? 2: 1, line, lp-line); return;