utf8pad: fix byte-seek issue with negative width codepoints in the range >= 127 - stagit-gopher - A git gopher frontend. (mirror)
 (HTM) git clone git://bitreich.org/stagit-gopher/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/stagit-gopher/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 05a08e8ab50a8da5b2896c3f5887801d059f48dd
 (DIR) parent a9c90b585f158f98dd0997d1509e83f85dd87498
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat,  9 Jan 2021 16:19:18 +0100
       
       utf8pad: fix byte-seek issue with negative width codepoints in the range >= 127
       
       For example: "\xef\xbf\xb7" (codepoint 0xfff7), returns wcwidth(wc) == -1.
       The next byte was incorrected seeked, but the codepoint itself was valid
       (mbtowc).
       
       Diffstat:
         M stagit-gopher-index.c               |       7 +++----
         M stagit-gopher.c                     |       7 +++----
       
       2 files changed, 6 insertions(+), 8 deletions(-)
       ---
 (DIR) diff --git a/stagit-gopher-index.c b/stagit-gopher-index.c
       @@ -38,19 +38,18 @@ utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad)
        
                slen = strlen(s);
                for (i = 0; i < slen; i += inc) {
       -                inc = 1;
       +                inc = 1; /* next byte */
                        if ((unsigned char)s[i] < 32)
                                continue;
        
                        rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4);
       +                inc = rl;
                        if (rl < 0) {
                                mbtowc(NULL, NULL, 0); /* reset state */
       -                        inc = 1; /* next byte */
       +                        inc = 1; /* invalid, seek next byte */
                                w = 1; /* replacement char is one width */
                        } else if ((w = wcwidth(wc)) == -1) {
                                continue;
       -                } else {
       -                        inc = rl;
                        }
        
                        if (col + w > len || (col + w == len && s[i + inc])) {
 (DIR) diff --git a/stagit-gopher.c b/stagit-gopher.c
       @@ -100,19 +100,18 @@ utf8pad(char *buf, size_t bufsiz, const char *s, size_t len, int pad)
        
                slen = strlen(s);
                for (i = 0; i < slen; i += inc) {
       -                inc = 1;
       +                inc = 1; /* next byte */
                        if ((unsigned char)s[i] < 32)
                                continue;
        
                        rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4);
       +                inc = rl;
                        if (rl < 0) {
                                mbtowc(NULL, NULL, 0); /* reset state */
       -                        inc = 1; /* next byte */
       +                        inc = 1; /* invalid, seek next byte */
                                w = 1; /* replacement char is one width */
                        } else if ((w = wcwidth(wc)) == -1) {
                                continue;
       -                } else {
       -                        inc = rl;
                        }
        
                        if (col + w > len || (col + w == len && s[i + inc])) {