drkhsh.at

       Support UTF-8 characters as word delimiters - st - Personal fork of st
 (HTM) git clone git://git.drkhsh.at/st.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit caa97cc781ccf29f28c3d9e6683a66eb3f70e2bd
 (DIR) parent c03548750b2527a6ddb5edfd945c5799066a6224
 (HTM) Author: Jan Christoph Ebersbach <jceb@e-jc.de>
       Date:   Fri, 22 May 2015 16:06:57 +0200
       
       Support UTF-8 characters as word delimiters
       
       For a higher usefulness of the utf8strchr function, the index of the
       UTF-8 character could be returned in addition with a Rune instead of a
       char*.  Since utf8strchr is currently only used by ISDELIM I didn't
       bother to increase the complexity.
       
       Diffstat:
         M st.c                                |      18 +++++++++++++++++-
       
       1 file changed, 17 insertions(+), 1 deletion(-)
       ---
 (DIR) diff --git a/st.c b/st.c
       @@ -71,7 +71,7 @@ char *argv0;
        #define ISCONTROLC0(c) (BETWEEN(c, 0, 0x1f) || (c) == '\177')
        #define ISCONTROLC1(c) (BETWEEN(c, 0x80, 0x9f))
        #define ISCONTROL(c) (ISCONTROLC0(c) || ISCONTROLC1(c))
       -#define ISDELIM(u) (BETWEEN(u, 0, 127) && strchr(worddelimiters, u) != NULL)
       +#define ISDELIM(u) (utf8strchr(worddelimiters, u) != NULL)
        #define LIMIT(x, a, b)    (x) = (x) < (a) ? (a) : (x) > (b) ? (b) : (x)
        #define ATTRCMP(a, b) ((a).mode != (b).mode || (a).fg != (b).fg || (a).bg != (b).bg)
        #define IS_SET(flag) ((term.mode & (flag)) != 0)
       @@ -473,6 +473,7 @@ static size_t utf8decode(char *, Rune *, size_t);
        static Rune utf8decodebyte(char, size_t *);
        static size_t utf8encode(Rune, char *);
        static char utf8encodebyte(Rune, size_t);
       +static char *utf8strchr(char *s, Rune u);
        static size_t utf8validate(Rune *, size_t);
        
        static ssize_t xwrite(int, const char *, size_t);
       @@ -640,6 +641,21 @@ utf8encodebyte(Rune u, size_t i) {
                return utfbyte[i] | (u & ~utfmask[i]);
        }
        
       +char *
       +utf8strchr(char *s, Rune u) {
       +        Rune r;
       +        size_t i, j, len;
       +
       +        len = strlen(s);
       +        for(i = 0, j = 0; i < len; i += j) {
       +                if(!(j = utf8decode(&s[i], &r, len - i)))
       +                        break;
       +                if(r == u)
       +                        return &(s[i]);
       +        }
       +        return NULL;
       +}
       +
        size_t
        utf8validate(Rune *u, size_t i) {
                if(!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))