added library to deal with text (libtext) - iomenu - interactive terminal-based selection menu
 (HTM) git clone git://bitreich.org/iomenu git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/iomenu
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 61b9b7eeca080291752b9813705c17208e83505a
 (DIR) parent 5fa48204850bed279e339eb370d2d595e297c274
 (HTM) Author: Josuah Demangeonā  ā µ <mail@josuah.net>
       Date:   Sun,  2 Apr 2017 01:29:06 +0200
       
       added library to deal with text (libtext)
       
       Diffstat:
         A text.c                              |     255 +++++++++++++++++++++++++++++++
         A text.h                              |       6 ++++++
       
       2 files changed, 261 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/text.c b/text.c
       @@ -0,0 +1,255 @@
       +/*
       + * Functions handling UTF-8 srings:
       + *
       + * stdin  -> buffer -> stdout
       + * char[] -> long[] -> char[]
       + * UTF-8  ->  rune  -> UTF-8
       + */
       +
       +
       +#include <stdio.h>
       +#include <string.h>
       +#include <stdlib.h>
       +
       +#include "text.h"
       +
       +
       +/*
       + * Return the number of bytes in rune for the `len` next char in `s`,
       + * or 0 if `utf` is misencoded.
       + *
       + * Thanks to Connor Lane Smith for some ideas.
       + */
       +int
       +utflen(char *s, int n) {
       +        int len = 1;
       +        int contiunation_bytes =
       +                (s[0] & 0x80) == 0x00 ? 0 :  /* 0xxxxxxx */
       +                (s[0] & 0xc0) == 0x80 ? 1 :  /* 10xxxxxx */
       +                (s[0] & 0xe0) == 0xc0 ? 2 :  /* 110xxxxx */
       +                (s[0] & 0xf0) == 0xe0 ? 3 :  /* 1110xxxx */
       +                (s[0] & 0xf8) == 0xf0 ? 4 :  /* 11110xxx */
       +                (s[0] & 0xfc) == 0xf8 ? 5 :  /* 111110xx */
       +                (s[0] & 0xfe) == 0xfc ? 6 :  /* 1111110x */
       +                (s[0] & 0xff) == 0xfe ? 7 :  /* 11111110 */
       +                                        8;   /* 11111111 */
       +
       +        if (contiunation_bytes > 6 || contiunation_bytes > n)
       +                return 0;
       +
       +        /* check if continuation bytes are 10xxxxxx and increment `len` */
       +        switch (contiunation_bytes) {  /* FALLTHROUGH */
       +        case 6:        if ((s[5] & 0xc0) != 0x80) return 0; else len++;
       +        case 5:        if ((s[4] & 0xc0) != 0x80) return 0; else len++;
       +        case 4:        if ((s[3] & 0xc0) != 0x80) return 0; else len++;
       +        case 3:        if ((s[2] & 0xc0) != 0x80) return 0; else len++;
       +        case 2:        if ((s[1] & 0xc0) != 0x80) return 0; else len++;
       +        case 0:        return len;
       +        default: return 0;
       +        }
       +}
       +
       +
       +/*
       + * return the number of bytes required to display `rune`
       + */
       +int
       +runelen(long r) {
       +        if (r <= 0x0000007f) return 1;
       +        if (r <= 0x000007ff) return 2;
       +        if (r <= 0x0000ffff) return 3;
       +        if (r <= 0x001fffff) return 4;
       +        if (r <= 0x03ffffff) return 5;
       +        if (r <= 0x7fffffff) return 6;
       +        return 0;
       +}
       +
       +
       +/*
       + * return the firsts `len` bytes in the sring poined by `utf` to a rune.
       + * if the `utf` is misencoded, the first char is returned as a
       + * negative value.
       + */
       +int
       +utftorune(long *r, char *s, int n) {
       +        int len = utflen(s, n);
       +
       +        /* first byte */
       +        switch (len) {
       +        case 1: *r = s[0]; return 1;      /* 0xxxxxxx */
       +        case 2: *r = s[0] & 0x1f; break;  /* 110xxxxx */
       +        case 3: *r = s[0] & 0x0f; break;  /* 1110xxxx */
       +        case 4: *r = s[0] & 0x07; break;  /* 11110xxx */
       +        case 5: *r = s[0] & 0x03; break;  /* 111110xx */
       +        case 6: *r = s[0] & 0x01; break;  /* 1111110x */
       +        default: *r = -(unsigned char) s[0]; return 1;  /* misencoded */
       +        }
       +
       +        /* continuation bytes */
       +        for (int i = 1; i < len; i++)
       +                *r = (*r << 6) | (s[i] & 0x3f);  /* 10xxxxxx */
       +
       +        /* overlong sequences */
       +        if (runelen(*r) != len) {
       +                *r = -(unsigned char) s[0];
       +                return 1;
       +        }
       + 
       +        return len;
       +}
       +
       +
       +/*
       + * return the next rune in the `len` next `utf`, or 0 if
       + * `utf` is misencoded.
       + */
       +int
       +runetoutf(char *s, long r) {
       +        switch (runelen(r)) {
       +        case 1:
       +                s[0] = r;                         /* 0xxxxxxx */
       +                s[1] = '\0';
       +                return 1;
       +        case 2:
       +                s[0] = 0xc0 | (0x3f & (r >> 6));  /* 110xxxxx */
       +                s[1] = 0x80 | (0x3f & (r));       /* 10xxxxxx */
       +                s[2] = '\0';
       +                return 2;
       +        case 3:
       +                s[0] = 0xe0 | (0x3f & (r >> 12)); /* 1110xxxx */
       +                s[1] = 0x80 | (0x3f & (r >> 6));  /* 10xxxxxx */
       +                s[2] = 0x80 | (0x3f & (r));       /* 10xxxxxx */
       +                s[3] = '\0';
       +                return 3;
       +        case 4:
       +                s[0] = 0xf0 | (0x3f & (r >> 6));  /* 11110xxx */
       +                s[1] = 0x80 | (0x3f & (r >> 6));  /* 10xxxxxx */
       +                s[2] = 0x80 | (0x3f & (r >> 6));  /* 10xxxxxx */
       +                s[3] = 0x80 | (0x3f & (r));       /* 10xxxxxx */
       +                s[4] = '\0';
       +                return 4;
       +        case 5:
       +                s[0] = 0xf8 | (0x3f & (r >> 24));  /* 111110xx */
       +                s[1] = 0x80 | (0x3f & (r >> 18));  /* 10xxxxxx */
       +                s[2] = 0x80 | (0x3f & (r >> 12));  /* 10xxxxxx */
       +                s[3] = 0x80 | (0x3f & (r >> 6));   /* 10xxxxxx */
       +                s[4] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       +                s[5] = '\0';
       +                return 5;
       +        case 6:
       +                s[0] = 0xfc | (0x3f & (r >> 30));  /* 1111110x */
       +                s[1] = 0x80 | (0x3f & (r >> 24));  /* 10xxxxxx */
       +                s[2] = 0x80 | (0x3f & (r >> 18));  /* 10xxxxxx */
       +                s[3] = 0x80 | (0x3f & (r >> 12));  /* 10xxxxxx */
       +                s[4] = 0x80 | (0x3f & (r >> 6));   /* 10xxxxxx */
       +                s[5] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       +                s[6] = '\0';
       +                return 6;
       +        }
       +
       +        return 0;
       +}
       +
       +
       +/*
       + * Fill `s` with a printable representation of `r` and return the
       + * width of the character
       + */
       +int
       +runetoprint(char *s, long r, int col)
       +{
       +        /* ASCII control characters and invalid characters */
       +        if (r == '\t') {
       +                int i;
       +                for (i = 0; i < (col + 1) % 8 - 1; i++)
       +                        s[i] = ' ';
       +                s[i] = '\0';
       +
       +        } else if (r < ' ' || r == 0x7f) {
       +                sprintf(s, "[%02x]", (char) r);
       +
       +        /* non-breaking space */
       +        } else if (r == 0xa0) {
       +                sprintf(s, "[ ]");
       +
       +        /* soft hyphen */
       +        } else if (r == 0xad) {
       +                sprintf(s, "[-]");
       +
       +        /* valid UTF-8 but not printable Unicode code points */
       +        } else if (
       +                /* unicode control */
       +                (0x80 <= r && r < 0xa0)          ||
       +
       +                /* outside range */
       +                (r > 0x10ffff)                   ||
       +
       +                /* noncharacters */
       +                (r % 0x010000 == 0x00fffe)       ||
       +                (r % 0x010000 == 0x00ffff)       ||
       +                (0x00fdd0 <= r && r <= 0x00fdef) ||
       +
       +                /* private use */
       +                (0x00e000 <= r && r <= 0x00f8ff) ||
       +                (0x0f0000 <= r && r <= 0x0ffffd) ||
       +                (0x100000 <= r && r <= 0x10fffd) ||
       +
       +                /* surrogates */
       +                (0x00d800 <= r && r <= 0x00dfff)
       +        ) {
       +                sprintf(s, "[%04x]", (unsigned int) r);
       +
       +        /* valid unicode characters */
       +        } else {
       +                runetoutf(s, r);
       +                return 1;
       +        }
       +
       +        return 0;
       +}
       +
       +
       +/*
       + * Read a newly allocated string `s` from `file` up to the first '\n'
       + * character or the end of the file.
       + */
       +int
       +getutf(char **s, FILE *file)
       +{
       +        int i; int c;
       +
       +        *s = malloc(BUFSIZ);
       +
       +        for (i = 0; (c = fgetc(file)) != EOF && (c != '\n'); i++) {
       +                (*s)[i] = c;
       +
       +                if ((size_t) i + 16 >= sizeof(s))
       +                        *s = realloc(*s, sizeof(s) + BUFSIZ);
       +        }
       +
       +        return i;
       +}
       +
       +
       +int
       +main()
       +{
       +        char s[7];
       +        long r;
       +
       +        for (int i = 0; i < 9000; i++) {
       +                runetoutf(s, i);
       +                utftorune(&r, s, 7);
       +                runetoutf(s, r);
       +                utftorune(&r, s, 7);
       +                runetoprint(s, r, 0);
       +
       +                printf("%5X: ", r);
       +                printf("'%s'\t", s);
       +
       +                if (i % 8 == 0)
       +                        puts("");
       +        }
       +
       +        return 0;
       +}
 (DIR) diff --git a/text.h b/text.h
       @@ -0,0 +1,6 @@
       +typedef int Rune;
       +
       +int utflen(char *, int);
       +int runelen(Rune);
       +int utftorune(Rune *, char *, int);
       +int runetoutf(char *, Rune);