added character class function - iomenu - interactive terminal-based selection menu
 (HTM) git clone git://bitreich.org/iomenu git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/iomenu
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit fa78594463743c4e84f2ad6dbcf599326f170539
 (DIR) parent 353e9eb3f5f4df50ed5802ee14c593c29eba706e
 (HTM) Author: Josuah Demangeonā  ā µ <mail@josuah.net>
       Date:   Sun,  2 Apr 2017 15:51:21 +0200
       
       added character class function
       
       Diffstat:
         D text.c                              |     259 -------------------------------
         D text.h                              |      11 -----------
         A utf.c                               |     263 +++++++++++++++++++++++++++++++
         A utf.h                               |      14 ++++++++++++++
       
       4 files changed, 277 insertions(+), 270 deletions(-)
       ---
 (DIR) diff --git a/text.c b/text.c
       @@ -1,259 +0,0 @@
       -/*
       - * Functions handling UTF-8 srings:
       - *
       - * stdin  -> buffer -> stdout
       - * UTF-8  ->  rune  -> UTF-8
       - * char[] -> long[] -> char[]
       - */
       -
       -
       -#include <stdio.h>
       -#include <string.h>
       -#include <stdlib.h>
       -
       -#include "text.h"
       -
       -
       -/*
       - * Return the number of bytes in rune for the `n` next char in `s`,
       - * or 0 if ti is misencoded.
       - *
       - * Thanks to Connor Lane Smith for the idea of using 0x??.
       - */
       -int
       -utflen(char *s, int n)
       -{
       -        int len = 1;
       -        int continuation_bytes =
       -                (s[0] & 0x80) == 0x00 ? 0 :  /* 0xxxxxxx */
       -                (s[0] & 0xc0) == 0x80 ? 1 :  /* 10xxxxxx */
       -                (s[0] & 0xe0) == 0xc0 ? 2 :  /* 110xxxxx */
       -                (s[0] & 0xf0) == 0xe0 ? 3 :  /* 1110xxxx */
       -                (s[0] & 0xf8) == 0xf0 ? 4 :  /* 11110xxx */
       -                (s[0] & 0xfc) == 0xf8 ? 5 :  /* 111110xx */
       -                (s[0] & 0xfe) == 0xfc ? 6 :  /* 1111110x */
       -                (s[0] & 0xff) == 0xfe ? 7 :  /* 11111110 */
       -                                        8;   /* 11111111 */
       -
       -        if (continuation_bytes > 6 || continuation_bytes > n)
       -                return 0;
       -
       -        /* check if continuation bytes are 10xxxxxx and increment `len` */
       -        switch (continuation_bytes) {  /* FALLTHROUGH */
       -        case 7:        if ((s[6] & 0xc0) != 0x80) return 0; else len++;
       -        case 6:        if ((s[5] & 0xc0) != 0x80) return 0; else len++;
       -        case 5:        if ((s[4] & 0xc0) != 0x80) return 0; else len++;
       -        case 4:        if ((s[3] & 0xc0) != 0x80) return 0; else len++;
       -        case 3:        if ((s[2] & 0xc0) != 0x80) return 0; else len++;
       -        case 2:        if ((s[1] & 0xc0) != 0x80) return 0; else len++;
       -        case 0:        return len;
       -        default: return 0;
       -        }
       -}
       -
       -
       -/*
       - * Return the number of bytes required to display `rune`
       - */
       -int
       -runelen(long r)
       -{
       -        if (r <= 0x0000007f) return 1;
       -        if (r <= 0x000007ff) return 2;
       -        if (r <= 0x0000ffff) return 3;
       -        if (r <= 0x001fffff) return 4;
       -        if (r <= 0x03ffffff) return 5;
       -        if (r <= 0x7fffffff) return 6;
       -        return 0;
       -}
       -
       -
       -/*
       - * Sets `r` to a rune corresponding to the firsts `n` bytes of `s`
       - * and return the number of bytes read.
       - * if `s` is misencoded, the rune is stored as a negative value.
       - */
       -int
       -utftorune(long *r, char *s, int n)
       -{
       -        int len = utflen(s, n);
       -
       -        /* first byte */
       -        switch (len) {
       -        case 1: *r = s[0]; return 1;      /* 0xxxxxxx */
       -        case 2: *r = s[0] & 0x1f; break;  /* 110xxxxx */
       -        case 3: *r = s[0] & 0x0f; break;  /* 1110xxxx */
       -        case 4: *r = s[0] & 0x07; break;  /* 11110xxx */
       -        case 5: *r = s[0] & 0x03; break;  /* 111110xx */
       -        case 6: *r = s[0] & 0x01; break;  /* 1111110x */
       -        default: *r = -(unsigned char) s[0]; return 1;  /* misencoded */
       -        }
       -
       -        /* continuation bytes */
       -        for (int i = 1; i < len; i++)
       -                *r = (*r << 6) | (s[i] & 0x3f);  /* 10xxxxxx */
       -
       -        /* overlong sequences */
       -        if (runelen(*r) != len) {
       -                *r = -(unsigned char) s[0];
       -                return 1;
       -        }
       - 
       -        return len;
       -}
       -
       -
       -/*
       - * Encode the rune `r` in utf-8 in `s`, null-terminated, and return
       - * the number of bytes written, 0 if `r` is invalid.
       - */
       -int
       -runetoutf(char *s, long r)
       -{
       -        switch (runelen(r)) {
       -        case 1:
       -                s[0] = r;                          /* 0xxxxxxx */
       -                s[1] = '\0';
       -                return 1;
       -        case 2:
       -                s[0] = 0xc0 | (0x1f & (r >> 6));   /* 110xxxxx */
       -                s[1] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       -                s[2] = '\0';
       -                return 2;
       -        case 3:
       -                s[0] = 0xe0 | (0x0f & (r >> 12));  /* 1110xxxx */
       -                s[1] = 0x80 | (0x3f & (r >> 6));   /* 10xxxxxx */
       -                s[2] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       -                s[3] = '\0';
       -                return 3;
       -        case 4:
       -                s[0] = 0xf0 | (0x07 & (r >> 18));  /* 11110xxx */
       -                s[1] = 0x80 | (0x3f & (r >> 12));  /* 10xxxxxx */
       -                s[2] = 0x80 | (0x3f & (r >> 6));   /* 10xxxxxx */
       -                s[3] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       -                s[4] = '\0';
       -                return 4;
       -        case 5:
       -                s[0] = 0xf8 | (0x03 & (r >> 24));  /* 111110xx */
       -                s[1] = 0x80 | (0x3f & (r >> 18));  /* 10xxxxxx */
       -                s[2] = 0x80 | (0x3f & (r >> 12));  /* 10xxxxxx */
       -                s[3] = 0x80 | (0x3f & (r >> 6));   /* 10xxxxxx */
       -                s[4] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       -                s[5] = '\0';
       -                return 5;
       -        case 6:
       -                s[0] = 0xfc | (0x01 & (r >> 30));  /* 1111110x */
       -                s[1] = 0x80 | (0x3f & (r >> 24));  /* 10xxxxxx */
       -                s[2] = 0x80 | (0x3f & (r >> 18));  /* 10xxxxxx */
       -                s[3] = 0x80 | (0x3f & (r >> 12));  /* 10xxxxxx */
       -                s[4] = 0x80 | (0x3f & (r >> 6));   /* 10xxxxxx */
       -                s[5] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       -                s[6] = '\0';
       -                return 6;
       -        }
       -
       -        return 0;
       -}
       -
       -
       -/*
       - * Read a newly allocated string from `f` up to the first '\n'
       - * character or the end of the fifle.  It is stored as a rune array,
       - * and `r` is set to point to it.
       - */
       -int
       -getutf(long **r, FILE *f)
       -{
       -        int slen, rlen = 0, c, size = BUFSIZ;
       -        char *s;
       -
       -        if (!(s = malloc(size))) return -1;
       -        for (slen = 0; (c = fgetc(f)) != EOF && (c != '\n'); slen++) {
       -                s[slen] = c;
       -
       -                if (slen >= size)
       -                        if (!(s = realloc(s, ++size))) return -1;
       -        }
       -
       -        if (!(*r = malloc(size * sizeof (long)))) return -1;
       -        for (int i = 0; i < slen; rlen++)
       -                i += utftorune(*r + rlen, s + i, slen - i);
       -
       -        free(s);
       -        return rlen;
       -}
       -
       -
       -/*
       - * Fill `s` with a printable representation of `r` and return the
       - * width of the character.  The tab characters are converted to
       - * spaces as if it was at the column `col`.
       - */
       -int
       -runetoprint(char *s, long r, int col)
       -{
       -        /* invalid */
       -        if (r < 0) {
       -                sprintf(s, "[%02x]", (unsigned char) -r);
       -
       -        } else if (r == '\t') {
       -                int i;
       -                for (i = 0; i < (col + 1) % 8 - 1; i++)
       -                        s[i] = ' ';
       -                s[i] = '\0'; s[0] = '|';
       -
       -        /* ascii control */
       -        } else if (r == 0x7f || r < ' ') {
       -                sprintf(s, "[%02lx]", r);
       -
       -        /* utf-8 but not printable */
       -        } else if (
       -                /* unicode control */
       -                (0x80 <= r && r < 0xa0)          ||
       -
       -                /* outside range */
       -                (r > 0x10ffff)                   ||
       -
       -                /* noncharacters */
       -                (r % 0x010000 == 0x00fffe)       ||
       -                (r % 0x010000 == 0x00ffff)       ||
       -                (0x00fdd0 <= r && r <= 0x00fdef) ||
       -
       -                /* private use */
       -                (0x00e000 <= r && r <= 0x00f8ff) ||
       -                (0x0f0000 <= r && r <= 0x0ffffd) ||
       -                (0x100000 <= r && r <= 0x10fffd) ||
       -
       -                /* surrogates */
       -                (0x00d800 <= r && r <= 0x00dfff)
       -        ) {
       -                sprintf(s, "[%04x]", (unsigned int) r);
       -
       -        /* valid unicode characters */
       -        } else {
       -                runetoutf(s, r);
       -                return 1;
       -        }
       -
       -        return 0;
       -}
       -
       -
       -int
       -main()
       -{
       -        char s[BUFSIZ];
       -        long *r;
       -
       -        for (int len; (len = getutf(&r, stdin)) >= 0 && !feof(stdin); free(r)) {
       -                for (int i = 0; i < len; i++) {
       -                        runetoprint(s, r[i], 0);
       -                        fputs(s, stdout);
       -                }
       -
       -                putchar('\n');
       -        }
       -        free(r);
       -
       -        return 0;
       -}
 (DIR) diff --git a/text.h b/text.h
       @@ -1,11 +0,0 @@
       -/* rune / utf length */
       -int utflen(char *, int);
       -int runelen(long);
       -
       -/* decode / encode */
       -int utftorune(long *, char *, int);
       -int runetoutf(char *, long);
       -
       -/* stdin / stdout */
       -int getutf(long **, FILE *);
       -int runetoprint(char *, long, int);
 (DIR) diff --git a/utf.c b/utf.c
       @@ -0,0 +1,263 @@
       +/*
       + * Functions handling UTF-8 srings:
       + *
       + * stdin  -> buffer -> stdout
       + * UTF-8  ->  rune  -> UTF-8
       + * char[] -> long[] -> char[]
       + */
       +
       +
       +#include <stdio.h>
       +#include <string.h>
       +#include <stdlib.h>
       +
       +#include "utf.h"
       +
       +
       +/*
       + * Return the number of bytes in rune for the `n` next char in `s`,
       + * or 0 if ti is misencoded.
       + *
       + * Thanks to Connor Lane Smith for the idea of using 0x??.
       + */
       +int
       +utflen(char *s, int n)
       +{
       +        int len = 1;
       +        int continuation_bytes =
       +                (s[0] & 0x80) == 0x00 ? 0 :  /* 0xxxxxxx */
       +                (s[0] & 0xc0) == 0x80 ? 1 :  /* 10xxxxxx */
       +                (s[0] & 0xe0) == 0xc0 ? 2 :  /* 110xxxxx */
       +                (s[0] & 0xf0) == 0xe0 ? 3 :  /* 1110xxxx */
       +                (s[0] & 0xf8) == 0xf0 ? 4 :  /* 11110xxx */
       +                (s[0] & 0xfc) == 0xf8 ? 5 :  /* 111110xx */
       +                (s[0] & 0xfe) == 0xfc ? 6 :  /* 1111110x */
       +                (s[0] & 0xff) == 0xfe ? 7 :  /* 11111110 */
       +                                        8;   /* 11111111 */
       +
       +        if (continuation_bytes > 6 || continuation_bytes > n)
       +                return 0;
       +
       +        /* check if continuation bytes are 10xxxxxx and increment `len` */
       +        switch (continuation_bytes) {  /* FALLTHROUGH */
       +        case 7:        if ((s[6] & 0xc0) != 0x80) return 0; else len++;
       +        case 6:        if ((s[5] & 0xc0) != 0x80) return 0; else len++;
       +        case 5:        if ((s[4] & 0xc0) != 0x80) return 0; else len++;
       +        case 4:        if ((s[3] & 0xc0) != 0x80) return 0; else len++;
       +        case 3:        if ((s[2] & 0xc0) != 0x80) return 0; else len++;
       +        case 2:        if ((s[1] & 0xc0) != 0x80) return 0; else len++;
       +        case 0:        return len;
       +        default: return 0;
       +        }
       +}
       +
       +
       +/*
       + * Return the number of bytes required to display `rune`
       + */
       +int
       +runelen(long r)
       +{
       +        if (r <= 0x0000007f) return 1;
       +        if (r <= 0x000007ff) return 2;
       +        if (r <= 0x0000ffff) return 3;
       +        if (r <= 0x001fffff) return 4;
       +        if (r <= 0x03ffffff) return 5;
       +        if (r <= 0x7fffffff) return 6;
       +        return 0;
       +}
       +
       +
       +/*
       + * Sets `r` to a rune corresponding to the firsts `n` bytes of `s`
       + * and return the number of bytes read.
       + * if `s` is misencoded, the rune is stored as a negative value.
       + */
       +int
       +utftorune(long *r, char *s, int n)
       +{
       +        int len = utflen(s, n);
       +
       +        /* first byte */
       +        switch (len) {
       +        case 1: *r = s[0]; return 1;      /* 0xxxxxxx */
       +        case 2: *r = s[0] & 0x1f; break;  /* 110xxxxx */
       +        case 3: *r = s[0] & 0x0f; break;  /* 1110xxxx */
       +        case 4: *r = s[0] & 0x07; break;  /* 11110xxx */
       +        case 5: *r = s[0] & 0x03; break;  /* 111110xx */
       +        case 6: *r = s[0] & 0x01; break;  /* 1111110x */
       +        default: *r = -(unsigned char) s[0]; return 1;  /* misencoded */
       +        }
       +
       +        /* continuation bytes */
       +        for (int i = 1; i < len; i++)
       +                *r = (*r << 6) | (s[i] & 0x3f);  /* 10xxxxxx */
       +
       +        /* overlong sequences */
       +        if (runelen(*r) != len) {
       +                *r = -(unsigned char) s[0];
       +                return 1;
       +        }
       +
       +        return len;
       +}
       +
       +
       +/*
       + * Encode the rune `r` in utf-8 in `s`, null-terminated, and return
       + * the number of bytes written, 0 if `r` is invalid.
       + */
       +int
       +runetoutf(char *s, long r)
       +{
       +        switch (runelen(r)) {
       +        case 1:
       +                s[0] = r;                          /* 0xxxxxxx */
       +                s[1] = '\0';
       +                return 1;
       +        case 2:
       +                s[0] = 0xc0 | (0x1f & (r >> 6));   /* 110xxxxx */
       +                s[1] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       +                s[2] = '\0';
       +                return 2;
       +        case 3:
       +                s[0] = 0xe0 | (0x0f & (r >> 12));  /* 1110xxxx */
       +                s[1] = 0x80 | (0x3f & (r >> 6));   /* 10xxxxxx */
       +                s[2] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       +                s[3] = '\0';
       +                return 3;
       +        case 4:
       +                s[0] = 0xf0 | (0x07 & (r >> 18));  /* 11110xxx */
       +                s[1] = 0x80 | (0x3f & (r >> 12));  /* 10xxxxxx */
       +                s[2] = 0x80 | (0x3f & (r >> 6));   /* 10xxxxxx */
       +                s[3] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       +                s[4] = '\0';
       +                return 4;
       +        case 5:
       +                s[0] = 0xf8 | (0x03 & (r >> 24));  /* 111110xx */
       +                s[1] = 0x80 | (0x3f & (r >> 18));  /* 10xxxxxx */
       +                s[2] = 0x80 | (0x3f & (r >> 12));  /* 10xxxxxx */
       +                s[3] = 0x80 | (0x3f & (r >> 6));   /* 10xxxxxx */
       +                s[4] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       +                s[5] = '\0';
       +                return 5;
       +        case 6:
       +                s[0] = 0xfc | (0x01 & (r >> 30));  /* 1111110x */
       +                s[1] = 0x80 | (0x3f & (r >> 24));  /* 10xxxxxx */
       +                s[2] = 0x80 | (0x3f & (r >> 18));  /* 10xxxxxx */
       +                s[3] = 0x80 | (0x3f & (r >> 12));  /* 10xxxxxx */
       +                s[4] = 0x80 | (0x3f & (r >> 6));   /* 10xxxxxx */
       +                s[5] = 0x80 | (0x3f & (r));        /* 10xxxxxx */
       +                s[6] = '\0';
       +                return 6;
       +        }
       +
       +        return 0;
       +}
       +
       +
       +/*
       + * Read a newly allocated string from `f` up to the first '\n'
       + * character or the end of the fifle.  It is stored as a rune array,
       + * and `r` is set to point to it.
       + */
       +int
       +getutf(long **r, FILE *f)
       +{
       +        int slen, rlen = 0, c, size = BUFSIZ;
       +        char *s;
       +
       +        if (!(s = malloc(size))) return -1;
       +        for (slen = 0; (c = fgetc(f)) != EOF && (c != '\n'); slen++) {
       +                s[slen] = c;
       +
       +                if (slen >= size)
       +                        if (!(s = realloc(s, ++size))) return -1;
       +        }
       +
       +        if (!(*r = malloc(size * sizeof (long)))) return -1;
       +        for (int i = 0; i < slen; rlen++)
       +                i += utftorune(*r + rlen, s + i, slen - i);
       +
       +        free(s);
       +        return rlen;
       +}
       +
       +
       +/*
       + * Returns 1 if the rune is a printable character and 0 if not.
       + */
       +int
       +isprintrune(long r)
       +{
       +        return !(
       +                (r == 0x7f || r < ' ')           ||  /* ascii control */
       +
       +                (0x80 <= r && r < 0xa0)          ||  /* unicode control */
       +
       +                (r > 0x10ffff)                   ||  /* outside range */
       +
       +                (r % 0x010000 == 0x00fffe)       ||  /* noncharacters */
       +                (r % 0x010000 == 0x00ffff)       ||
       +                (0x00fdd0 <= r && r <= 0x00fdef) ||
       +
       +                (0x00e000 <= r && r <= 0x00f8ff) ||  /* private use */
       +                (0x0f0000 <= r && r <= 0x0ffffd) ||
       +                (0x100000 <= r && r <= 0x10fffd) ||
       +
       +                (0x00d800 <= r && r <= 0x00dfff)     /* surrogates */
       +        );
       +}
       +
       +
       +/*
       + * Fill `s` with a printable representation of `r` and return the
       + * width of the character.  The tab characters are converted to
       + * spaces as if it was at the column `col`.
       + */
       +int
       +runetoprint(char *s, long r, int col)
       +{
       +        if (r < 0) {
       +                return sprintf(s, "[%02x]", (unsigned char) -r);
       +
       +        } else if (r == 0x7f || r < ' ') {
       +                return sprintf(s, "[%02lx]", r);
       +
       +        } else if (!isprintrune(r)) {
       +                return sprintf(s, "[%04lx]", r);
       +
       +        } else if (r == '\t') {
       +                int i;
       +                for (i = 1; (col + i) % 8 != 0; i++)
       +                        s[i] = ' ';
       +                s[0] = ' '; s[i] = '\0';
       +                return i;
       +
       +        } else {
       +                runetoutf(s, r);
       +                return 1;
       +        }
       +
       +        return 0;
       +}
       +
       +
       +int
       +main()
       +{
       +        char s[BUFSIZ];
       +        long *r;
       +
       +        for (int len; (len = getutf(&r, stdin)) >= 0 && !feof(stdin); free(r)) {
       +                for (int i = 0; i < len; i++) {
       +                        runetoprint(s, r[i], 0);
       +                        fputs(s, stdout);
       +                }
       +
       +                putchar('\n');
       +        }
       +        free(r);
       +
       +        return 0;
       +}
 (DIR) diff --git a/utf.h b/utf.h
       @@ -0,0 +1,14 @@
       +/* rune / utf length */
       +int utflen(char *, int);
       +int runelen(long);
       +
       +/* decode / encode */
       +int utftorune(long *, char *, int);
       +int runetoutf(char *, long);
       +
       +/* rune class */
       +int isprintrune(long);
       +
       +/* stdin / stdout */
       +int getutf(long **, FILE *);
       +int runetoprint(char *, long, int);