Move buzhash code to chunker.c - dedup - deduplicating backup program
 (HTM) git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit b225ea67c4500992507a9e417e3e103777c2864e
 (DIR) parent db2431b5cc0b4c4d51ac809ff846145069d6e2cb
 (HTM) Author: sin <sin@2f30.org>
       Date:   Mon, 25 Feb 2019 15:23:01 +0000
       
       Move buzhash code to chunker.c
       
       This allows the compiler to inline the calls.  This saves about 10-15%
       of time during deduplication.
       
       Diffstat:
         M Makefile                            |       4 ++--
         M chunker.c                           |      65 +++++++++++++++++++++++++++++++
         M dedup.h                             |       4 ----
         D hash.c                              |      67 -------------------------------
       
       4 files changed, 67 insertions(+), 73 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       @@ -2,8 +2,8 @@ VERSION = 0.5
        PREFIX = /usr/local
        MANPREFIX = $(PREFIX)/man
        BIN = dedup
       -SRC = $(BIN).c cache.c chunker.c hash.c pack.c unpack.c utils.c
       -OBJ = $(BIN).o cache.o chunker.o hash.o pack.o unpack.o utils.o
       +SRC = $(BIN).c cache.c chunker.c pack.c unpack.c utils.c
       +OBJ = $(BIN).o cache.o chunker.o pack.o unpack.o utils.o
        DISTFILES = \
                $(SRC) \
                LICENSE \
 (DIR) diff --git a/chunker.c b/chunker.c
       @@ -7,6 +7,8 @@
        
        #include "dedup.h"
        
       +#define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
       +
        struct chunker {
                uint8_t *buf;
                size_t cap;
       @@ -16,6 +18,69 @@ struct chunker {
                int fd;
        };
        
       +/*
       + * Static table for use in buzhash algorithm.
       + * 256 * 32 bits randomly generated unique integers
       + *
       + * To get better pseudo-random results, there is exactly the same number
       + * of 0 and 1 spread amongst these integers. It means that there is
       + * exactly 50% chance that a XOR operation would flip all the bits in
       + * the hash.
       + */
       +static uint32_t buz[] = {
       +        0xbc9fa594,0x30a8f827,0xced627a7,0xdb46a745,0xcfa4a9e8,0x77cccb59,0xddb66276,0x3adc532f,
       +        0xfe8b67d3,0x8155b59e,0x0c893666,0x1d757009,0x17394ee4,0x85d94c07,0xcacd52da,0x076c6f79,
       +        0xead0a798,0x6c7ccb4a,0x2639a1b8,0x3aa5ae32,0x3e6218d2,0xb290d980,0xa5149521,0x4b426119,
       +        0xd3230fc7,0x677c1cc4,0x2b64603c,0x01fe92a8,0xbe358296,0xa7e7fac7,0xf509bf41,0x04b017ad,
       +        0xf900344c,0x8e14e202,0xb2a6e9b4,0x3db3c311,0x960286a8,0xf6bf0468,0xed54ec94,0xf358070c,
       +        0x6a4795dd,0x3f7b925c,0x5e13a060,0xfaecbafe,0x03c8bb55,0x8a56ba88,0x633e3b49,0xe036bbbe,
       +        0x1ed3dbb5,0x76e8ad74,0x79d346ab,0x44b4ccc4,0x71eb22d3,0xa1aa3f24,0x50e05b81,0xa3b450d3,
       +        0x7f5caffb,0xa1990650,0x54c44800,0xda134b65,0x72362eea,0xbd12b8e6,0xf7c99fdc,0x020d48c7,
       +        0x9d9c3d46,0x32b75615,0xe61923cf,0xadc09d8f,0xab11376b,0xd66fe4cd,0xb3b086b6,0xb8345b9f,
       +        0x59029667,0xae0e937c,0xcbd4d4ba,0x720bb3fb,0x5f7d2ca3,0xec24ba15,0x6b40109b,0xf0a54587,
       +        0x3acf9420,0x466e981d,0xc66dc124,0x150ef7b4,0xc3ce718e,0x136774f5,0x46684ab4,0xb4b490f0,
       +        0x26508a8b,0xf12febc8,0x4b99171b,0xfc373c84,0x339b5677,0x41703ff3,0x7cadbbd7,0x15ea24e2,
       +        0x7a2f9783,0xed6a383a,0x649eb072,0x79970941,0x2abd28ad,0x4375e00c,0x9df084f7,0x6fdeec6c,
       +        0x6619ac6d,0x7d256f4d,0x9b8e658a,0x3d7627e9,0xd5a98d45,0x15f84223,0x9b6acef5,0xf876be67,
       +        0xe3ae7089,0x84e2b64a,0x6818a969,0x86e9ba4e,0xa24a5b57,0x61570cf1,0xa5f8fc91,0x879d8383,
       +        0x91b13866,0x75e87961,0x16db8138,0x5a2ff6b8,0x8f664e9b,0x894e1496,0x88235c5b,0xcdb3b580,
       +        0xa2e80109,0xb0f88a82,0xd12cd340,0x93fbc37d,0xf4d1eb82,0xce42f309,0x16ffd2c2,0xb4dfef2b,
       +        0xb8b1a33e,0x4708a5e6,0xba66dd88,0xa9ec0da6,0x6f8ee2c9,0xad8b9993,0x1d6a25a8,0x1f3d08ce,
       +        0x149c04e7,0x5cd1fa51,0xb84c89c7,0xeced6f8c,0xe328b30f,0x084fa836,0x6d1bb1b7,0x94c78ea5,
       +        0x14973034,0xf1a1bcef,0x48b798d2,0xded9ca9e,0x5fd965d0,0x92544eb1,0x5e80f189,0xcbbf5e15,
       +        0x4d8121f0,0x5dd3b92f,0xd9ea98fb,0x2dbf5644,0x0fbcb9b7,0x20a1db53,0x7c3fcc98,0x36744fbd,
       +        0xced08954,0x8e7c5efe,0x3c5f6733,0x657477be,0x3630a02d,0x38bcbda0,0xb7702575,0x4a7f4bce,
       +        0x0e7660fe,0x4dcb91b5,0x4fd7ffd3,0x041821c1,0xa846a181,0xc8048e9e,0xd4b05072,0x986e0509,
       +        0xa00aaeeb,0x02e3526a,0x2fac4843,0xfa98e805,0x923ecd8d,0x395d9546,0x8674c3cd,0xae5a8a71,
       +        0x966dfe45,0x5c9ceba5,0x0830a1cf,0xa1750981,0x8f604480,0x28ea0c9a,0x0da12413,0x98b0b3c5,
       +        0xa21d473a,0x96ce4308,0xe9a1001b,0x8bbacb44,0x18bad3f4,0xe3121acb,0x46a9b45f,0x92cd9704,
       +        0xc1a7c619,0x3281e361,0x462e8c79,0x9e572f93,0x7239e5f0,0x67d8e6ba,0x13747ce3,0xf01ee64a,
       +        0xe7d0ae12,0xeea04088,0xe5b36767,0x17558eae,0x678ffbe6,0xe0bbc866,0x0c24adec,0xa9cbb869,
       +        0x3fd44ee1,0x9ca4ca06,0x04c0ef00,0x04589a21,0x9cf9c819,0x976f6ca1,0x8a30e66a,0x004d6f7e,
       +        0x384c8851,0x5bc97eb8,0xc6c49339,0x5aa386c7,0x74bdf8af,0x9b713750,0x4112f8c2,0x2895dae1,
       +        0xf576d905,0x9de98bce,0xb2b26bcd,0xd46707a0,0x147fbb46,0xa52c6e50,0xe43128fc,0x374ad964,
       +        0x8dfd4d53,0xc4d0c087,0x31dfb5ca,0xa44589b5,0x6b637e2e,0x663f6b45,0xd2d8baa0,0x1dac7e4c
       +};
       +
       +/* Buzhash: https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial */
       +static inline uint32_t
       +buzh_init(uint8_t *buf, size_t winsize)
       +{
       +        uint32_t fp;
       +        size_t i;
       +
       +        for (i = 1, fp = 0; i < winsize; i++, buf++)
       +                fp ^= ROTL(buz[*buf], (winsize - i) % 32);
       +
       +        return fp ^ buz[*buf];
       +}
       +
       +static inline uint32_t
       +buzh_update(uint32_t fp, uint8_t in, uint8_t out, size_t winsize)
       +{
       +        return ROTL(fp, 1) ^ ROTL(buz[out], winsize % 32) ^ buz[in];
       +}
       +
        static size_t
        calc_discr(size_t avg)
        {
 (DIR) diff --git a/dedup.h b/dedup.h
       @@ -62,10 +62,6 @@ ssize_t fill_chunker(struct chunker *chunker);
        uint8_t *get_chunk(struct chunker *chunker, size_t *chunk_size);
        void drain_chunker(struct chunker *chunker);
        
       -/* hash.c */
       -uint32_t buzh_init(uint8_t *buf, size_t size);
       -uint32_t buzh_update(uint32_t fp, uint8_t in, uint8_t out, size_t size);
       -
        /* pack.c */
        int pack(unsigned char *dst, char *fmt, ...);
        
 (DIR) diff --git a/hash.c b/hash.c
       @@ -1,67 +0,0 @@
       -#include <stddef.h>
       -#include <stdint.h>
       -
       -#define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
       -
       -/*
       - * Static table for use in buzhash algorithm.
       - * 256 * 32 bits randomly generated unique integers
       - *
       - * To get better pseudo-random results, there is exactly the same number
       - * of 0 and 1 spread amongst these integers. It means that there is
       - * exactly 50% chance that a XOR operation would flip all the bits in
       - * the hash.
       - */
       -static uint32_t buz[] = {
       -        0xbc9fa594,0x30a8f827,0xced627a7,0xdb46a745,0xcfa4a9e8,0x77cccb59,0xddb66276,0x3adc532f,
       -        0xfe8b67d3,0x8155b59e,0x0c893666,0x1d757009,0x17394ee4,0x85d94c07,0xcacd52da,0x076c6f79,
       -        0xead0a798,0x6c7ccb4a,0x2639a1b8,0x3aa5ae32,0x3e6218d2,0xb290d980,0xa5149521,0x4b426119,
       -        0xd3230fc7,0x677c1cc4,0x2b64603c,0x01fe92a8,0xbe358296,0xa7e7fac7,0xf509bf41,0x04b017ad,
       -        0xf900344c,0x8e14e202,0xb2a6e9b4,0x3db3c311,0x960286a8,0xf6bf0468,0xed54ec94,0xf358070c,
       -        0x6a4795dd,0x3f7b925c,0x5e13a060,0xfaecbafe,0x03c8bb55,0x8a56ba88,0x633e3b49,0xe036bbbe,
       -        0x1ed3dbb5,0x76e8ad74,0x79d346ab,0x44b4ccc4,0x71eb22d3,0xa1aa3f24,0x50e05b81,0xa3b450d3,
       -        0x7f5caffb,0xa1990650,0x54c44800,0xda134b65,0x72362eea,0xbd12b8e6,0xf7c99fdc,0x020d48c7,
       -        0x9d9c3d46,0x32b75615,0xe61923cf,0xadc09d8f,0xab11376b,0xd66fe4cd,0xb3b086b6,0xb8345b9f,
       -        0x59029667,0xae0e937c,0xcbd4d4ba,0x720bb3fb,0x5f7d2ca3,0xec24ba15,0x6b40109b,0xf0a54587,
       -        0x3acf9420,0x466e981d,0xc66dc124,0x150ef7b4,0xc3ce718e,0x136774f5,0x46684ab4,0xb4b490f0,
       -        0x26508a8b,0xf12febc8,0x4b99171b,0xfc373c84,0x339b5677,0x41703ff3,0x7cadbbd7,0x15ea24e2,
       -        0x7a2f9783,0xed6a383a,0x649eb072,0x79970941,0x2abd28ad,0x4375e00c,0x9df084f7,0x6fdeec6c,
       -        0x6619ac6d,0x7d256f4d,0x9b8e658a,0x3d7627e9,0xd5a98d45,0x15f84223,0x9b6acef5,0xf876be67,
       -        0xe3ae7089,0x84e2b64a,0x6818a969,0x86e9ba4e,0xa24a5b57,0x61570cf1,0xa5f8fc91,0x879d8383,
       -        0x91b13866,0x75e87961,0x16db8138,0x5a2ff6b8,0x8f664e9b,0x894e1496,0x88235c5b,0xcdb3b580,
       -        0xa2e80109,0xb0f88a82,0xd12cd340,0x93fbc37d,0xf4d1eb82,0xce42f309,0x16ffd2c2,0xb4dfef2b,
       -        0xb8b1a33e,0x4708a5e6,0xba66dd88,0xa9ec0da6,0x6f8ee2c9,0xad8b9993,0x1d6a25a8,0x1f3d08ce,
       -        0x149c04e7,0x5cd1fa51,0xb84c89c7,0xeced6f8c,0xe328b30f,0x084fa836,0x6d1bb1b7,0x94c78ea5,
       -        0x14973034,0xf1a1bcef,0x48b798d2,0xded9ca9e,0x5fd965d0,0x92544eb1,0x5e80f189,0xcbbf5e15,
       -        0x4d8121f0,0x5dd3b92f,0xd9ea98fb,0x2dbf5644,0x0fbcb9b7,0x20a1db53,0x7c3fcc98,0x36744fbd,
       -        0xced08954,0x8e7c5efe,0x3c5f6733,0x657477be,0x3630a02d,0x38bcbda0,0xb7702575,0x4a7f4bce,
       -        0x0e7660fe,0x4dcb91b5,0x4fd7ffd3,0x041821c1,0xa846a181,0xc8048e9e,0xd4b05072,0x986e0509,
       -        0xa00aaeeb,0x02e3526a,0x2fac4843,0xfa98e805,0x923ecd8d,0x395d9546,0x8674c3cd,0xae5a8a71,
       -        0x966dfe45,0x5c9ceba5,0x0830a1cf,0xa1750981,0x8f604480,0x28ea0c9a,0x0da12413,0x98b0b3c5,
       -        0xa21d473a,0x96ce4308,0xe9a1001b,0x8bbacb44,0x18bad3f4,0xe3121acb,0x46a9b45f,0x92cd9704,
       -        0xc1a7c619,0x3281e361,0x462e8c79,0x9e572f93,0x7239e5f0,0x67d8e6ba,0x13747ce3,0xf01ee64a,
       -        0xe7d0ae12,0xeea04088,0xe5b36767,0x17558eae,0x678ffbe6,0xe0bbc866,0x0c24adec,0xa9cbb869,
       -        0x3fd44ee1,0x9ca4ca06,0x04c0ef00,0x04589a21,0x9cf9c819,0x976f6ca1,0x8a30e66a,0x004d6f7e,
       -        0x384c8851,0x5bc97eb8,0xc6c49339,0x5aa386c7,0x74bdf8af,0x9b713750,0x4112f8c2,0x2895dae1,
       -        0xf576d905,0x9de98bce,0xb2b26bcd,0xd46707a0,0x147fbb46,0xa52c6e50,0xe43128fc,0x374ad964,
       -        0x8dfd4d53,0xc4d0c087,0x31dfb5ca,0xa44589b5,0x6b637e2e,0x663f6b45,0xd2d8baa0,0x1dac7e4c
       -};
       -
       -/* Buzhash: https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial */
       -uint32_t
       -buzh_init(uint8_t *buf, size_t winsize)
       -{
       -        uint32_t fp;
       -        size_t i;
       -
       -        for (i = 1, fp = 0; i < winsize; i++, buf++)
       -                fp ^= ROTL(buz[*buf], (winsize - i) % 32);
       -
       -        return fp ^ buz[*buf];
       -}
       -
       -uint32_t
       -buzh_update(uint32_t fp, uint8_t in, uint8_t out, size_t winsize)
       -{
       -        return ROTL(fp, 1) ^ ROTL(buz[out], winsize % 32) ^ buz[in];
       -}