When matching a pattern check if bottom bits of hash are 0 - dedup - deduplicating backup program
 (HTM) git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit d60ace395a74a5efe067ee9cd5d85446c7facf43
 (DIR) parent d8bfc3a69ce4c3c35dfa8c0d5cef3ce10e424300
 (HTM) Author: sin <sin@2f30.org>
       Date:   Tue, 26 Feb 2019 09:48:57 +0000
       
       When matching a pattern check if bottom bits of hash are 0
       
       This approach is more efficient and easier to understand.
       
       Diffstat:
         M chunker.c                           |      10 +---------
         M config.h                            |       1 +
       
       2 files changed, 2 insertions(+), 9 deletions(-)
       ---
 (DIR) diff --git a/chunker.c b/chunker.c
       @@ -14,7 +14,6 @@ struct chunker {
                size_t cap;
                size_t rpos;
                size_t wpos;
       -        size_t discr;
                int fd;
        };
        
       @@ -88,7 +87,7 @@ match_pattern(struct chunker *chunker, size_t chunk_size, uint32_t fp)
                        return 1;
                if (chunk_size < BLKSIZE_MIN)
                        return 0;
       -        return (fp % chunker->discr) == chunker->discr - 1;
       +        return (fp & HASHMASK_BITS) == 0;
        }
        
        static size_t
       @@ -123,12 +122,6 @@ get_chunk_size(struct chunker *chunker)
                return chunk_size;
        }
        
       -static size_t
       -calc_discr(size_t avg)
       -{
       -        return avg / (-1.42888852e-7 * avg + 1.33237515);
       -}
       -
        struct chunker *
        alloc_chunker(size_t cap, int fd)
        {
       @@ -145,7 +138,6 @@ alloc_chunker(size_t cap, int fd)
                chunker->rpos = 0;
                chunker->wpos = 0;
                chunker->fd = fd;
       -        chunker->discr = calc_discr(BLKSIZE_AVG);
        
                return chunker;
        }
 (DIR) diff --git a/config.h b/config.h
       @@ -1,4 +1,5 @@
        #define BLKSIZE_AVG ((size_t)524288)
        #define BLKSIZE_MIN ((BLKSIZE_AVG) / 4)
        #define BLKSIZE_MAX ((BLKSIZE_AVG) * 4)
       +#define HASHMASK_BITS (BLKSIZE_AVG - 1)
        #define WINSIZE 32