Tweak params - dedup - deduplicating backup program
 (HTM) git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 23e23d22beed84fe844c6d76f453667d9a6f95c6
 (DIR) parent e8031b23797e666b43f96906372d97e2da4f4d0a
 (HTM) Author: sin <sin@2f30.org>
       Date:   Fri, 22 Feb 2019 13:15:25 +0000
       
       Tweak params
       
       The parameters were taken from casync.
       
       Diffstat:
         M chunker.c                           |      35 ++++++++++++++++++++++++-------
         M config.h                            |       5 +++--
         M dedup.c                             |      18 +++++++++---------
       
       3 files changed, 39 insertions(+), 19 deletions(-)
       ---
 (DIR) diff --git a/chunker.c b/chunker.c
       @@ -16,15 +16,33 @@ struct chunker {
        };
        
        static size_t
       +calc_discr(size_t avg)
       +{
       +        return avg / (-1.42888852e-7 * avg + 1.33237515);
       +}
       +
       +static int
       +match_pattern(size_t chunk_size, uint32_t fp)
       +{
       +        size_t discr = calc_discr(BLKSIZE_AVG);
       +
       +        if (chunk_size >= BLKSIZE_MAX)
       +                return 1;
       +        if (chunk_size < BLKSIZE_MIN)
       +                return 0;
       +        return (fp % discr) == discr - 1;
       +}
       +
       +static size_t
        get_chunk_size(struct chunker *chunker)
        {
                uint8_t *bp;
                uint32_t fp;
       -        size_t i;
       +        size_t i, chunk_size;
        
       -        /* buzhash should be at least WINSIZE */
       -        if (chunker->wpos - chunker->rpos < WINSIZE)
       -                return chunker->wpos - chunker->rpos;
       +        chunk_size = chunker->wpos - chunker->rpos;
       +        if (chunk_size < WINSIZE)
       +                return chunk_size;
        
                bp = chunker->buf;
        
       @@ -39,13 +57,14 @@ get_chunk_size(struct chunker *chunker)
                 */
                fp = buzh_init(bp, WINSIZE);
                for (i = chunker->rpos; i < chunker->wpos - WINSIZE; i++) {
       +                chunk_size = i + WINSIZE;
                        if (i > 0)
       -                        fp = buzh_update(fp, bp[i - 1], bp[WINSIZE + i - 1],
       +                        fp = buzh_update(fp, bp[i - 1], bp[chunk_size - 1],
                                                 WINSIZE);
       -                if ((fp & HASHMSK) == 0)
       -                        return i + WINSIZE;
       +                if (match_pattern(chunk_size, fp) == 1)
       +                        return chunk_size;
                }
       -        return chunker->wpos - chunker->rpos;
       +        return chunk_size;
        }
        
        struct chunker *
 (DIR) diff --git a/config.h b/config.h
       @@ -1,3 +1,4 @@
       -#define BLKSIZE 131072
       +#define BLKSIZE_AVG ((size_t)131072)
       +#define BLKSIZE_MIN ((BLKSIZE_AVG) / 4)
       +#define BLKSIZE_MAX ((BLKSIZE_AVG) * 4)
        #define WINSIZE 32
       -#define HASHMSK ((1ul << 15) - 1)
 (DIR) diff --git a/dedup.c b/dedup.c
       @@ -325,9 +325,9 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size)
                struct blk_desc blk_desc;
                size_t n;
        
       -        comp_buf = alloc_buf(comp_size(BLKSIZE));
       +        comp_buf = alloc_buf(comp_size(BLKSIZE_MAX));
        
       -        n = comp(chunkp, comp_buf, chunk_size, comp_size(BLKSIZE));
       +        n = comp(chunkp, comp_buf, chunk_size, comp_size(BLKSIZE_MAX));
                hash_blk(comp_buf, n, md);
        
                snaphdr.st.orig_size += chunk_size;
       @@ -374,7 +374,7 @@ dedup(int fd, char *msg)
                ssize_t n;
        
                snap = alloc_snap();
       -        chunker = alloc_chunker(BLKSIZE, fd);
       +        chunker = alloc_chunker(BLKSIZE_MAX, fd);
        
                SHA256_Init(&ctx);
                while ((n = fill_chunker(chunker)) > 0) {
       @@ -417,14 +417,14 @@ extract(struct snapshot *snap, void *arg)
                if (memcmp(snap->md, args->md, sizeof(snap->md)) != 0)
                        return WALK_CONTINUE;
        
       -        buf[0] = alloc_buf(BLKSIZE);
       -        buf[1] = alloc_buf(comp_size(BLKSIZE));
       +        buf[0] = alloc_buf(BLKSIZE_MAX);
       +        buf[1] = alloc_buf(comp_size(BLKSIZE_MAX));
                for (i = 0; i < snap->nr_blk_descs; i++) {
                        size_t blksize;
        
                        read_blk(buf[1], &snap->blk_desc[i]);
                        blksize = decomp(buf[1], buf[0], snap->blk_desc[i].size,
       -                                 BLKSIZE);
       +                                 BLKSIZE_MAX);
                        xwrite(args->fd, buf[0], blksize);
                }
                free_buf(buf[1]);
       @@ -440,7 +440,7 @@ check(struct snapshot *snap, void *arg)
                SHA256_CTX ctx;
                uint64_t i;
        
       -        buf = alloc_buf(comp_size(BLKSIZE));
       +        buf = alloc_buf(comp_size(BLKSIZE_MAX));
                /*
                 * Calculate hash for each block and compare
                 * against snapshot entry block descriptor
       @@ -491,7 +491,7 @@ rebuild_cache(struct snapshot *snap, void *arg)
                SHA256_CTX ctx;
                uint64_t i;
        
       -        buf = alloc_buf(comp_size(BLKSIZE));
       +        buf = alloc_buf(comp_size(BLKSIZE_MAX));
                for (i = 0; i < snap->nr_blk_descs; i++) {
                        struct cache_entry *ent;
        
       @@ -590,7 +590,7 @@ init(void)
                                     VER_MAJ, VER_MIN, maj, min);
                } else {
                        snaphdr.flags = (VER_MAJ << 8) | VER_MIN;
       -                snaphdr.st.min_blk_size = comp_size(BLKSIZE);
       +                snaphdr.st.min_blk_size = comp_size(BLKSIZE_MAX);
                        xwrite(ifd, &snaphdr, sizeof(snaphdr));
                }