Rework chunker interface - dedup - deduplicating backup program
 (HTM) git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 8ed79b4311c4715717198c0e04dd06efb29be265
 (DIR) parent 499f62b1b9e3e6db6f9f77defdb0c2bb15c25778
 (HTM) Author: sin <sin@2f30.org>
       Date:   Thu, 21 Feb 2019 13:48:22 +0000
       
       Rework chunker interface
       
       Eventually, this will be implemented using a ring buffer.
       
       Diffstat:
         M chunker.c                           |      56 +++++++++++++++++--------------
         M dedup.c                             |       2 +-
         M dedup.h                             |       6 +++---
       
       3 files changed, 35 insertions(+), 29 deletions(-)
       ---
 (DIR) diff --git a/chunker.c b/chunker.c
       @@ -1,3 +1,4 @@
       +#include <assert.h>
        #include <err.h>
        #include <stdint.h>
        #include <stdio.h>
       @@ -9,8 +10,9 @@
        
        struct chunker {
                uint8_t *buf;
       -        size_t size;
       -        size_t pos;
       +        size_t cap;
       +        size_t rpos;
       +        size_t wpos;
                int fd;
        };
        
       @@ -22,8 +24,8 @@ get_chunk_size(struct chunker *chunker)
                uint32_t fp;
        
                /* buzhash should be at least WINSIZE */
       -        if (chunker->pos < WINSIZE)
       -                return chunker->pos;
       +        if (chunker->wpos - chunker->rpos < WINSIZE)
       +                return chunker->wpos - chunker->rpos;
        
                bp = chunker->buf;
        
       @@ -37,18 +39,18 @@ get_chunk_size(struct chunker *chunker)
                 * WINSIZE the smallest possible block size.
                 */
                fp = buzh_init(bp, WINSIZE);
       -        for (i = 0; i < chunker->pos - WINSIZE; i++) {
       +        for (i = chunker->rpos; i < chunker->wpos - WINSIZE; i++) {
                        if (i > 0)
                                fp = buzh_update(fp, bp[i - 1], bp[WINSIZE + i - 1],
                                                 WINSIZE);
                        if ((fp & HASHMSK) == 0)
                                return i + WINSIZE;
                }
       -        return chunker->pos;
       +        return chunker->wpos - chunker->rpos;
        }
        
        struct chunker *
       -alloc_chunker(size_t size, int fd)
       +alloc_chunker(size_t cap, int fd)
        {
                struct chunker *chunker;
        
       @@ -56,11 +58,12 @@ alloc_chunker(size_t size, int fd)
                if (chunker == NULL)
                        err(1, "malloc");
        
       -        chunker->buf = malloc(size);
       +        chunker->buf = malloc(cap);
                if (chunker->buf == NULL)
                        err(1, "malloc");
       -        chunker->size = size;
       -        chunker->pos = 0;
       +        chunker->cap = cap;
       +        chunker->rpos = 0;
       +        chunker->wpos = 0;
                chunker->fd = fd;
        
                return chunker;
       @@ -79,28 +82,31 @@ fill_chunker(struct chunker *chunker)
                uint8_t *bp;
                ssize_t n;
        
       -        bp = &chunker->buf[chunker->pos];
       -        n = read(chunker->fd, bp, chunker->size - chunker->pos);
       +        bp = &chunker->buf[chunker->wpos];
       +        n = read(chunker->fd, bp, chunker->cap - chunker->wpos);
                if (n < 0)
                        err(1, "read");
       -        chunker->pos += n;
       -        return chunker->pos;
       +        chunker->wpos += n;
       +        return chunker->wpos;
       +}
       +
       +uint8_t *
       +get_chunk(struct chunker *chunker, size_t *chunk_size)
       +{
       +        assert(chunker->rpos <= chunker->wpos);
       +        *chunk_size = get_chunk_size(chunker);
       +        chunker->rpos += *chunk_size;
       +        return chunker->buf;
        }
        
        void
       -drain_chunker(struct chunker *chunker, size_t chunk_size)
       +drain_chunker(struct chunker *chunker)
        {
                uint8_t *src, *dst;
        
       -        src = &chunker->buf[chunk_size];
       +        src = &chunker->buf[chunker->rpos];
                dst = chunker->buf;
       -        memmove(dst, src, chunker->pos - chunk_size);
       -        chunker->pos -= chunk_size;
       -}
       -
       -uint8_t *
       -get_chunk(struct chunker *chunker, size_t *size)
       -{
       -        *size = get_chunk_size(chunker);
       -        return chunker->buf;
       +        memmove(dst, src, chunker->wpos - chunker->rpos);
       +        chunker->wpos -= chunker->rpos;
       +        chunker->rpos = 0;
        }
 (DIR) diff --git a/dedup.c b/dedup.c
       @@ -445,7 +445,7 @@ dedup(int fd, char *msg)
                        SHA256_Update(&ctx, chunkp, chunk_size);
                        snap = grow_snap(snap, snap->nr_blk_descs + 1);
                        dedup_chunk(snap, chunkp, chunk_size);
       -                drain_chunker(chunker, chunk_size);
       +                drain_chunker(chunker);
                }
                SHA256_Final(snap->md, &ctx);
        
 (DIR) diff --git a/dedup.h b/dedup.h
       @@ -3,11 +3,11 @@
        struct chunker;
        
        /* chunker.c */
       -struct chunker *alloc_chunker(size_t size, int fd);
       +struct chunker *alloc_chunker(size_t cap, int fd);
        void free_chunker(struct chunker *chunker);
        ssize_t fill_chunker(struct chunker *chunker);
       -void drain_chunker(struct chunker *chunker, size_t n);
       -uint8_t *get_chunk(struct chunker *chunker, size_t *size);
       +uint8_t *get_chunk(struct chunker *chunker, size_t *chunk_size);
       +void drain_chunker(struct chunker *chunker);
        
        /* hash.c */
        uint32_t buzh_init(uint8_t *buf, size_t size);