Implement dup-check(1) - dedup - deduplicating backup program
 (HTM) git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 3205dbd75458fc84e08ca98ee1e1fe17b19f2693
 (DIR) parent 2d0701e96dd5242eefe456dca44a5c2b8ba67eb5
 (HTM) Author: sin <sin@2f30.org>
       Date:   Thu, 25 Apr 2019 20:54:30 +0100
       
       Implement dup-check(1)
       
       Diffstat:
         M Makefile                            |      12 ++++++++----
         M TODO                                |       1 -
         M bcompress.c                         |      11 +++++++++++
         M block.c                             |      12 ++++++++++++
         M block.h                             |       2 ++
         M bstorage.c                          |      49 +++++++++++++++++++++++++++++++
         M dotest                              |       4 ++++
         A dup-check.1                         |      25 +++++++++++++++++++++++++
         A dup-check.c                         |      81 ++++++++++++++++++++++++++++++
       
       9 files changed, 192 insertions(+), 5 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       @@ -1,7 +1,7 @@
        include config.mk
        
       -BIN = dup-init dup-pack dup-unpack
       -MAN = dup-init.1 dup-pack.1 dup-unpack.1
       +BIN = dup-check dup-init dup-pack dup-unpack
       +MAN = dup-check.1 dup-init.1 dup-pack.1 dup-unpack.1
        
        HDR = \
                arg.h \
       @@ -24,6 +24,7 @@ COMMOBJ = \
                snap.o \
                unpack.o \
        
       +DCHECKOBJ = $(COMMOBJ) dup-check.o
        DINITOBJ = $(COMMOBJ) dup-init.o
        DPACKOBJ = $(COMMOBJ) dup-pack.o
        DUNPACKOBJ = $(COMMOBJ) dup-unpack.o
       @@ -32,10 +33,10 @@ LDLIBS = -lsnappy
        
        all: $(BIN)
        
       -$(DINITOBJ) $(DPACKOBJ) $(DUNPACKOBJ): $(HDR)
       +$(DCHECKOBJ) $(DINITOBJ) $(DPACKOBJ) $(DUNPACKOBJ): $(HDR)
        
        clean:
       -        rm -f $(DINITOBJ) $(DPACKOBJ) $(DUNPACKOBJ) $(BIN)
       +        rm -f $(DCHECKOBJ) $(DINITOBJ) $(DPACKOBJ) $(DUNPACKOBJ) $(BIN)
                rm -rf dedup-$(VERSION) dedup-$(VERSION).tar.gz
        
        install: all
       @@ -58,6 +59,9 @@ dist: clean
        .c.o:
                $(CC) $(CPPFLAGS) $(CFLAGS) -c $<
        
       +dup-check: $(DCHECKOBJ)
       +        $(CC) -o $@ $(DCHECKOBJ) $(LDFLAGS) $(LDLIBS)
       +
        dup-init: $(DINITOBJ)
                $(CC) -o $@ $(DINITOBJ) $(LDFLAGS) $(LDLIBS)
        
 (DIR) diff --git a/TODO b/TODO
       @@ -1,5 +1,4 @@
        Use a ring buffer in the chunker (avoid memmove() call)
        Create a library archive out of the blake2b files and link with it
        pledge/unveil support
       -Implement dup-check(1)
        Use flock() to avoid corruption
 (DIR) diff --git a/bcompress.c b/bcompress.c
       @@ -25,6 +25,7 @@ static int bccreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar)
        static int bcopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar);
        static int bcput(struct bctx *bctx, void *buf, size_t n, unsigned char *md);
        static int bcget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n);
       +static int bccheck(struct bctx *bctx, unsigned char *md);
        static int bcsync(struct bctx *bctx);
        static int bcclose(struct bctx *bctx);
        
       @@ -33,6 +34,7 @@ static struct bops bops = {
                .open = bcopen,
                .put = bcput,
                .get = bcget,
       +        .check = bccheck,
                .sync = bcsync,
                .close = bcclose,
        };
       @@ -236,6 +238,15 @@ bcget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n)
        }
        
        static int
       +bccheck(struct bctx *bctx, unsigned char *md)
       +{
       +        struct bops *bops = bstorageops();
       +
       +        return bops->check(bctx, md);
       +
       +}
       +
       +static int
        bcsync(struct bctx *bctx)
        {
                struct bops *bops = bstorageops();
 (DIR) diff --git a/block.c b/block.c
       @@ -78,6 +78,18 @@ bget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n)
        }
        
        int
       +bcheck(struct bctx *bctx, unsigned char *md)
       +{
       +        struct bops *bops;
       +
       +        if (bctx == NULL || md == NULL)
       +                return -1;
       +
       +        bops = bcompressops();
       +        return bops->check(bctx, md);
       +}
       +
       +int
        bsync(struct bctx *bctx)
        {
                struct bops *bops;
 (DIR) diff --git a/block.h b/block.h
       @@ -17,6 +17,7 @@ struct bops {
                int (*open)(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar);
                int (*put)(struct bctx *bctx, void *buf, size_t n, unsigned char *md);
                int (*get)(struct bctx *bctx, unsigned char *md, void *buf, size_t *n);
       +        int (*check)(struct bctx *bctx, unsigned char *md);
                int (*sync)(struct bctx *bctx);
                int (*close)(struct bctx *bctx);
        };
       @@ -26,6 +27,7 @@ extern int bcreat(char *path, int mode, struct bparam *bpar, struct bctx **bctx)
        extern int bopen(char *path, int flags, int mode, struct bparam *bpar, struct bctx **bctx);
        extern int bput(struct bctx *bctx, void *buf, size_t n, unsigned char *md);
        extern int bget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n);
       +extern int bcheck(struct bctx *bctx, unsigned char *md);
        extern int bsync(struct bctx *bctx);
        extern int bclose(struct bctx *bctx);
        struct bparam *bparamdef(void);
 (DIR) diff --git a/bstorage.c b/bstorage.c
       @@ -51,6 +51,7 @@ static int bscreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar)
        static int bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar);
        static int bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md);
        static int bsget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n);
       +static int bscheck(struct bctx *bctx, unsigned char *md);
        static int bssync(struct bctx *bctx);
        static int bsclose(struct bctx *bctx);
        
       @@ -59,6 +60,7 @@ static struct bops bops = {
                .open = bsopen,
                .put = bsput,
                .get = bsget,
       +        .check = bscheck,
                .sync = bssync,
                .close = bsclose,
        };
       @@ -515,6 +517,53 @@ bsget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n)
                return 0;
        }
        
       +/*
       + * Lookup the block and rehash it.  Check that the
       + * resulting hash matches the given hash.
       + */
       +static int
       +bscheck(struct bctx *bctx, unsigned char *md)
       +{
       +        struct sctx *sctx;
       +        struct bd key, *bd;
       +        void *buf;
       +
       +        sctx = bctx->sctx;
       +
       +        /* Lookup block in the cache */
       +        memcpy(key.md, md, MDSIZE);
       +        bd = RB_FIND(bdcache, &sctx->bdcache, &key);
       +        if (bd == NULL)
       +                return -1;
       +
       +        buf = malloc(bd->size);
       +        if (buf == NULL)
       +                return -1;
       +
       +        if (lseek(sctx->fd, bd->offset, SEEK_SET) < 0) {
       +                free(buf);
       +                return -1;
       +        }
       +
       +        if (xread(sctx->fd, buf, bd->size) != bd->size) {
       +                free(buf);
       +                return -1;
       +        }
       +
       +        if (bhash(buf, bd->size, key.md) < 0) {
       +                free(buf);
       +                return -1;
       +        }
       +
       +        if (memcmp(key.md, md, MDSIZE) != 0) {
       +                free(buf);
       +                return -1;
       +        }
       +
       +        free(buf);
       +        return 0;
       +}
       +
        /* Sync block header to storage */
        static int
        bssync(struct bctx *bctx)
 (DIR) diff --git a/dotest b/dotest
       @@ -9,6 +9,8 @@ test0()
                ./dup-init -Z none "$repo"
                ./dup-pack -r "$repo" snap0 < "$data"
                ./dup-pack -r "$repo" snap1 < "$data"
       +        ./dup-check -r "$repo" snap0
       +        ./dup-check -r "$repo" snap1
                du -sh "$repo"
                sum0=`sha1sum "$data" | awk '{print $1}'`
                sum1=`./dup-unpack -r "$repo" snap0 | sha1sum | awk '{print $1}'`
       @@ -26,6 +28,8 @@ test1()
                ./dup-init -Z snappy "$repo"
                ./dup-pack -r "$repo" snap0 < "$data"
                ./dup-pack -r "$repo" snap1 < "$data"
       +        ./dup-check -r "$repo" snap0
       +        ./dup-check -r "$repo" snap1
                du -sh "$repo"
                sum0=`sha1sum "$data" | awk '{print $1}'`
                sum1=`./dup-unpack -r "$repo" snap0 | sha1sum | awk '{print $1}'`
 (DIR) diff --git a/dup-check.1 b/dup-check.1
       @@ -0,0 +1,25 @@
       +.Dd April 25, 2019
       +.Dt DUP-CHECK 1
       +.Os
       +.Sh NAME
       +.Nm dup-check
       +.Nd Check snapshot consistency
       +.Sh SYNOPSIS
       +.Nm dup-check
       +.Op Fl v
       +.Op Fl r Ar repo
       +.Ar name
       +.Sh DESCRIPTION
       +.Nm
       +checks that a snapshot is internally consistent.
       +.Sh OPTIONS
       +.Bl -tag -width "-r repo"
       +.It Fl r Ar repo
       +Repository directory.
       +By default the current working directory is used.
       +.It Fl v
       +Enable verbose mode.
       +.El
       +.Sh AUTHORS
       +.An Dimitris Papastamos Aq Mt sin@2f30.org ,
       +.An z3bra Aq Mt contactatz3bradotorg .
 (DIR) diff --git a/dup-check.c b/dup-check.c
       @@ -0,0 +1,81 @@
       +#include <sys/types.h>
       +#include <sys/stat.h>
       +
       +#include <err.h>
       +#include <fcntl.h>
       +#include <limits.h>
       +#include <stdio.h>
       +#include <stdlib.h>
       +#include <unistd.h>
       +
       +#include "arg.h"
       +#include "block.h"
       +#include "config.h"
       +#include "snap.h"
       +
       +int verbose;
       +char *argv0;
       +
       +static int
       +check(struct sctx *sctx, struct bctx *bctx)
       +{
       +        unsigned char md[MDSIZE];
       +        int sn;
       +
       +        while ((sn = sget(sctx, md)) == MDSIZE) {
       +                if (bcheck(bctx, md) < 0)
       +                        return -1;
       +        }
       +        if (sn < 0)
       +                return -1;
       +        return 0;
       +}
       +
       +static void
       +usage(void)
       +{
       +        fprintf(stderr, "usage: %s [-v] [-r repo] name\n", argv0);
       +        exit(1);
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        char path[PATH_MAX];
       +        struct sctx *sctx;
       +        struct bctx *bctx;
       +        struct bparam bparam;
       +        char *repo = ".";
       +
       +        ARGBEGIN {
       +        case 'r':
       +                repo = EARGF(usage());
       +                break;
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +        } ARGEND
       +
       +        if (argc != 1)
       +                usage();
       +
       +        snprintf(path, sizeof(path), "%s/archive/%s", repo, argv[0]);
       +        if (sopen(path, O_RDONLY, 0600, &sctx) < 0)
       +                errx(1, "sopen: %s: failed", path);
       +
       +        snprintf(path, sizeof(path), "%s/storage", repo);
       +        if (bopen(path, O_RDONLY, 0600, &bparam, &bctx) <0)
       +                errx(1, "bopen: %s: failed", path);
       +
       +        if (check(sctx, bctx) < 0)
       +                errx(1, "dedup: failed");
       +
       +        if (bclose(bctx) < 0)
       +                errx(1, "bclose: failed");
       +        if (sclose(sctx) < 0)
       +                errx(1, "sclose: failed");
       +        
       +        return 0;
       +}