Rework stats - dedup - deduplicating backup program
 (HTM) git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 99c3b317f9e0558d624235439a4162306f8e549d
 (DIR) parent b1fd149a54efd7fe0d3bf4dcd5b3c48467d4434f
 (HTM) Author: sin <sin@2f30.org>
       Date:   Tue, 19 Feb 2019 10:17:52 +0000
       
       Rework stats
       
       Diffstat:
         M dedup.c                             |      71 ++++++++++++++++---------------
       
       1 file changed, 37 insertions(+), 34 deletions(-)
       ---
 (DIR) diff --git a/dedup.c b/dedup.c
       @@ -25,7 +25,7 @@
        #define MDSIZE SHA256_DIGEST_LENGTH
        
        /* file format version */
       -#define VER_MIN 0
       +#define VER_MIN 1
        #define VER_MAJ 0
        
        #define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
       @@ -39,8 +39,12 @@ struct stats {
                uint64_t orig_size;
                uint64_t comp_size;
                uint64_t dedup_size;
       +        uint64_t min_blk_size;
       +        uint64_t max_blk_size;
       +        uint64_t nblks;
                uint64_t cache_hits;
                uint64_t cache_misses;
       +        uint64_t reserved[4];
        };
        
        /* index file header */
       @@ -48,6 +52,7 @@ struct enthdr {
                uint64_t flags;
                uint64_t nents;
                uint64_t store_size;
       +        uint64_t reserved[4];
                struct stats st;
        };
        
       @@ -208,7 +213,7 @@ decomp(uint8_t *in, uint8_t *out, size_t insize, size_t outsize)
        }
        
        void
       -print_md(const uint8_t *md, size_t size)
       +print_md(uint8_t *md, size_t size)
        {
                size_t i;
        
       @@ -217,6 +222,27 @@ print_md(const uint8_t *md, size_t size)
        }
        
        void
       +print_stats(struct stats *st)
       +{
       +        fprintf(stderr, "original size: %llu bytes\n",
       +                (unsigned long long)st->orig_size);
       +        fprintf(stderr, "compressed size: %llu bytes\n",
       +                (unsigned long long)st->comp_size);
       +        fprintf(stderr, "deduplicated size: %llu bytes\n",
       +                (unsigned long long)st->dedup_size);
       +        fprintf(stderr, "min/avg/max block size: %llu/%llu/%llu\n",
       +                (unsigned long long)st->min_blk_size,
       +                (unsigned long long)st->dedup_size / st->nblks,
       +                (unsigned long long)st->max_blk_size);
       +        fprintf(stderr, "number of blocks: %llu\n",
       +                (unsigned long long)st->nblks);
       +        fprintf(stderr, "total cache hits: %llu\n",
       +                (unsigned long long)st->cache_hits);
       +        fprintf(stderr, "total cache misses: %llu\n",
       +                (unsigned long long)st->cache_misses);
       +}
       +
       +void
        str2bin(char *s, uint8_t *d)
        {
                size_t i, size = strlen(s) / 2;
       @@ -488,7 +514,13 @@ dedup(int fd, char *msg)
                                cache_dirty = 1;
        
                                enthdr.st.dedup_size += bdescr.size;
       +                        enthdr.st.nblks++;
                                enthdr.st.cache_misses++;
       +
       +                        if (bdescr.size > enthdr.st.max_blk_size)
       +                                enthdr.st.max_blk_size = bdescr.size;
       +                        if (bdescr.size < enthdr.st.min_blk_size)
       +                                enthdr.st.min_blk_size = bdescr.size;
                        } else {
                                ent->bdescr[ent->nblks++] = bdescr;
                                enthdr.st.cache_hits++;
       @@ -651,11 +683,6 @@ void
        init_cache(void)
        {
                uint64_t nents, i;
       -        uint64_t min, max, avg;
       -
       -        min = comp_size(BLKSIZE);
       -        max = 0;
       -        avg = 0;
        
                nents = cache_nents();
                xlseek(cfd, 0, SEEK_SET);
       @@ -666,20 +693,6 @@ init_cache(void)
                        if (xread(cfd, &cent->bdescr, sizeof(cent->bdescr)) == 0)
                                errx(1, "read: unexpected EOF");
                        add_cent(cent);
       -
       -                if (cent->bdescr.size > max)
       -                        max = cent->bdescr.size;
       -                if (cent->bdescr.size < min)
       -                        min = cent->bdescr.size;
       -                avg += cent->bdescr.size;
       -        }
       -        avg /= nents;
       -
       -        if (verbose) {
       -                fprintf(stderr, "min/avg/max block size: %llu/%llu/%llu\n",
       -                        (unsigned long long)min,
       -                        (unsigned long long)avg,
       -                        (unsigned long long)max);
                }
        }
        
       @@ -720,19 +733,7 @@ init(void)
                } else {
                        enthdr.flags = (VER_MAJ << 8) | VER_MIN;
                        xwrite(ifd, &enthdr, sizeof(enthdr));
       -        }
       -
       -        if (verbose) {
       -                fprintf(stderr, "original size: %llu bytes\n",
       -                        (unsigned long long)enthdr.st.orig_size);
       -                fprintf(stderr, "compressed size: %llu bytes\n",
       -                        (unsigned long long)enthdr.st.comp_size);
       -                fprintf(stderr, "deduplicated size: %llu bytes\n",
       -                        (unsigned long long)enthdr.st.dedup_size);
       -                fprintf(stderr, "total cache hits: %llu\n",
       -                        (unsigned long long)enthdr.st.cache_hits);
       -                fprintf(stderr, "total cache misses: %llu\n",
       -                        (unsigned long long)enthdr.st.cache_misses);
       +                enthdr.st.min_blk_size = comp_size(BLKSIZE);
                }
        
                if (cache_nents() != 0)
       @@ -744,6 +745,8 @@ init(void)
        void
        term(void)
        {
       +        if (verbose)
       +                print_stats(&enthdr.st);
                flush_cache();
                free_cache();