tthe one true diff. at least it can handle "diff file1 file2 dir". - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 5993a8f2756bc455101a8c9ce95347d5050e7883
 (DIR) parent cfabc3ed1638efc186ebd26bdaa3dfb5663dff17
 (HTM) Author: rsc <devnull@localhost>
       Date:   Sun, 23 Nov 2003 18:03:02 +0000
       
       tthe one true diff.
       at least it can handle "diff file1 file2 dir".
       
       Diffstat:
         A src/cmd/diff/diff.h                 |      25 +++++++++++++++++++++++++
         A src/cmd/diff/diffdir.c              |     108 +++++++++++++++++++++++++++++++
         A src/cmd/diff/diffio.c               |     291 +++++++++++++++++++++++++++++++
         A src/cmd/diff/diffreg.c              |     419 +++++++++++++++++++++++++++++++
         A src/cmd/diff/main.c                 |     262 +++++++++++++++++++++++++++++++
         A src/cmd/diff/mkfile                 |      15 +++++++++++++++
       
       6 files changed, 1120 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/src/cmd/diff/diff.h b/src/cmd/diff/diff.h
       t@@ -0,0 +1,25 @@
       +char mode;                        /* '\0', 'e', 'f', 'h' */
       +char bflag;                        /* ignore multiple and trailing blanks */
       +char rflag;                        /* recurse down directory trees */
       +char mflag;                        /* pseudo flag: doing multiple files, one dir */
       +int anychange;
       +extern Biobuf        stdout;
       +extern int        binary;
       +
       +#define MALLOC(t, n)                ((t *)emalloc((n)*sizeof(t)))
       +#define REALLOC(p, t, n)        ((t *)erealloc((void *)(p), (n)*sizeof(t)))
       +#define FREE(p)                        free((void *)(p))
       +
       +#define MAXPATHLEN        1024
       +
       +int mkpathname(char *, char *, char *);
       +void *emalloc(unsigned);
       +void *erealloc(void *, unsigned);
       +void diff(char *, char *, int);
       +void diffdir(char *, char *, int);
       +void diffreg(char *, char *);
       +Biobuf *prepare(int, char *);
       +void panic(int, char *, ...);
       +void check(Biobuf *, Biobuf *);
       +void change(int, int, int, int);
       +
 (DIR) diff --git a/src/cmd/diff/diffdir.c b/src/cmd/diff/diffdir.c
       t@@ -0,0 +1,108 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include "diff.h"
       +
       +static int
       +itemcmp(void *v1, void *v2)
       +{
       +        char **d1 = v1, **d2 = v2;
       +
       +        return strcmp(*d1, *d2);
       +}
       +
       +static char **
       +scandir(char *name)
       +{
       +        char **cp;
       +        Dir *db;
       +        int nitems;
       +        int fd, n;
       +
       +        if ((fd = open(name, OREAD)) < 0)
       +                panic(2, "can't open %s\n", name);
       +        cp = 0;
       +        nitems = 0;
       +        if((n = dirreadall(fd, &db)) > 0){
       +                while (n--) {
       +                        cp = REALLOC(cp, char *, (nitems+1));
       +                        cp[nitems] = MALLOC(char, strlen((db+n)->name)+1);
       +                        strcpy(cp[nitems], (db+n)->name);
       +                        nitems++;
       +                }
       +                free(db);
       +        }
       +        cp = REALLOC(cp, char*, (nitems+1));
       +        cp[nitems] = 0;
       +        close(fd);
       +        qsort((char *)cp, nitems, sizeof(char*), itemcmp);
       +        return cp;
       +}
       +
       +static int
       +isdotordotdot(char *p)
       +{
       +        if (*p == '.') {
       +                if (!p[1])
       +                        return 1;
       +                if (p[1] == '.' && !p[2])
       +                        return 1;
       +        }
       +        return 0;
       +}
       +
       +void
       +diffdir(char *f, char *t, int level)
       +{
       +        char  **df, **dt, **dirf, **dirt;
       +        char *from, *to;
       +        int res;
       +        char fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
       +
       +        df = scandir(f);
       +        dt = scandir(t);
       +        dirf = df;
       +        dirt = dt;
       +        while (*df || *dt) {
       +                from = *df;
       +                to = *dt;
       +                if (from && isdotordotdot(from)) {
       +                        df++;
       +                        continue;
       +                }
       +                if (to && isdotordotdot(to)) {
       +                        dt++;
       +                        continue;
       +                }
       +                if (!from)
       +                        res = 1;
       +                else if (!to)
       +                        res = -1;
       +                else
       +                        res = strcmp(from, to);
       +                if (res < 0) {
       +                        if (mode == 0 || mode == 'n')
       +                                Bprint(&stdout, "Only in %s: %s\n", f, from);
       +                        df++;
       +                        continue;
       +                }
       +                if (res > 0) {
       +                        if (mode == 0 || mode == 'n')
       +                                Bprint(&stdout, "Only in %s: %s\n", t, to);
       +                        dt++;
       +                        continue;
       +                }
       +                if (mkpathname(fb, f, from))
       +                        continue;
       +                if (mkpathname(tb, t, to))
       +                        continue;
       +                diff(fb, tb, level+1);
       +                df++; dt++;
       +        }
       +        for (df = dirf; *df; df++)
       +                FREE(*df);
       +        for (dt = dirt; *dt; dt++)
       +                FREE(*dt);
       +        FREE(dirf);
       +        FREE(dirt);
       +}
 (DIR) diff --git a/src/cmd/diff/diffio.c b/src/cmd/diff/diffio.c
       t@@ -0,0 +1,291 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include <ctype.h>
       +#include "diff.h"
       +
       +struct line {
       +        int        serial;
       +        int        value;
       +};
       +extern struct line *file[2];
       +extern int len[2];
       +extern long *ixold, *ixnew;
       +extern int *J;
       +
       +static Biobuf *input[2];
       +static char *file1, *file2;
       +static int firstchange;
       +
       +#define MAXLINELEN        4096
       +#define MIN(x, y)        ((x) < (y) ? (x): (y))
       +
       +static int
       +readline(Biobuf *bp, char *buf)
       +{
       +        int c;
       +        char *p, *e;
       +
       +        p = buf;
       +        e = p + MAXLINELEN-1;
       +        do {
       +                c = Bgetc(bp);
       +                if (c < 0) {
       +                        if (p == buf)
       +                                return -1;
       +                        break;
       +                }
       +                if (c == '\n')
       +                        break;
       +                *p++ = c;
       +        } while (p < e);
       +        *p = 0;
       +        if (c != '\n' && c >= 0) {
       +                do c = Bgetc(bp);
       +                while (c >= 0 && c != '\n');
       +        }
       +        return p - buf;
       +}
       +
       +#define HALFLONG 16
       +#define low(x)        (x&((1L<<HALFLONG)-1))
       +#define high(x)        (x>>HALFLONG)
       +
       +/*
       + * hashing has the effect of
       + * arranging line in 7-bit bytes and then
       + * summing 1-s complement in 16-bit hunks 
       + */
       +static int
       +readhash(Biobuf *bp, char *buf)
       +{
       +        long sum;
       +        unsigned shift;
       +        char *p;
       +        int len, space;
       +
       +        sum = 1;
       +        shift = 0;
       +        if ((len = readline(bp, buf)) == -1)
       +                return 0;
       +        p = buf;
       +        switch(bflag)        /* various types of white space handling */
       +        {
       +        case 0:
       +                while (len--) {
       +                        sum += (long)*p++ << (shift &= (HALFLONG-1));
       +                        shift += 7;
       +                }
       +                break;
       +        case 1:
       +                /*
       +                 * coalesce multiple white-space
       +                 */
       +                for (space = 0; len--; p++) {
       +                        if (isspace(*p)) {
       +                                space++;
       +                                continue;
       +                        }
       +                        if (space) {
       +                                shift += 7;
       +                                space = 0;
       +                        }
       +                        sum += (long)*p << (shift &= (HALFLONG-1));
       +                        shift += 7;
       +                }
       +                break;
       +        default:
       +                /*
       +                 * strip all white-space
       +                 */
       +                while (len--) {
       +                        if (isspace(*p)) {
       +                                p++;
       +                                continue;
       +                        }
       +                        sum += (long)*p++ << (shift &= (HALFLONG-1));
       +                        shift += 7;
       +                }
       +                break;
       +        }
       +        sum = low(sum) + high(sum);
       +        return ((short)low(sum) + (short)high(sum));
       +}
       +
       +Biobuf *
       +prepare(int i, char *arg)
       +{
       +        struct line *p;
       +        int j, h;
       +        Biobuf *bp;
       +        char *cp, buf[MAXLINELEN];
       +        int nbytes;
       +        Rune r;
       +
       +        bp = Bopen(arg, OREAD);
       +        if (!bp) {
       +                panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
       +                return 0;
       +        }
       +        if (binary)
       +                return bp;
       +        nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN));
       +        if (nbytes > 0) {
       +                cp = buf;
       +                while (cp < buf+nbytes-UTFmax) {
       +                        /*
       +                         * heuristic for a binary file in the
       +                         * brave new UNICODE world
       +                         */
       +                        cp += chartorune(&r, cp);
       +                        if (r == 0 || (r > 0x7f && r <= 0xa0)) {
       +                                binary++;
       +                                return bp;
       +                        }
       +                }
       +                Bseek(bp, 0, 0);
       +        }
       +        p = MALLOC(struct line, 3);
       +        for (j = 0; h = readhash(bp, buf); p[j].value = h)
       +                p = REALLOC(p, struct line, (++j+3));
       +        len[i] = j;
       +        file[i] = p;
       +        input[i] = bp;                        /*fix*/
       +        if (i == 0) {                        /*fix*/
       +                file1 = arg;
       +                firstchange = 0;
       +        }
       +        else
       +                file2 = arg;
       +        return bp;
       +}
       +
       +static int
       +squishspace(char *buf)
       +{
       +        char *p, *q;
       +        int space;
       +
       +        for (space = 0, q = p = buf; *q; q++) {
       +                if (isspace(*q)) {
       +                        space++;
       +                        continue;
       +                }
       +                if (space && bflag == 1) {
       +                        *p++ = ' ';
       +                        space = 0;
       +                }
       +                *p++ = *q;
       +        }
       +        *p = 0;
       +        return p - buf;
       +}
       +
       +/*
       + * need to fix up for unexpected EOF's
       + */
       +void
       +check(Biobuf *bf, Biobuf *bt)
       +{
       +        int f, t, flen, tlen;
       +        char fbuf[MAXLINELEN], tbuf[MAXLINELEN];
       +
       +        ixold[0] = ixnew[0] = 0;
       +        for (f = t = 1; f < len[0]; f++) {
       +                flen = readline(bf, fbuf);
       +                ixold[f] = ixold[f-1] + flen + 1;                /* ftell(bf) */
       +                if (J[f] == 0)
       +                        continue;
       +                do {
       +                        tlen = readline(bt, tbuf);
       +                        ixnew[t] = ixnew[t-1] + tlen + 1;        /* ftell(bt) */
       +                } while (t++ < J[f]);
       +                if (bflag) {
       +                        flen = squishspace(fbuf);
       +                        tlen = squishspace(tbuf);
       +                }
       +                if (flen != tlen || strcmp(fbuf, tbuf))
       +                        J[f] = 0;
       +        }
       +        while (t < len[1]) {
       +                tlen = readline(bt, tbuf);
       +                ixnew[t] = ixnew[t-1] + tlen + 1;        /* fseek(bt) */
       +                t++;
       +        }
       +}
       +
       +static void
       +range(int a, int b, char *separator)
       +{
       +        Bprint(&stdout, "%d", a > b ? b: a);
       +        if (a < b)
       +                Bprint(&stdout, "%s%d", separator, b);
       +}
       +
       +static void
       +fetch(long *f, int a, int b, Biobuf *bp, char *s)
       +{
       +        char buf[MAXLINELEN];
       +
       +        Bseek(bp, f[a-1], 0);
       +        while (a++ <= b) {
       +                readline(bp, buf);
       +                Bprint(&stdout, "%s%s\n", s, buf);
       +        }
       +}
       +
       +void
       +change(int a, int b, int c, int d)
       +{
       +        char verb;
       +        char buf[4];
       +
       +        if (a > b && c > d)
       +                return;
       +        anychange = 1;
       +        if (mflag && firstchange == 0) {
       +                if(mode) {
       +                        buf[0] = '-';
       +                        buf[1] = mode;
       +                        buf[2] = ' ';
       +                        buf[3] = '\0';
       +                } else {
       +                        buf[0] = '\0';
       +                }
       +                Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2);
       +                firstchange = 1;
       +        }
       +        verb = a > b ? 'a': c > d ? 'd': 'c';
       +        switch(mode) {
       +        case 'e':
       +                range(a, b, ",");
       +                Bputc(&stdout, verb);
       +                break;
       +        case 0:
       +                range(a, b, ",");
       +                Bputc(&stdout, verb);
       +                range(c, d, ",");
       +                break;
       +        case 'n':
       +                Bprint(&stdout, "%s:", file1);
       +                range(a, b, ",");
       +                Bprint(&stdout, " %c ", verb);
       +                Bprint(&stdout, "%s:", file2);
       +                range(c, d, ",");
       +                break;
       +        case 'f':
       +                Bputc(&stdout, verb);
       +                range(a, b, " ");
       +                break;
       +        }
       +        Bputc(&stdout, '\n');
       +        if (mode == 0 || mode == 'n') {
       +                fetch(ixold, a, b, input[0], "< ");
       +                if (a <= b && c <= d)
       +                        Bprint(&stdout, "---\n");
       +        }
       +        fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": "");
       +        if (mode != 0 && mode != 'n' && c <= d)
       +                Bprint(&stdout, ".\n");
       +}
       +
 (DIR) diff --git a/src/cmd/diff/diffreg.c b/src/cmd/diff/diffreg.c
       t@@ -0,0 +1,419 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include "diff.h"
       +
       +/*        diff - differential file comparison
       +*
       +*        Uses an algorithm due to Harold Stone, which finds
       +*        a pair of longest identical subsequences in the two
       +*        files.
       +*
       +*        The major goal is to generate the match vector J.
       +*        J[i] is the index of the line in file1 corresponding
       +*        to line i file0. J[i] = 0 if there is no
       +*        such line in file1.
       +*
       +*        Lines are hashed so as to work in core. All potential
       +*        matches are located by sorting the lines of each file
       +*        on the hash (called value). In particular, this
       +*        collects the equivalence classes in file1 together.
       +*        Subroutine equiv replaces the value of each line in
       +*        file0 by the index of the first element of its 
       +*        matching equivalence in (the reordered) file1.
       +*        To save space equiv squeezes file1 into a single
       +*        array member in which the equivalence classes
       +*        are simply concatenated, except that their first
       +*        members are flagged by changing sign.
       +*
       +*        Next the indices that point into member are unsorted into
       +*        array class according to the original order of file0.
       +*
       +*        The cleverness lies in routine stone. This marches
       +*        through the lines of file0, developing a vector klist
       +*        of "k-candidates". At step i a k-candidate is a matched
       +*        pair of lines x,y (x in file0 y in file1) such that
       +*        there is a common subsequence of lenght k
       +*        between the first i lines of file0 and the first y 
       +*        lines of file1, but there is no such subsequence for
       +*        any smaller y. x is the earliest possible mate to y
       +*        that occurs in such a subsequence.
       +*
       +*        Whenever any of the members of the equivalence class of
       +*        lines in file1 matable to a line in file0 has serial number 
       +*        less than the y of some k-candidate, that k-candidate 
       +*        with the smallest such y is replaced. The new 
       +*        k-candidate is chained (via pred) to the current
       +*        k-1 candidate so that the actual subsequence can
       +*        be recovered. When a member has serial number greater
       +*        that the y of all k-candidates, the klist is extended.
       +*        At the end, the longest subsequence is pulled out
       +*        and placed in the array J by unravel.
       +*
       +*        With J in hand, the matches there recorded are
       +*        check'ed against reality to assure that no spurious
       +*        matches have crept in due to hashing. If they have,
       +*        they are broken, and "jackpot " is recorded--a harmless
       +*        matter except that a true match for a spuriously
       +*        mated line may now be unnecessarily reported as a change.
       +*
       +*        Much of the complexity of the program comes simply
       +*        from trying to minimize core utilization and
       +*        maximize the range of doable problems by dynamically
       +*        allocating what is needed and reusing what is not.
       +*        The core requirements for problems larger than somewhat
       +*        are (in words) 2*length(file0) + length(file1) +
       +*        3*(number of k-candidates installed),  typically about
       +*        6n words for files of length n. 
       +*/
       +/* TIDY THIS UP */
       +struct cand {
       +        int x;
       +        int y;
       +        int pred;
       +} cand;
       +struct line {
       +        int serial;
       +        int value;
       +} *file[2], line;
       +int len[2];
       +int binary;
       +struct line *sfile[2];        /*shortened by pruning common prefix and suffix*/
       +int slen[2];
       +int pref, suff;        /*length of prefix and suffix*/
       +int *class;        /*will be overlaid on file[0]*/
       +int *member;        /*will be overlaid on file[1]*/
       +int *klist;                /*will be overlaid on file[0] after class*/
       +struct cand *clist;        /* merely a free storage pot for candidates */
       +int clen;
       +int *J;                /*will be overlaid on class*/
       +long *ixold;        /*will be overlaid on klist*/
       +long *ixnew;        /*will be overlaid on file[1]*/
       +/* END OF SOME TIDYING */
       +
       +static void        
       +sort(struct line *a, int n)        /*shellsort CACM #201*/
       +{
       +        int m;
       +        struct line *ai, *aim, *j, *k;
       +        struct line w;
       +        int i;
       +
       +        m = 0;
       +        for (i = 1; i <= n; i *= 2)
       +                m = 2*i - 1;
       +        for (m /= 2; m != 0; m /= 2) {
       +                k = a+(n-m);
       +                for (j = a+1; j <= k; j++) {
       +                        ai = j;
       +                        aim = ai+m;
       +                        do {
       +                                if (aim->value > ai->value ||
       +                                   aim->value == ai->value &&
       +                                   aim->serial > ai->serial)
       +                                        break;
       +                                w = *ai;
       +                                *ai = *aim;
       +                                *aim = w;
       +
       +                                aim = ai;
       +                                ai -= m;
       +                        } while (ai > a && aim >= ai);
       +                }
       +        }
       +}
       +
       +static void
       +unsort(struct line *f, int l, int *b)
       +{
       +        int *a;
       +        int i;
       +
       +        a = MALLOC(int, (l+1));
       +        for(i=1;i<=l;i++)
       +                a[f[i].serial] = f[i].value;
       +        for(i=1;i<=l;i++)
       +                b[i] = a[i];
       +        FREE(a);
       +}
       +
       +static void
       +prune(void)
       +{
       +        int i,j;
       +
       +        for(pref=0;pref<len[0]&&pref<len[1]&&
       +                file[0][pref+1].value==file[1][pref+1].value;
       +                pref++ ) ;
       +        for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
       +                file[0][len[0]-suff].value==file[1][len[1]-suff].value;
       +                suff++) ;
       +        for(j=0;j<2;j++) {
       +                sfile[j] = file[j]+pref;
       +                slen[j] = len[j]-pref-suff;
       +                for(i=0;i<=slen[j];i++)
       +                        sfile[j][i].serial = i;
       +        }
       +}
       +
       +static void
       +equiv(struct line *a, int n, struct line *b, int m, int *c)
       +{
       +        int i, j;
       +
       +        i = j = 1;
       +        while(i<=n && j<=m) {
       +                if(a[i].value < b[j].value)
       +                        a[i++].value = 0;
       +                else if(a[i].value == b[j].value)
       +                        a[i++].value = j;
       +                else
       +                        j++;
       +        }
       +        while(i <= n)
       +                a[i++].value = 0;
       +        b[m+1].value = 0;
       +        j = 0;
       +        while(++j <= m) {
       +                c[j] = -b[j].serial;
       +                while(b[j+1].value == b[j].value) {
       +                        j++;
       +                        c[j] = b[j].serial;
       +                }
       +        }
       +        c[j] = -1;
       +}
       +
       +static int
       +newcand(int x, int  y, int pred)
       +{
       +        struct cand *q;
       +
       +        clist = REALLOC(clist, struct cand, (clen+1));
       +        q = clist + clen;
       +        q->x = x;
       +        q->y = y;
       +        q->pred = pred;
       +        return clen++;
       +}
       +
       +static int
       +search(int *c, int k, int y)
       +{
       +        int i, j, l;
       +        int t;
       +
       +        if(clist[c[k]].y < y)        /*quick look for typical case*/
       +                return k+1;
       +        i = 0;
       +        j = k+1;
       +        while((l=(i+j)/2) > i) {
       +                t = clist[c[l]].y;
       +                if(t > y)
       +                        j = l;
       +                else if(t < y)
       +                        i = l;
       +                else
       +                        return l;
       +        }
       +        return l+1;
       +}
       +
       +static int
       +stone(int *a, int n, int *b, int *c)
       +{
       +        int i, k,y;
       +        int j, l;
       +        int oldc, tc;
       +        int oldl;
       +
       +        k = 0;
       +        c[0] = newcand(0,0,0);
       +        for(i=1; i<=n; i++) {
       +                j = a[i];
       +                if(j==0)
       +                        continue;
       +                y = -b[j];
       +                oldl = 0;
       +                oldc = c[0];
       +                do {
       +                        if(y <= clist[oldc].y)
       +                                continue;
       +                        l = search(c, k, y);
       +                        if(l!=oldl+1)
       +                                oldc = c[l-1];
       +                        if(l<=k) {
       +                                if(clist[c[l]].y <= y)
       +                                        continue;
       +                                tc = c[l];
       +                                c[l] = newcand(i,y,oldc);
       +                                oldc = tc;
       +                                oldl = l;
       +                        } else {
       +                                c[l] = newcand(i,y,oldc);
       +                                k++;
       +                                break;
       +                        }
       +                } while((y=b[++j]) > 0);
       +        }
       +        return k;
       +}
       +
       +static void
       +unravel(int p)
       +{
       +        int i;
       +        struct cand *q;
       +
       +        for(i=0; i<=len[0]; i++) {
       +                if (i <= pref)
       +                        J[i] = i;
       +                else if (i > len[0]-suff)
       +                        J[i] = i+len[1]-len[0];
       +                else
       +                        J[i] = 0;
       +        }
       +        for(q=clist+p;q->y!=0;q=clist+q->pred)
       +                J[q->x+pref] = q->y+pref;
       +}
       +
       +static void
       +output(void)
       +{
       +        int m, i0, i1, j0, j1;
       +
       +        m = len[0];
       +        J[0] = 0;
       +        J[m+1] = len[1]+1;
       +        if (mode != 'e') {
       +                for (i0 = 1; i0 <= m; i0 = i1+1) {
       +                        while (i0 <= m && J[i0] == J[i0-1]+1)
       +                                i0++;
       +                        j0 = J[i0-1]+1;
       +                        i1 = i0-1;
       +                        while (i1 < m && J[i1+1] == 0)
       +                                i1++;
       +                        j1 = J[i1+1]-1;
       +                        J[i1] = j1;
       +                        change(i0, i1, j0, j1);
       +                }
       +        }
       +        else {
       +                for (i0 = m; i0 >= 1; i0 = i1-1) {
       +                        while (i0 >= 1 && J[i0] == J[i0+1]-1 && J[i0])
       +                                i0--;
       +                        j0 = J[i0+1]-1;
       +                        i1 = i0+1;
       +                        while (i1 > 1 && J[i1-1] == 0)
       +                                i1--;
       +                        j1 = J[i1-1]+1;
       +                        J[i1] = j1;
       +                        change(i1 , i0, j1, j0);
       +                }
       +        }
       +        if (m == 0)
       +                change(1, 0, 1, len[1]);
       +}
       +
       +#define BUF 4096
       +static int
       +cmp(Biobuf* b1, Biobuf* b2)
       +{
       +        int n;
       +        uchar buf1[BUF], buf2[BUF];
       +        int f1, f2;
       +        vlong nc = 1;
       +        uchar *b1s, *b1e, *b2s, *b2e;
       +
       +        f1 = Bfildes(b1);
       +        f2 = Bfildes(b2);
       +        seek(f1, 0, 0);
       +        seek(f2, 0, 0);
       +        b1s = b1e = buf1;
       +        b2s = b2e = buf2;
       +        for(;;){
       +                if(b1s >= b1e){
       +                        if(b1s >= &buf1[BUF])
       +                                b1s = buf1;
       +                        n = read(f1, b1s,  &buf1[BUF] - b1s);
       +                        b1e = b1s + n;
       +                }
       +                if(b2s >= b2e){
       +                        if(b2s >= &buf2[BUF])
       +                                b2s = buf2;
       +                        n = read(f2, b2s,  &buf2[BUF] - b2s);
       +                        b2e = b2s + n;
       +                }
       +                n = b2e - b2s;
       +                if(n > b1e - b1s)
       +                        n = b1e - b1s;
       +                if(n <= 0)
       +                        break;
       +                if(memcmp((void *)b1s, (void *)b2s, n) != 0){
       +                        return 1;
       +                }                
       +                nc += n;
       +                b1s += n;
       +                b2s += n;
       +        }
       +        if(b1e - b1s == b2e - b2s)
       +                return 0;
       +        return 1;        
       +}
       +
       +void
       +diffreg(char *f, char *t)
       +{
       +        Biobuf *b0, *b1;
       +        int k;
       +
       +        binary = 0;
       +        b0 = prepare(0, f);
       +        if (!b0)
       +                return;
       +        b1 = prepare(1, t);
       +        if (!b1) {
       +                FREE(file[0]);
       +                Bterm(b0);
       +                return;
       +        }
       +        if (binary){
       +                // could use b0 and b1 but this is simpler.
       +                if (cmp(b0, b1))
       +                        print("binary files %s %s differ\n", f, t);
       +                Bterm(b0);
       +                Bterm(b1);
       +                return;
       +        }
       +        clen = 0;
       +        prune();
       +        sort(sfile[0], slen[0]);
       +        sort(sfile[1], slen[1]);
       +
       +        member = (int *)file[1];
       +        equiv(sfile[0], slen[0], sfile[1], slen[1], member);
       +        member = REALLOC(member, int, slen[1]+2);
       +
       +        class = (int *)file[0];
       +        unsort(sfile[0], slen[0], class);
       +        class = REALLOC(class, int, slen[0]+2);
       +
       +        klist = MALLOC(int, slen[0]+2);
       +        clist = MALLOC(struct cand, 1);
       +        k = stone(class, slen[0], member, klist);
       +        FREE(member);
       +        FREE(class);
       +
       +        J = MALLOC(int, len[0]+2);
       +        unravel(klist[k]);
       +        FREE(clist);
       +        FREE(klist);
       +
       +        ixold = MALLOC(long, len[0]+2);
       +        ixnew = MALLOC(long, len[1]+2);
       +        Bseek(b0, 0, 0); Bseek(b1, 0, 0);
       +        check(b0, b1);
       +        output();
       +        FREE(J); FREE(ixold); FREE(ixnew);
       +        Bterm(b0); Bterm(b1);                        /* ++++ */
       +}
 (DIR) diff --git a/src/cmd/diff/main.c b/src/cmd/diff/main.c
       t@@ -0,0 +1,262 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include "diff.h"
       +
       +#define        DIRECTORY(s)                ((s)->qid.type&QTDIR)
       +#define        REGULAR_FILE(s)                ((s)->type == 'M' && !DIRECTORY(s))
       +
       +Biobuf        stdout;
       +
       +static char *tmp[] = {"/tmp/diff1", "/tmp/diff2"};
       +static int whichtmp;
       +static char *progname;
       +static char usage[] = "diff [ -efmnbwr ] file1 ... file2\n";
       +
       +static void
       +rmtmpfiles(void)
       +{
       +        while (whichtmp > 0) {
       +                whichtmp--;
       +                remove(tmp[whichtmp]);
       +        }
       +}
       +
       +void        
       +done(int status)
       +{
       +        rmtmpfiles();
       +        switch(status)
       +        {
       +        case 0:
       +                exits("");
       +        case 1:
       +                exits("some");
       +        default:
       +                exits("error");
       +        }
       +        /*NOTREACHED*/
       +}
       +
       +void
       +panic(int status, char *fmt, ...)
       +{
       +        va_list arg;
       +
       +        Bflush(&stdout);
       +
       +        fprint(2, "%s: ", progname);
       +        va_start(arg, fmt);
       +        vfprint(2, fmt, arg);
       +        va_end(arg);
       +        if (status)
       +                done(status);
       +                /*NOTREACHED*/
       +}
       +
       +static int
       +catch(void *a, char *msg)
       +{
       +        USED(a);
       +        panic(2, msg);
       +        return 1;
       +}
       +
       +int
       +mkpathname(char *pathname, char *path, char *name)
       +{
       +        if (strlen(path) + strlen(name) > MAXPATHLEN) {
       +                panic(0, "pathname %s/%s too long\n", path, name);
       +                return 1;
       +        }
       +        sprint(pathname, "%s/%s", path, name);
       +        return 0;
       +}
       +        
       +static char *
       +mktmpfile(int input, Dir **sb)
       +{
       +        int fd, i;
       +        char *p;
       +        char buf[8192];
       +
       +        atnotify(catch, 1);
       +        p = tmp[whichtmp++];
       +        fd = create(p, OWRITE, 0600);
       +        if (fd < 0) {
       +                panic(mflag ? 0: 2, "cannot create %s: %r\n", p);
       +                return 0;
       +        }
       +        while ((i = read(input, buf, sizeof(buf))) > 0) {
       +                if ((i = write(fd, buf, i)) < 0)
       +                        break;
       +        }
       +        *sb = dirfstat(fd);
       +        close(fd);
       +        if (i < 0) {
       +                panic(mflag ? 0: 2, "cannot read/write %s: %r\n", p);
       +                return 0;
       +        }
       +        return p;
       +}
       +
       +static char *
       +statfile(char *file, Dir **sb)
       +{
       +        Dir *dir;
       +        int input;
       +
       +        dir = dirstat(file);
       +        if(dir == nil) {
       +                if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) {
       +                        panic(mflag ? 0: 2, "cannot stat %s: %r\n", file);
       +                        return 0;
       +                }
       +                free(dir);
       +                return mktmpfile(0, sb);
       +        }
       +        else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) {
       +                free(dir);
       +                if ((input = open(file, OREAD)) == -1) {
       +                        panic(mflag ? 0: 2, "cannot open %s: %r\n", file);
       +                        return 0;
       +                }
       +                file = mktmpfile(input, sb);
       +                close(input);
       +        }
       +        else
       +                *sb = dir;
       +        return file;
       +}
       +
       +void
       +diff(char *f, char *t, int level)
       +{
       +        char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
       +        Dir *fsb, *tsb;
       +
       +        if ((fp = statfile(f, &fsb)) == 0)
       +                return;
       +        if ((tp = statfile(t, &tsb)) == 0){
       +                free(fsb);
       +                return;
       +        }
       +        if (DIRECTORY(fsb) && DIRECTORY(tsb)) {
       +                if (rflag || level == 0)
       +                        diffdir(fp, tp, level);
       +                else
       +                        Bprint(&stdout, "Common subdirectories: %s and %s\n",
       +                                fp, tp);
       +        }
       +        else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb))
       +                diffreg(fp, tp);
       +        else {
       +                if (REGULAR_FILE(fsb)) {
       +                        if ((p = utfrrune(f, '/')) == 0)
       +                                p = f;
       +                        else
       +                                p++;
       +                        if (mkpathname(tb, tp, p) == 0)
       +                                diffreg(fp, tb);
       +                }
       +                else {
       +                        if ((p = utfrrune(t, '/')) == 0)
       +                                p = t;
       +                        else
       +                                p++;
       +                        if (mkpathname(fb, fp, p) == 0)
       +                                diffreg(fb, tp);
       +                }
       +        }
       +        free(fsb);
       +        free(tsb);
       +}
       +
       +void
       +main(int argc, char *argv[])
       +{
       +        char *p;
       +        int i;
       +        Dir *fsb, *tsb;
       +
       +        Binit(&stdout, 1, OWRITE);
       +        progname = *argv;
       +        while (--argc && (*++argv)[0] == '-' && (*argv)[1]) {
       +                for (p = *argv+1; *p; p++) {
       +                        switch (*p) {
       +
       +                        case 'e':
       +                        case 'f':
       +                        case 'n':
       +                                mode = *p;
       +                                break;
       +
       +                        case 'w':
       +                                bflag = 2;
       +                                break;
       +
       +                        case 'b':
       +                                bflag = 1;
       +                                break;
       +
       +                        case 'r':
       +                                rflag = 1;
       +                                break;
       +
       +                        case 'm':
       +                                mflag = 1;        
       +                                break;
       +
       +                        case 'h':
       +                        default:
       +                                progname = "Usage";
       +                                panic(2, usage);
       +                        }
       +                }
       +        }
       +        if (argc < 2)
       +                panic(2, usage, progname);
       +        if ((tsb = dirstat(argv[argc-1])) == nil)
       +                panic(2, "can't stat %s\n", argv[argc-1]);
       +        if (argc > 2) {
       +                if (!DIRECTORY(tsb))
       +                        panic(2, usage, progname);
       +                mflag = 1;
       +        }
       +        else {
       +                if ((fsb = dirstat(argv[0])) == nil)
       +                        panic(2, "can't stat %s\n", argv[0]);
       +                if (DIRECTORY(fsb) && DIRECTORY(tsb))
       +                        mflag = 1;
       +                free(fsb);
       +        }
       +        free(tsb);
       +        for (i = 0; i < argc-1; i++) {
       +                diff(argv[i], argv[argc-1], 0);
       +                rmtmpfiles();
       +        }
       +        done(anychange);
       +        /*NOTREACHED*/
       +}
       +
       +static char noroom[] = "out of memory - try diff -h\n";
       +
       +void *
       +emalloc(unsigned n)
       +{
       +        register void *p;
       +
       +        if ((p = malloc(n)) == 0)
       +                panic(2, noroom);
       +        return p;
       +}
       +
       +void *
       +erealloc(void *p, unsigned n)
       +{
       +        register void *rp;
       +
       +        if ((rp = realloc(p, n)) == 0)
       +                panic(2, noroom);
       +        return rp;
       +}
 (DIR) diff --git a/src/cmd/diff/mkfile b/src/cmd/diff/mkfile
       t@@ -0,0 +1,15 @@
       +PLAN9=../../..
       +<$PLAN9/src/mkhdr
       +
       +TARG=diff
       +OFILES=\
       +        diffdir.$O\
       +        diffio.$O\
       +        diffreg.$O\
       +        main.$O\
       +
       +HFILES=diff.h
       +
       +<$PLAN9/src/mkone
       +
       +LDFLAGS=$LDFLAGS -lbio -l9 -lfmt -lutf