attempting merge - vx32 - Local 9vx git repository for patches.
 (HTM) git clone git://r-36.net/vx32
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
 (DIR) commit b0d887c52d091ae3c62316cfc54ff178f64ab850
 (DIR) parent a700164cb1d15543535ee17fbff2f91fe4b4f595
 (HTM) Author: John (EBo) David <ebo@users.sourceforge.net>
       Date:   Mon, 21 Jun 2010 03:17:15 -0500
       
       attempting merge
       
       --HG--
       branch : yy-int-branch
       
       Diffstat:
         M .hgignore                           |       2 ++
         M CONTRIBUTORS                        |       6 ++++++
         A doc/9vx.1                           |     127 +++++++++++++++++++++++++++++++
         A src/9vx/9vx-tap                     |      27 +++++++++++++++++++++++++++
         M src/9vx/LICENSE                     |       2 ++
         M src/9vx/Makefrag                    |      62 ++++++++++++++++++++++++++++---
         A src/9vx/a/aoe.h                     |      84 +++++++++++++++++++++++++++++++
         M src/9vx/a/chan.c                    |       2 +-
         A src/9vx/a/devaoe.c                  |    2575 ++++++++++++++++++++++++++++++
         M src/9vx/a/devcons.c                 |       1 +
         A src/9vx/a/devether.c                |     542 +++++++++++++++++++++++++++++++
         M src/9vx/a/devsd.c                   |      16 +++++++++++++++-
         A src/9vx/a/dosfs.h                   |      62 +++++++++++++++++++++++++++++++
         A src/9vx/a/etherif.h                 |      39 +++++++++++++++++++++++++++++++
         M src/9vx/a/fns.ed                    |      50 +++++++++++++++++++++++++++++++
         M src/9vx/a/fns.h                     |      53 ++++++++++++++++++++++++++++---
         A src/9vx/a/fs.h                      |      38 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip.ed                     |    2297 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/arp.c                  |     684 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/chandial.c             |     124 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/devip.c                |    1439 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/eipconvtest.c          |     152 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/esp.c                  |     951 ++++++++++++++++++++++++++++++
         A src/9vx/a/ip/ethermedium.c          |     766 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/gre.c                  |     283 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/icmp.c                 |     490 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/icmp6.c                |     946 ++++++++++++++++++++++++++++++
         A src/9vx/a/ip/igmp.c                 |     294 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/il.c                   |    1408 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/inferno.c              |      46 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/ip.c                   |     776 ++++++++++++++++++++++++++++++
         A src/9vx/a/ip/ip.h                   |     677 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/ipaux.c                |     368 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/ipifc.c                |    1654 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/ipmux.c                |     842 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/iproute.c              |     854 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/ipv6.c                 |     718 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/ipv6.h                 |     185 ++++++++++++++++++++++++++++++
         A src/9vx/a/ip/loopbackmedium.c       |     120 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/netdevmedium.c         |     153 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/netlog.c               |     261 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/nullmedium.c           |      39 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/pktmedium.c            |      78 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/ptclbsum.c             |      72 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/rudp.c                 |    1055 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/tcp.c                  |    3209 ++++++++++++++++++++++++++++++
         A src/9vx/a/ip/tripmedium.c           |     398 +++++++++++++++++++++++++++++++
         A src/9vx/a/ip/udp.c                  |     619 +++++++++++++++++++++++++++++++
         A src/9vx/a/kfs.h                     |      57 +++++++++++++++++++++++++++++++
         A src/9vx/a/netif.c                   |     761 ++++++++++++++++++++++++++++++
         M src/9vx/a/netif.h                   |       8 ++++----
         A src/9vx/a/part.c                    |     341 +++++++++++++++++++++++++++++++
         M src/9vx/a/pgrp.c                    |       2 +-
         M src/9vx/a/portfns.h                 |      20 ++++++++++----------
         M src/9vx/a/qlock.c                   |      18 ++++++++++--------
         M src/9vx/a/sd.h                      |       5 +++++
         A src/9vx/a/sdaoe.c                   |     652 +++++++++++++++++++++++++++++++
         M src/9vx/bootcode.9                  |       0 
         M src/9vx/devip.c                     |       2 +-
         M src/9vx/devtab.c                    |       9 ++++++---
         A src/9vx/etherpcap.c                 |     189 +++++++++++++++++++++++++++++++
         A src/9vx/ethertap.c                  |     185 ++++++++++++++++++++++++++++++
         A src/9vx/fossil.9                    |       0 
         M src/9vx/main.c                      |     243 +++++++++++++++++++++++++++++--
         M src/9vx/mmu.c                       |      28 ++++++++++++++++++++--------
         M src/9vx/sched.c                     |       8 ++++----
         M src/9vx/sdloop.c                    |      59 ++++++++++++++++++++++++++-----
         M src/9vx/u.h                         |       1 +
         A src/9vx/venti.9                     |       0 
         A src/9vx/vether.c                    |     122 +++++++++++++++++++++++++++++++
         A src/9vx/vether.h                    |      15 +++++++++++++++
         M src/libvx32/Makefrag                |       4 ++++
         M src/libvx32/freebsd.c               |     162 ++++++++++++++++++++++++++++---
         M src/libvx32/run64.S                 |       4 ++++
       
       74 files changed, 28454 insertions(+), 87 deletions(-)
       ---
 (DIR) diff --git a/.hgignore b/.hgignore
       @@ -25,6 +25,8 @@ src/vxa/bz2/*ebz2
        src/vxlinux/vxlinux
        src/9vx/9vx
        src/9vx/bootcode.S
       +src/9vx/fossil.S
       +src/9vx/venti.S
        src/9vx/data2s
        src/9vx/a/errstr.h
        src/9vx/kerndate.h
 (DIR) diff --git a/CONTRIBUTORS b/CONTRIBUTORS
       @@ -3,3 +3,9 @@ The following people have contributed source code to vx32.
        Bryan Ford <baford@pdos.csail.mit.edu>
        Michael Teichgräber <mt4swm@googlemail.com>
        Russ Cox <rsc@swtch.com>
       +Jesus Galan Lopez <yiyu.jgl@gmail.com>
       +Tuly Gray
       +Devon H. O'Dell
       +Ron Minnich
       +Erik Quantrom
       +Brian L. Stuart
 (DIR) diff --git a/doc/9vx.1 b/doc/9vx.1
       @@ -0,0 +1,127 @@
       +.TH 9VX 1
       +.SH NAME
       +9vx, 9vx-tap \- Plan9 port to the virtual execution environment vx32
       +.SH SYNOPSIS
       +.B 9vx
       +[
       +.I option ...
       +]
       +[
       +.I -p 9vx.ini
       +]
       +[
       +.I -r root
       +]
       +[
       +.I -u user
       +]
       +.PP
       +.B 9vx-tap
       +[
       +.I option ...
       +]
       +[
       +.I -p 9vx.ini
       +]
       +[
       +.I -r root
       +]
       +[
       +.I -u user
       +]
       +.SH DESCRIPTION
       +Plan 9 VX (or
       +.I 9vx
       +for short) is a port of the Plan 9 operating system to run on top of commodity operating systems, allowing the use of both Plan 9 and the host system simultaneously. To run user programs,
       +.I 9vx
       +creates an appropriate address space in a window within its own address space and invokes vx32 to simulate user mode execution. Some hardware devices are replaced by virtual versions, depending on the options given to
       +.I 9vx.
       +.I 9vx-tap
       +is a shell script that sets up a tap device with tunctl(1), launches
       +.I 9vx,
       +and removes the tap device when finished.
       +.PP
       +Options can be passed to
       +.I 9vx
       +as command line arguments or in a configuration file with the
       +.I -p
       +option (see below). If no
       +.I root
       +argument is present, the current directory or
       +.I /usr/local/9vx
       +is used.
       +Alternatively, a file system can be specified in the 9vx.ini file.
       +If an
       +.I user
       +is not specified, the current user in the host operating system will be used.
       +Other options are:
       +.nr xx \w'\fL-m\f2name\ \ '
       +.TP \n(xxu
       +.BI -b
       +Run /boot/boot instead of bootscript
       +.TP
       +.BI -f
       +Do not fork at init
       +.TP
       +.BI -g
       +Do not start the gui
       +.TP
       +.BI -i
       +Run rc instead of init
       +.TP
       +.BI -t
       +Use tty for input/output
       +.TP
       +.BI -n " [ tap ] [ device ]"
       +Create virtual ethernet devices. The
       +.I tap
       +option tells that
       +.I device
       +is a tap device. Else, the virtual device will use pcap(3) to intercept packets going to
       +.I device,
       +and will therefore need root privileges. If a host
       +.I device
       +is not specified, pcap will use the first one available, and tap will use the
       +.I tap0
       +device. More than one virtual ethernet device can be used. In absence of virtual devices, the network stack of the host system will be used.
       +.TP
       +.BI -m " macaddress"
       +Use the hardware address
       +.I macaddress
       +for the last given virtual network device.
       +.SS 9vx.ini configuration files
       +Configuration parameters can also be given to
       +.I 9vx
       +in the configuration file specified with the
       +.I -p
       +command line option.
       +The file name
       +.L -
       +means the standard input.
       +The file
       +.I 9vx.ini
       +has to contain a list of
       +.I parameter=value
       +pairs in a similar fasion to plan9.ini(8). Available options are
       +.I bootboot,
       +.I nofork,
       +.I nogui,
       +.I initrc,
       +.I usetty,
       +.I net,
       +.I macaddr,
       +.I localroot
       +and
       +.I user.
       +Other options will be passed to the boot process as environment variables.
       +.SH BUGS
       +The menu system of plan9.ini(8) is not supported in
       +.I 9vx.ini
       +files.
       +.P
       +.I 9vx
       +is not so stable as native Plan9 systems.
       +.SH "SEE ALSO"
       +.br
       +Bryan Ford and Russ Cox,
       +``Vx32: Lightweight User-level Sandboxing on the x86'
 (DIR) diff --git a/src/9vx/9vx-tap b/src/9vx/9vx-tap
       @@ -0,0 +1,27 @@
       +#!/bin/sh
       +
       +USERID=`whoami`
       +
       +# Create the tap device with tunctl
       +IFACE=`sudo tunctl -b -u $USERID`
       +# or openvpn
       +#IFACE=tap0
       +#sudo openvpn --mktun --dev $IFACE --user $USERID
       +
       +# Bring the tap device up
       +sudo /sbin/ifconfig $IFACE 0.0.0.0 up
       +
       +# Add it to the bridge
       +sudo /usr/sbin/brctl addif br0 $IFACE
       +
       +# Launch 9vx (use -f to not fork)
       +9vx -f -n tap $IFACE $*
       +
       +# Bring the tap device down and disconnect from br0
       +sudo /sbin/ifconfig $IFACE down
       +sudo /usr/sbin/brctl delif br0 $IFACE
       +
       +# Remove the tap device with tunctl
       +sudo tunctl -d $IFACE &> /dev/null
       +# or openvpn
       +#sudo openvpn --rmtun --dev $1
 (DIR) diff --git a/src/9vx/LICENSE b/src/9vx/LICENSE
       @@ -4,6 +4,8 @@ Plan 9 from Bell Labs distribution, which carries this license.
        The local changes are Copyright (c) 2006-2008 Russ Cox and
        are distributed as contributions under the terms of this license.
        
       +Other contributors are listed on the AUTHORS file.
       +
        
        ===================================================================
        
 (DIR) diff --git a/src/9vx/Makefrag b/src/9vx/Makefrag
       @@ -29,13 +29,12 @@ PLAN9_OBJS = \
                        devaudio.o \
                        devaudio-$(PLAN9AUDIO).o \
                        devfs-posix.o \
       -                devip.o \
       -                devip-posix.o \
                        devmntloop.o \
                        devmouse.o \
                        devram.o \
                        devtab.o \
                        factotum.o \
       +                fossil.o \
                        kprocdev.o \
                        label.o \
                        main.o \
       @@ -47,6 +46,7 @@ PLAN9_OBJS = \
                        time.o \
                        trap.o \
                        tty.o \
       +                venti.o \
                        vx32.o \
                )
        
       @@ -58,7 +58,6 @@ PLAN9_A_OBJS = \
                $(addprefix 9vx/a/, \
                        allocb.o \
                        auth.o \
       -                bo.o \
                        chan.o \
                        classmask.o \
                        cleanname.o \
       @@ -91,6 +90,7 @@ PLAN9_A_OBJS = \
                        page.o \
                        parse.o \
                        parseip.o \
       +                part.o \
                        pgrp.o \
                        print.o \
                        proc.o \
       @@ -111,6 +111,48 @@ PLAN9_A_OBJS = \
                        utf.o \
                )
        
       +PLAN9_IP_OBJS = \
       +        $(addprefix 9vx/,\
       +                devip.o \
       +                devip-posix.o \
       +                etherpcap.o \
       +                ethertap.o \
       +                vether.o \
       +        ) \
       +        $(addprefix 9vx/a/,\
       +                devaoe.o \
       +                devether.o \
       +                netif.o \
       +                sdaoe.o \
       +        ) \
       +        $(addprefix 9vx/a/ip/,\
       +                arp.o \
       +                chandial.o \
       +                devip.o \
       +                esp.o \
       +                ethermedium.o \
       +                gre.o \
       +                icmp.o \
       +                icmp6.o \
       +                il.o \
       +                inferno.o \
       +                ip.o \
       +                ipaux.o \
       +                ipifc.o \
       +                ipmux.o \
       +                iproute.o \
       +                ipv6.o \
       +                loopbackmedium.o \
       +                netdevmedium.o \
       +                netlog.o \
       +                nullmedium.o \
       +                pktmedium.o \
       +                ptclbsum.o \
       +                tcp.o \
       +                udp.o \
       +        )
       +PLAN9_IP_LIBS = -lpcap
       +
        PLAN9_nogui_OBJS = \
                $(addprefix 9vx/,\
                        nogui.o \
       @@ -142,6 +184,7 @@ PLAN9_GUI_LIBS = $(PLAN9_$(PLAN9GUI)_LIBS)
        PLAN9_DEPS = \
                $(PLAN9_OBJS) \
                $(PLAN9_A_OBJS) \
       +        $(PLAN9_IP_OBJS) \
                $(PLAN9_GUI_OBJS) \
                9vx/libsec/libsec.a \
                9vx/libmemlayer/libmemlayer.a \
       @@ -150,7 +193,7 @@ PLAN9_DEPS = \
                libvx32/libvx32.a \
        
        9vx/9vx: $(PLAN9_DEPS)
       -        $(HOST_CC) -o $@ $(PLAN9_DEPS) $(PLAN9_GUI_LIBS) -lpthread
       +        $(HOST_CC) -o $@ $(PLAN9_DEPS) $(PLAN9_GUI_LIBS) $(PLAN9_IP_LIBS) -lpthread
        
        9vx/a/%.o: 9vx/a/%.c
                $(HOST_CC) $(HOST_CFLAGS) -I. -I9vx -I9vx/a -Wall -Wno-missing-braces -c -o $@ $<
       @@ -176,6 +219,12 @@ PLAN9_DEPS = \
        9vx/factotum.S: 9vx/data2s 9vx/factotum.9
                ./9vx/data2s factotum < 9vx/factotum.9 >$@_ && mv $@_ $@
        
       +9vx/fossil.S: 9vx/data2s 9vx/fossil.9
       +        ./9vx/data2s fossil < 9vx/fossil.9 >$@_ && mv $@_ $@
       +
       +9vx/venti.S: 9vx/data2s 9vx/venti.9
       +        ./9vx/data2s venti < 9vx/venti.9 > $@_ && mv $@_ $@
       +
        9vx/a/errstr.h: 9vx/a/error.h
                sed 's/extern //; s!;.*/\* ! = "!; s! \*\/!";!' 9vx/a/error.h >9vx/a/errstr.h
        
       @@ -199,7 +248,10 @@ CLEAN_FILES += \
                9vx/a/errstr.h \
                9vx/9vx \
                9vx/data2s \
       -        9vx/bootcode.S
       +        9vx/bootcode.S \
       +        9vx/factotum.S \
       +        9vx/fossil.S \
       +        9vx/venti.S
        
        include 9vx/libdraw/Makefrag
        include 9vx/libmemlayer/Makefrag
 (DIR) diff --git a/src/9vx/a/aoe.h b/src/9vx/a/aoe.h
       @@ -0,0 +1,84 @@
       +enum {
       +        ACata,
       +        ACconfig,
       +};
       +
       +enum {
       +        AQCread,
       +        AQCtest,
       +        AQCprefix,
       +        AQCset,
       +        AQCfset,
       +};
       +
       +enum {
       +        AEcmd        = 1,
       +        AEarg,
       +        AEdev,
       +        AEcfg,
       +        AEver,
       +};
       +
       +enum {
       +        Aoetype        = 0x88a2,
       +        Aoesectsz = 512,
       +        Szaoeata        = 24+12,
       +        Szaoeqc        = 24+8,
       +        Aoever        = 1,
       +
       +        AFerr        = 1<<2,
       +        AFrsp        = 1<<3,
       +
       +        AAFwrite= 1,
       +        AAFext        = 1<<6,
       +};
       +
       +typedef struct {
       +        uchar        dst[Eaddrlen];
       +        uchar        src[Eaddrlen];
       +        uchar        type[2];
       +        uchar        verflag;
       +        uchar        error;
       +        uchar        major[2];
       +        uchar        minor;
       +        uchar        cmd;
       +        uchar        tag[4];
       +} Aoehdr;
       +
       +typedef struct {
       +        uchar        dst[Eaddrlen];
       +        uchar        src[Eaddrlen];
       +        uchar        type[2];
       +        uchar        verflag;
       +        uchar        error;
       +        uchar        major[2];
       +        uchar        minor;
       +        uchar        cmd;
       +        uchar        tag[4];
       +        uchar        aflag;
       +        uchar        errfeat;
       +        uchar        scnt;
       +        uchar        cmdstat;
       +        uchar        lba[6];
       +        uchar        res[2];
       +} Aoeata;
       +
       +typedef struct {
       +        uchar        dst[Eaddrlen];
       +        uchar        src[Eaddrlen];
       +        uchar        type[2];
       +        uchar        verflag;
       +        uchar        error;
       +        uchar        major[2];
       +        uchar        minor;
       +        uchar        cmd;
       +        uchar        tag[4];
       +        uchar        bufcnt[2];
       +        uchar        fwver[2];
       +        uchar        scnt;
       +        uchar        verccmd;
       +        uchar        cslen[2];
       +} Aoeqc;
       +
       +extern char Echange[];
       +extern char Enotup[];
 (DIR) diff --git a/src/9vx/a/chan.c b/src/9vx/a/chan.c
       @@ -28,7 +28,7 @@ struct Elemlist
        {
                char        *aname;        /* original name */
                char        *name;        /* copy of name, so '/' can be overwritten */
       -        int        nelems;
       +        uint        nelems;
                char        **elems;
                int        *off;
                int        mustbedir;
 (DIR) diff --git a/src/9vx/a/devaoe.c b/src/9vx/a/devaoe.c
       @@ -0,0 +1,2575 @@
       +/*
       + *        © 2005-8 coraid
       + *        aoe storage initiator
       + */
       +
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "io.h"
       +#include "ureg.h"
       +#include "error.h"
       +#include "netif.h"
       +#include "etherif.h"
       +#include "ip/ip.h"
       +#include "aoe.h"
       +
       +#define        WAKEUP(x)        wakeup(&((x)->rend))
       +#define SLEEP(a,b,c)        sleep(&(a->rend), b, c)
       +
       +//#pragma        varargck argpos        eventlog        1
       +
       +#define dprint(...)        if(debug) eventlog(__VA_ARGS__); else USED(debug);
       +#define uprint(...)        snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
       +
       +enum {
       +        Maxunits        = 0xff,
       +        Maxframes        = 128,
       +        Maxmtu                = 100000,
       +        Ndevlink        = 6,
       +        Nea                = 6,
       +        Nnetlink        = 6,
       +};
       +
       +#define TYPE(q)                ((ulong)(q).path & 0xf)
       +#define UNIT(q)                (((ulong)(q).path>>4) & 0xff)
       +#define L(q)                (((ulong)(q).path>>12) & 0xf)
       +#define QID(u, t)         ((u)<<4 | (t))
       +#define Q3(l, u, t)        ((l)<<8 | QID(u, t))
       +#define UP(d)                ((d)->flag & Dup)
       +
       +#define        Ticks                msec()
       +#define        Ms2tk(t)        (((t)*HZ)/1000)
       +#define        Tk2ms(t)        (((t)*1000)/HZ)
       +
       +enum {
       +        Qzero,
       +        Qtopdir                = 1,
       +        Qtopbase,
       +        Qtopctl                = Qtopbase,
       +        Qtoplog,
       +        Qtopend,
       +
       +        Qunitdir,
       +        Qunitbase,
       +        Qctl                = Qunitbase,
       +        Qdata,
       +        Qconfig,
       +        Qident,
       +
       +        Qdevlinkdir,
       +        Qdevlinkbase,
       +        Qdevlink        = Qdevlinkbase,
       +        Qdevlinkend,
       +
       +        Qtopfiles        = Qtopend-Qtopbase,
       +        Qdevlinkfiles        = Qdevlinkend-Qdevlinkbase,
       +
       +        Eventlen         = 256,
       +        Nevents         = 64,
       +
       +        Fread                = 0,
       +        Fwrite,
       +        Tfree                = -1,
       +        Tmgmt,
       +
       +        /* round trip bounds, timeouts, in ticks */
       +        Rtmax                = Ms2tk(320),
       +        Rtmin                = Ms2tk(20),
       +        Srbtimeout        = 45*HZ,
       +
       +        Dbcnt                = 1024,
       +
       +        Crd                = 0x20,
       +        Crdext                = 0x24,
       +        Cwr                = 0x30,
       +        Cwrext                = 0x34,
       +        Cid                = 0xec,
       +};
       +
       +enum {
       +        Read,
       +        Write,
       +};
       +
       +/*
       + * unified set of flags
       + * a Netlink + Aoedev most both be jumbo capable
       + * to send jumbograms to that interface.
       + */
       +enum {
       +        /* sync with ahci.h */
       +        Dllba         = 1<<0,
       +        Dsmart        = 1<<1,
       +        Dpower        = 1<<2,
       +        Dnop        = 1<<3,
       +        Datapi        = 1<<4,
       +        Datapi16= 1<<5,
       +
       +        /* aoe specific */
       +        Dup        = 1<<6,
       +        Djumbo        = 1<<7,
       +};
       +
       +static char *flagname[] = {
       +        "llba",
       +        "smart",
       +        "power",
       +        "nop",
       +        "atapi",
       +        "atapi16",
       +
       +        "up",
       +        "jumbo",
       +};
       +
       +typedef struct {
       +        uchar        flag;
       +        uchar        lostjumbo;
       +        int        datamtu;
       +
       +        Chan        *cc;
       +        Chan        *dc;
       +        Chan        *mtu;                /* open early to prevent bind issues. */
       +        char        path[Maxpath];
       +        uchar        ea[Eaddrlen];
       +} Netlink;
       +
       +typedef struct {
       +        Netlink        *nl;
       +        int        nea;
       +        ulong        eaidx;
       +        uchar        eatab[Nea][Eaddrlen];
       +        int        datamtu;
       +        ulong        npkt;
       +        ulong        resent;
       +        uchar        flag;
       +
       +        ulong        rttavg;
       +        ulong        mintimer;
       +} Devlink;
       +
       +typedef struct Srb Srb;
       +struct Srb {
       +        Rendez        rend;
       +        Srb        *next;
       +        ulong        ticksent;
       +        ulong        len;
       +        vlong        sector;
       +        short        write;
       +        short        nout;
       +        char        *error;
       +        void        *dp;
       +        void        *data;
       +};
       +
       +typedef struct {
       +        int        tag;
       +        ulong        bcnt;
       +        ulong        dlen;
       +        vlong        lba;
       +        ulong        ticksent;
       +        int        nhdr;
       +        uchar        hdr[ETHERMINTU];
       +        void        *dp;
       +        Devlink        *dl;
       +        Netlink        *nl;
       +        int        eaidx;
       +        Srb        *srb;
       +} Frame;
       +
       +typedef struct Aoedev Aoedev;
       +struct Aoedev {
       +        QLock        qlock;
       +        Aoedev        *next;
       +
       +        ulong        vers;
       +
       +        int        ndl;
       +        ulong        dlidx;
       +        Devlink        *dl;
       +        Devlink        dltab[Ndevlink];
       +
       +        ushort        fwver;
       +        uchar        flag;
       +        int        nopen;
       +        int        major;
       +        int        minor;
       +        int        unit;
       +        int        lasttag;
       +        int        nframes;
       +        Frame        *frames;
       +        vlong        bsize;
       +        vlong        realbsize;
       +
       +        uint        maxbcnt;
       +        uint        maxmtu;
       +        ulong        lostjumbo;
       +        ushort        nout;
       +        ushort        maxout;
       +        ulong        lastwadj;
       +        Srb        *head;
       +        Srb        *tail;
       +        Srb        *inprocess;
       +
       +        char        serial[20+1];
       +        char        firmware[8+1];
       +        char        model[40+1];
       +        int        nconfig;
       +        uchar        config[1024];
       +        uchar        ident[512];
       +};
       +
       +//#pragma        varargck type        "æ"        Aoedev*
       +
       +static struct {
       +        Lock        lk;
       +        QLock        qlock;
       +        Rendez        rend;
       +        char        buf[Eventlen*Nevents];
       +        char        *rp;
       +        char        *wp;
       +} events;
       +
       +static struct {
       +        RWlock        rwlock;
       +        int        nd;
       +        Aoedev        *d;
       +} devs;
       +
       +static struct {
       +        Lock        lk;
       +        int        reader[Nnetlink];        /* reader is running. */
       +        Rendez        rendez[Nnetlink];        /* confirm exit. */
       +        Netlink        nl[Nnetlink];
       +} netlinks;
       +
       +extern        Dev         aoedevtab;
       +static        Ref         units;
       +static        Ref        drivevers;
       +static        int        debug;
       +static        int        autodiscover        = 1;
       +static        int        rediscover;
       +        char         Enotup[]         = "aoe device is down";
       +        char        Echange[]        = "media or partition has changed";
       +
       +static Srb*
       +srballoc(ulong sz)
       +{
       +        Srb *srb;
       +
       +        srb = malloc(sizeof *srb+sz);
       +        srb->dp = srb->data = srb+1;
       +        srb->ticksent = Ticks;
       +        return srb;
       +}
       +
       +static Srb*
       +srbkalloc(void *db, ulong dummy)
       +{
       +        Srb *srb;
       +
       +        srb = malloc(sizeof *srb);
       +        srb->dp = srb->data = db;
       +        srb->ticksent = Ticks;
       +        return srb;
       +}
       +
       +#define srbfree(srb) free(srb)
       +
       +static void
       +srberror(Srb *srb, char *s)
       +{
       +        srb->error = s;
       +        srb->nout--;
       +        WAKEUP(srb);
       +}
       +
       +static void
       +frameerror(Aoedev *d, Frame *f, char *s)
       +{
       +        Srb *srb;
       +
       +        srb = f->srb;
       +        if(f->tag == Tfree)
       +                return;
       +        f->srb = nil;
       +        f->tag = Tfree;                /* don't get fooled by way-slow responses */
       +        if(!srb)
       +                return;
       +        srberror(srb, s);
       +        d->nout--;
       +}
       +
       +static char*
       +unitname(Aoedev *d)
       +{
       +        uprint("%d.%d", d->major, d->minor);
       +        return up->genbuf;
       +}
       +
       +static long
       +eventlogread(void *a, long n)
       +{
       +        int len;
       +        char *p, *buf;
       +
       +        buf = smalloc(Eventlen);
       +        QLOCK(&events);
       +        LOCK(&events);
       +        p = events.rp;
       +        len = *p;
       +        if(len == 0){
       +                n = 0;
       +                UNLOCK(&events);
       +        } else {
       +                if(n > len)
       +                        n = len;
       +                /* can't move directly into pageable space with events lock held */
       +                memmove(buf, p+1, n);
       +                *p = 0;
       +                events.rp = p += Eventlen;
       +                if(p >= events.buf + sizeof events.buf)
       +                        events.rp = events.buf;
       +                UNLOCK(&events);
       +
       +                /* the concern here is page faults in memmove below */
       +                if(waserror()){
       +                        free(buf);
       +                        QUNLOCK(&events);
       +                        nexterror();
       +                }
       +                memmove(a, buf, n);
       +                poperror();
       +        }
       +        free(buf);
       +        QUNLOCK(&events);
       +        return n;
       +}
       +
       +static int
       +eventlog(char *fmt, ...)
       +{
       +        int dragrp, n;
       +        char *p;
       +        va_list arg;
       +
       +        LOCK(&events);
       +        p = events.wp;
       +        dragrp = *p++;
       +        va_start(arg, fmt);
       +        n = vsnprint(p, Eventlen-1, fmt, arg);
       +        *--p = n;
       +        p = events.wp += Eventlen;
       +        if(p >= events.buf + sizeof events.buf)
       +                p = events.wp = events.buf;
       +        if(dragrp)
       +                events.rp = p;
       +        UNLOCK(&events);
       +        WAKEUP(&events);
       +        return n;
       +}
       +
       +static int
       +eventcount(void)
       +{
       +        int n;
       +
       +        LOCK(&events);
       +        if(*events.rp == 0)
       +                n = 0;
       +        else if(events.wp < events.rp)
       +                n = Nevents - (events.rp - events.wp);
       +        else
       +                n = events.wp - events.rp;
       +        UNLOCK(&events);
       +        return n/Eventlen;
       +}
       +
       +static int
       +tsince(int tag)
       +{
       +        int n;
       +
       +        n = Ticks & 0xffff;
       +        n -= tag & 0xffff;
       +        if(n < 0)
       +                n += 1<<16;
       +        return n;
       +}
       +
       +static int
       +newtag(Aoedev *d)
       +{
       +        int t;
       +
       +        do {
       +                t = ++d->lasttag << 16;
       +                t |= Ticks & 0xffff;
       +        } while (t == Tfree || t == Tmgmt);
       +        return t;
       +}
       +
       +static void
       +downdev(Aoedev *d, char *err)
       +{
       +        Frame *f, *e;
       +
       +        d->flag &= ~Dup;
       +        f = d->frames;
       +        e = f + d->nframes;
       +        for(; f < e; f->tag = Tfree, f->srb = nil, f++)
       +                frameerror(d, f, Enotup);
       +        d->inprocess = nil;
       +        eventlog("%æ: removed; %s\n", d, err);
       +}
       +
       +static Block*
       +allocfb(Frame *f)
       +{
       +        int len;
       +        Block *b;
       +
       +        len = f->nhdr + f->dlen;
       +        if(len < ETHERMINTU)
       +                len = ETHERMINTU;
       +        b = allocb(len);
       +        memmove(b->wp, f->hdr, f->nhdr);
       +        if(f->dlen)
       +                memmove(b->wp + f->nhdr, f->dp, f->dlen);
       +        b->wp += len;
       +        return b;
       +}
       +
       +static void
       +putlba(Aoeata *a, vlong lba)
       +{
       +        uchar *c;
       +
       +        c = a->lba;
       +        c[0] = lba;
       +        c[1] = lba >> 8;
       +        c[2] = lba >> 16;
       +        c[3] = lba >> 24;
       +        c[4] = lba >> 32;
       +        c[5] = lba >> 40;
       +}
       +
       +static Devlink*
       +pickdevlink(Aoedev *d)
       +{
       +        ulong i, n;
       +        Devlink *l;
       +
       +        for(i = 0; i < d->ndl; i++){
       +                n = d->dlidx++ % d->ndl;
       +                l = d->dl + n;
       +                if(l && l->flag & Dup)
       +                        return l;
       +        }
       +        return 0;
       +}
       +
       +static int
       +pickea(Devlink *l)
       +{
       +        if(l == 0)
       +                return -1;
       +        if(l->nea == 0)
       +                return -1;
       +        return l->eaidx++ % l->nea;
       +}
       +
       +static int
       +hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd)
       +{
       +        int i;
       +        Devlink *l;
       +
       +        if(f->srb)
       +        if((long)(Ticks-f->srb->ticksent) > Srbtimeout){
       +                eventlog("%æ: srb timeout\n", d);
       +                frameerror(d, f, Etimedout);
       +                return -1;
       +        }
       +        l = pickdevlink(d);
       +        i = pickea(l);
       +        if(i == -1){
       +                downdev(d, "resend fails; no netlink/ea");
       +                return -1;
       +        }
       +        memmove(h->dst, l->eatab[i], Eaddrlen);
       +        memmove(h->src, l->nl->ea, sizeof h->src);
       +        hnputs(h->type, Aoetype);
       +        h->verflag = Aoever << 4;
       +        h->error = 0;
       +        hnputs(h->major, d->major);
       +        h->minor = d->minor;
       +        h->cmd = cmd;
       +
       +        hnputl(h->tag, f->tag = newtag(d));
       +        f->dl = l;
       +        f->nl = l->nl;
       +        f->eaidx = i;
       +        f->ticksent = Ticks;
       +
       +        return f->tag;
       +}
       +
       +static int
       +resend(Aoedev *d, Frame *f)
       +{
       +        ulong n;
       +        Aoeata *a;
       +
       +        a = (Aoeata*)f->hdr;
       +        if(hset(d, f, (Aoehdr*)a, a->cmd) == -1)
       +                return -1;
       +        n = f->bcnt;
       +        if(n > d->maxbcnt){
       +                n = d->maxbcnt;                /* mtu mismatch (jumbo fail?) */
       +                if(f->dlen > n)
       +                        f->dlen = n;
       +        }
       +        a->scnt = n / Aoesectsz;
       +        f->dl->resent++;
       +        f->dl->npkt++;
       +        if(waserror())
       +                /* should remove the netlink */
       +                return -1;
       +        devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
       +        poperror();
       +        return 0;
       +}
       +
       +static void
       +discover(int major, int minor)
       +{
       +        Aoehdr *h;
       +        Block *b;
       +        Netlink *nl, *e;
       +
       +        nl = netlinks.nl;
       +        e = nl + nelem(netlinks.nl);
       +        for(; nl < e; nl++){
       +                if(nl->cc == nil)
       +                        continue;
       +                b = allocb(ETHERMINTU);
       +                if(waserror()){
       +                        freeb(b);
       +                        nexterror();
       +                }
       +                b->wp = b->rp + ETHERMINTU;
       +                memset(b->rp, 0, ETHERMINTU);
       +                h = (Aoehdr*)b->rp;
       +                memset(h->dst, 0xff, sizeof h->dst);
       +                memmove(h->src, nl->ea, sizeof h->src);
       +                hnputs(h->type, Aoetype);
       +                h->verflag = Aoever << 4;
       +                hnputs(h->major, major);
       +                h->minor = minor;
       +                h->cmd = ACconfig;
       +                poperror();
       +                devtab[nl->dc->type]->bwrite(nl->dc, b, 0);
       +        }
       +}
       +
       +/*
       + * Check all frames on device and resend any frames that have been
       + * outstanding for 200% of the device round trip time average.
       + */
       +static void
       +aoesweepproc(void *dummy)
       +{
       +        ulong i, tx, timeout, nbc;
       +        vlong starttick;
       +        enum { Nms = 100, Nbcms = 30*1000, };
       +        uchar *ea;
       +        Aoeata *a;
       +        Aoedev *d;
       +        Devlink *l;
       +        Frame *f, *e;
       +
       +        nbc = Nbcms/Nms;
       +loop:
       +        if(nbc-- == 0){
       +                if(rediscover && !waserror()){
       +                        discover(0xffff, 0xff);
       +                        poperror();
       +                }
       +                nbc = Nbcms/Nms;
       +        }
       +        starttick = Ticks;
       +        RLOCK(&devs);
       +        for(d = devs.d; d; d = d->next){
       +                if(!CANQLOCK(d))
       +                        continue;
       +                if(!UP(d)){
       +                        QUNLOCK(d);
       +                        continue;
       +                }
       +                tx = 0;
       +                f = d->frames;
       +                e = f + d->nframes;
       +                for (; f < e; f++){
       +                        if(f->tag == Tfree)
       +                                continue;
       +                        l = f->dl;
       +                        timeout = l->rttavg << 1;
       +                        i = tsince(f->tag);
       +                        if(i < timeout)
       +                                continue;
       +                        if(d->nout == d->maxout){
       +                                if(d->maxout > 1)
       +                                        d->maxout--;
       +                                d->lastwadj = Ticks;
       +                        }
       +                        a = (Aoeata*)f->hdr;
       +                        if(a->scnt > Dbcnt / Aoesectsz &&
       +                           ++f->nl->lostjumbo > (d->nframes << 1)){
       +                                ea = f->dl->eatab[f->eaidx];
       +                                eventlog("%æ: jumbo failure on %s:%E; lba%lld\n",
       +                                        d, f->nl->path, ea, f->lba);
       +                                d->maxbcnt = Dbcnt;
       +                                d->flag &= ~Djumbo;
       +                        }
       +                        resend(d, f);
       +                        if(tx++ == 0){
       +                                if((l->rttavg <<= 1) > Rtmax)
       +                                        l->rttavg = Rtmax;
       +                                eventlog("%æ: rtt %ldms\n", d, Tk2ms(l->rttavg));
       +                        }
       +                }
       +                if(d->nout == d->maxout && d->maxout < d->nframes &&
       +                   TK2MS(Ticks-d->lastwadj) > 10*1000){
       +                        d->maxout++;
       +                        d->lastwadj = Ticks;
       +                }
       +                QUNLOCK(d);
       +        }
       +        RUNLOCK(&devs);
       +        i = Nms - TK2MS(Ticks - starttick);
       +        if(i > 0)
       +                tsleep(&up->sleep, return0, 0, i);
       +        goto loop;
       +}
       +
       +static int
       +fmtaoe(Fmt *f)
       +{
       +        char buf[16];
       +        Aoedev *d;
       +
       +        d = va_arg(f->args, Aoedev*);
       +        snprint(buf, sizeof buf, "aoe%d.%d", d->major, d->minor);
       +        return fmtstrcpy(f, buf);
       +}
       +
       +static void netbind(char *path);
       +
       +static void
       +aoecfg(void)
       +{
       +        int n, i;
       +        char *p, *f[32], buf[24];
       +
       +        if(1)
       +//        if((p = getconf("aoeif")) == nil || (n = tokenize(p, f, nelem(f))) < 1)
       +                return;
       +        /* goo! */
       +        for(i = 0; i < n; i++){
       +                p = f[i];
       +                if(strncmp(p, "ether", 5) == 0)
       +                        snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]);
       +                else if(strncmp(p, "#l", 2) == 0)
       +                        snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]);
       +                else
       +                        continue;
       +                if(!waserror()){
       +                        netbind(buf);
       +                        poperror();
       +                }
       +        }
       +}
       +
       +static void
       +aoeinit(void)
       +{
       +        static int init;
       +        static QLock l;
       +
       +        if(!canqlock(&l))
       +                return;
       +        if(init == 0){
       +                fmtinstall(L'æ', fmtaoe);
       +                events.rp = events.wp = events.buf;
       +                kproc("aoesweep", aoesweepproc, nil);
       +                aoecfg();
       +                init = 1;
       +        }
       +        qunlock(&l);
       +}
       +
       +static Chan*
       +aoeattach(char *spec)
       +{
       +        Chan *c;
       +
       +        if(*spec)
       +                error(Enonexist);
       +        aoeinit();
       +        c = devattach(L'æ', spec);
       +        mkqid(&c->qid, Qzero, 0, QTDIR);
       +        return c;
       +}
       +
       +static Aoedev*
       +unitseq(ulong unit)
       +{
       +        int i;
       +        Aoedev *d;
       +
       +        i = 0;
       +        RLOCK(&devs);
       +        for(d = devs.d; d; d = d->next)
       +                if(i++ == unit)
       +                        break;
       +        RUNLOCK(&devs);
       +        return d;
       +}
       +
       +static Aoedev*
       +unit2dev(ulong unit)
       +{
       +        Aoedev *d;
       +
       +        RLOCK(&devs);
       +        for(d = devs.d; d; d = d->next)
       +                if(d->unit == unit){
       +                        RUNLOCK(&devs);
       +                        return d;
       +                }
       +        RUNLOCK(&devs);
       +        error("unit lookup failure");
       +        return nil;
       +}
       +
       +static int
       +unitgen(Chan *c, ulong type, Dir *dp)
       +{
       +        int perm, t;
       +        ulong vers;
       +        vlong size;
       +        char *p;
       +        Aoedev *d;
       +        Qid q;
       +
       +        d = unit2dev(UNIT(c->qid));
       +        perm = 0644;
       +        size = 0;
       +        vers = d->vers;
       +        t = QTFILE;
       +
       +        switch(type){
       +        default:
       +                return -1;
       +        case Qctl:
       +                p = "ctl";
       +                break;
       +        case Qdata:
       +                p = "data";
       +                perm = 0640;
       +                if(UP(d))
       +                        size = d->bsize;
       +                break;
       +        case Qconfig:
       +                p = "config";
       +                if(UP(d))
       +                        size = d->nconfig;
       +                break;
       +        case Qident:
       +                p = "ident";
       +                if(UP(d))
       +                        size = sizeof d->ident;
       +                break;
       +        case Qdevlinkdir:
       +                p = "devlink";
       +                t = QTDIR;
       +                perm = 0555;
       +                break;
       +        }
       +        mkqid(&q, QID(UNIT(c->qid), type), vers, t);
       +        devdir(c, q, p, size, eve, perm, dp);
       +        return 1;
       +}
       +
       +static int
       +topgen(Chan *c, ulong type, Dir *d)
       +{
       +        int perm;
       +        vlong size;
       +        char *p;
       +        Qid q;
       +
       +        perm = 0444;
       +        size = 0;
       +        switch(type){
       +        default:
       +                return -1;
       +        case Qtopctl:
       +                p = "ctl";
       +                perm = 0644;
       +                break;
       +        case Qtoplog:
       +                p = "log";
       +                size = eventcount();
       +                break;
       +        }
       +        mkqid(&q, type, 0, QTFILE);
       +        devdir(c, q, p, size, eve, perm, d);
       +        return 1;
       +}
       +
       +static int
       +aoegen(Chan *c, char *d0, Dirtab *d1, int d2, int s, Dir *dp)
       +{
       +        int i;
       +        Aoedev *d;
       +        Qid q;
       +
       +        if(c->qid.path == 0){
       +                switch(s){
       +                case DEVDOTDOT:
       +                        q.path = 0;
       +                        q.type = QTDIR;
       +                        devdir(c, q, "#æ", 0, eve, 0555, dp);
       +                        break;
       +                case 0:
       +                        q.path = Qtopdir;
       +                        q.type = QTDIR;
       +                        devdir(c, q, "aoe", 0, eve, 0555, dp);
       +                        break;
       +                default:
       +                        return -1;
       +                }
       +                return 1;
       +        }
       +
       +        switch(TYPE(c->qid)){
       +        default:
       +                return -1;
       +        case Qtopdir:
       +                if(s == DEVDOTDOT){
       +                        mkqid(&q, Qzero, 0, QTDIR);
       +                        devdir(c, q, "aoe", 0, eve, 0555, dp);
       +                        return 1;
       +                }
       +                if(s < Qtopfiles)
       +                        return topgen(c, Qtopbase + s, dp);
       +                s -= Qtopfiles;
       +                if((d = unitseq(s)) == 0)
       +                        return -1;
       +                mkqid(&q, QID(d->unit, Qunitdir), 0, QTDIR);
       +                devdir(c, q, unitname(d), 0, eve, 0555, dp);
       +                return 1;
       +        case Qtopctl:
       +        case Qtoplog:
       +                return topgen(c, TYPE(c->qid), dp);
       +        case Qunitdir:
       +                if(s == DEVDOTDOT){
       +                        mkqid(&q, QID(0, Qtopdir), 0, QTDIR);
       +                        uprint("%uld", UNIT(c->qid));
       +                        devdir(c, q, up->genbuf, 0, eve, 0555, dp);
       +                        return 1;
       +                }
       +                return unitgen(c, Qunitbase+s, dp);
       +        case Qctl:
       +        case Qdata:
       +        case Qconfig:
       +        case Qident:
       +                return unitgen(c, TYPE(c->qid), dp);
       +        case Qdevlinkdir:
       +                i = UNIT(c->qid);
       +                if(s == DEVDOTDOT){
       +                        mkqid(&q, QID(i, Qunitdir), 0, QTDIR);
       +                        devdir(c, q, "devlink", 0, eve, 0555, dp);
       +                        return 1;
       +                }
       +                if(i >= units.ref)
       +                        return -1;
       +                d = unit2dev(i);
       +                if(s >= d->ndl)
       +                        return -1;
       +                uprint("%d", s);
       +                mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE);
       +                devdir(c, q, up->genbuf, 0, eve, 0755, dp);
       +                return 1;
       +        case Qdevlink:
       +                uprint("%d", s);
       +                mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE);
       +                devdir(c, q, up->genbuf, 0, eve, 0755, dp);
       +                return 1;
       +        }
       +}
       +
       +static Walkqid*
       +aoewalk(Chan *c, Chan *nc, char **name, int nname)
       +{
       +        return devwalk(c, nc, name, nname, nil, 0, aoegen);
       +}
       +
       +static int
       +aoestat(Chan *c, uchar *db, int n)
       +{
       +        return devstat(c, db, n, nil, 0, aoegen);
       +}
       +
       +static Chan*
       +aoeopen(Chan *c, int omode)
       +{
       +        Aoedev *d;
       +
       +        if(TYPE(c->qid) != Qdata)
       +                return devopen(c, omode, 0, 0, aoegen);
       +
       +        d = unit2dev(UNIT(c->qid));
       +        QLOCK(d);
       +        if(waserror()){
       +                QUNLOCK(d);
       +                nexterror();
       +        }
       +        if(!UP(d))
       +                error(Enotup);
       +        c = devopen(c, omode, 0, 0, aoegen);
       +        d->nopen++;
       +        poperror();
       +        QUNLOCK(d);
       +        return c;
       +}
       +
       +static void
       +aoeclose(Chan *c)
       +{
       +        Aoedev *d;
       +
       +        if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0)
       +                return;
       +
       +        d = unit2dev(UNIT(c->qid));
       +        QLOCK(d);
       +        if(--d->nopen == 0 && !waserror()){
       +                discover(d->major, d->minor);
       +                poperror();
       +        }
       +        QUNLOCK(d);
       +}
       +
       +static void
       +atarw(Aoedev *d, Frame *f)
       +{
       +        ulong bcnt;
       +        char extbit, writebit;
       +        Aoeata *ah;
       +        Srb *srb;
       +
       +        extbit = 0x4;
       +        writebit = 0x10;
       +
       +        srb = d->inprocess;
       +        bcnt = d->maxbcnt;
       +        if(bcnt > srb->len)
       +                bcnt = srb->len;
       +        f->nhdr = Szaoeata;
       +        memset(f->hdr, 0, f->nhdr);
       +        ah = (Aoeata*)f->hdr;
       +        if(hset(d, f, (Aoehdr*)ah, ACata) == -1)
       +                return;
       +        f->dp = srb->dp;
       +        f->bcnt = bcnt;
       +        f->lba = srb->sector;
       +        f->srb = srb;
       +
       +        ah->scnt = bcnt / Aoesectsz;
       +        putlba(ah, f->lba);
       +        if(d->flag & Dllba)
       +                ah->aflag |= AAFext;
       +        else {
       +                extbit = 0;
       +                ah->lba[3] &= 0x0f;
       +                ah->lba[3] |= 0xe0;        /* LBA bit+obsolete 0xa0 */
       +        }
       +        if(srb->write){
       +                ah->aflag |= AAFwrite;
       +                f->dlen = bcnt;
       +        }else{
       +                writebit = 0;
       +                f->dlen = 0;
       +        }
       +        ah->cmdstat = 0x20 | writebit | extbit;
       +
       +        /* mark tracking fields and load out */
       +        srb->nout++;
       +        srb->dp = (uchar*)srb->dp + bcnt;
       +        srb->len -= bcnt;
       +        srb->sector += bcnt / Aoesectsz;
       +        if(srb->len == 0)
       +                d->inprocess = nil;
       +        d->nout++;
       +        f->dl->npkt++;
       +        if(waserror()){
       +                f->tag = Tfree;
       +                d->inprocess = nil;
       +                nexterror();
       +        }
       +        devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
       +        poperror();
       +}
       +
       +static char*
       +aoeerror(Aoehdr *h)
       +{
       +        int n;
       +        static char *errs[] = {
       +                "aoe protocol error: unknown",
       +                "aoe protocol error: bad command code",
       +                "aoe protocol error: bad argument param",
       +                "aoe protocol error: device unavailable",
       +                "aoe protocol error: config string present",
       +                "aoe protocol error: unsupported version",
       +        };
       +
       +        if((h->verflag & AFerr) == 0)
       +                return 0;
       +        n = h->error;
       +        if(n > nelem(errs))
       +                n = 0;
       +        return errs[n];
       +}
       +
       +static void
       +rtupdate(Devlink *l, int rtt)
       +{
       +        int n;
       +
       +        n = rtt;
       +        if(rtt < 0){
       +                n = -rtt;
       +                if(n < Rtmin)
       +                        n = Rtmin;
       +                else if(n > Rtmax)
       +                        n = Rtmax;
       +                l->mintimer += (n - l->mintimer) >> 1;
       +        } else if(n < l->mintimer)
       +                n = l->mintimer;
       +        else if(n > Rtmax)
       +                n = Rtmax;
       +
       +        /* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */
       +        n -= l->rttavg;
       +        l->rttavg += n >> 2;
       +}
       +
       +static int
       +srbready(void *v)
       +{
       +        Srb *s;
       +
       +        s = v;
       +        return s->error || (!s->nout && !s->len);
       +}
       +
       +static Frame*
       +getframe(Aoedev *d, int tag)
       +{
       +        Frame *f, *e;
       +
       +        f = d->frames;
       +        e = f + d->nframes;
       +        for(; f < e; f++)
       +                if(f->tag == tag)
       +                        return f;
       +        return nil;
       +}
       +
       +static Frame*
       +freeframe(Aoedev *d)
       +{
       +        if(d->nout < d->maxout)
       +                return getframe(d, Tfree);
       +        return nil;
       +}
       +
       +static void
       +work(Aoedev *d)
       +{
       +        Frame *f;
       +
       +        while(f = freeframe(d)) {
       +                if(d->inprocess == nil){
       +                        if(d->head == nil)
       +                                return;
       +                        d->inprocess = d->head;
       +                        d->head = d->head->next;
       +                        if(d->head == nil)
       +                                d->tail = nil;
       +                }
       +                atarw(d, f);
       +        }
       +}
       +
       +static void
       +strategy(Aoedev *d, Srb *srb)
       +{
       +        QLOCK(d);
       +        if(waserror()){
       +                QUNLOCK(d);
       +                nexterror();
       +        }
       +        srb->next = nil;
       +        if(d->tail)
       +                d->tail->next = srb;
       +        d->tail = srb;
       +        if(d->head == nil)
       +                d->head = srb;
       +        work(d);
       +        poperror();
       +        QUNLOCK(d);
       +
       +        while(waserror())
       +                ;
       +        SLEEP(srb, srbready, srb);
       +        poperror();
       +}
       +
       +#define iskaddr(a)        (!up || (uintptr)(a) > up->pmmu.uzero+USTKTOP)
       +
       +static long
       +rw(Aoedev *d, int write, uchar *db, long len, uvlong off)
       +{
       +        long n, nlen, copy;
       +        enum { Srbsz = 1<<19, };
       +        Srb *srb;
       +
       +        if((off|len) & (Aoesectsz-1))
       +                error("offset and length must be sector multiple.\n");
       +        if(off >= d->bsize)
       +                return 0;
       +        if(off + len > d->bsize)
       +                len = d->bsize - off;
       +        copy = 0;
       +        if(iskaddr(db)){
       +panic("iskaddr %p %p\n", db);
       +                srb = srbkalloc(db, len);
       +                copy = 1;
       +        }else
       +                srb = srballoc(Srbsz <= len? Srbsz: len);
       +        if(waserror()){
       +                srbfree(srb);
       +                nexterror();
       +        }
       +        srb->write = write;
       +        for(nlen = len; nlen; nlen -= n){
       +                if(!UP(d))
       +                        error(Eio);
       +                srb->sector = off / Aoesectsz;
       +                srb->dp = srb->data;
       +                n = nlen;
       +                if(n > Srbsz)
       +                        n = Srbsz;
       +                srb->len = n;
       +                if(write && !copy)
       +                        memmove(srb->data, db, n);
       +                strategy(d, srb);
       +                if(srb->error)
       +                        error(srb->error);
       +                if(!write && !copy)
       +                        memmove(db, srb->data, n);
       +                db += n;
       +                off += n;
       +        }
       +        poperror();
       +        srbfree(srb);
       +        return len;
       +}
       +
       +static long
       +readmem(ulong off, void *dst, long n, void *src, long size)
       +{
       +        if(off >= size)
       +                return 0;
       +        if(off + n > size)
       +                n = size - off;
       +        memmove(dst, (uchar*)src + off, n);
       +        return n;
       +}
       +
       +static char*
       +pflag(char *s, char *e, uchar f)
       +{
       +        uchar i;
       +
       +        for(i = 0; i < nelem(flagname); i++)
       +                if(f & 1 << i)
       +                        s = seprint(s, e, "%s ", flagname[i]);
       +        return seprint(s, e, "\n");
       +}
       +
       +static int
       +pstat(Aoedev *d, char *db, int len, int off)
       +{
       +        int i;
       +        char *state, *s, *p, *e;
       +
       +        s = p = malloc(1024);
       +        e = p + 1024;
       +
       +        state = "down";
       +        if(UP(d))
       +                state = "up";
       +
       +        p = seprint(p, e,
       +                "state: %s\n"        "nopen: %d\n"        "nout: %d\n"
       +                "nmaxout: %d\n"        "nframes: %d\n"        "maxbcnt: %d [maxmtu %d]\n"
       +                "fw: %.4ux\n"
       +                "model: %s\n"        "serial: %s\n"        "firmware: %s\n",
       +                state,                d->nopen,        d->nout,
       +                d->maxout,         d->nframes,        d->maxbcnt, d->maxmtu,
       +                d->fwver,
       +                d->model,         d->serial,         d->firmware);
       +        p = seprint(p, e, "flag: ");
       +        p = pflag(p, e, d->flag);
       +
       +        if(p - s < len)
       +                len = p - s;
       +        i = readstr(off, db, len, s);
       +        free(s);
       +        return i;
       +}
       +
       +static long
       +unitread(Chan *c, void *db, long len, vlong off)
       +{
       +        Aoedev *d;
       +
       +        d = unit2dev(UNIT(c->qid));
       +        if(d->vers != c->qid.vers)
       +                error(Echange);
       +        switch(TYPE(c->qid)){
       +        default:
       +                error(Ebadarg);
       +        case Qctl:
       +                return pstat(d, db, len, off);
       +        case Qdata:
       +                return rw(d, Read, db, len, off);
       +        case Qconfig:
       +                if(!UP(d))
       +                        error(Enotup);
       +                return readmem(off, db, len, d->config, d->nconfig);
       +        case Qident:
       +                if(!UP(d))
       +                        error(Enotup);
       +                return readmem(off, db, len, d->ident, sizeof d->ident);
       +        }
       +}
       +
       +static int
       +devlinkread(Chan *c, void *db, int len, int off)
       +{
       +        int i;
       +        char *s, *p, *e;
       +        Aoedev *d;
       +        Devlink *l;
       +
       +        d = unit2dev(UNIT(c->qid));
       +        i = L(c->qid);
       +        if(i >= d->ndl)
       +                return 0;
       +        l = d->dl + i;
       +
       +        s = p = malloc(1024);
       +        e = s + 1024;
       +
       +        p = seprint(p, e, "addr: ");
       +        for(i = 0; i < l->nea; i++)
       +                p = seprint(p, e, "%E ", l->eatab[i]);
       +        p = seprint(p, e, "\n");
       +        p = seprint(p, e, "npkt: %uld\n", l->npkt);
       +        p = seprint(p, e, "resent: %uld\n", l->resent);
       +        p = seprint(p, e, "flag: "); p = pflag(p, e, l->flag);
       +        p = seprint(p, e, "rttavg: %uld\n", Tk2ms(l->rttavg));
       +        p = seprint(p, e, "mintimer: %uld\n", Tk2ms(l->mintimer));
       +
       +        p = seprint(p, e, "nl path: %s\n", l->nl->path);
       +        p = seprint(p, e, "nl ea: %E\n", l->nl->ea);
       +        p = seprint(p, e, "nl flag: "); p = pflag(p, e, l->flag);
       +        p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo);
       +        p = seprint(p, e, "nl datamtu: %d\n", l->nl->datamtu);
       +
       +        if(p - s < len)
       +                len = p - s;
       +        i = readstr(off, db, len, s);
       +        free(s);
       +        return i;
       +}
       +
       +static long
       +topctlread(Chan *d0, void *db, int len, int off)
       +{
       +        int i;
       +        char *s, *p, *e;
       +        Netlink *n;
       +
       +        s = p = malloc(1024);
       +        e = s + 1024;
       +
       +        p = seprint(p, e, "debug: %d\n", debug);
       +        p = seprint(p, e, "autodiscover: %d\n", autodiscover);
       +        p = seprint(p, e, "rediscover: %d\n", rediscover);
       +
       +        for(i = 0; i < Nnetlink; i++){
       +                n = netlinks.nl+i;
       +                if(n->cc == 0)
       +                        continue;
       +                p = seprint(p, e, "if%d path: %s\n", i, n->path);
       +                p = seprint(p, e, "if%d ea: %E\n", i, n->ea);
       +                p = seprint(p, e, "if%d flag: ", i); p = pflag(p, e, n->flag);
       +                p = seprint(p, e, "if%d lostjumbo: %d\n", i, n->lostjumbo);
       +                p = seprint(p, e, "if%d datamtu: %d\n", i, n->datamtu);
       +        }
       +
       +        if(p - s < len)
       +                len = p - s;
       +        i = readstr(off, db, len, s);
       +        free(s);
       +        return i;
       +}
       +
       +static long
       +aoeread(Chan *c, void *db, long n, vlong off)
       +{
       +        switch(TYPE(c->qid)){
       +        default:
       +                error(Eperm);
       +        case Qzero:
       +        case Qtopdir:
       +        case Qunitdir:
       +        case Qdevlinkdir:
       +                return devdirread(c, db, n, 0, 0, aoegen);
       +        case Qtopctl:
       +                return topctlread(c, db, n, off);
       +        case Qtoplog:
       +                return eventlogread(db, n);
       +        case Qctl:
       +        case Qdata:
       +        case Qconfig:
       +        case Qident:
       +                return unitread(c, db, n, off);
       +        case Qdevlink:
       +                return devlinkread(c, db, n, off);
       +        }
       +}
       +
       +static long
       +configwrite(Aoedev *d, void *db, long len)
       +{
       +        char *s;
       +        Aoeqc *ch;
       +        Frame *f;
       +        Srb *srb;
       +
       +        if(!UP(d))
       +                error(Enotup);
       +        if(len > sizeof d->config)
       +                error(Etoobig);
       +        srb = srballoc(len);
       +        s = malloc(len);
       +        memmove(s, db, len);
       +        if(waserror()){
       +                srbfree(srb);
       +                free(s);
       +                nexterror();
       +        }
       +        for (;;) {
       +                QLOCK(d);
       +                if(waserror()){
       +                        QUNLOCK(d);
       +                        nexterror();
       +                }
       +                f = freeframe(d);
       +                if(f != nil)
       +                        break;
       +                poperror();
       +                QUNLOCK(d);
       +                if(waserror())
       +                        nexterror();
       +                tsleep(&up->sleep, return0, 0, 100);
       +                poperror();
       +        }
       +        f->nhdr = Szaoeqc;
       +        memset(f->hdr, 0, f->nhdr);
       +        ch = (Aoeqc*)f->hdr;
       +        if(hset(d, f, (Aoehdr*)ch, ACconfig) == -1)
       +                return 0;
       +        f->srb = srb;
       +        f->dp = s;
       +        ch->verccmd = AQCfset;
       +        hnputs(ch->cslen, len);
       +        d->nout++;
       +        srb->nout++;
       +        f->dl->npkt++;
       +        f->dlen = len;
       +        /*
       +         * these refer to qlock & waserror in the above for loop.
       +         * there's still the first waserror outstanding.
       +         */
       +        poperror();
       +        QUNLOCK(d);
       +
       +        devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
       +        SLEEP(srb, srbready, srb);
       +        if(srb->error)
       +                error(srb->error);
       +
       +        QLOCK(d);
       +        if(waserror()){
       +                QUNLOCK(d);
       +                nexterror();
       +        }
       +        memmove(d->config, s, len);
       +        d->nconfig = len;
       +        poperror();
       +        QUNLOCK(d);
       +
       +        poperror();                        /* pop first waserror */
       +
       +        srbfree(srb);
       +        memmove(db, s, len);
       +        free(s);
       +        return len;
       +}
       +
       +static int
       +getmtu(Chan *m)
       +{
       +        int n, mtu;
       +        char buf[36];
       +
       +        mtu = 1514;
       +        if(m == nil || waserror())
       +                return mtu;
       +        n = devtab[m->type]->read(m, buf, sizeof buf - 1, 0);
       +        poperror();
       +        if(n > 12){
       +                buf[n] = 0;
       +                mtu = strtoul(buf + 12, 0, 0);
       +        }
       +        return mtu;
       +}
       +
       +static int
       +devmaxdata(Aoedev *d)
       +{
       +        int i, m, mtu;
       +        Devlink *l;
       +        Netlink *n;
       +
       +        mtu = 100000;
       +        for(i = 0; i < d->ndl; i++){
       +                l = d->dl + i;
       +                n = l->nl;
       +                if((l->flag & Dup) == 0 || (n->flag & Dup) == 0)
       +                        continue;
       +                m = getmtu(n->mtu);
       +                if(m > l->datamtu)
       +                        m = l->datamtu;
       +                if(m < mtu)
       +                        mtu = m;
       +        }
       +        if(mtu == 100000)
       +                mtu = 1514;
       +        mtu -= Szaoeata;
       +        mtu -= mtu % Aoesectsz;
       +        return mtu;
       +}
       +
       +static int
       +toggle(char *s, int init)
       +{
       +        if(s == nil)
       +                return init ^ 1;
       +        return strcmp(s, "on") == 0;
       +}
       +
       +static void ataident(Aoedev*);
       +
       +static long
       +unitctlwrite(Aoedev *d, void *db, long n)
       +{
       +        uint maxbcnt, m;
       +        uvlong bsize;
       +        enum {
       +                Failio,
       +                Ident,
       +                Jumbo,
       +                Maxbno,
       +                Mtu,
       +                Setsize,
       +        };
       +        Cmdbuf *cb;
       +        Cmdtab *ct;
       +        static Cmdtab cmds[] = {
       +                {Failio,         "failio",         1 },
       +                {Ident,         "identify",         1 },
       +                {Jumbo,         "jumbo",         0 },
       +                {Maxbno,        "maxbno",        0 },
       +                {Mtu,                "mtu",                0 },
       +                {Setsize,         "setsize",         0 },
       +        };
       +
       +        cb = parsecmd(db, n);
       +        QLOCK(d);
       +        if(waserror()){
       +                QUNLOCK(d);
       +                free(cb);
       +                nexterror();
       +        }
       +        ct = lookupcmd(cb, cmds, nelem(cmds));
       +        switch(ct->index){
       +        case Failio:
       +                downdev(d, "i/o failure");
       +                break;
       +        case Ident:
       +                ataident(d);
       +                break;
       +        case Jumbo:
       +                m = 0;
       +                if(d->flag & Djumbo)
       +                        m = 1;
       +                toggle(cb->f[1], m);
       +                if(m)
       +                        d->flag |= Djumbo;
       +                else
       +                        d->flag &= ~Djumbo;
       +                break;
       +        case Maxbno:
       +        case Mtu:
       +                maxbcnt = devmaxdata(d);
       +                if(cb->nf > 2)
       +                        error(Ecmdargs);
       +                if(cb->nf == 2){
       +                        m = strtoul(cb->f[1], 0, 0);
       +                        if(ct->index == Maxbno)
       +                                m *= Aoesectsz;
       +                        else{
       +                                m -= Szaoeata;
       +                                m &= ~(Aoesectsz-1);
       +                        }
       +                        if(m == 0 || m > maxbcnt)
       +                                cmderror(cb, "invalid mtu");
       +                        maxbcnt = m;
       +                        d->maxmtu = m;
       +                } else
       +                        d->maxmtu = Maxmtu;
       +                d->maxbcnt = maxbcnt;
       +                break;
       +        case Setsize:
       +                bsize = d->realbsize;
       +                if(cb->nf > 2)
       +                        error(Ecmdargs);
       +                if(cb->nf == 2){
       +                        bsize = strtoull(cb->f[1], 0, 0);
       +                        if(bsize % Aoesectsz)
       +                                cmderror(cb, "disk size must be sector aligned");
       +                }
       +                d->bsize = bsize;
       +                break;
       +        default:
       +                cmderror(cb, "unknown aoe control message");
       +        }
       +        poperror();
       +        QUNLOCK(d);
       +        free(cb);
       +        return n;
       +}
       +
       +static long
       +unitwrite(Chan *c, void *db, long n, vlong off)
       +{
       +        long rv;
       +        char *buf;
       +        Aoedev *d;
       +
       +        d = unit2dev(UNIT(c->qid));
       +        switch(TYPE(c->qid)){
       +        default:
       +                error(Ebadarg);
       +        case Qctl:
       +                return unitctlwrite(d, db, n);
       +        case Qident:
       +                error(Eperm);
       +        case Qdata:
       +                return rw(d, Write, db, n, off);
       +        case Qconfig:
       +                if(off + n > sizeof d->config)
       +                        error(Etoobig);
       +                buf = malloc(sizeof d->config);
       +                if(waserror()){
       +                        free(buf);
       +                        nexterror();
       +                }
       +                memmove(buf, d->config, d->nconfig);
       +                memmove(buf + off, db, n);
       +                rv = configwrite(d, buf, n + off);
       +                poperror();
       +                free(buf);
       +                return rv;
       +        }
       +}
       +
       +static Netlink*
       +addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea)
       +{
       +        Netlink *nl, *e;
       +
       +        LOCK(&netlinks);
       +        if(waserror()){
       +                UNLOCK(&netlinks);
       +                nexterror();
       +        }
       +        nl = netlinks.nl;
       +        e = nl + nelem(netlinks.nl);
       +        for(; nl < e && nl->cc; nl++)
       +                continue;
       +        if(nl == e)
       +                error("out of netlink structures");
       +        nl->cc = cc;
       +        nl->dc = dc;
       +        nl->mtu = mtu;
       +        strncpy(nl->path, path, sizeof nl->path);
       +        memmove(nl->ea, ea, sizeof nl->ea);
       +        poperror();
       +        nl->flag |= Dup;
       +        UNLOCK(&netlinks);
       +        return nl;
       +}
       +
       +static int
       +newunit(void)
       +{
       +        int x;
       +
       +        LOCK(&units);
       +        if(units.ref == Maxunits)
       +                x = -1;
       +        else
       +                x = units.ref++;
       +        UNLOCK(&units);
       +        return x;
       +}
       +
       +static int
       +dropunit(void)
       +{
       +        int x;
       +
       +        LOCK(&units);
       +        x = --units.ref;
       +        UNLOCK(&units);
       +        return x;
       +}
       +
       +/*
       + * always allocate max frames.  maxout may change.
       + */
       +static Aoedev*
       +newdev(long major, long minor, int n)
       +{
       +        Aoedev *d;
       +        Frame *f, *e;
       +
       +        d = malloc(sizeof *d);
       +        f = malloc(sizeof *f*Maxframes);
       +        if(!d || !f) {
       +                free(d);
       +                free(f);
       +                error("aoe device allocation failure");
       +        }
       +        d->nframes = n;
       +        d->frames = f;
       +        for (e = f + Maxframes; f < e; f++)
       +                f->tag = Tfree;
       +        d->maxout = n;
       +        d->major = major;
       +        d->minor = minor;
       +        d->maxbcnt = Dbcnt;
       +        d->flag = Djumbo;
       +        d->maxmtu = Maxmtu;
       +        d->unit = newunit();                /* bzzt.  inaccurate if units removed */
       +        if(d->unit == -1){
       +                free(d);
       +                free(d->frames);
       +                error("too many units");
       +        }
       +        d->dl = d->dltab;
       +        return d;
       +}
       +
       +static Aoedev*
       +mm2dev(int major, int minor)
       +{
       +        Aoedev *d;
       +
       +        RLOCK(&devs);
       +        for(d = devs.d; d; d = d->next)
       +                if(d->major == major && d->minor == minor){
       +                        RUNLOCK(&devs);
       +                        return d;
       +                }
       +        RUNLOCK(&devs);
       +        eventlog("mm2dev: %d.%d not found\n", major, minor);
       +        return nil;
       +}
       +
       +/* Find the device in our list.  If not known, add it */
       +static Aoedev*
       +getdev(long major, long minor, int n)
       +{
       +        Aoedev *d;
       +
       +        if(major == 0xffff || minor == 0xff)
       +                return 0;
       +        WLOCK(&devs);
       +        if(waserror()){
       +                WUNLOCK(&devs);
       +                nexterror();
       +        }
       +        for(d = devs.d; d; d = d->next)
       +                if(d->major == major && d->minor == minor)
       +                        break;
       +        if(d == nil) {
       +                d = newdev(major, minor, n);
       +                d->next = devs.d;
       +                devs.d = d;
       +        }
       +        poperror();
       +        WUNLOCK(&devs);
       +        return d;
       +}
       +
       +static ushort
       +gbit16(void *a)
       +{
       +        uchar *i;
       +
       +        i = a;
       +        return i[1] << 8 | i[0];
       +}
       +
       +static ulong
       +gbit32(void *a)
       +{
       +        ulong j;
       +        uchar *i;
       +
       +        i = a;
       +        j  = i[3] << 24;
       +        j |= i[2] << 16;
       +        j |= i[1] << 8;
       +        j |= i[0];
       +        return j;
       +}
       +
       +static uvlong
       +gbit64(void *a)
       +{
       +        uchar *i;
       +
       +        i = a;
       +        return (uvlong)gbit32(i+4) << 32 | gbit32(a);
       +}
       +
       +static void
       +ataident(Aoedev *d)
       +{
       +        Aoeata *a;
       +        Block *b;
       +        Frame *f;
       +
       +        f = freeframe(d);
       +        if(f == nil)
       +                return;
       +        f->nhdr = Szaoeata;
       +        memset(f->hdr, 0, f->nhdr);
       +        a = (Aoeata*)f->hdr;
       +        if(hset(d, f, (Aoehdr*)a, ACata) == -1)
       +                return;
       +        f->srb = srbkalloc(0, 0);
       +        a->cmdstat = Cid;        /* ata 6, page 110 */
       +        a->scnt = 1;
       +        a->lba[3] = 0xa0;
       +        d->nout++;
       +        f->dl->npkt++;
       +        f->bcnt = 512;
       +        f->dlen = 0;
       +        b = allocfb(f);
       +        devtab[f->nl->dc->type]->bwrite(f->nl->dc, b, 0);
       +}
       +
       +static int
       +newdlea(Devlink *l, uchar *ea)
       +{
       +        int i;
       +        uchar *t;
       +
       +        for(i = 0; i < Nea; i++){
       +                t = l->eatab[i];
       +                if(i == l->nea){
       +                        memmove(t, ea, Eaddrlen);
       +                        return l->nea++;
       +                }
       +                if(memcmp(t, ea, Eaddrlen) == 0)
       +                        return i;
       +        }
       +        return -1;
       +}
       +
       +static Devlink*
       +newdevlink(Aoedev *d, Netlink *n, Aoeqc *c)
       +{
       +        int i;
       +        Devlink *l;
       +
       +        for(i = 0; i < Ndevlink; i++){
       +                l = d->dl + i;
       +                if(i == d->ndl){
       +                        d->ndl++;
       +                        newdlea(l, c->src);
       +                        l->datamtu = c->scnt*Aoesectsz;
       +                        l->nl = n;
       +                        l->flag |= Dup;
       +                        l->mintimer = Rtmin;
       +                        l->rttavg = Rtmax;
       +                        return l;
       +                }
       +                if(l->nl == n){
       +                        newdlea(l, c->src);
       +                        l->datamtu = c->scnt*Aoesectsz;
       +                        l->flag |= Dup;
       +                        return l;
       +                }
       +        }
       +        eventlog("%æ: out of links: %s:%E to %E\n", d, n->path, n->ea, c->src);
       +        return 0;
       +}
       +
       +static void
       +errrsp(Block *b, char *s)
       +{
       +        int n;
       +        Aoedev *d;
       +        Aoehdr *h;
       +        Frame *f;
       +
       +        h = (Aoehdr*)b->rp;
       +        n = nhgetl(h->tag);
       +        if(n == Tmgmt || n == Tfree)
       +                return;
       +        d = mm2dev(nhgets(h->major), h->minor);
       +        if(d == 0)
       +                return;
       +        if(f = getframe(d, n))
       +                frameerror(d, f, s);
       +}
       +
       +static void
       +qcfgrsp(Block *b, Netlink *nl)
       +{
       +        int major, cmd, cslen, blen;
       +        unsigned n;
       +        Aoedev *d;
       +        Aoeqc *ch;
       +        Devlink *l;
       +        Frame *f;
       +
       +        ch = (Aoeqc*)b->rp;
       +        major = nhgets(ch->major);
       +        n = nhgetl(ch->tag);
       +        if(n != Tmgmt){
       +                d = mm2dev(major, ch->minor);
       +                if(d == nil)
       +                        return;
       +                QLOCK(d);
       +                f = getframe(d, n);
       +                if(f == nil){
       +                        QUNLOCK(d);
       +                        eventlog("%æ: unknown response tag %ux\n", d, n);
       +                        return;
       +                }
       +                cslen = nhgets(ch->cslen);
       +                blen = BLEN(b) - Szaoeqc;
       +                if(cslen < blen)
       +                        eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n",
       +                                d, n, cslen, blen);
       +                if(cslen > blen){
       +                        eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n",
       +                                d, n, cslen, blen);
       +                        cslen = blen;
       +                }
       +                memmove(f->dp, ch + 1, cslen);
       +                f->srb->nout--;
       +                WAKEUP(f->srb);
       +                d->nout--;
       +                f->srb = nil;
       +                f->tag = Tfree;
       +                QUNLOCK(d);
       +                return;
       +        }
       +
       +        cmd = ch->verccmd & 0xf;
       +        if(cmd != 0){
       +                eventlog("aoe%d.%d: cfgrsp: bad command %d\n", major, ch->minor, cmd);
       +                return;
       +        }
       +        n = nhgets(ch->bufcnt);
       +        if(n > Maxframes)
       +                n = Maxframes;
       +
       +        if(waserror()){
       +                eventlog("getdev: %d.%d ignored: %s\n", major, ch->minor, up->errstr);
       +                return;
       +        }
       +        d = getdev(major, ch->minor, n);
       +        poperror();
       +        if(d == 0)
       +                return;
       +
       +        QLOCK(d);
       +        *up->errstr = 0;
       +        if(waserror()){
       +                QUNLOCK(d);
       +                eventlog("%æ: %s\n", d, up->errstr);
       +                nexterror();
       +        }
       +
       +        l = newdevlink(d, nl, ch);                /* add this interface. */
       +
       +        d->fwver = nhgets(ch->fwver);
       +        n = nhgets(ch->cslen);
       +        if(n > sizeof d->config)
       +                n = sizeof d->config;
       +        d->nconfig = n;
       +        memmove(d->config, ch + 1, n);
       +
       +        /* manually set mtu may be reset lower if conditions warrant */
       +        if(l){
       +                n = devmaxdata(d);
       +                if(!(d->flag & Djumbo))
       +                        n = Dbcnt;
       +                if(n > d->maxmtu)
       +                        n = d->maxmtu;
       +                if(n != d->maxbcnt){
       +                        eventlog("%æ: setting %d byte mtu on %s:%E\n",
       +                                d, n, nl->path, nl->ea);
       +                        d->maxbcnt = n;
       +                }
       +        }
       +        if(d->nopen == 0)
       +                ataident(d);
       +        poperror();
       +        QUNLOCK(d);
       +}
       +
       +static void
       +idmove(char *p, ushort *a, unsigned n)
       +{
       +        int i;
       +        char *op, *e;
       +
       +        op = p;
       +        for(i = 0; i < n / 2; i++){
       +                *p++ = a[i] >> 8;
       +                *p++ = a[i];
       +        }
       +        *p = 0;
       +        while(p > op && *--p == ' ')
       +                *p = 0;
       +        e = p;
       +        p = op;
       +        while(*p == ' ')
       +                p++;
       +        memmove(op, p, n - (e - p));
       +}
       +
       +static vlong
       +aoeidentify(Aoedev *d, ushort *id)
       +{
       +        int i;
       +        vlong s;
       +
       +        d->flag &= ~(Dllba|Dpower|Dsmart|Dnop|Dup);
       +
       +        i = gbit16(id+83) | gbit16(id+86);
       +        if(i & (1<<10)){
       +                d->flag |= Dllba;
       +                s = gbit64(id+100);
       +        }else
       +                s = gbit32(id+60);
       +
       +        i = gbit16(id+83);
       +        if((i>>14) == 1) {
       +                if(i & (1<<3))
       +                        d->flag  |= Dpower;
       +                i = gbit16(id+82);
       +                if(i & 1)
       +                        d->flag  |= Dsmart;
       +                if(i & (1<<14))
       +                        d->flag  |= Dnop;
       +        }
       +//        eventlog("%æ up\n", d);
       +        d->flag |= Dup;
       +        memmove(d->ident, id, sizeof d->ident);
       +        return s;
       +}
       +
       +static void
       +newvers(Aoedev *d)
       +{
       +        LOCK(&drivevers);
       +        d->vers = drivevers.ref++;
       +        UNLOCK(&drivevers);
       +}
       +
       +static int
       +identify(Aoedev *d, ushort *id)
       +{
       +        vlong osectors, s;
       +        uchar oserial[21];
       +
       +        s = aoeidentify(d, id);
       +        if(s == -1)
       +                return -1;
       +        osectors = d->realbsize;
       +        memmove(oserial, d->serial, sizeof d->serial);
       +
       +        idmove(d->serial, id+10, 20);
       +        idmove(d->firmware, id+23, 8);
       +        idmove(d->model, id+27, 40);
       +
       +        s *= Aoesectsz;
       +        if(osectors != s || memcmp(oserial, d->serial, sizeof oserial)){
       +                d->bsize = s;
       +                d->realbsize = s;
       +//                d->mediachange = 1;
       +                newvers(d);
       +        }
       +        return 0;
       +}
       +
       +static void
       +atarsp(Block *b)
       +{
       +        unsigned n;
       +        short major;
       +        Aoeata *ahin, *ahout;
       +        Aoedev *d;
       +        Frame *f;
       +        Srb *srb;
       +
       +        ahin = (Aoeata*)b->rp;
       +        major = nhgets(ahin->major);
       +        d = mm2dev(major, ahin->minor);
       +        if(d == nil)
       +                return;
       +        QLOCK(d);
       +        if(waserror()){
       +                QUNLOCK(d);
       +                nexterror();
       +        }
       +        n = nhgetl(ahin->tag);
       +        f = getframe(d, n);
       +        if(f == nil){
       +                dprint("%æ: unexpected response; tag %ux\n", d, n);
       +                goto bail;
       +        }
       +        rtupdate(f->dl, tsince(f->tag));
       +        ahout = (Aoeata*)f->hdr;
       +        srb = f->srb;
       +
       +        if(ahin->cmdstat & 0xa9){
       +                eventlog("%æ: ata error cmd %.2ux stat %.2ux\n",
       +                        d, ahout->cmdstat, ahin->cmdstat);
       +                if(srb)
       +                        srb->error = Eio;
       +        } else {
       +                n = ahout->scnt * Aoesectsz;
       +                switch(ahout->cmdstat){
       +                case Crd:
       +                case Crdext:
       +                        if(BLEN(b) - Szaoeata < n){
       +                                eventlog("%æ: runt read blen %ld expect %d\n",
       +                                        d, BLEN(b), n);
       +                                goto bail;
       +                        }
       +                        memmove(f->dp, b->rp + Szaoeata, n);
       +                case Cwr:
       +                case Cwrext:
       +                        if(n > Dbcnt)
       +                                f->nl->lostjumbo = 0;
       +                        if(f->bcnt -= n){
       +                                f->lba += n / Aoesectsz;
       +                                f->dp = (uchar*)f->dp + n;
       +                                resend(d, f);
       +                                goto bail;
       +                        }
       +                        break;
       +                case Cid:
       +                        if(BLEN(b) - Szaoeata < 512){
       +                                eventlog("%æ: runt identify blen %ld expect %d\n",
       +                                        d, BLEN(b), n);
       +                                goto bail;
       +                        }
       +                        identify(d, (ushort*)(b->rp + Szaoeata));
       +                        break;
       +                default:
       +                        eventlog("%æ: unknown ata command %.2ux \n",
       +                                d, ahout->cmdstat);
       +                }
       +        }
       +
       +        if(srb && --srb->nout == 0 && srb->len == 0)
       +                WAKEUP(srb);
       +        f->srb = nil;
       +        f->tag = Tfree;
       +        d->nout--;
       +
       +        work(d);
       +bail:
       +        poperror();
       +        QUNLOCK(d);
       +}
       +
       +static void
       +netrdaoeproc(void *v)
       +{
       +        int idx;
       +        char name[Maxpath+1], *s;
       +        Aoehdr *h;
       +        Block *b;
       +        Netlink *nl;
       +
       +        nl = (Netlink*)v;
       +        idx = nl - netlinks.nl;
       +        netlinks.reader[idx] = 1;
       +        kstrcpy(name, nl->path, Maxpath);
       +
       +        if(waserror()){
       +                eventlog("netrdaoe@%s: exiting: %s\n", name, up->errstr);
       +                netlinks.reader[idx] = 0;
       +                wakeup(netlinks.rendez + idx);
       +                pexit(up->errstr, 1);
       +        }
       +        if(autodiscover)
       +                discover(0xffff, 0xff);
       +        for (;;) {
       +                if(!(nl->flag & Dup))
       +                        error("netlink is down");
       +                if(nl->dc == nil)
       +                        panic("netrdaoe: nl->dc == nil");
       +                b = devtab[nl->dc->type]->bread(nl->dc, 1<<16, 0);
       +                if(b == nil)
       +                        error("network read");
       +                h = (Aoehdr*)b->rp;
       +                if(h->verflag & AFrsp)
       +                        if(s = aoeerror(h)){
       +                                eventlog("%s: %s\n", nl->path, s);
       +                                errrsp(b, s);
       +                        }else if(h->cmd == ACata)
       +                                atarsp(b);
       +                        else if(h->cmd == ACconfig)
       +                                qcfgrsp(b, nl);
       +                        else if((h->cmd & 0xf0) == 0){
       +                                eventlog("%s: unknown cmd %d\n",
       +                                        nl->path, h->cmd);
       +                                errrsp(b, "unknown command");
       +                        }
       +                freeb(b);
       +        }
       +}
       +
       +static void
       +getaddr(char *path, uchar *ea)
       +{
       +        int n;
       +        char buf[2*Eaddrlen+1];
       +        Chan *c;
       +
       +        uprint("%s/addr", path);
       +        c = namec(up->genbuf, Aopen, OREAD, 0);
       +        if(waserror()) {
       +                cclose(c);
       +                nexterror();
       +        }
       +        if(c == nil)
       +                panic("æ: getaddr: c == nil");
       +        n = devtab[c->type]->read(c, buf, sizeof buf-1, 0);
       +        poperror();
       +        cclose(c);
       +        buf[n] = 0;
       +        if(parseether(ea, buf) < 0)
       +                error("parseether failure");
       +}
       +
       +static void
       +netbind(char *path)
       +{
       +        char addr[Maxpath];
       +        uchar ea[2*Eaddrlen+1];
       +        Chan *dc, *cc, *mtu;
       +        Netlink *nl;
       +
       +        snprint(addr, sizeof addr, "%s!0x%x", path, Aoetype);
       +        dc = chandial(addr, nil, nil, &cc);
       +        snprint(addr, sizeof addr, "%s/mtu", path);
       +        if(waserror())
       +                mtu = nil;
       +        else {
       +                mtu = namec(addr, Aopen, OREAD, 0);
       +                poperror();
       +        }
       +
       +        if(waserror()){
       +                cclose(dc);
       +                cclose(cc);
       +                if(mtu)
       +                        cclose(mtu);
       +                nexterror();
       +        }
       +        if(dc == nil  || cc == nil)
       +                error(Enonexist);
       +        getaddr(path, ea);
       +        nl = addnet(path, cc, dc, mtu, ea);
       +        snprint(addr, sizeof addr, "netrdaoe@%s", path);
       +        kproc(addr, netrdaoeproc, nl);
       +        poperror();
       +}
       +
       +static int
       +unbound(void *v)
       +{
       +        return *(int*)v != 0;
       +}
       +
       +static void
       +netunbind(char *path)
       +{
       +        int i, idx;
       +        Aoedev *d, *p, *next;
       +        Chan *dc, *cc;
       +        Devlink *l;
       +        Frame *f;
       +        Netlink *n, *e;
       +
       +        n = netlinks.nl;
       +        e = n + nelem(netlinks.nl);
       +
       +        LOCK(&netlinks);
       +        for(; n < e; n++)
       +                if(n->dc && strcmp(n->path, path) == 0)
       +                        break;
       +        UNLOCK(&netlinks);
       +        if(n == e)
       +                error("device not bound");
       +
       +        /*
       +         * hunt down devices using this interface; disable
       +         * this also terminates the reader.
       +         */
       +        idx = n - netlinks.nl;
       +        WLOCK(&devs);
       +        for(d = devs.d; d; d = d->next){
       +                QLOCK(d);
       +                for(i = 0; i < d->ndl; i++){
       +                        l = d->dl + i;
       +                        if(l->nl == n)
       +                                l->flag &= ~Dup;
       +                }
       +                QUNLOCK(d);
       +        }
       +        n->flag &= ~Dup;
       +        WUNLOCK(&devs);
       +
       +        /* confirm reader is down. */
       +        while(waserror())
       +                ;
       +        sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx);
       +        poperror();
       +
       +        /* reschedule packets. */
       +        WLOCK(&devs);
       +        for(d = devs.d; d; d = d->next){
       +                QLOCK(d);
       +                for(i = 0; i < d->nframes; i++){
       +                        f = d->frames + i;
       +                        if(f->tag != Tfree && f->nl == n)
       +                                resend(d, f);
       +                }
       +                QUNLOCK(d);
       +        }
       +        WUNLOCK(&devs);
       +
       +        /* squeeze devlink pool.  (we assert nobody is using them now) */
       +        WLOCK(&devs);
       +        for(d = devs.d; d; d = d->next){
       +                QLOCK(d);
       +                for(i = 0; i < d->ndl; i++){
       +                        l = d->dl + i;
       +                        if(l->nl == n)
       +                                memmove(l, l + 1, sizeof *l * (--d->ndl - i));
       +                }
       +                QUNLOCK(d);
       +        }
       +        WUNLOCK(&devs);
       +
       +        /* close device link. */
       +        LOCK(&netlinks);
       +        dc = n->dc;
       +        cc = n->cc;
       +        if(n->mtu)
       +                cclose(n->mtu);
       +        memset(n, 0, sizeof *n);
       +        UNLOCK(&netlinks);
       +
       +        cclose(dc);
       +        cclose(cc);
       +
       +        /* squeeze orphan devices */
       +        WLOCK(&devs);
       +        for(p = d = devs.d; d; d = next){
       +                next = d->next;
       +                if(d->ndl > 0){
       +                        p = d;
       +                        continue;
       +                }
       +                QLOCK(d);
       +                downdev(d, "orphan");
       +                QUNLOCK(d);
       +                if(p != devs.d)
       +                        p->next = next;
       +                else{
       +                        devs.d = next;
       +                        p = devs.d;
       +                }
       +                free(d->frames);
       +                free(d);
       +                dropunit();
       +        }
       +        WUNLOCK(&devs);
       +}
       +
       +static void
       +strtoss(char *f, ushort *shelf, ushort *slot)
       +{
       +        ulong sh;
       +        char *s;
       +
       +        *shelf = 0xffff;
       +        *slot = 0xff;
       +        if(!f)
       +                return;
       +        *shelf = sh = strtol(f, &s, 0);
       +        if(s == f || sh > 0xffff)
       +                error("bad shelf");
       +        f = s;
       +        if(*f++ == '.'){
       +                *slot = strtol(f, &s, 0);
       +                if(s == f || *slot > 0xff)
       +                        error("bad shelf");
       +        }else
       +                *slot = 0xff;
       +}
       +
       +static void
       +discoverstr(char *f)
       +{
       +        ushort shelf, slot;
       +
       +        strtoss(f, &shelf, &slot);
       +        discover(shelf, slot);
       +}
       +
       +static void
       +removedev(Aoedev *d)
       +{
       +        int i;
       +        Aoedev *p;
       +
       +        WLOCK(&devs);
       +        p = 0;
       +        if(d != devs.d)
       +        for(p = devs.d; p; p = p->next)
       +                if(p->next == d)
       +                        break;
       +        QLOCK(d);
       +        d->flag &= ~Dup;
       +        newvers(d);
       +        d->ndl = 0;
       +        QUNLOCK(d);
       +        for(i = 0; i < d->nframes; i++)
       +                frameerror(d, d->frames+i, Enotup);
       +
       +        if(p)
       +                p->next = d->next;
       +        else
       +                devs.d = d->next;
       +        free(d->frames);
       +        free(d);
       +        dropunit();
       +        WUNLOCK(&devs);
       +}
       +
       +
       +static void
       +aoeremove(Chan *c)
       +{
       +        switch(TYPE(c->qid)){
       +        default:
       +        case Qzero:
       +        case Qtopdir:
       +        case Qtoplog:
       +        case Qtopctl:
       +        case Qctl:
       +        case Qdata:
       +        case Qconfig:
       +        case Qident:
       +                error(Eperm);
       +        case Qunitdir:
       +                removedev(unit2dev(UNIT(c->qid)));
       +                break;
       +        }
       +}
       +
       +static void
       +removestr(char *f)
       +{
       +        ushort shelf, slot;
       +        Aoedev *d;
       +
       +        strtoss(f, &shelf, &slot);
       +        WLOCK(&devs);
       +        for(d = devs.d; d; d = d->next)
       +                if(shelf == d->major && slot == d->minor){
       +                        WUNLOCK(&devs);        /* BOTCH */
       +                        removedev(d);
       +                        return;
       +                }
       +        WUNLOCK(&devs);
       +        error("device not bound");
       +}
       +
       +static long
       +topctlwrite(void *db, long n)
       +{
       +        enum {
       +                Autodiscover,
       +                Bind,
       +                Debug,
       +                Discover,
       +                Closewait,
       +                Rediscover,
       +                Remove,
       +                Unbind,
       +        };
       +        char *f;
       +        Cmdbuf *cb;
       +        Cmdtab *ct;
       +        static Cmdtab cmds[] = {
       +                { Autodiscover,        "autodiscover",        0        },
       +                { Bind,         "bind",         2        },
       +                { Debug,         "debug",         0        },
       +                { Discover,         "discover",         0        },
       +                { Rediscover,        "rediscover",        0        },
       +                { Remove,        "remove",        2        },
       +                { Unbind,        "unbind",        2        },
       +        };
       +
       +        cb = parsecmd(db, n);
       +        if(waserror()){
       +                free(cb);
       +                nexterror();
       +        }
       +        ct = lookupcmd(cb, cmds, nelem(cmds));
       +        f = cb->f[1];
       +        switch(ct->index){
       +        case Autodiscover:
       +                autodiscover = toggle(f, autodiscover);
       +                break;
       +        case Bind:
       +                netbind(f);
       +                break;
       +        case Debug:
       +                debug = toggle(f, debug);
       +                break;
       +        case Discover:
       +                discoverstr(f);
       +                break;
       +        case Rediscover:
       +                rediscover = toggle(f, rediscover);
       +                break;
       +        case Remove:
       +                removestr(f);        /* depricated */
       +                break;
       +        case Unbind:
       +                netunbind(f);
       +                break;
       +        default:
       +                cmderror(cb, "unknown aoe control message");
       +        }
       +        poperror();
       +        free(cb);
       +        return n;
       +}
       +
       +static long
       +aoewrite(Chan *c, void *db, long n, vlong off)
       +{
       +        switch(TYPE(c->qid)){
       +        default:
       +        case Qzero:
       +        case Qtopdir:
       +        case Qunitdir:
       +        case Qtoplog:
       +                error(Eperm);
       +        case Qtopctl:
       +                return topctlwrite(db, n);
       +        case Qctl:
       +        case Qdata:
       +        case Qconfig:
       +        case Qident:
       +                return unitwrite(c, db, n, off);
       +        }
       +}
       +
       +Dev aoedevtab = {
       +        L'æ',
       +        "aoe",
       +
       +        devreset,
       +        devinit,
       +        devshutdown,
       +        aoeattach,
       +        aoewalk,
       +        aoestat,
       +        aoeopen,
       +        devcreate,
       +        aoeclose,
       +        aoeread,
       +        devbread,
       +        aoewrite,
       +        devbwrite,
       +        aoeremove,
       +        devwstat,
       +        devpower,
       +        devconfig,
       +};
 (DIR) diff --git a/src/9vx/a/devcons.c b/src/9vx/a/devcons.c
       @@ -784,6 +784,7 @@ consread(Chan *c, void *buf, long n, vlong off)
                        while(!qcanread(lineq)){
                                if(qread(kbdq, &ch, 1) == 0)
                                        continue;
       +                        //XXX TODO: startup blocks here
                                send = 0;
                                if(ch == 0){
                                        /* flush output on rawoff -> rawon */
 (DIR) diff --git a/src/9vx/a/devether.c b/src/9vx/a/devether.c
       @@ -0,0 +1,542 @@
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "io.h"
       +#include "ureg.h"
       +#include "error.h"
       +#include "netif.h"
       +
       +#include "etherif.h"
       +
       +#define MEMSIZE (256<<20)        // same as ../mmu.c:29 (TODO: var)
       +
       +static Ether *etherxx[MaxEther];
       +
       +Chan*
       +etherattach(char* spec)
       +{
       +        ulong ctlrno;
       +        char *p;
       +        Chan *chan;
       +
       +        ctlrno = 0;
       +        if(spec && *spec){
       +                ctlrno = strtoul(spec, &p, 0);
       +                if((ctlrno == 0 && p == spec) || *p || (ctlrno >= MaxEther))
       +                        error(Ebadarg);
       +        }
       +        if(etherxx[ctlrno] == 0)
       +                error(Enodev);
       +
       +        chan = devattach('l', spec);
       +        if(waserror()){
       +                chanfree(chan);
       +                nexterror();
       +        }
       +        chan->dev = ctlrno;
       +        if(etherxx[ctlrno]->attach)
       +                etherxx[ctlrno]->attach(etherxx[ctlrno]);
       +        poperror();
       +        return chan;
       +}
       +
       +static Walkqid*
       +etherwalk(Chan* chan, Chan* nchan, char** name, int nname)
       +{
       +        return netifwalk(&etherxx[chan->dev]->ni, chan, nchan, name, nname);
       +}
       +
       +static int
       +etherstat(Chan* chan, uchar* dp, int n)
       +{
       +        return netifstat(&etherxx[chan->dev]->ni, chan, dp, n);
       +}
       +
       +static Chan*
       +etheropen(Chan* chan, int omode)
       +{
       +        return netifopen(&etherxx[chan->dev]->ni, chan, omode);
       +}
       +
       +static void
       +ethercreate(Chan* ch, char* c, int i, ulong ul)
       +{
       +}
       +
       +static void
       +etherclose(Chan* chan)
       +{
       +        netifclose(&etherxx[chan->dev]->ni, chan);
       +}
       +
       +static long
       +etherread(Chan* chan, void* buf, long n, vlong off)
       +{
       +        Ether *ether;
       +        ulong offset = off;
       +
       +        ether = etherxx[chan->dev];
       +        if((chan->qid.type & QTDIR) == 0 && ether->ifstat){
       +                /*
       +                 * With some controllers it is necessary to reach
       +                 * into the chip to extract statistics.
       +                 */
       +                if(NETTYPE(chan->qid.path) == Nifstatqid)
       +                        return ether->ifstat(ether, buf, n, offset);
       +                else if(NETTYPE(chan->qid.path) == Nstatqid)
       +                        ether->ifstat(ether, buf, 0, offset);
       +        }
       +
       +        return netifread(&ether->ni, chan, buf, n, offset);
       +}
       +
       +static Block*
       +etherbread(Chan* chan, long n, ulong offset)
       +{
       +        return netifbread(&etherxx[chan->dev]->ni, chan, n, offset);
       +}
       +
       +static int
       +etherwstat(Chan* chan, uchar* dp, int n)
       +{
       +        return netifwstat(&etherxx[chan->dev]->ni, chan, dp, n);
       +}
       +
       +static void
       +etherrtrace(Netfile* f, Etherpkt* pkt, int len)
       +{
       +        int i, n;
       +        Block *bp;
       +
       +        if(qwindow(f->in) <= 0)
       +                return;
       +        if(len > 58)
       +                n = 58;
       +        else
       +                n = len;
       +        bp = iallocb(64);
       +        if(bp == nil)
       +                return;
       +        memmove(bp->wp, pkt->d, n);
       +        i = TK2MS(MACHP(0)->tscticks);
       +        bp->wp[58] = len>>8;
       +        bp->wp[59] = len;
       +        bp->wp[60] = i>>24;
       +        bp->wp[61] = i>>16;
       +        bp->wp[62] = i>>8;
       +        bp->wp[63] = i;
       +        bp->wp += 64;
       +        qpass(f->in, bp);
       +}
       +
       +Block*
       +etheriq(Ether* ether, Block* bp, int fromwire)
       +{
       +        Etherpkt *pkt;
       +        ushort type;
       +        int len, multi, tome, fromme;
       +        Netfile **ep, *f, **fp, *fx;
       +        Block *xbp;
       +
       +        ether->ni.inpackets++;
       +
       +        pkt = (Etherpkt*)bp->rp;
       +        len = BLEN(bp);
       +        type = (pkt->type[0]<<8)|pkt->type[1];
       +        fx = 0;
       +        ep = &ether->ni.f[Ntypes];
       +
       +        multi = pkt->d[0] & 1;
       +        /* check for valid multicast addresses */
       +        if(multi && memcmp(pkt->d, ether->ni.bcast, sizeof(pkt->d)) != 0 && ether->ni.prom == 0){
       +                if(!activemulti(&ether->ni, pkt->d, sizeof(pkt->d))){
       +                        if(fromwire){
       +                                freeb(bp);
       +                                bp = 0;
       +                        }
       +                        return bp;
       +                }
       +        }
       +
       +        /* is it for me? */
       +        tome = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
       +        fromme = memcmp(pkt->s, ether->ea, sizeof(pkt->s)) == 0;
       +        // if(tome||fromme)
       +        //        iprint("XXX PACK: %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux -> %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux%s[%d]%s\n",
       +        //        pkt->s[0], pkt->s[1], pkt->s[2],pkt->s[3], pkt->s[4], pkt->s[5],
       +        //        pkt->d[0], pkt->d[1], pkt->d[2],pkt->d[3], pkt->d[4], pkt->d[5],
       +        //        (tome ? " <<--" : ""), len, (fromme ? " -->>" : ""));
       +        /*
       +         * Multiplex the packet to all the connections which want it.
       +         * If the packet is not to be used subsequently (fromwire != 0),
       +         * attempt to simply pass it into one of the connections, thereby
       +         * saving a copy of the data (usual case hopefully).
       +         */
       +        for(fp = ether->ni.f; fp < ep; fp++){
       +                if((f = *fp) != nil)
       +                if(f->type == type || f->type < 0)
       +                if(tome || multi || f->prom){
       +                        /* Don't want to hear bridged packets */
       +                        if(f->bridge && !fromwire && !fromme)
       +                                continue;
       +                        if(!f->headersonly){
       +                                if(fromwire && fx == 0)
       +                                        fx = f;
       +                                else if((xbp = iallocb(len)) != nil){
       +                                        memmove(xbp->wp, pkt, len);
       +                                        xbp->wp += len;
       +                                        if(qpass(f->in, xbp) < 0)
       +                                                ether->ni.soverflows++;
       +                                }
       +                                else
       +                                        ether->ni.soverflows++;
       +                        }
       +                        else
       +                                etherrtrace(f, pkt, len);
       +                }
       +        }
       +
       +        if(fx){
       +                if(qpass(fx->in, bp) < 0)
       +                        ether->ni.soverflows++;
       +                return 0;
       +        }
       +        if(fromwire){
       +                freeb(bp);
       +                return 0;
       +        }
       +
       +        return bp;
       +}
       +
       +static int
       +etheroq(Ether* ether, Block* bp)
       +{
       +        int len, loopback, s;
       +        Etherpkt *pkt;
       +
       +        ether->ni.outpackets++;
       +
       +        /*
       +         * Check if the packet has to be placed back onto the input queue,
       +         * i.e. if it's a loopback or broadcast packet or the interface is
       +         * in promiscuous mode.
       +         * If it's a loopback packet indicate to etheriq that the data isn't
       +         * needed and return, etheriq will pass-on or free the block.
       +         * To enable bridging to work, only packets that were originated
       +         * by this interface are fed back.
       +         */
       +        pkt = (Etherpkt*)bp->rp;
       +        len = BLEN(bp);
       +        loopback = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
       +        if(loopback || memcmp(pkt->d, ether->ni.bcast, sizeof(pkt->d)) == 0 || ether->ni.prom){
       +                s = splhi();
       +                etheriq(ether, bp, 0);
       +                splx(s);
       +        }
       +
       +        if(!loopback){
       +                qbwrite(ether->oq, bp);
       +                if(ether->transmit != nil)
       +                        ether->transmit(ether);
       +        } else
       +                freeb(bp);
       +
       +        return len;
       +}
       +
       +static long
       +etherwrite(Chan* chan, void* buf, long n, vlong v)
       +{
       +        Ether *ether;
       +        Block *bp;
       +        int nn, onoff;
       +        Cmdbuf *cb;
       +
       +        ether = etherxx[chan->dev];
       +        if(NETTYPE(chan->qid.path) != Ndataqid) {
       +                nn = netifwrite(&ether->ni, chan, buf, n);
       +                if(nn >= 0)
       +                        return nn;
       +                cb = parsecmd(buf, n);
       +                if(cb->f[0] && strcmp(cb->f[0], "nonblocking") == 0){
       +                        if(cb->nf <= 1)
       +                                onoff = 1;
       +                        else
       +                                onoff = atoi(cb->f[1]);
       +                        qnoblock(ether->oq, onoff);
       +                        free(cb);
       +                        return n;
       +                }
       +                free(cb);
       +                if(ether->ctl!=nil)
       +                        return ether->ctl(ether,buf,n);
       +
       +                error(Ebadctl);
       +        }
       +
       +        if(n > ether->maxmtu)
       +                error(Etoobig);
       +        if(n < ether->minmtu)
       +                error(Etoosmall);
       +
       +        bp = allocb(n);
       +        if(waserror()){
       +                freeb(bp);
       +                nexterror();
       +        }
       +        memmove(bp->rp, buf, n);
       +        memmove(bp->rp+Eaddrlen, ether->ea, Eaddrlen);
       +        poperror();
       +        bp->wp += n;
       +
       +        return etheroq(ether, bp);
       +}
       +
       +static long
       +etherbwrite(Chan* chan, Block* bp, ulong u)
       +{
       +        Ether *ether;
       +        long n;
       +
       +        n = BLEN(bp);
       +        if(NETTYPE(chan->qid.path) != Ndataqid){
       +                if(waserror()) {
       +                        freeb(bp);
       +                        nexterror();
       +                }
       +                n = etherwrite(chan, bp->rp, n, 0);
       +                poperror();
       +                freeb(bp);
       +                return n;
       +        }
       +        ether = etherxx[chan->dev];
       +
       +        if(n > ether->maxmtu){
       +                freeb(bp);
       +                error(Etoobig);
       +        }
       +        if(n < ether->minmtu){
       +                freeb(bp);
       +                error(Etoosmall);
       +        }
       +
       +        return etheroq(ether, bp);
       +}
       +
       +static struct {
       +        char*        type;
       +        int        (*reset)(Ether*);
       +} cards[MaxEther+1];
       +
       +void
       +addethercard(char* t, int (*r)(Ether*))
       +{
       +        static int ncard;
       +
       +        if(ncard == MaxEther)
       +                panic("too many ether cards");
       +        cards[ncard].type = t;
       +        cards[ncard].reset = r;
       +        ncard++;
       +}
       +
       +int
       +parseether(uchar *to, char *from)
       +{
       +        char nip[4];
       +        char *p;
       +        int i;
       +
       +        p = from;
       +        for(i = 0; i < Eaddrlen; i++){
       +                if(*p == 0)
       +                        return -1;
       +                nip[0] = *p++;
       +                if(*p == 0)
       +                        return -1;
       +                nip[1] = *p++;
       +                nip[2] = 0;
       +                to[i] = strtoul(nip, 0, 16);
       +                if(*p == ':')
       +                        p++;
       +        }
       +        return 0;
       +}
       +
       +static Ether*
       +etherprobe(int cardno, int ctlrno)
       +{
       +        int i, lg;
       +        ulong mb, bsz;
       +        Ether *ether;
       +        char buf[128], name[32];
       +
       +        ether = malloc(sizeof(Ether));
       +        memset(ether, 0, sizeof(Ether));
       +        ether->ctlrno = ctlrno;
       +        ether->tbdf = BUSUNKNOWN;
       +        ether->ni.mbps = 100;
       +        ether->minmtu = ETHERMINTU;
       +        ether->maxmtu = ETHERMAXTU;
       +
       +        if(cardno < 0){
       +                for(cardno = 0; cards[cardno].type; cardno++){
       +                        for(i = 0; i < ether->isac.nopt; i++){
       +                                if(strncmp(ether->isac.opt[i], "ea=", 3))
       +                                        continue;
       +                                if(parseether(ether->ea, &ether->isac.opt[i][3]))
       +                                        memset(ether->ea, 0, Eaddrlen);
       +                        }
       +                        break;
       +                }
       +        }
       +
       +        if(cardno >= MaxEther || cards[cardno].type == nil){
       +                free(ether);
       +                return nil;
       +        }
       +        if(cards[cardno].reset(ether) < 0){
       +                free(ether);
       +                return nil;
       +        }
       +
       +        /*
       +         * IRQ2 doesn't really exist, it's used to gang the interrupt
       +         * controllers together. A device set to IRQ2 will appear on
       +         * the second interrupt controller as IRQ9.
       +         */
       +        if(ether->isac.irq == 2)
       +                ether->isac.irq = 9;
       +        snprint(name, sizeof(name), "ether%d", ctlrno);
       +
       +        i = sprint(buf, "#l%d: %s: %dMbps port 0x%luX irq %d",
       +                ctlrno, cards[cardno].type, ether->ni.mbps, ether->isac.port, ether->isac.irq);
       +        if(ether->isac.mem)
       +                i += sprint(buf+i, " addr 0x%luX", ether->isac.mem);
       +        if(ether->isac.size)
       +                i += sprint(buf+i, " size 0x%luX", ether->isac.size);
       +        i += sprint(buf+i, ": %2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux",
       +                ether->ea[0], ether->ea[1], ether->ea[2],
       +                ether->ea[3], ether->ea[4], ether->ea[5]);
       +        sprint(buf+i, "\n");
       +        print(buf);
       +
       +        /* compute log10(ether->ni.mbps) into lg */
       +        for(lg = 0, mb = ether->ni.mbps; mb >= 10; lg++)
       +                mb /= 10;
       +        if (lg > 0)
       +                lg--;
       +        if (lg > 14)                        /* 2^(14+17) = 2⁳ⁱ */
       +                lg = 14;
       +        /* allocate larger output queues for higher-speed interfaces */
       +        bsz = 1UL << (lg + 17);                /* 2ⁱ⁷ = 128K, bsz = 2ⁿ × 128K */
       +        while (bsz > MEMSIZE && bsz >= 128*1024)
       +                bsz /= 2;
       +
       +        netifinit(&ether->ni, name, Ntypes, bsz);
       +        while (ether->oq == nil && bsz >= 128*1024) {
       +                bsz /= 2;
       +                ether->oq = qopen(bsz, Qmsg, 0, 0);
       +                ether->ni.limit = bsz;
       +        }
       +        if(ether->oq == nil)
       +                panic("etherreset %s", name);
       +        ether->ni.alen = Eaddrlen;
       +        memmove(ether->ni.addr, ether->ea, Eaddrlen);
       +        memset(ether->ni.bcast, 0xFF, Eaddrlen);
       +
       +        // iprint("XXX EADDR: %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux\n",
       +        // ether->ea[0], ether->ea[1], ether->ea[2],ether->ea[3], ether->ea[4], ether->ea[5]);
       +
       +        return ether;
       +}
       +
       +static void
       +etherreset(void)
       +{
       +        Ether *ether;
       +        int cardno, ctlrno;
       +
       +        for(ctlrno = 0; ctlrno < MaxEther; ctlrno++){
       +                if((ether = etherprobe(-1, ctlrno)) == nil)
       +                        continue;
       +                etherxx[ctlrno] = ether;
       +        }
       +
       +        cardno = ctlrno = 0;
       +        while(cards[cardno].type != nil && ctlrno < MaxEther){
       +                if(etherxx[ctlrno] != nil){
       +                        ctlrno++;
       +                        continue;
       +                }
       +                if((ether = etherprobe(cardno, ctlrno)) == nil){
       +                        cardno++;
       +                        continue;
       +                }
       +                etherxx[ctlrno] = ether;
       +                ctlrno++;
       +        }
       +}
       +
       +static void
       +ethershutdown(void)
       +{
       +        Ether *ether;
       +        int i;
       +
       +        for(i = 0; i < MaxEther; i++){
       +                ether = etherxx[i];
       +                if(ether == nil)
       +                        continue;
       +                if(ether->shutdown == nil) {
       +                        print("#l%d: no shutdown fuction\n", i);
       +                        continue;
       +                }
       +                (*ether->shutdown)(ether);
       +        }
       +}
       +
       +
       +#define POLY 0xedb88320
       +
       +/* really slow 32 bit crc for ethers */
       +ulong
       +ethercrc(uchar *p, int len)
       +{
       +        int i, j;
       +        ulong crc, b;
       +
       +        crc = 0xffffffff;
       +        for(i = 0; i < len; i++){
       +                b = *p++;
       +                for(j = 0; j < 8; j++){
       +                        crc = (crc>>1) ^ (((crc^b) & 1) ? POLY : 0);
       +                        b >>= 1;
       +                }
       +        }
       +        return crc;
       +}
       +
       +Dev etherdevtab = {
       +        'l',
       +        "ether",
       +
       +        etherreset,
       +        devinit,
       +        ethershutdown,
       +        etherattach,
       +        etherwalk,
       +        etherstat,
       +        etheropen,
       +        ethercreate,
       +        etherclose,
       +        etherread,
       +        etherbread,
       +        etherwrite,
       +        etherbwrite,
       +        devremove,
       +        etherwstat,
       +};
 (DIR) diff --git a/src/9vx/a/devsd.c b/src/9vx/a/devsd.c
       @@ -72,7 +72,7 @@ enum {
                                                 ((p)<<PartSHIFT)|((t)<<TypeSHIFT))
        
        
       -static void
       +void
        sdaddpart(SDunit* unit, char* name, uvlong start, uvlong end)
        {
                SDpart *pp;
       @@ -135,6 +135,19 @@ sdaddpart(SDunit* unit, char* name, uvlong start, uvlong end)
                pp->valid = 1;
        }
        
       +SDpart*
       +sdfindpart(SDunit *unit, char *name)
       +{
       +        int i;
       +
       +        for(i=0; i<unit->npart; i++) {
       +                if(strcmp(unit->part[i].perm.name, name) == 0){
       +                        return &unit->part[i];
       +                }
       +        }
       +        return nil;
       +}
       +
        static void
        sddelpart(SDunit* unit, char* name)
        {
       @@ -198,6 +211,7 @@ sdinitpart(SDunit* unit)
                if(unit->sectors){
                        sdincvers(unit);
                        sdaddpart(unit, "data", 0, unit->sectors);
       +                partition(unit);
        #if 0
                        /*
                         * Use partitions passed from boot program,
 (DIR) diff --git a/src/9vx/a/dosfs.h b/src/9vx/a/dosfs.h
       @@ -0,0 +1,62 @@
       +typedef struct Dosboot        Dosboot;
       +typedef struct Dos        Dos;
       +typedef struct Dosdir        Dosdir;
       +typedef struct Dosfile        Dosfile;
       +typedef struct Dospart        Dospart;
       +
       +struct Dospart
       +{
       +        uchar flag;                /* active flag */
       +        uchar shead;                /* starting head */
       +        uchar scs[2];                /* starting cylinder/sector */
       +        uchar type;                /* partition type */
       +        uchar ehead;                /* ending head */
       +        uchar ecs[2];                /* ending cylinder/sector */
       +        uchar start[4];                /* starting sector */
       +        uchar len[4];                /* length in sectors */
       +};
       +
       +#define FAT12        0x01
       +#define FAT16        0x04
       +#define EXTEND        0x05
       +#define FATHUGE        0x06
       +#define FAT32        0x0b
       +#define FAT32X        0x0c
       +#define EXTHUGE        0x0f
       +#define DMDDO        0x54
       +#define PLAN9        0x39
       +#define LEXTEND 0x85
       +
       +struct Dosfile{
       +        Dos        *dos;                /* owning dos file system */
       +        char        name[8];
       +        char        ext[3];
       +        uchar        attr;
       +        long        length;
       +        long        pstart;                /* physical start cluster address */
       +        long        pcurrent;        /* physical current cluster address */
       +        long        lcurrent;        /* logical current cluster address */
       +        long        offset;
       +};
       +
       +struct Dos{
       +        long        start;                /* start of file system */
       +        int        sectsize;        /* in bytes */
       +        int        clustsize;        /* in sectors */
       +        int        clustbytes;        /* in bytes */
       +        int        nresrv;                /* sectors */
       +        int        nfats;                /* usually 2 */
       +        int        rootsize;        /* number of entries */
       +        int        volsize;        /* in sectors */
       +        int        mediadesc;
       +        int        fatsize;        /* in sectors */
       +        int        fatclusters;
       +        int        fatbits;        /* 12 or 16 */
       +        long        fataddr;        /* sector number */
       +        long        rootaddr;
       +        long        rootclust;
       +        long        dataaddr;
       +        long        freeptr;
       +};
       +
       +extern int        dosinit(Fs*);
 (DIR) diff --git a/src/9vx/a/etherif.h b/src/9vx/a/etherif.h
       @@ -0,0 +1,39 @@
       +enum {
       +        MaxEther        = 48,
       +        Ntypes                = 8,
       +};
       +
       +typedef struct Ether Ether;
       +struct Ether {
       +        ISAConf isac;
       +
       +        int        ctlrno;
       +        int        tbdf;                        /* type+busno+devno+funcno */
       +        int        minmtu;
       +        int         maxmtu;
       +        uchar        ea[Eaddrlen];
       +
       +        void        (*attach)(Ether*);        /* filled in by reset routine */
       +        void        (*detach)(Ether*);
       +        void        (*transmit)(Ether*);
       +        void        (*interrupt)(Ureg*, void*);
       +        long        (*ifstat)(Ether*, void*, long, ulong);
       +        long         (*ctl)(Ether*, void*, long); /* custom ctl messages */
       +        void        (*power)(Ether*, int);        /* power on/off */
       +        void        (*shutdown)(Ether*);        /* shutdown hardware before reboot */
       +        void        *ctlr;
       +
       +        Queue*        oq;
       +
       +        Netif        ni;
       +};
       +
       +extern Block* etheriq(Ether*, Block*, int);
       +extern void addethercard(char*, int(*)(Ether*));
       +extern ulong ethercrc(uchar*, int);
       +extern int parseether(uchar*, char*);
       +
       +#define NEXT(x, l)        (((uint)(x)+1)%(l))
       +#define PREV(x, l)        (((x) == 0) ? (l)-1: (x)-1)
       +#define        HOWMANY(x, y)        (((x)+((y)-1))/(y))
       +#define ROUNDUP(x, y)        (HOWMANY((x), (y))*(y))
 (DIR) diff --git a/src/9vx/a/fns.ed b/src/9vx/a/fns.ed
       @@ -16,4 +16,54 @@ int        tailkmesg(char*, int);
        void        trap(Ureg*);
        void        uartecho(char*, int);
        void        uartinit(int);
       +
       +#define GSHORT(p)        (((p)[1]<<8)|(p)[0])
       +#define GLONG(p)        ((GSHORT(p+2)<<16)|GSHORT(p))
       +
       +void        __plock(Psleep*);
       +void        __punlock(Psleep*);
       +void        __pwakeup(Psleep*);
       +void        __psleep(Psleep*);
       +
       +extern int tracelock;
       +
       +#define lockfngen(type)        __ ## type
       +
       +#define lockgen(type, arg)                                                                 \
       +        do {                                                                                \
       +                if (tracelock) {                                                        \
       +                        iprint("%s %p %s %d\n", (#type), (arg), __FILE__, __LINE__);        \
       +                        lockfngen(type)((arg));                                                \
       +                } else {                                                                \
       +                        lockfngen(type)((arg));                                                \
       +                }                                                                        \
       +        } while (0)
       +
       +#define qlock(x)        lockgen(qlock, (x))
       +#define qunlock(x)        lockgen(qunlock, (x))
       +#define rlock(x)        lockgen(rlock, (x))
       +#define runlock(x)        lockgen(runlock, (x))
       +#define wlock(x)        lockgen(wlock, (x))
       +#define wunlock(x)        lockgen(wunlock, (x))
       +#define plock(x)        lockgen(plock, (x))
       +#define punlock(x)        lockgen(punlock, (x))
       +#define pwakeup(x)        lockgen(pwakeup, (x))
       +#define psleep(x)        lockgen(psleep, (x))
       +// #define lock(x)                lockgen(lock, (x))
       +// #define unlock(x)        lockgen(unlock, (x))
       +#define lock(x) __lock(x)
       +#define unlock(x) __unlock(x)
       +#define canqlock        __canqlock
       +#define canrlock        __canrlock
       +
       +#define        LOCK(x)                lock(&((x)->lk))
       +#define        UNLOCK(x)        unlock(&((x)->lk))
       +#define CANQLOCK(x)        canqlock(&((x)->qlock))
       +#define        QLOCK(x)        qlock(&((x)->qlock))
       +#define        QUNLOCK(x)        qunlock(&((x)->qlock))
       +#define CANRLOCK(x)        canrlock(&((x)->rwlock))
       +#define        RLOCK(x)        rlock(&((x)->rwlock))
       +#define        RUNLOCK(x)        runlock(&((x)->rwlock))
       +#define        WLOCK(x)        wlock(&((x)->rwlock))
       +#define        WUNLOCK(x)        wunlock(&((x)->rwlock))
        .
 (DIR) diff --git a/src/9vx/a/fns.h b/src/9vx/a/fns.h
       @@ -167,8 +167,53 @@ void        *uvalidaddr(ulong addr, ulong len, int write);
        int        isuaddr(void*);
        void        setsigsegv(int invx32);
        
       -void        plock(Psleep*);
       -void        punlock(Psleep*);
       -void        pwakeup(Psleep*);
       -void        psleep(Psleep*);
       +#define GSHORT(p)        (((p)[1]<<8)|(p)[0])
       +#define GLONG(p)        ((GSHORT(p+2)<<16)|GSHORT(p))
       +
       +void        __plock(Psleep*);
       +void        __punlock(Psleep*);
       +void        __pwakeup(Psleep*);
       +void        __psleep(Psleep*);
       +
       +extern int tracelock;
       +
       +#define lockfngen(type)        __ ## type
       +
       +#define lockgen(type, arg)                                                                 \
       +        do {                                                                                \
       +                if (tracelock) {                                                        \
       +                        iprint("%s %p %s %d\n", (#type), (arg), __FILE__, __LINE__);        \
       +                        lockfngen(type)((arg));                                                \
       +                } else {                                                                \
       +                        lockfngen(type)((arg));                                                \
       +                }                                                                        \
       +        } while (0)
       +
       +#define qlock(x)        lockgen(qlock, (x))
       +#define qunlock(x)        lockgen(qunlock, (x))
       +#define rlock(x)        lockgen(rlock, (x))
       +#define runlock(x)        lockgen(runlock, (x))
       +#define wlock(x)        lockgen(wlock, (x))
       +#define wunlock(x)        lockgen(wunlock, (x))
       +#define plock(x)        lockgen(plock, (x))
       +#define punlock(x)        lockgen(punlock, (x))
       +#define pwakeup(x)        lockgen(pwakeup, (x))
       +#define psleep(x)        lockgen(psleep, (x))
       +// #define lock(x)                lockgen(lock, (x))
       +// #define unlock(x)        lockgen(unlock, (x))
       +#define lock(x) __lock(x)
       +#define unlock(x) __unlock(x)
       +#define canqlock        __canqlock
       +#define canrlock        __canrlock
       +
       +#define        LOCK(x)                lock(&((x)->lk))
       +#define        UNLOCK(x)        unlock(&((x)->lk))
       +#define CANQLOCK(x)        canqlock(&((x)->qlock))
       +#define        QLOCK(x)        qlock(&((x)->qlock))
       +#define        QUNLOCK(x)        qunlock(&((x)->qlock))
       +#define CANRLOCK(x)        canrlock(&((x)->rwlock))
       +#define        RLOCK(x)        rlock(&((x)->rwlock))
       +#define        RUNLOCK(x)        runlock(&((x)->rwlock))
       +#define        WLOCK(x)        wlock(&((x)->rwlock))
       +#define        WUNLOCK(x)        wunlock(&((x)->rwlock))
        
 (DIR) diff --git a/src/9vx/a/fs.h b/src/9vx/a/fs.h
       @@ -0,0 +1,38 @@
       +typedef struct File File;
       +typedef struct Fs Fs;
       +
       +#include "dosfs.h"
       +#include "kfs.h"
       +
       +struct File{
       +        union{
       +                Dosfile        dos;
       +                Kfsfile        kfs;
       +                int walked;
       +        };
       +        Fs        *fs;
       +        char        *path;
       +};
       +
       +struct Fs{
       +        union {
       +                Dos dos;
       +                Kfs kfs;
       +        };
       +        int        dev;                                /* device id */
       +        long        (*diskread)(Fs*, void*, long);        /* disk read routine */
       +        vlong        (*diskseek)(Fs*, vlong);        /* disk seek routine */
       +        long        (*read)(File*, void*, long);
       +        int        (*walk)(File*, char*);
       +        File        root;
       +};
       +
       +/*
       +extern int chatty;
       +extern int dotini(Fs*);
       +extern int fswalk(Fs*, char*, File*);
       +extern int fsread(File*, void*, long);
       +extern int fsboot(Fs*, char*, Boot*);
       +*/
       +
       +#define BADPTR(x) ((ulong)x < 0x80000000)
 (DIR) diff --git a/src/9vx/a/ip.ed b/src/9vx/a/ip.ed
       @@ -0,0 +1,2297 @@
       +diff -e ip.orig/arp.c ip/arp.c
       +643c
       +        QUNLOCK(arp);
       +.
       +613,614c
       +        RUNLOCK(ifc);
       +        QLOCK(arp);
       +.
       +609c
       +        QUNLOCK(arp);        /* for icmpns */
       +.
       +589c
       +                if((a->rxtsrem <= 0) || !(CANRLOCK(ifc)) || (a->ifcid != ifc->ifcid)){
       +.
       +574c
       +        QLOCK(arp);
       +.
       +557c
       +                QUNLOCK(arp);
       +.
       +554c
       +                QLOCK(arp);
       +.
       +511c
       +                QUNLOCK(arp);
       +.
       +481c
       +                QLOCK(arp);
       +.
       +444c
       +                QUNLOCK(arp);
       +.
       +426c
       +                QLOCK(arp);
       +.
       +398c
       +        QUNLOCK(arp);
       +.
       +380c
       +                                        RUNLOCK(ifc);
       +.
       +375c
       +                                        RLOCK(ifc);
       +.
       +372c
       +                                                RUNLOCK(ifc);
       +.
       +366c
       +                        QUNLOCK(arp);
       +.
       +337c
       +        QLOCK(arp);
       +.
       +292c
       +        QUNLOCK(arp);
       +.
       +260c
       +        QUNLOCK(arp);
       +.
       +258c
       +arprelease(Arp *arp, Arpent* ae)
       +.
       +250c
       +        QUNLOCK(arp);
       +.
       +219c
       +        QLOCK(arp);
       +.
       +50c
       +int         ReTransTimer = RETRANS_TIMER;
       +.
       +48c
       +#define haship(s) ((ulong)((s)[IPaddrlen-1])%NHASH)
       +.
       +36c
       +        QLock        qlock;
       +.
       +14d
       +6c
       +#include "error.h"
       +.
       +2c
       +#include "lib.h"
       +.
       +diff -e ip.orig/chandial.c ip/chandial.c
       +6,7c
       +#include        "error.h"
       +#include        "ip/ip.h"
       +.
       +2c
       +#include        "lib.h"
       +.
       +diff -e ip.orig/devip.c ip/devip.c
       +1430c
       +        QUNLOCK(c);
       +.
       +1418c
       +                QUNLOCK(c);
       +.
       +1404,1411c
       +                QUNLOCK(c);
       +.
       +1399c
       +        QLOCK(c);
       +.
       +1349c
       +        QUNLOCK(c);
       +.
       +1326,1328d
       +1322,1323d
       +1318c
       +                        QUNLOCK(c);
       +.
       +1310c
       +                if(CANQLOCK(c)){
       +.
       +1294c
       +                        QLOCK(c);
       +.
       +1185c
       +                QUNLOCK(c);
       +.
       +1130c
       +                        QUNLOCK(c);
       +.
       +1128c
       +                QLOCK(c);
       +.
       +1033c
       +        QLOCK(c);
       +.
       +1029c
       +                QLOCK(c);
       +.
       +1027c
       +        QUNLOCK(c);
       +.
       +980c
       +        QLOCK(c);
       +.
       +976c
       +                QLOCK(c);
       +.
       +974c
       +        QUNLOCK(c);
       +.
       +831c
       +        QUNLOCK(p);
       +.
       +820,826c
       +        QUNLOCK(p);
       +.
       +793c
       +        QLOCK(p);
       +.
       +765c
       +        QUNLOCK(p);
       +.
       +760c
       +                        QUNLOCK(p);
       +.
       +748c
       +        QLOCK(p);
       +.
       +582c
       +        QUNLOCK(cv);
       +.
       +561c
       +                QUNLOCK(cv);
       +.
       +558c
       +        QLOCK(cv);
       +.
       +516c
       +ipremove(Chan* _)
       +.
       +510c
       +ipcreate(Chan* _, char* __, int ___, ulong ____)
       +.
       +494c
       +                        QUNLOCK(cv);
       +.
       +487c
       +                        QLOCK(cv);
       +.
       +470c
       +                QUNLOCK(cv);
       +.
       +468c
       +                QLOCK(cv);
       +.
       +447,448c
       +                QUNLOCK(cv);
       +                QUNLOCK(p);
       +.
       +431,432c
       +                        QUNLOCK(cv);
       +                        QUNLOCK(p);
       +.
       +429c
       +                QLOCK(cv);
       +.
       +427c
       +                QLOCK(p);
       +.
       +415c
       +                QUNLOCK(p);
       +.
       +411c
       +                        QUNLOCK(p);
       +.
       +409c
       +                QLOCK(p);
       +.
       +174c
       +ipgen(Chan *c, char* __ch, Dirtab* __dt, int __i, int s, Dir *dp)
       +.
       +50c
       +#define QID(p, c, y)         ( ((uint)(p)<<(Shiftproto)) | ((uint)(c)<<Shiftconv) | (y) )
       +.
       +6,7c
       +#include        "error.h"
       +#include        "ip/ip.h"
       +.
       +2c
       +#include        "lib.h"
       +.
       +diff -e ip.orig/esp.c ip/esp.c
       +1106a
       +
       +
       +#ifdef notdef
       +enum {
       +        RC4forward= 10*1024*1024,        /* maximum skip forward */
       +        RC4back = 100*1024,        /* maximum look back */
       +};
       +
       +typedef struct Esprc4 Esprc4;
       +struct Esprc4
       +{
       +        ulong        cseq;                /* current byte sequence number */
       +        RC4state current;
       +
       +        int        ovalid;                /* old is valid */
       +        ulong        lgseq;                /* last good sequence */
       +        ulong        oseq;                /* old byte sequence number */
       +        RC4state old;
       +};
       +
       +static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
       +
       +static int
       +rc4cipher(Espcb *ecb, uchar *p, int n)
       +{
       +        Esprc4 *esprc4;
       +        RC4state tmpstate;
       +        ulong seq;
       +        long d, dd;
       +
       +        if(n < 4)
       +                return 0;
       +
       +        esprc4 = ecb->espstate;
       +        if(ecb->incoming) {
       +                seq = nhgetl(p);
       +                p += 4;
       +                n -= 4;
       +                d = seq-esprc4->cseq;
       +                if(d == 0) {
       +                        rc4(&esprc4->current, p, n);
       +                        esprc4->cseq += n;
       +                        if(esprc4->ovalid) {
       +                                dd = esprc4->cseq - esprc4->lgseq;
       +                                if(dd > RC4back)
       +                                        esprc4->ovalid = 0;
       +                        }
       +                } else if(d > 0) {
       +print("esp rc4cipher: missing packet: %uld %ld\n", seq, d); /* this link is hosed */
       +                        if(d > RC4forward) {
       +                                strcpy(up->errstr, "rc4cipher: skipped too much");
       +                                return 0;
       +                        }
       +                        esprc4->lgseq = seq;
       +                        if(!esprc4->ovalid) {
       +                                esprc4->ovalid = 1;
       +                                esprc4->oseq = esprc4->cseq;
       +                                memmove(&esprc4->old, &esprc4->current,
       +                                        sizeof(RC4state));
       +                        }
       +                        rc4skip(&esprc4->current, d);
       +                        rc4(&esprc4->current, p, n);
       +                        esprc4->cseq = seq+n;
       +                } else {
       +print("esp rc4cipher: reordered packet: %uld %ld\n", seq, d);
       +                        dd = seq - esprc4->oseq;
       +                        if(!esprc4->ovalid || -d > RC4back || dd < 0) {
       +                                strcpy(up->errstr, "rc4cipher: too far back");
       +                                return 0;
       +                        }
       +                        memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
       +                        rc4skip(&tmpstate, dd);
       +                        rc4(&tmpstate, p, n);
       +                        return 1;
       +                }
       +
       +                /* move old state up */
       +                if(esprc4->ovalid) {
       +                        dd = esprc4->cseq - RC4back - esprc4->oseq;
       +                        if(dd > 0) {
       +                                rc4skip(&esprc4->old, dd);
       +                                esprc4->oseq += dd;
       +                        }
       +                }
       +        } else {
       +                hnputl(p, esprc4->cseq);
       +                p += 4;
       +                n -= 4;
       +                rc4(&esprc4->current, p, n);
       +                esprc4->cseq += n;
       +        }
       +        return 1;
       +}
       +
       +static void
       +rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
       +{
       +        Esprc4 *esprc4;
       +
       +        /* bits to bytes */
       +        n = (n+7)>>3;
       +        esprc4 = smalloc(sizeof(Esprc4));
       +        memset(esprc4, 0, sizeof(Esprc4));
       +        setupRC4state(&esprc4->current, k, n);
       +        ecb->espalg = name;
       +        ecb->espblklen = 4;
       +        ecb->espivlen = 4;
       +        ecb->cipher = rc4cipher;
       +        ecb->espstate = esprc4;
       +}
       +#endif
       +.
       +1056,1081d
       +1048,1050c
       +        ecb->espblklen = 8;
       +        ecb->espivlen = 8;
       +.
       +1045c
       +        for(i=0; i<8; i++)
       +.
       +1040,1042c
       +        /* bits to bytes */
       +        n = (n+7)>>3;
       +        if(n > 8)
       +                n = 8;
       +.
       +1037c
       +        uchar key[8], ivec[8];
       +.
       +1035c
       +desespinit(Espcb *ecb, char *name, uchar *k, int n)
       +.
       +1019,1033d
       +1013,1014c
       +                memmove(p, ds->ivec, 8);
       +                for(p += 8; p < ep; p += 8){
       +                        pp = p;
       +                        ip = ds->ivec;
       +                        for(eip = ip+8; ip < eip; )
       +                                *pp++ ^= *ip++;
       +                        block_cipher(ds->expanded, p, 0);
       +                        memmove(ds->ivec, p, 8);
       +                }
       +.
       +1010,1011c
       +                memmove(ds->ivec, p, 8);
       +                p += 8;
       +                while(p < ep){
       +                        memmove(tmp, p, 8);
       +                        block_cipher(ds->expanded, p, 1);
       +                        tp = tmp;
       +                        ip = ds->ivec;
       +                        for(eip = ip+8; ip < eip; ){
       +                                *p++ ^= *ip;
       +                                *ip++ = *tp++;
       +                        }
       +                }
       +.
       +1008a
       +        ep = p + n;
       +.
       +1006a
       +        uchar tmp[8];
       +        uchar *pp, *tp, *ip, *eip, *ep;
       +.
       +999,1003d
       +993c
       +        ecb->ahlen = 12;
       +.
       +990c
       +        klen >>= 3;                /* convert to bytes */
       +
       +.
       +986c
       +md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
       +.
       +979c
       +        seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
       +.
       +968c
       +        digest = md5(opad, 64, nil, nil);
       +.
       +966c
       +        digest = md5(ipad, 64, nil, nil);
       +.
       +959,962c
       +        for(i=0; i<64; i++){
       +                ipad[i] = 0x36;
       +                opad[i] = 0x5c;
       +        }
       +        ipad[64] = opad[64] = 0;
       +        for(i=0; i<klen; i++){
       +.
       +957a
       +        uchar innerhash[MD5dlen];
       +.
       +956d
       +954a
       +        uchar ipad[65], opad[65];
       +.
       +796,952c
       +void
       +.
       +790c
       +        ecb->ahlen = 12;
       +.
       +786c
       +        klen >>= 8;                /* convert to bytes */
       +.
       +782c
       +shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
       +.
       +775c
       +        seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
       +.
       +772a
       +        int r;
       +.
       +771d
       +764c
       +        digest = sha1(opad, 64, nil, nil);
       +.
       +762c
       +        digest = sha1(ipad, 64, nil, nil);
       +.
       +755,758c
       +        for(i=0; i<64; i++){
       +                ipad[i] = 0x36;
       +                opad[i] = 0x5c;
       +        }
       +        ipad[64] = opad[64] = 0;
       +        for(i=0; i<klen; i++){
       +.
       +753a
       +        uchar innerhash[SHA1dlen];
       +.
       +752d
       +750a
       +        uchar ipad[65], opad[65];
       +.
       +743,748c
       +void
       +.
       +735c
       +nullahinit(Espcb *ecb, char *name, uchar* _, int __)
       +.
       +729c
       +nullauth(Espcb* _, uchar* __, int ___, uchar* ____)
       +.
       +720c
       +nullespinit(Espcb *ecb, char *name, uchar* _, int __)
       +.
       +714c
       +nullcipher(Espcb* _, uchar* __, int ___)
       +.
       +708,712d
       +647c
       +        QUNLOCK(c);
       +.
       +642c
       +        QLOCK(c);
       +.
       +632c
       +        QUNLOCK(c);
       +.
       +627c
       +        QLOCK(c);
       +.
       +606c
       +        QUNLOCK(esp);
       +.
       +600,601c
       +        spi = nhgets(h->espspi);
       +        QLOCK(esp);
       +        c = convlookup(esp, spi);
       +.
       +597,598c
       +        h = (Esp4hdr*)(bp->rp);
       +.
       +595c
       +        ulong spi;
       +.
       +593a
       +        Esp4hdr *h;
       +.
       +590d
       +568c
       +        QUNLOCK(c);
       +.
       +565c
       +                qpass(c->rq, bp);
       +.
       +560,561c
       +                netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
       +                        laddr, spi);
       +.
       +557,558d
       +547c
       +        bp->rp += hdrlen + ecb->espivlen;
       +.
       +539,541c
       +                QUNLOCK(c);
       +                netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n",
       +                        raddr, laddr, spi);
       +.
       +535c
       +        et = (Esptail*)(bp->rp + hdrlen + payload);
       +.
       +523,529c
       +        if(!ecb->cipher(ecb, bp->rp + hdrlen, payload)) {
       +                QUNLOCK(c);
       +print("esp: cipher failed %I -> %I!%ld: %s\n", raddr, laddr, spi, up->errstr);
       +                netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %s\n", raddr,
       +                        laddr, spi, up->errstr);
       +.
       +517,519c
       +                QUNLOCK(c);
       +                netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n",
       +                        raddr, laddr, spi, payload, BLEN(bp));
       +.
       +515c
       +        payload = BLEN(bp) - hdrlen - ecb->ahlen;
       +.
       +507,510c
       +                QUNLOCK(c);
       +print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
       +                netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
       +                        laddr, spi);
       +.
       +502,505c
       +        espspi = version == V4? ((Esp4hdr*)bp->rp)->espspi:
       +                                ((Esp6hdr*)bp->rp)->espspi;
       +.
       +493,496c
       +        if(BLEN(bp) < hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
       +                QUNLOCK(c);
       +                netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
       +                        laddr, spi);
       +.
       +485,486c
       +        QLOCK(c);
       +        QUNLOCK(esp);
       +.
       +477,479c
       +                QUNLOCK(esp);
       +                netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
       +                        laddr, spi);
       +.
       +475c
       +        c = convlookup(esp, spi);
       +.
       +473c
       +        if (version == V4) {
       +                eh4 = (Esp4hdr*)bp->rp;
       +                spi = nhgetl(eh4->espspi);
       +                v4tov6(raddr, eh4->espsrc);
       +                v4tov6(laddr, eh4->espdst);
       +        } else {
       +                eh6 = (Esp6hdr*)bp->rp;
       +                spi = nhgetl(eh6->espspi);
       +                ipmove(raddr, eh6->src);
       +                ipmove(laddr, eh6->dst);
       +        }
       +
       +        QLOCK(esp);
       +.
       +471d
       +464,466c
       +        bp = pullupblock(bp, hdrlen + Esptaillen);
       +.
       +462a
       +        if (bp == nil || BLEN(bp) == 0) {
       +                /* get enough to identify the IP version */
       +                bp = pullupblock(bp, IP4HDR);
       +                if(bp == nil) {
       +                        netlog(f, Logesp, "esp: short packet\n");
       +                        return;
       +                }
       +        }
       +        eh4 = (Esp4hdr*)bp->rp;
       +        version = ((eh4->vihl & 0xf0) == IP_VER4? V4: V6);
       +        hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
       +.
       +459,460c
       +        uchar *auth, *espspi;
       +        ulong spi;
       +        int payload, nexthdr, version, hdrlen;
       +.
       +457c
       +        uchar raddr[IPaddrlen], laddr[IPaddrlen];
       +.
       +453,454c
       +        Esp4hdr *eh4;
       +        Esp6hdr *eh6;
       +        Esptail *et;
       +        Userhdr *uh;
       +.
       +451c
       +espiput(Proto *esp, Ipifc* _, Block *bp)
       +.
       +446,449d
       +440c
       +        if (version == V4)
       +.
       +438c
       +        QUNLOCK(c);
       +.
       +434,435c
       +        ecb->auth(ecb, bp->rp + iphdrlen, (hdrlen - iphdrlen) +
       +.
       +429,431d
       +425a
       +                hnputl(eh6->espspi, ecb->spi);
       +                hnputl(eh6->espseq, ++ecb->seq);
       +.
       +424d
       +420,422d
       +414a
       +                hnputl(eh4->espspi, ecb->spi);
       +                hnputl(eh4->espseq, ++ecb->seq);
       +.
       +411,413c
       +        /* fill in head */
       +        if (version == V4) {
       +.
       +407,409c
       +        ecb->cipher(ecb, bp->rp + hdrlen, payload + pad + Esptaillen);
       +        auth = bp->rp + hdrlen + payload + pad + Esptaillen;
       +.
       +401c
       +        eh4 = (Esp4hdr *)bp->rp;
       +        eh6 = (Esp6hdr *)bp->rp;
       +        et = (Esptail*)(bp->rp + hdrlen + payload + pad);
       +.
       +383,384c
       +        bp = padblock(bp, hdrlen + ecb->espivlen);
       +.
       +370c
       +                        QUNLOCK(c);
       +.
       +363c
       +        QLOCK(c);
       +.
       +358c
       +        version = ipvers(c);
       +        iphdrlen = version == V4? IP4HDR: IP6HDR;
       +        hdrlen =   version == V4? Esp4hdrlen: Esp6hdrlen;
       +
       +.
       +356c
       +        Espcb *ecb;
       +        Block *bp;
       +        int nexthdr, payload, pad, align, version, hdrlen, iphdrlen;
       +        uchar *auth;
       +.
       +353d
       +347,349d
       +299,344d
       +284,297d
       +274c
       +ipvers(Conv *c)
       +.
       +221c
       +                        QUNLOCK(c->p);
       +.
       +215c
       +                        QLOCK(c->p);
       +.
       +207,210c
       +                parseip(c->raddr, argv[1]);
       +.
       +192c
       +        char *p, *pp;
       +        char *e = nil;
       +.
       +182,186c
       +        "null",                        0,        nullahinit,
       +        "hmac_sha1_96",                128,        shaahinit,        /* rfc2404 */
       +//        "aes_xcbc_mac_96",        128,        aesahinit,        /* rfc3566 */
       +        "hmac_md5_96",                128,        md5ahinit,        /* rfc2403 */
       +        nil,                        0,        nil,
       +.
       +170,177c
       +        "null",                        0,        nullespinit,
       +//        "des3_cbc",                192,        des3espinit,        /* rfc2451 */
       +//        "aes_128_cbc",                128,        aescbcespinit,        /* rfc3602 */
       +//        "aes_ctr",                128,        aesctrespinit,        /* rfc3686 */
       +        "des_56_cbc",                64,        desespinit,        /* rfc2405, deprecated */
       +//        "rc4_128",                128,        rc4espinit,        /* gone in rfc4305 */
       +        nil,                        0,        nil,
       +.
       +163,166c
       +static        void nullahinit(Espcb*, char*, uchar *key, int keylen);
       +static        void shaahinit(Espcb*, char*, uchar *key, int keylen);
       +static        void md5ahinit(Espcb*, char*, uchar *key, int keylen);
       +.
       +157,161c
       +static        void nullespinit(Espcb*, char*, uchar *key, int keylen);
       +static        void desespinit(Espcb *ecb, char *name, uchar *k, int n);
       +.
       +150c
       +        void        (*init)(Espcb*, char* name, uchar *key, int keylen);
       +.
       +143d
       +137d
       +131d
       +127c
       +        int        header;                /* user user level header */
       +.
       +96,107d
       +86,87c
       +        /* Ip6hdr; */
       +        uchar        vcf[4];                /* version:4, traffic class:8, flow label:20 */
       +        uchar        ploadlen[2];        /* payload length: packet length - 40 */
       +        uchar        proto;                /* next header type */
       +        uchar        ttl;                /* hop limit */
       +        uchar        src[IPaddrlen];
       +        uchar        dst[IPaddrlen];
       +
       +        /* Esphdr; */
       +        uchar        espspi[4];        /* Security parameter index */
       +        uchar        espseq[4];        /* Sequence number */
       +.
       +80c
       +        /* Esphdr; */
       +        uchar        espspi[4];        /* Security parameter index */
       +        uchar        espseq[4];        /* Sequence number */
       +.
       +58,64c
       + * tunnel-mode layout:                IP | ESP | TCP/UDP | user data.
       + * transport-mode layout is:        ESP | IP | TCP/UDP | user data.
       +.
       +54d
       +42,47d
       +32,35c
       +enum
       +{
       +.
       +30a
       +typedef struct Esppriv Esppriv;
       +typedef struct Espcb Espcb;
       +typedef struct Algorithm Algorithm;
       +.
       +26,28d
       +20,23c
       +typedef struct Esphdr Esphdr;
       +.
       +14c
       +#include        "error.h"
       +.
       +10c
       +#include        "lib.h"
       +.
       +6,7c
       + * TODO: update to match rfc4303.
       +.
       +3,4d
       +diff -e ip.orig/ethermedium.c ip/ethermedium.c
       +536c
       +        if((sflag = ipv6anylocal(ifc, ipsrc)) != 0)
       +.
       +429c
       +etherremmulti(Ipifc *ifc, uchar *a, uchar *_)
       +.
       +407c
       +etheraddmulti(Ipifc *ifc, uchar *a, uchar *_)
       +.
       +401c
       +                RUNLOCK(ifc);
       +.
       +392c
       +                        RUNLOCK(ifc);
       +.
       +387c
       +                if(!CANRLOCK(ifc)){
       +.
       +362c
       +                RUNLOCK(ifc);
       +.
       +353c
       +                        RUNLOCK(ifc);
       +.
       +348c
       +                if(!CANRLOCK(ifc)){
       +.
       +269c
       + *  called by ipoput with a single block to write with ifc RLOCK'd
       +.
       +123a
       +
       +.
       +8c
       +#include "netif.h"
       +.
       +6c
       +#include "error.h"
       +.
       +2c
       +#include "lib.h"
       +.
       +diff -e ip.orig/gre.c ip/gre.c
       +968c
       +        gre->ptclsize = 0;
       +.
       +919,948d
       +894,916c
       +        return "unknown control request";
       +.
       +885,892d
       +881,883c
       +                else if(strcmp(f[0], "cooked") == 0){
       +                        gpriv->raw = 0;
       +                        return nil;
       +.
       +696,879c
       +        gpriv = c->p->priv;
       +        if(n == 1){
       +                if(strcmp(f[0], "raw") == 0){
       +                        gpriv->raw = 1;
       +                        return nil;
       +.
       +694c
       +        GREpriv *gpriv;
       +.
       +691,692c
       +char*
       +grectl(Conv *c, char **f, int n)
       +.
       +681,688c
       +        return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
       +.
       +675,679d
       +659,660c
       +        if(qlen(c->rq) > 64*1024)
       +                freeblist(bp);
       +.
       +651d
       +648d
       +645c
       +                freeblist(bp);
       +.
       +643c
       +        len = nhgets(ghp->len) - GRE_IPONLY;
       +.
       +639a
       +        QUNLOCK(gre);
       +
       +.
       +633,636c
       +        if(*p == nil) {
       +                QUNLOCK(gre);
       +                freeblist(bp);
       +.
       +590,629c
       +                if(c->rport == eproto && 
       +                        (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
       +.
       +587d
       +553,585c
       +        /* Look for a conversation structure for this port and address */
       +        c = nil;
       +        for(p = gre->conv; *p; p++) {
       +.
       +547,551c
       +        v4tov6(raddr, ghp->src);
       +        eproto = nhgets(ghp->eproto);
       +        QLOCK(gre);
       +.
       +536,545c
       +        gpriv = gre->priv;
       +        ghp = (GREhdr*)(bp->rp);
       +.
       +534d
       +531,532c
       +        ushort eproto;
       +        uchar raddr[IPaddrlen];
       +.
       +336,529c
       +        int len;
       +        GREhdr *ghp;
       +.
       +334c
       +greiput(Proto *gre, Ipifc* __, Block *bp)
       +.
       +328,329d
       +325,326c
       +        ghp->proto = IP_GREPROTO;
       +        ghp->frag[0] = 0;
       +        ghp->frag[1] = 0;
       +.
       +322c
       +                hnputs(ghp->eproto, c->rport);
       +.
       +318,320c
       +                                findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
       +                        memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
       +.
       +314,315c
       +                        memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
       +                v4tov6(laddr, ghp->src);
       +.
       +311,312c
       +        if(!((GREpriv*)c->p->priv)->raw){
       +                v4tov6(raddr, ghp->dst);
       +.
       +308,309c
       +        ghp = (GREhdr *)(bp->rp);
       +        ghp->vihl = IP_VER4;
       +.
       +295,297d
       +287,289c
       +        Conv *c = x;
       +        GREhdr *ghp;
       +.
       +283a
       +int drop;
       +
       +.
       +281c
       +        c->lport = 0;
       +        c->rport = 0;
       +.
       +247,278c
       +        qclose(c->rq);
       +        qclose(c->wq);
       +        qclose(c->eq);
       +.
       +241c
       +        return "pktifc does not support announce";
       +.
       +239c
       +greannounce(Conv* _, char** __, int ___)
       +.
       +218,235c
       +        USED(c);
       +        return snprint(state, n, "%s\n", "Datagram");
       +.
       +211c
       +        c->rq = qopen(64*1024, Qmsg, 0, c);
       +.
       +199c
       +        QUNLOCK(p);
       +.
       +184c
       +        QLOCK(p);
       +.
       +138,171c
       +static char*
       +.
       +136d
       +71,134d
       +68c
       +        ulong                csumerr;                /* checksum errors */
       +        ulong                lenerr;                        /* short packet */
       +.
       +66c
       +struct GREpriv
       +{
       +        int                raw;                        /* Raw GRE mode */
       +
       +.
       +63c
       +} GREhdr;
       +.
       +54c
       +        uchar        Unused;        
       +.
       +46,47c
       +typedef struct GREhdr
       +{
       +.
       +21,43d
       +13c
       +enum
       +{
       +.
       +9c
       +#include "error.h"
       +.
       +5c
       +#include "lib.h"
       +.
       +diff -e ip.orig/icmp.c ip/icmp.c
       +350c
       +        if(iplen > n || ((uint)iplen % 1)){
       +.
       +339,341c
       +        netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
       +.
       +324c
       +icmpiput(Proto *icmp, Ipifc* __, Block *bp)
       +.
       +6c
       +#include "error.h"
       +.
       +2c
       +#include "lib.h"
       +.
       +diff -e ip.orig/icmp6.c ip/icmp6.c
       +781c
       +                        bp->rp -= sizeof(IPICMP);
       +.
       +770c
       +                        bp->rp += sizeof(IPICMP);
       +.
       +762c
       +                bp->rp -= sizeof(IPICMP);
       +.
       +750c
       +                bp->rp += sizeof(IPICMP);
       +.
       +711c
       +        RUNLOCK(ifc);
       +.
       +707c
       +                        RUNLOCK(ifc);
       +.
       +700c
       +                RUNLOCK(ifc);
       +.
       +698c
       +        RLOCK(ifc);
       +.
       +666c
       +                        sz = sizeof(IPICMP) + 8;
       +.
       +661c
       +                        if(pktsz - sizeof(Ip6hdr) < 8) {
       +.
       +649c
       +                        sz = sizeof(IPICMP) + 8;
       +.
       +641c
       +                        if(pktsz - sizeof(Ip6hdr) < 16) {
       +.
       +575c
       +        if(iplen > n - IP6HDR || ((uint)iplen % 1) != 0) {
       +.
       +568c
       +        if(n < sizeof(IPICMP)) {
       +.
       +546c
       +        memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
       +.
       +537c
       +                netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n",
       +.
       +534c
       +                netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n",
       +.
       +518c
       +        int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
       +.
       +506c
       +        memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
       +.
       +498c
       +                netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n",
       +.
       +495c
       +                netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n",
       +.
       +479c
       +        int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
       +.
       +471c
       +        RUNLOCK(ifc);
       +.
       +457c
       +        memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
       +.
       +445c
       +                netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n",
       +.
       +442c
       +                netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n",
       +.
       +440c
       +        RLOCK(ifc);
       +.
       +425c
       +        int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
       +.
       +397c
       +        nbp = newIPICMP(sizeof(Ndpkt));
       +.
       +375c
       +                nbp->wp -= sizeof(Ndpkt) - sizeof(NdiscC);
       +.
       +354c
       +        nbp = newIPICMP(sizeof(Ndpkt));
       +.
       +260c
       +        if(blocklen(bp) < sizeof(IPICMP)){
       +.
       +257c
       +                bp = padblock(bp, sizeof(Ip6hdr));
       +.
       +122c
       +        QLock        qlock;
       +.
       +109,110d
       +106d
       +101a
       +
       +.
       +99,100c
       +        /* ICMPpkt; */
       +        uchar        type;
       +        uchar        code;
       +        uchar        cksum[2];
       +        uchar        icmpid[2];
       +        uchar        seq[2];
       +
       +.
       +97c
       +struct Ndpkt
       +{
       +        /* NdiscC; */
       +        /* IPICMP; */
       +        /* Ip6hdr; */
       +        uchar        vcf[4];                /* version:4, traffic class:8, flow label:20 */
       +        uchar        ploadlen[2];        /* payload length: packet length - 40 */
       +        uchar        proto;                /* next header type */
       +        uchar        ttl;                /* hop limit */
       +        uchar        src[IPaddrlen];
       +        uchar        dst[IPaddrlen];
       +.
       +94d
       +91,92c
       +        /* ICMPpkt; */
       +        uchar        type;
       +        uchar        code;
       +        uchar        cksum[2];
       +        uchar        icmpid[2];
       +        uchar        seq[2];
       +
       +.
       +89c
       +struct NdiscC
       +{
       +        /* IPICMP; */
       +        /* Ip6hdr; */
       +        uchar        vcf[4];                /* version:4, traffic class:8, flow label:20 */
       +        uchar        ploadlen[2];        /* payload length: packet length - 40 */
       +        uchar        proto;                /* next header type */
       +        uchar        ttl;                /* hop limit */
       +        uchar        src[IPaddrlen];
       +        uchar        dst[IPaddrlen];
       +.
       +85,86c
       +        /* Ip6hdr; */
       +        uchar        vcf[4];                /* version:4, traffic class:8, flow label:20 */
       +        uchar        ploadlen[2];        /* payload length: packet length - 40 */
       +        uchar        proto;                /* next header type */
       +        uchar        ttl;                /* hop limit */
       +        uchar        src[IPaddrlen];
       +        uchar        dst[IPaddrlen];
       +
       +        /* ICMPpkt; */
       +        uchar        type;
       +        uchar        code;
       +        uchar        cksum[2];
       +        uchar        icmpid[2];
       +        uchar        seq[2];
       +.
       +75,82c
       +struct ICMPpkt {
       +        uchar        type;
       +        uchar        code;
       +        uchar        cksum[2];
       +        uchar        icmpid[2];
       +        uchar        seq[2];
       +};
       +.
       +70c
       +typedef struct ICMPpkt ICMPpkt;
       +.
       +9c
       +#include "error.h"
       +.
       +5c
       +#include "lib.h"
       +.
       +diff -e ip.orig/igmp.c ip/igmp.c
       +217c
       +                mp = Mediacopymulti(m);
       +.
       +177c
       +igmpiput(Media *m, Ipifc *, Block *bp)
       +.
       +123c
       +        byte ip[IPaddrlen];
       +.
       +97,99c
       +        bp->wp += sizeof(IGMPpkt);
       +        memset(bp->rp, 0, sizeof(IGMPpkt));
       +        hnputl(p->src, Mediagetaddr(m));
       +.
       +87c
       +igmpsendreport(Media *m, byte *addr)
       +.
       +68c
       +        Lock lk;
       +
       +.
       +60c
       +        Media                *m;
       +.
       +51,52d
       +43,48c
       +        byte        vertype;        /* version and type */
       +        byte        unused;
       +        byte        igmpcksum[2];                /* checksum of igmp portion */
       +        byte        group[IPaddrlen];        /* multicast group */
       +.
       +31,40c
       +        byte        vihl;                /* Version and header length */
       +        byte        tos;                /* Type of service */
       +        byte        len[2];                /* packet length (including headers) */
       +        byte        id[2];                /* Identification */
       +        byte        frag[2];        /* Fragment information */
       +        byte        Unused;        
       +        byte        proto;                /* Protocol */
       +        byte        cksum[2];        /* checksum of ip portion */
       +        byte        src[IPaddrlen];                /* Ip source */
       +        byte        dst[IPaddrlen];                /* Ip destination */
       +.
       +27a
       +typedef char byte;
       +
       +.
       +10c
       +#include "error.h"
       +.
       +6c
       +#include "lib.h"
       +.
       +1,4d
       +diff -e ip.orig/inferno.c ip/inferno.c
       +28a
       +
       +Medium tripmedium =
       +{
       +        "trip",
       +};
       +.
       +25c
       +bootpread(char* _, ulong __, int ___)
       +.
       +23a
       +char*
       +bootp(Ipifc* _)
       +{
       +        return "unimplmented";
       +}
       +
       +.
       +17a
       +Chan*
       +commonfdtochan(int fd, int mode, int a, int b)
       +{
       +        return fdtochan(fd, mode, a, b);
       +}
       +
       +.
       +6c
       +#include        "error.h"
       +#include        "ip.h"
       +.
       +2c
       +#include        "lib.h"
       +.
       +diff -e ip.orig/ip.c ip/ip.c
       +522,524c
       +        if(bp->base+sizeof(Ipfrag) >= bp->rp){
       +                bp = padblock(bp, sizeof(Ipfrag));
       +                bp->rp += sizeof(Ipfrag);
       +.
       +466,467c
       +        for(i = 0; i < Nstats; i++)
       +                p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
       +.
       +383c
       +                        freeb(bp);
       +.
       +381a
       +                Conv conv;
       +
       +.
       +322d
       +320d
       +301c
       +        RUNLOCK(ifc);
       +.
       +213c
       +                RUNLOCK(ifc);
       +.
       +211d
       +196,199c
       +        medialen = ifc->maxtu - ifc->m->hsize;
       +.
       +189c
       +                RUNLOCK(ifc);
       +.
       +186c
       +        if(!CANRLOCK(ifc))
       +.
       +11a
       +/* MIB II counters */
       +enum
       +{
       +        Forwarding,
       +        DefaultTTL,
       +        InReceives,
       +        InHdrErrors,
       +        InAddrErrors,
       +        ForwDatagrams,
       +        InUnknownProtos,
       +        InDiscards,
       +        InDelivers,
       +        OutRequests,
       +        OutDiscards,
       +        OutNoRoutes,
       +        ReasmTimeout,
       +        ReasmReqds,
       +        ReasmOKs,
       +        ReasmFails,
       +        FragOKs,
       +        FragFails,
       +        FragCreates,
       +
       +        Nstats,
       +};
       +
       +struct Fragment4
       +{
       +        Block*        blist;
       +        Fragment4*        next;
       +        ulong         src;
       +        ulong         dst;
       +        ushort        id;
       +        ulong         age;
       +};
       +
       +struct Fragment6
       +{
       +        Block*        blist;
       +        Fragment6*        next;
       +        uchar         src[IPaddrlen];
       +        uchar         dst[IPaddrlen];
       +        uint        id;
       +        ulong         age;
       +};
       +
       +struct Ipfrag
       +{
       +        ushort        foff;
       +        ushort        flen;
       +};
       +
       +/* an instance of IP */
       +struct IP
       +{
       +        ulong                stats[Nstats];
       +
       +        QLock                fraglock4;
       +        Fragment4*        flisthead4;
       +        Fragment4*        fragfree4;
       +        Ref                id4;
       +
       +        QLock                fraglock6;
       +        Fragment6*        flisthead6;
       +        Fragment6*        fragfree6;
       +        Ref                id6;
       +
       +        int                iprouting;        /* true if we route like a gateway */
       +};
       +
       +.
       +9a
       +typedef struct Fragment4        Fragment4;
       +typedef struct Fragment6        Fragment6;
       +typedef struct Ipfrag                Ipfrag;
       +
       +.
       +6c
       +#include        "error.h"
       +.
       +2c
       +#include        "lib.h"
       +.
       +diff -e ip.orig/ip.h ip/ip.h
       +732a
       +Chan*                commonfdtochan(int, int, int, int);
       +.
       +727a
       +extern char*        bootp(Ipifc*);
       +.
       +676a
       +extern Medium        tripmedium;
       +.
       +669c
       +#define        NOW        msec()
       +.
       +578c
       +/*        RouteTree; */
       +        Route*        right;
       +        Route*        left;
       +        Route*        mid;
       +        uchar        depth;
       +        uchar        type;
       +        uchar        ifcid;                /* must match ifc->id */
       +        Ipifc        *ifc;
       +        char        tag[4];
       +        int        ref;
       +.
       +516,517d
       +491a
       +        Logilmsg=        1<<8,
       +.
       +488a
       +        Logil=                1<<4,
       +.
       +423c
       +        RWlock        rwlock;
       +
       +        Conv        *conv;                /* link to its conversation structure */
       +.
       +386c
       +        QLock                qlock;
       +
       +.
       +374c
       +        Lock        lk;
       +
       +.
       +312c
       +        RWlock        rwlock;
       +.
       +173c
       +        QLock        qlock;
       +.
       +153a
       +typedef struct Ip4hdr                Ip4hdr;
       +.
       +79,152d
       +41c
       +        Maxincall=        5,
       +.
       +30,35d
       +8,9d
       +2,3d
       +diff -e ip.orig/ipaux.c ip/ipaux.c
       +366c
       +        UNLOCK(ht);
       +.
       +363c
       +                UNLOCK(ht);
       +.
       +352c
       +                        UNLOCK(ht);
       +.
       +340c
       +                        UNLOCK(ht);
       +.
       +328c
       +                        UNLOCK(ht);
       +.
       +316c
       +                        UNLOCK(ht);
       +.
       +309c
       +        LOCK(ht);
       +.
       +290c
       +        UNLOCK(ht);
       +.
       +282c
       +        LOCK(ht);
       +.
       +272c
       +        UNLOCK(ht);
       +.
       +269c
       +        LOCK(ht);
       +.
       +241c
       +        return (ulong)(sa[IPaddrlen-1]<<24 ^ sp<< 16 ^ da[IPaddrlen-1]<<8 ^ dp) % Nhash;
       +.
       +6c
       +#include        "error.h"
       +.
       +2c
       +#include        "lib.h"
       +.
       +diff -e ip.orig/ipifc.c ip/ipifc.c
       +1575c
       +                        RUNLOCK(nifc);
       +.
       +1565c
       +                                RUNLOCK(nifc);
       +.
       +1562c
       +                        RLOCK(nifc);
       +.
       +1555c
       +                        RUNLOCK(nifc);
       +.
       +1541c
       +                                RUNLOCK(nifc);
       +.
       +1538c
       +                        RLOCK(nifc);
       +.
       +1518d
       +1511d
       +1498c
       +                WUNLOCK(ifc);
       +.
       +1494c
       +                WLOCK(ifc);
       +.
       +1491c
       +                        WUNLOCK(ifc);
       +.
       +1455c
       +                WUNLOCK(ifc);
       +.
       +1451c
       +                WLOCK(ifc);
       +.
       +1448c
       +                        WUNLOCK(ifc);
       +.
       +1301c
       +        QUNLOCK(f->ipifc);
       +.
       +1265,1266c
       +                                if((atypel > atype && atype < atyper) ||
       +                                   (atypel < atype && atype > atyper)){
       +.
       +1232,1234c
       +        QLOCK(f->ipifc);
       +.
       +1154c
       +            (isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop))
       +.
       +1054c
       +        QUNLOCK(f->self);
       +.
       +1040c
       +        QLOCK(f->self);
       +.
       +1021c
       +        QUNLOCK(f->self);
       +.
       +951c
       +        QLOCK(f->self);
       +.
       +888c
       +        QUNLOCK(f->self);
       +.
       +839c
       +        QLOCK(f->self);
       +.
       +689c
       +        WUNLOCK(ifc);
       +.
       +683c
       +        WLOCK(ifc);
       +.
       +680c
       +                WUNLOCK(ifc);
       +.
       +619c
       +        WUNLOCK(ifc);
       +.
       +604c
       +        WLOCK(ifc);
       +.
       +539c
       + *  always called with ifc WLOCK'd
       +.
       +531c
       +        WUNLOCK(ifc);
       +.
       +417c
       +        WLOCK(ifc);
       +.
       +319c
       +        c->sq = qopen(2*QMAX, 0, 0, 0);
       +.
       +306c
       +        RUNLOCK(ifc);
       +.
       +299c
       +                RUNLOCK(ifc);
       +.
       +294c
       +        if(!CANRLOCK(ifc)){
       +.
       +266c
       +        RUNLOCK(ifc);
       +.
       +259c
       +        RLOCK(ifc);
       +.
       +244c
       +        RUNLOCK(ifc);
       +.
       +238c
       +        RLOCK(ifc);
       +.
       +212c
       +        WUNLOCK(ifc);
       +.
       +181c
       +        WLOCK(ifc);
       +.
       +178c
       +                WUNLOCK(ifc);
       +.
       +162c
       +        WUNLOCK(ifc);
       +.
       +124c
       +                WUNLOCK(ifc);
       +.
       +120c
       +                WUNLOCK(ifc);
       +.
       +118c
       +        WLOCK(ifc);
       +.
       +58c
       +#define hashipa(a) ( (ulong)(((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1])%NHASH )
       +.
       +39c
       +        QLock        qlock;
       +.
       +18c
       +        QMAX                = 64*1024-1,
       +.
       +6c
       +#include "error.h"
       +.
       +2c
       +#include "lib.h"
       +.
       +diff -e ip.orig/ipmux.c ip/ipmux.c
       +811c
       +        RUNLOCK(f);
       +.
       +809c
       +        RLOCK(f);
       +.
       +742c
       +        RUNLOCK(f);
       +.
       +680c
       +        RLOCK(f);
       +.
       +631,633c
       +        WLOCK(f);
       +        i = (Ipmux *)c->p->priv;
       +        ipmuxremove(&i, r->chain);
       +        WUNLOCK(f);
       +.
       +617a
       +        Ipmux *i;
       +.
       +610c
       +ipmuxannounce(Conv* _, char** __, int ___)
       +.
       +583c
       +        WUNLOCK(f);
       +.
       +581c
       +        WLOCK(f);
       +.
       +9c
       +#include "error.h"
       +.
       +5c
       +#include "lib.h"
       +.
       +diff -e ip.orig/iproute.c ip/iproute.c
       +469c
       +                                while((p = f->queue) != nil) {
       +.
       +425c
       +                                while((p = f->queue) != nil) {
       +.
       +359c
       +                while((p = f->queue) != nil) {
       +.
       +313c
       +                while((p = f->queue) != nil) {
       +.
       +213,214c
       +        dl = 0; if((l = p->left) != nil) dl = l->depth;
       +        dr = 0; if((r = p->right) != nil) dr = r->depth;
       +.
       +6c
       +#include        "error.h"
       +.
       +2c
       +#include        "lib.h"
       +.
       +diff -e ip.orig/ipv6.c ip/ipv6.c
       +506,508c
       +        if(bp->base+sizeof(Ipfrag) >= bp->rp){
       +                bp = padblock(bp, sizeof(Ipfrag));
       +                bp->rp += sizeof(Ipfrag);
       +.
       +218c
       +        RUNLOCK(ifc);
       +.
       +122c
       +                RUNLOCK(ifc);
       +.
       +110c
       +                RUNLOCK(ifc);
       +.
       +106c
       +        if(!CANRLOCK(ifc))
       +.
       +29a
       +/* MIB II counters */
       +enum
       +{
       +        Forwarding,
       +        DefaultTTL,
       +        InReceives,
       +        InHdrErrors,
       +        InAddrErrors,
       +        ForwDatagrams,
       +        InUnknownProtos,
       +        InDiscards,
       +        InDelivers,
       +        OutRequests,
       +        OutDiscards,
       +        OutNoRoutes,
       +        ReasmTimeout,
       +        ReasmReqds,
       +        ReasmOKs,
       +        ReasmFails,
       +        FragOKs,
       +        FragFails,
       +        FragCreates,
       +
       +        Nstats,
       +};
       +
       +static char *statnames[] =
       +{
       +[Forwarding]        "Forwarding",
       +[DefaultTTL]        "DefaultTTL",
       +[InReceives]        "InReceives",
       +[InHdrErrors]        "InHdrErrors",
       +[InAddrErrors]        "InAddrErrors",
       +[ForwDatagrams]        "ForwDatagrams",
       +[InUnknownProtos]        "InUnknownProtos",
       +[InDiscards]        "InDiscards",
       +[InDelivers]        "InDelivers",
       +[OutRequests]        "OutRequests",
       +[OutDiscards]        "OutDiscards",
       +[OutNoRoutes]        "OutNoRoutes",
       +[ReasmTimeout]        "ReasmTimeout",
       +[ReasmReqds]        "ReasmReqds",
       +[ReasmOKs]        "ReasmOKs",
       +[ReasmFails]        "ReasmFails",
       +[FragOKs]        "FragOKs",
       +[FragFails]        "FragFails",
       +[FragCreates]        "FragCreates",
       +};
       +
       +struct Fragment4
       +{
       +        Block*        blist;
       +        Fragment4*        next;
       +        ulong         src;
       +        ulong         dst;
       +        ushort        id;
       +        ulong         age;
       +};
       +
       +struct Fragment6
       +{
       +        Block*        blist;
       +        Fragment6*        next;
       +        uchar         src[IPaddrlen];
       +        uchar         dst[IPaddrlen];
       +        uint        id;
       +        ulong         age;
       +};
       +
       +struct Ipfrag
       +{
       +        ushort        foff;
       +        ushort        flen;
       +};
       +
       +/* an instance of IP */
       +struct IP
       +{
       +        ulong                stats[Nstats];
       +
       +        QLock                fraglock4;
       +        Fragment4*        flisthead4;
       +        Fragment4*        fragfree4;
       +        Ref                id4;
       +
       +        QLock                fraglock6;
       +        Fragment6*        flisthead6;
       +        Fragment6*        fragfree6;
       +        Ref                id6;
       +
       +        int                iprouting;        /* true if we route like a gateway */
       +};
       +
       +.
       +22a
       +typedef struct        Fragment4        Fragment4;
       +typedef struct        Fragment6        Fragment6;
       +typedef struct        Ipfrag        Ipfrag;
       +
       +.
       +6c
       +#include        "error.h"
       +.
       +2c
       +#include        "lib.h"
       +.
       +diff -e ip.orig/ipv6.h ip/ipv6.h
       +145c
       +struct        Routinghdr {
       +.
       +134c
       +struct        Opthdr {
       +.
       +130,131c
       +        uchar        vcf[4];                /* version:4, traffic class:8, flow label:20 */
       +        uchar        ploadlen[2];        /* payload length: packet length - 40 */
       +        uchar        proto;                /* next header type */
       +        uchar        ttl;                /* hop limit */
       +        uchar        src[IPaddrlen];
       +        uchar        dst[IPaddrlen];
       +.
       +120,128d
       +81c
       +        IP6HDR                = 20,                /* sizeof(Ip6hdr) */
       +.
       +26a
       +#undef ESP
       +
       +.
       +diff -e ip.orig/loopbackmedium.c ip/loopbackmedium.c
       +99c
       +                RUNLOCK(ifc);
       +.
       +92c
       +                        RUNLOCK(ifc);
       +.
       +87c
       +                if(!CANRLOCK(ifc)){
       +.
       +58c
       +loopbackbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
       +.
       +26c
       +loopbackbind(Ipifc *ifc, int _, char** __)
       +.
       +6c
       +#include "error.h"
       +.
       +2c
       +#include "lib.h"
       +.
       +diff -e ip.orig/netdevmedium.c ip/netdevmedium.c
       +144c
       +                RUNLOCK(ifc);
       +.
       +136c
       +                        RUNLOCK(ifc);
       +.
       +131c
       +                if(!CANRLOCK(ifc)){
       +.
       +85c
       +netdevbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
       +.
       +6c
       +#include "error.h"
       +.
       +2c
       +#include "lib.h"
       +.
       +diff -e ip.orig/netlog.c ip/netlog.c
       +260c
       +        wakeup(&f->alog->rendez);
       +.
       +258c
       +        UNLOCK(f->alog);
       +.
       +242c
       +        LOCK(f->alog);
       +.
       +228c
       +        char buf[128], *t, *fp;
       +.
       +185c
       +        set = 1;
       +.
       +160c
       +        QUNLOCK(f->alog);
       +.
       +157c
       +                sleep(&f->alog->rendez, netlogready, f);
       +.
       +155c
       +                        UNLOCK(f->alog);
       +.
       +146c
       +                        UNLOCK(f->alog);
       +.
       +134c
       +                LOCK(f->alog);
       +.
       +129c
       +                QUNLOCK(f->alog);
       +.
       +127c
       +        QLOCK(f->alog);
       +.
       +122c
       +netlogread(Fs *f, void *a, ulong _, long n)
       +.
       +109c
       +        UNLOCK(f->alog);
       +.
       +101c
       +                UNLOCK(f->alog);
       +.
       +99c
       +        LOCK(f->alog);
       +.
       +92c
       +        UNLOCK(f->alog);
       +.
       +82c
       +                UNLOCK(f->alog);
       +.
       +80c
       +        LOCK(f->alog);
       +.
       +28,29c
       +        QLock        qlock;
       +        Rendez        rendez;
       +.
       +17c
       +        Lock        lk;
       +.
       +6,7c
       +#include        "error.h"
       +#include        "ip/ip.h"
       +.
       +2c
       +#include        "lib.h"
       +.
       +diff -e ip.orig/nullmedium.c ip/nullmedium.c
       +22c
       +nullbwrite(Ipifc* _, Block* __, int ___, uchar* ____)
       +.
       +17c
       +nullunbind(Ipifc* _)
       +.
       +11c
       +nullbind(Ipifc* _, int __, char** ___)
       +.
       +6c
       +#include "error.h"
       +.
       +2c
       +#include "lib.h"
       +.
       +diff -e ip.orig/pktmedium.c ip/pktmedium.c
       +51c
       +pktbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
       +.
       +43c
       +pktunbind(Ipifc* _)
       +.
       +36d
       +34c
       +pktbind(Ipifc* _, int argc, char **argv)
       +.
       +6c
       +#include "error.h"
       +.
       +2c
       +#include "lib.h"
       +.
       +diff -e ip.orig/ptclbsum.c ip/ptclbsum.c
       +68c
       +        while((hisum = losum>>16))
       +.
       +6c
       +#include        "error.h"
       +.
       +2c
       +#include        "lib.h"
       +.
       +diff -e ip.orig/rudp.c ip/rudp.c
       +693c
       +        rudp->nc = 16;
       +.
       +11c
       +#include        "error.h"
       +.
       +7c
       +#include        "lib.h"
       +.
       +diff -e ip.orig/tcp.c ip/tcp.c
       +3171c
       +                QUNLOCK(c);
       +.
       +3154c
       +                if(!CANQLOCK(c))
       +.
       +3127c
       +                p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
       +.
       +3101c
       +/* called with c QLOCKed */
       +.
       +3085c
       +        QUNLOCK(tcp);
       +.
       +3080c
       +                        QUNLOCK(s);
       +.
       +3073,3074c
       +                        QLOCK(s);
       +                        QUNLOCK(tcp);
       +.
       +3064c
       +        QLOCK(tcp);
       +.
       +2871,2873d
       +2869c
       +        if(seg->mss != 0 && seg->mss < tcb->mss)
       +.
       +2859d
       +2842c
       +        QUNLOCK(s);
       +.
       +2830c
       +                netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
       +.
       +2817c
       +        QLOCK(s);
       +.
       +2814c
       +                QUNLOCK(s);
       +.
       +2768c
       +tcpsetchecksum(Conv *s, char **f, int _)
       +.
       +2737c
       +        QUNLOCK(s);
       +.
       +2728c
       +        QLOCK(s);
       +.
       +2725c
       +                QUNLOCK(s);
       +.
       +2641c
       +                        QLOCK(s);
       +.
       +2638,2639c
       +                if((uint)(msgs%4) == 1){
       +                        QUNLOCK(s);
       +.
       +2563c
       +                        netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
       +.
       +2421c
       +        QUNLOCK(s);
       +.
       +2417c
       +        QUNLOCK(s);
       +.
       +2351c
       +                                QUNLOCK(s);
       +.
       +2189c
       +                QUNLOCK(s);
       +.
       +2172,2174d
       +2144c
       +                QUNLOCK(s);
       +.
       +2095,2096c
       +        QLOCK(s);
       +        QUNLOCK(tcp);
       +.
       +2092c
       +                QUNLOCK(s);
       +.
       +2072c
       +                        QUNLOCK(tcp);
       +.
       +2064c
       +                        QUNLOCK(tcp);
       +.
       +2053c
       +                QUNLOCK(tcp);
       +.
       +2050,2051c
       +                netlog(f, Logtcp, "iphtlook failed\n");
       +.
       +2045c
       +        QLOCK(tcp);
       +.
       +1942c
       +tcpiput(Proto *tcp, Ipifc* _, Block *bp)
       +.
       +1862c
       +                netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
       +.
       +1817c
       +                netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
       +.
       +1685,1686d
       +1683c
       +        if(lp->mss != 0 && lp->mss < tcb->mss)
       +.
       +1626c
       +                netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d\n",
       +.
       +1562c
       +        QUNLOCK(tcp);
       +.
       +1529c
       +        if(!CANQLOCK(tcp))
       +.
       +1421,1422d
       +1334c
       + *  called with s QLOCKed
       +.
       +1245,1246d
       +1231,1232d
       +1210,1211d
       +1208c
       +                        if(optlen == MSS_LENGTH)
       +.
       +995d
       +873c
       + *  called with s QLOCKed
       +.
       +861,862d
       +805d
       +609c
       +        QUNLOCK(s);
       +.
       +603c
       +        QLOCK(s);
       +.
       +600c
       +                QUNLOCK(s);
       +.
       +583,584d
       +569c
       +        QUNLOCK(s);
       +.
       +551c
       +        QLOCK(s);
       +.
       +548c
       +                QUNLOCK(s);
       +.
       +352c
       +        ulong        stats[Nstats];
       +.
       +317d
       +293d
       +231c
       +        ulong        window;                        /* Recevive window */
       +.
       +229c
       +        ushort        mss;                        /* Mean segment size */
       +.
       +193c
       + *  the QLOCK in the Conv locks this structure
       +.
       +49,50c
       +        DEF_MSS                = 1460,                /* Default mean segment */
       +        DEF_MSS6        = 1280,                /* Default mean segment (min) for v6 */
       +.
       +44c
       +        MSS_LENGTH        = 4,                /* Mean segment size */
       +.
       +6c
       +#include        "error.h"
       +.
       +2c
       +#include        "lib.h"
       +.
       +diff -e ip.orig/udp.c ip/udp.c
       +590,591c
       +        return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
       +.
       +580c
       +        QUNLOCK(udp);
       +.
       +575c
       +                        QUNLOCK(s);
       +.
       +571,572c
       +                        QLOCK(s);
       +                        QUNLOCK(udp);
       +.
       +562c
       +        QLOCK(udp);
       +.
       +510c
       +        QUNLOCK(c);
       +.
       +502c
       +                QUNLOCK(c);
       +.
       +475c
       +                QUNLOCK(c);
       +.
       +456,457c
       +        QLOCK(c);
       +        QUNLOCK(udp);
       +.
       +447c
       +                                QUNLOCK(udp);
       +.
       +410c
       +                QUNLOCK(udp);
       +.
       +404c
       +        QLOCK(udp);
       +.
       +197c
       +        netlog(c->p->f, Logudp, "udp: kick\n");
       +.
       +103c
       +        QLock        qlock;
       +.
       +78c
       +        ulong        udpOutDatagrams;
       +.
       +75c
       +        ulong        udpInDatagrams;
       +.
       +6c
       +#include        "error.h"
       +.
       +2c
       +#include        "lib.h"
       +.
 (DIR) diff --git a/src/9vx/a/ip/arp.c b/src/9vx/a/ip/arp.c
       @@ -0,0 +1,684 @@
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "ip.h"
       +#include "ipv6.h"
       +
       +/*
       + *  address resolution tables
       + */
       +enum
       +{
       +        NHASH                = (1<<6),
       +        NCACHE                = 256,
       +
       +        AOK                = 1,
       +        AWAIT                = 2,
       +};
       +
       +char *arpstate[] =
       +{
       +        "UNUSED",
       +        "OK",
       +        "WAIT",
       +};
       +
       +/*
       + *  one per Fs
       + */
       +struct Arp
       +{
       +        QLock        qlock;
       +        Fs        *f;
       +        Arpent        *hash[NHASH];
       +        Arpent        cache[NCACHE];
       +        Arpent        *rxmt;
       +        Proc        *rxmitp;        /* neib sol re-transmit proc */
       +        Rendez        rxmtq;
       +        Block         *dropf, *dropl;
       +};
       +
       +char *Ebadarp = "bad arp";
       +
       +#define haship(s) ((ulong)((s)[IPaddrlen-1])%NHASH)
       +
       +int         ReTransTimer = RETRANS_TIMER;
       +
       +static void         rxmitproc(void *v);
       +
       +void
       +arpinit(Fs *f)
       +{
       +        f->arp = smalloc(sizeof(Arp));
       +        f->arp->f = f;
       +        f->arp->rxmt = nil;
       +        f->arp->dropf = f->arp->dropl = nil;
       +        kproc("rxmitproc", rxmitproc, f->arp);
       +}
       +
       +/*
       + *  create a new arp entry for an ip address.
       + */
       +static Arpent*
       +newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
       +{
       +        uint t;
       +        Block *next, *xp;
       +        Arpent *a, *e, *f, **l;
       +        Medium *m = ifc->m;
       +        int empty;
       +
       +        /* find oldest entry */
       +        e = &arp->cache[NCACHE];
       +        a = arp->cache;
       +        t = a->utime;
       +        for(f = a; f < e; f++){
       +                if(f->utime < t){
       +                        t = f->utime;
       +                        a = f;
       +                }
       +        }
       +
       +        /* dump waiting packets */
       +        xp = a->hold;
       +        a->hold = nil;
       +
       +        if(isv4(a->ip)){
       +                while(xp){
       +                        next = xp->list;
       +                        freeblist(xp);
       +                        xp = next;
       +                }
       +        }
       +        else { /* queue icmp unreachable for rxmitproc later on, w/o arp lock */
       +                if(xp){
       +                        if(arp->dropl == nil) 
       +                                arp->dropf = xp;
       +                        else
       +                                arp->dropl->list = xp;
       +
       +                        for(next = xp->list; next; next = next->list)
       +                                xp = next;
       +                        arp->dropl = xp;
       +                        wakeup(&arp->rxmtq);
       +                }
       +        }
       +
       +        /* take out of current chain */
       +        l = &arp->hash[haship(a->ip)];
       +        for(f = *l; f; f = f->hash){
       +                if(f == a){
       +                        *l = a->hash;
       +                        break;
       +                }
       +                l = &f->hash;
       +        }
       +
       +        /* insert into new chain */
       +        l = &arp->hash[haship(ip)];
       +        a->hash = *l;
       +        *l = a;
       +
       +        memmove(a->ip, ip, sizeof(a->ip));
       +        a->utime = NOW;
       +        a->ctime = 0;
       +        a->type = m;
       +
       +        a->rtime = NOW + ReTransTimer;
       +        a->rxtsrem = MAX_MULTICAST_SOLICIT;
       +        a->ifc = ifc;
       +        a->ifcid = ifc->ifcid;
       +
       +        /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
       +        if(!ipismulticast(a->ip) && addrxt){
       +                l = &arp->rxmt;
       +                empty = (*l==nil);
       +
       +                for(f = *l; f; f = f->nextrxt){
       +                        if(f == a){
       +                                *l = a->nextrxt;
       +                                break;
       +                        }
       +                        l = &f->nextrxt;
       +                }
       +                for(f = *l; f; f = f->nextrxt){
       +                        l = &f->nextrxt;
       +                }
       +                *l = a;
       +                if(empty) 
       +                        wakeup(&arp->rxmtq);
       +        }
       +
       +        a->nextrxt = nil;
       +
       +        return a;
       +}
       +
       +/* called with arp qlocked */
       +
       +void
       +cleanarpent(Arp *arp, Arpent *a)
       +{
       +        Arpent *f, **l;
       +
       +        a->utime = 0;
       +        a->ctime = 0;
       +        a->type = 0;
       +        a->state = 0;
       +        
       +        /* take out of current chain */
       +        l = &arp->hash[haship(a->ip)];
       +        for(f = *l; f; f = f->hash){
       +                if(f == a){
       +                        *l = a->hash;
       +                        break;
       +                }
       +                l = &f->hash;
       +        }
       +
       +        /* take out of re-transmit chain */
       +        l = &arp->rxmt;
       +        for(f = *l; f; f = f->nextrxt){
       +                if(f == a){
       +                        *l = a->nextrxt;
       +                        break;
       +                }
       +                l = &f->nextrxt;
       +        }
       +        a->nextrxt = nil;
       +        a->hash = nil;
       +        a->hold = nil;
       +        a->last = nil;
       +        a->ifc = nil;
       +}
       +
       +/*
       + *  fill in the media address if we have it.  Otherwise return an
       + *  Arpent that represents the state of the address resolution FSM
       + *  for ip.  Add the packet to be sent onto the list of packets
       + *  waiting for ip->mac to be resolved.
       + */
       +Arpent*
       +arpget(Arp *arp, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *mac)
       +{
       +        int hash;
       +        Arpent *a;
       +        Medium *type = ifc->m;
       +        uchar v6ip[IPaddrlen];
       +
       +        if(version == V4){
       +                v4tov6(v6ip, ip);
       +                ip = v6ip;
       +        }
       +
       +        QLOCK(arp);
       +        hash = haship(ip);
       +        for(a = arp->hash[hash]; a; a = a->hash){
       +                if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
       +                if(type == a->type)
       +                        break;
       +        }
       +
       +        if(a == nil){
       +                a = newarp6(arp, ip, ifc, (version != V4));
       +                a->state = AWAIT;
       +        }
       +        a->utime = NOW;
       +        if(a->state == AWAIT){
       +                if(bp != nil){
       +                        if(a->hold)
       +                                a->last->list = bp;
       +                        else
       +                                a->hold = bp;
       +                        a->last = bp;
       +                        bp->list = nil; 
       +                }
       +                return a;                /* return with arp qlocked */
       +        }
       +
       +        memmove(mac, a->mac, a->type->maclen);
       +
       +        /* remove old entries */
       +        if(NOW - a->ctime > 15*60*1000)
       +                cleanarpent(arp, a);
       +
       +        QUNLOCK(arp);
       +        return nil;
       +}
       +
       +/*
       + * called with arp locked
       + */
       +void
       +arprelease(Arp *arp, Arpent* ae)
       +{
       +        QUNLOCK(arp);
       +}
       +
       +/*
       + * Copy out the mac address from the Arpent.  Return the
       + * block waiting to get sent to this mac address.
       + *
       + * called with arp locked
       + */
       +Block*
       +arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
       +{
       +        Block *bp;
       +        Arpent *f, **l;
       +
       +        if(!isv4(a->ip)){
       +                l = &arp->rxmt;
       +                for(f = *l; f; f = f->nextrxt){
       +                        if(f == a){
       +                                *l = a->nextrxt;
       +                                break;
       +                        }
       +                        l = &f->nextrxt;
       +                }
       +        }
       +
       +        memmove(a->mac, mac, type->maclen);
       +        a->type = type;
       +        a->state = AOK;
       +        a->utime = NOW;
       +        bp = a->hold;
       +        a->hold = nil;
       +        QUNLOCK(arp);
       +
       +        return bp;
       +}
       +
       +void
       +arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
       +{
       +        Arp *arp;
       +        Route *r;
       +        Arpent *a, *f, **l;
       +        Ipifc *ifc;
       +        Medium *type;
       +        Block *bp, *next;
       +        uchar v6ip[IPaddrlen];
       +
       +        arp = fs->arp;
       +
       +        if(n != 6){
       +//                print("arp: len = %d\n", n);
       +                return;
       +        }
       +
       +        switch(version){
       +        case V4:
       +                r = v4lookup(fs, ip, nil);
       +                v4tov6(v6ip, ip);
       +                ip = v6ip;
       +                break;
       +        case V6:
       +                r = v6lookup(fs, ip, nil);
       +                break;
       +        default:
       +                panic("arpenter: version %d", version);
       +                return;        /* to supress warnings */
       +        }
       +
       +        if(r == nil){
       +//                print("arp: no route for entry\n");
       +                return;
       +        }
       +
       +        ifc = r->ifc;
       +        type = ifc->m;
       +
       +        QLOCK(arp);
       +        for(a = arp->hash[haship(ip)]; a; a = a->hash){
       +                if(a->type != type || (a->state != AWAIT && a->state != AOK))
       +                        continue;
       +
       +                if(ipcmp(a->ip, ip) == 0){
       +                        a->state = AOK;
       +                        memmove(a->mac, mac, type->maclen);
       +
       +                        if(version == V6){
       +                                /* take out of re-transmit chain */
       +                                l = &arp->rxmt;
       +                                for(f = *l; f; f = f->nextrxt){
       +                                        if(f == a){
       +                                                *l = a->nextrxt;
       +                                                break;
       +                                        }
       +                                        l = &f->nextrxt;
       +                                }
       +                        }
       +
       +                        a->ifc = ifc;
       +                        a->ifcid = ifc->ifcid;
       +                        bp = a->hold;
       +                        a->hold = nil;
       +                        if(version == V4)
       +                                ip += IPv4off;
       +                        a->utime = NOW;
       +                        a->ctime = a->utime;
       +                        QUNLOCK(arp);
       +
       +                        while(bp){
       +                                next = bp->list;
       +                                if(ifc != nil){
       +                                        if(waserror()){
       +                                                RUNLOCK(ifc);
       +                                                nexterror();
       +                                        }
       +                                        RLOCK(ifc);
       +                                        if(ifc->m != nil)
       +                                                ifc->m->bwrite(ifc, bp, version, ip);
       +                                        else
       +                                                freeb(bp);
       +                                        RUNLOCK(ifc);
       +                                        poperror();
       +                                } else
       +                                        freeb(bp);
       +                                bp = next;
       +                        }
       +                        return;
       +                }
       +        }
       +
       +        if(refresh == 0){
       +                a = newarp6(arp, ip, ifc, 0);
       +                a->state = AOK;
       +                a->type = type;
       +                a->ctime = NOW;
       +                memmove(a->mac, mac, type->maclen);
       +        }
       +
       +        QUNLOCK(arp);
       +}
       +
       +int
       +arpwrite(Fs *fs, char *s, int len)
       +{
       +        int n;
       +        Route *r;
       +        Arp *arp;
       +        Block *bp;
       +        Arpent *a, *fl, **l;
       +        Medium *m;
       +        char *f[4], buf[256];
       +        uchar ip[IPaddrlen], mac[MAClen];
       +
       +        arp = fs->arp;
       +
       +        if(len == 0)
       +                error(Ebadarp);
       +        if(len >= sizeof(buf))
       +                len = sizeof(buf)-1;
       +        strncpy(buf, s, len);
       +        buf[len] = 0;
       +        if(len > 0 && buf[len-1] == '\n')
       +                buf[len-1] = 0;
       +
       +        n = getfields(buf, f, 4, 1, " ");
       +        if(strcmp(f[0], "flush") == 0){
       +                QLOCK(arp);
       +                for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
       +                        memset(a->ip, 0, sizeof(a->ip));
       +                        memset(a->mac, 0, sizeof(a->mac));
       +                        a->hash = nil;
       +                        a->state = 0;
       +                        a->utime = 0;
       +                        while(a->hold != nil){
       +                                bp = a->hold->list;
       +                                freeblist(a->hold);
       +                                a->hold = bp;
       +                        }
       +                }
       +                memset(arp->hash, 0, sizeof(arp->hash));
       +                /* clear all pkts on these lists (rxmt, dropf/l) */
       +                arp->rxmt = nil;
       +                arp->dropf = nil;
       +                arp->dropl = nil;
       +                QUNLOCK(arp);
       +        } else if(strcmp(f[0], "add") == 0){
       +                switch(n){
       +                default:
       +                        error(Ebadarg);
       +                case 3:
       +                        if (parseip(ip, f[1]) == -1)
       +                                error(Ebadip);
       +                        if(isv4(ip))
       +                                r = v4lookup(fs, ip+IPv4off, nil);
       +                        else
       +                                r = v6lookup(fs, ip, nil);
       +                        if(r == nil)
       +                                error("Destination unreachable");
       +                        m = r->ifc->m;
       +                        n = parsemac(mac, f[2], m->maclen);
       +                        break;
       +                case 4:
       +                        m = ipfindmedium(f[1]);
       +                        if(m == nil)
       +                                error(Ebadarp);
       +                        if (parseip(ip, f[2]) == -1)
       +                                error(Ebadip);
       +                        n = parsemac(mac, f[3], m->maclen);
       +                        break;
       +                }
       +
       +                if(m->ares == nil)
       +                        error(Ebadarp);
       +
       +                m->ares(fs, V6, ip, mac, n, 0);
       +        } else if(strcmp(f[0], "del") == 0){
       +                if(n != 2)
       +                        error(Ebadarg);
       +
       +                if (parseip(ip, f[1]) == -1)
       +                        error(Ebadip);
       +                QLOCK(arp);
       +
       +                l = &arp->hash[haship(ip)];
       +                for(a = *l; a; a = a->hash){
       +                        if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
       +                                *l = a->hash;
       +                                break;
       +                        }
       +                        l = &a->hash;
       +                }
       +        
       +                if(a){
       +                        /* take out of re-transmit chain */
       +                        l = &arp->rxmt;
       +                        for(fl = *l; fl; fl = fl->nextrxt){
       +                                if(fl == a){
       +                                        *l = a->nextrxt;
       +                                        break;
       +                                }
       +                                l = &fl->nextrxt;
       +                        }
       +
       +                        a->nextrxt = nil;
       +                        a->hash = nil;
       +                        a->hold = nil;
       +                        a->last = nil;
       +                        a->ifc = nil;
       +                        memset(a->ip, 0, sizeof(a->ip));
       +                        memset(a->mac, 0, sizeof(a->mac));
       +                }
       +                QUNLOCK(arp);
       +        } else
       +                error(Ebadarp);
       +
       +        return len;
       +}
       +
       +enum
       +{
       +        Alinelen=        90,
       +};
       +
       +char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
       +
       +static void
       +convmac(char *p, uchar *mac, int n)
       +{
       +        while(n-- > 0)
       +                p += sprint(p, "%2.2ux", *mac++);
       +}
       +
       +int
       +arpread(Arp *arp, char *p, ulong offset, int len)
       +{
       +        Arpent *a;
       +        int n;
       +        char mac[2*MAClen+1];
       +
       +        if(offset % Alinelen)
       +                return 0;
       +
       +        offset = offset/Alinelen;
       +        len = len/Alinelen;
       +
       +        n = 0;
       +        for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
       +                if(a->state == 0)
       +                        continue;
       +                if(offset > 0){
       +                        offset--;
       +                        continue;
       +                }
       +                len--;
       +                QLOCK(arp);
       +                convmac(mac, a->mac, a->type->maclen);
       +                n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
       +                QUNLOCK(arp);
       +        }
       +
       +        return n;
       +}
       +
       +extern int
       +rxmitsols(Arp *arp)
       +{
       +        uint sflag;
       +        Block *next, *xp;
       +        Arpent *a, *b, **l;
       +        Fs *f;
       +        uchar ipsrc[IPaddrlen];
       +        Ipifc *ifc = nil;
       +        long nrxt;
       +
       +        QLOCK(arp);
       +        f = arp->f;
       +
       +        a = arp->rxmt;
       +        if(a==nil){
       +                nrxt = 0;
       +                goto dodrops;                 /* return nrxt; */
       +        }
       +        nrxt = a->rtime - NOW;
       +        if(nrxt > 3*ReTransTimer/4) 
       +                goto dodrops;                 /* return nrxt; */
       +
       +        for(; a; a = a->nextrxt){
       +                ifc = a->ifc;
       +                assert(ifc != nil);
       +                if((a->rxtsrem <= 0) || !(CANRLOCK(ifc)) || (a->ifcid != ifc->ifcid)){
       +                        xp = a->hold;
       +                        a->hold = nil;
       +
       +                        if(xp){
       +                                if(arp->dropl == nil) 
       +                                        arp->dropf = xp;
       +                                else
       +                                        arp->dropl->list = xp;
       +                        }
       +
       +                        cleanarpent(arp, a);
       +                }
       +                else
       +                        break;
       +        }
       +        if(a == nil)
       +                goto dodrops;
       +
       +
       +        QUNLOCK(arp);        /* for icmpns */
       +        if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) 
       +                icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); 
       +
       +        RUNLOCK(ifc);
       +        QLOCK(arp);
       +
       +        /* put to the end of re-transmit chain */
       +        l = &arp->rxmt;
       +        for(b = *l; b; b = b->nextrxt){
       +                if(b == a){
       +                        *l = a->nextrxt;
       +                        break;
       +                }
       +                l = &b->nextrxt;
       +        }
       +        for(b = *l; b; b = b->nextrxt){
       +                l = &b->nextrxt;
       +        }
       +        *l = a;
       +        a->rxtsrem--;
       +        a->nextrxt = nil;
       +        a->rtime = NOW + ReTransTimer;
       +
       +        a = arp->rxmt;
       +        if(a==nil)
       +                nrxt = 0;
       +        else 
       +                nrxt = a->rtime - NOW;
       +
       +dodrops:
       +        xp = arp->dropf;
       +        arp->dropf = nil;
       +        arp->dropl = nil;
       +        QUNLOCK(arp);
       +
       +        for(; xp; xp = next){
       +                next = xp->list;
       +                icmphostunr(f, ifc, xp, Icmp6_adr_unreach, 1);
       +        }
       +
       +        return nrxt;
       +
       +}
       +
       +static int
       +rxready(void *v)
       +{
       +        Arp *arp = (Arp *) v;
       +        int x;
       +
       +        x = ((arp->rxmt != nil) || (arp->dropf != nil));
       +
       +        return x;
       +}
       +
       +static void
       +rxmitproc(void *v)
       +{
       +        Arp *arp = v;
       +        long wakeupat;
       +
       +        arp->rxmitp = up;
       +        //print("arp rxmitproc started\n");
       +        if(waserror()){
       +                arp->rxmitp = 0;
       +                pexit("hangup", 1);
       +        }
       +        for(;;){
       +                wakeupat = rxmitsols(arp);
       +                if(wakeupat == 0) 
       +                        sleep(&arp->rxmtq, rxready, v); 
       +                else if(wakeupat > ReTransTimer/4) 
       +                        tsleep(&arp->rxmtq, return0, 0, wakeupat); 
       +        }
       +}
       +
 (DIR) diff --git a/src/9vx/a/ip/chandial.c b/src/9vx/a/ip/chandial.c
       @@ -0,0 +1,124 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +#include        "ip/ip.h"
       +
       +typedef struct DS DS;
       +static Chan*        call(char*, char*, DS*);
       +static void        _dial_string_parse(char*, DS*);
       +
       +enum
       +{
       +        Maxstring=        128,
       +};
       +
       +struct DS
       +{
       +        char        buf[Maxstring];                        /* dist string */
       +        char        *netdir;
       +        char        *proto;
       +        char        *rem;
       +        char        *local;                                /* other args */
       +        char        *dir;
       +        Chan        **ctlp;
       +};
       +
       +/*
       + *  the dialstring is of the form '[/net/]proto!dest'
       + */
       +Chan*
       +chandial(char *dest, char *local, char *dir, Chan **ctlp)
       +{
       +        DS ds;
       +        char clone[Maxpath];
       +
       +        ds.local = local;
       +        ds.dir = dir;
       +        ds.ctlp = ctlp;
       +
       +        _dial_string_parse(dest, &ds);
       +        if(ds.netdir == 0)
       +                ds.netdir = "/net";
       +
       +        /* no connection server, don't translate */
       +        snprint(clone, sizeof(clone), "%s/%s/clone", ds.netdir, ds.proto);
       +        return call(clone, ds.rem, &ds);
       +}
       +
       +static Chan*
       +call(char *clone, char *dest, DS *ds)
       +{
       +        int n;
       +        Chan *dchan, *cchan;
       +        char name[Maxpath], data[Maxpath], *p;
       +
       +        cchan = namec(clone, Aopen, ORDWR, 0);
       +
       +        /* get directory name */
       +        if(waserror()){
       +                cclose(cchan);
       +                nexterror();
       +        }
       +        n = devtab[cchan->type]->read(cchan, name, sizeof(name)-1, 0);
       +        name[n] = 0;
       +        for(p = name; *p == ' '; p++)
       +                ;
       +        sprint(name, "%lud", strtoul(p, 0, 0));
       +        p = strrchr(clone, '/');
       +        *p = 0;
       +        if(ds->dir)
       +                snprint(ds->dir, Maxpath, "%s/%s", clone, name);
       +        snprint(data, sizeof(data), "%s/%s/data", clone, name);
       +
       +        /* connect */
       +        if(ds->local)
       +                snprint(name, sizeof(name), "connect %s %s", dest, ds->local);
       +        else
       +                snprint(name, sizeof(name), "connect %s", dest);
       +        devtab[cchan->type]->write(cchan, name, strlen(name), 0);
       +
       +        /* open data connection */
       +        dchan = namec(data, Aopen, ORDWR, 0);
       +        if(ds->ctlp)
       +                *ds->ctlp = cchan;
       +        else
       +                cclose(cchan);
       +        poperror();
       +        return dchan;
       +
       +}
       +
       +/*
       + *  parse a dial string
       + */
       +static void
       +_dial_string_parse(char *str, DS *ds)
       +{
       +        char *p, *p2;
       +
       +        strncpy(ds->buf, str, Maxstring);
       +        ds->buf[Maxstring-1] = 0;
       +
       +        p = strchr(ds->buf, '!');
       +        if(p == 0) {
       +                ds->netdir = 0;
       +                ds->proto = "net";
       +                ds->rem = ds->buf;
       +        } else {
       +                if(*ds->buf != '/' && *ds->buf != '#'){
       +                        ds->netdir = 0;
       +                        ds->proto = ds->buf;
       +                } else {
       +                        for(p2 = p; *p2 != '/'; p2--)
       +                                ;
       +                        *p2++ = 0;
       +                        ds->netdir = ds->buf;
       +                        ds->proto = p2;
       +                }
       +                *p = 0;
       +                ds->rem = p + 1;
       +        }
       +}
 (DIR) diff --git a/src/9vx/a/ip/devip.c b/src/9vx/a/ip/devip.c
       @@ -0,0 +1,1439 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +#include        "ip/ip.h"
       +
       +enum
       +{
       +        Qtopdir=        1,                /* top level directory */
       +        Qtopbase,
       +        Qarp=                Qtopbase,
       +        Qbootp,
       +        Qndb,
       +        Qiproute,
       +        Qipselftab,
       +        Qlog,
       +
       +        Qprotodir,                        /* directory for a protocol */
       +        Qprotobase,
       +        Qclone=                Qprotobase,
       +        Qstats,
       +
       +        Qconvdir,                        /* directory for a conversation */
       +        Qconvbase,
       +        Qctl=                Qconvbase,
       +        Qdata,
       +        Qerr,
       +        Qlisten,
       +        Qlocal,
       +        Qremote,
       +        Qstatus,
       +        Qsnoop,
       +
       +        Logtype=        5,
       +        Masktype=        (1<<Logtype)-1,
       +        Logconv=        12,
       +        Maskconv=        (1<<Logconv)-1,
       +        Shiftconv=        Logtype,
       +        Logproto=        8,
       +        Maskproto=        (1<<Logproto)-1,
       +        Shiftproto=        Logtype + Logconv,
       +
       +        Nfs=                128,
       +};
       +#define TYPE(x)         ( ((ulong)(x).path) & Masktype )
       +#define CONV(x)         ( (((ulong)(x).path) >> Shiftconv) & Maskconv )
       +#define PROTO(x)         ( (((ulong)(x).path) >> Shiftproto) & Maskproto )
       +#define QID(p, c, y)         ( ((uint)(p)<<(Shiftproto)) | ((uint)(c)<<Shiftconv) | (y) )
       +
       +static char network[] = "network";
       +
       +QLock        fslock;
       +Fs        *ipfs[Nfs];        /* attached fs's */
       +Queue        *qlog;
       +
       +extern        void nullmediumlink(void);
       +extern        void pktmediumlink(void);
       +        long ndbwrite(Fs *f, char *a, ulong off, int n);
       +
       +static int
       +ip3gen(Chan *c, int i, Dir *dp)
       +{
       +        Qid q;
       +        Conv *cv;
       +        char *p;
       +
       +        cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
       +        if(cv->owner == nil)
       +                kstrdup(&cv->owner, eve);
       +        mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
       +
       +        switch(i) {
       +        default:
       +                return -1;
       +        case Qctl:
       +                devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp);
       +                return 1;
       +        case Qdata:
       +                devdir(c, q, "data", qlen(cv->rq), cv->owner, cv->perm, dp);
       +                return 1;
       +        case Qerr:
       +                devdir(c, q, "err", qlen(cv->eq), cv->owner, cv->perm, dp);
       +                return 1;
       +        case Qlisten:
       +                devdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
       +                return 1;
       +        case Qlocal:
       +                p = "local";
       +                break;
       +        case Qremote:
       +                p = "remote";
       +                break;
       +        case Qsnoop:
       +                if(strcmp(cv->p->name, "ipifc") != 0)
       +                        return -1;
       +                devdir(c, q, "snoop", qlen(cv->sq), cv->owner, 0400, dp);
       +                return 1;
       +        case Qstatus:
       +                p = "status";
       +                break;
       +        }
       +        devdir(c, q, p, 0, cv->owner, 0444, dp);
       +        return 1;
       +}
       +
       +static int
       +ip2gen(Chan *c, int i, Dir *dp)
       +{
       +        Qid q;
       +
       +        switch(i) {
       +        case Qclone:
       +                mkqid(&q, QID(PROTO(c->qid), 0, Qclone), 0, QTFILE);
       +                devdir(c, q, "clone", 0, network, 0666, dp);
       +                return 1;
       +        case Qstats:
       +                mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
       +                devdir(c, q, "stats", 0, network, 0444, dp);
       +                return 1;
       +        }
       +        return -1;
       +}
       +
       +static int
       +ip1gen(Chan *c, int i, Dir *dp)
       +{
       +        Qid q;
       +        char *p;
       +        int prot;
       +        int len = 0;
       +        Fs *f;
       +        extern ulong        kerndate;
       +
       +        f = ipfs[c->dev];
       +
       +        prot = 0666;
       +        mkqid(&q, QID(0, 0, i), 0, QTFILE);
       +        switch(i) {
       +        default:
       +                return -1;
       +        case Qarp:
       +                p = "arp";
       +                prot = 0664;
       +                break;
       +        case Qbootp:
       +                p = "bootp";
       +                break;
       +        case Qndb:
       +                p = "ndb";
       +                len = strlen(f->ndb);
       +                q.vers = f->ndbvers;
       +                break;
       +        case Qiproute:
       +                p = "iproute";
       +                prot = 0664;
       +                break;
       +        case Qipselftab:
       +                p = "ipselftab";
       +                prot = 0444;
       +                break;
       +        case Qlog:
       +                p = "log";
       +                break;
       +        }
       +        devdir(c, q, p, len, network, prot, dp);
       +        if(i == Qndb && f->ndbmtime > kerndate)
       +                dp->mtime = f->ndbmtime;
       +        return 1;
       +}
       +
       +static int
       +ipgen(Chan *c, char* __ch, Dirtab* __dt, int __i, int s, Dir *dp)
       +{
       +        Qid q;
       +        Conv *cv;
       +        Fs *f;
       +
       +        f = ipfs[c->dev];
       +
       +        switch(TYPE(c->qid)) {
       +        case Qtopdir:
       +                if(s == DEVDOTDOT){
       +                        mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
       +                        sprint(up->genbuf, "#I%lud", c->dev);
       +                        devdir(c, q, up->genbuf, 0, network, 0555, dp);
       +                        return 1;
       +                }
       +                if(s < f->np) {
       +                        if(f->p[s]->connect == nil)
       +                                return 0;        /* protocol with no user interface */
       +                        mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
       +                        devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
       +                        return 1;
       +                }
       +                s -= f->np;
       +                return ip1gen(c, s+Qtopbase, dp);
       +        case Qarp:
       +        case Qbootp:
       +        case Qndb:
       +        case Qlog:
       +        case Qiproute:
       +        case Qipselftab:
       +                return ip1gen(c, TYPE(c->qid), dp);
       +        case Qprotodir:
       +                if(s == DEVDOTDOT){
       +                        mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
       +                        sprint(up->genbuf, "#I%lud", c->dev);
       +                        devdir(c, q, up->genbuf, 0, network, 0555, dp);
       +                        return 1;
       +                }
       +                if(s < f->p[PROTO(c->qid)]->ac) {
       +                        cv = f->p[PROTO(c->qid)]->conv[s];
       +                        sprint(up->genbuf, "%d", s);
       +                        mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
       +                        devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
       +                        return 1;
       +                }
       +                s -= f->p[PROTO(c->qid)]->ac;
       +                return ip2gen(c, s+Qprotobase, dp);
       +        case Qclone:
       +        case Qstats:
       +                return ip2gen(c, TYPE(c->qid), dp);
       +        case Qconvdir:
       +                if(s == DEVDOTDOT){
       +                        s = PROTO(c->qid);
       +                        mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
       +                        devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
       +                        return 1;
       +                }
       +                return ip3gen(c, s+Qconvbase, dp);
       +        case Qctl:
       +        case Qdata:
       +        case Qerr:
       +        case Qlisten:
       +        case Qlocal:
       +        case Qremote:
       +        case Qstatus:
       +        case Qsnoop:
       +                return ip3gen(c, TYPE(c->qid), dp);
       +        }
       +        return -1;
       +}
       +
       +static void
       +ipreset(void)
       +{
       +        nullmediumlink();
       +        pktmediumlink();
       +
       +        fmtinstall('i', eipfmt);
       +        fmtinstall('I', eipfmt);
       +        fmtinstall('E', eipfmt);
       +        fmtinstall('V', eipfmt);
       +        fmtinstall('M', eipfmt);
       +}
       +
       +static Fs*
       +ipgetfs(int dev)
       +{
       +        extern void (*ipprotoinit[])(Fs*);
       +        Fs *f;
       +        int i;
       +
       +        if(dev >= Nfs)
       +                return nil;
       +
       +        qlock(&fslock);
       +        if(ipfs[dev] == nil){
       +                f = smalloc(sizeof(Fs));
       +                ip_init(f);
       +                arpinit(f);
       +                netloginit(f);
       +                for(i = 0; ipprotoinit[i]; i++)
       +                        ipprotoinit[i](f);
       +                f->dev = dev;
       +                ipfs[dev] = f;
       +        }
       +        qunlock(&fslock);
       +
       +        return ipfs[dev];
       +}
       +
       +IPaux*
       +newipaux(char *owner, char *tag)
       +{
       +        IPaux *a;
       +        int n;
       +
       +        a = smalloc(sizeof(*a));
       +        kstrdup(&a->owner, owner);
       +        memset(a->tag, ' ', sizeof(a->tag));
       +        n = strlen(tag);
       +        if(n > sizeof(a->tag))
       +                n = sizeof(a->tag);
       +        memmove(a->tag, tag, n);
       +        return a;
       +}
       +
       +#define ATTACHER(c) (((IPaux*)((c)->aux))->owner)
       +
       +static Chan*
       +ipattach(char* spec)
       +{
       +        Chan *c;
       +        int dev;
       +
       +        dev = atoi(spec);
       +        if(dev >= Nfs)
       +                error("bad specification");
       +
       +        ipgetfs(dev);
       +        c = devattach('I', spec);
       +        mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
       +        c->dev = dev;
       +
       +        c->aux = newipaux(commonuser(), "none");
       +
       +        return c;
       +}
       +
       +static Walkqid*
       +ipwalk(Chan* c, Chan *nc, char **name, int nname)
       +{
       +        IPaux *a = c->aux;
       +        Walkqid* w;
       +
       +        w = devwalk(c, nc, name, nname, nil, 0, ipgen);
       +        if(w != nil && w->clone != nil)
       +                w->clone->aux = newipaux(a->owner, a->tag);
       +        return w;
       +}
       +
       +
       +static int
       +ipstat(Chan* c, uchar* db, int n)
       +{
       +        return devstat(c, db, n, nil, 0, ipgen);
       +}
       +
       +static int
       +incoming(void* arg)
       +{
       +        Conv *conv;
       +
       +        conv = arg;
       +        return conv->incall != nil;
       +}
       +
       +static int m2p[] = {
       +        [OREAD]                4,
       +        [OWRITE]        2,
       +        [ORDWR]                6
       +};
       +
       +static Chan*
       +ipopen(Chan* c, int omode)
       +{
       +        Conv *cv, *nc;
       +        Proto *p;
       +        int perm;
       +        Fs *f;
       +
       +        perm = m2p[omode&3];
       +
       +        f = ipfs[c->dev];
       +
       +        switch(TYPE(c->qid)) {
       +        default:
       +                break;
       +        case Qndb:
       +                if(omode & (OWRITE|OTRUNC) && !iseve())
       +                        error(Eperm);
       +                if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
       +                        f->ndb[0] = 0;
       +                break;
       +        case Qlog:
       +                netlogopen(f);
       +                break;
       +        case Qiproute:
       +        case Qarp:
       +                if(omode != OREAD && !iseve())
       +                        error(Eperm);
       +                break;
       +        case Qtopdir:
       +        case Qprotodir:
       +        case Qconvdir:
       +        case Qstatus:
       +        case Qremote:
       +        case Qlocal:
       +        case Qstats:
       +        case Qbootp:
       +        case Qipselftab:
       +                if(omode != OREAD)
       +                        error(Eperm);
       +                break;
       +        case Qsnoop:
       +                if(omode != OREAD)
       +                        error(Eperm);
       +                p = f->p[PROTO(c->qid)];
       +                cv = p->conv[CONV(c->qid)];
       +                if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
       +                        error(Eperm);
       +                incref(&cv->snoopers);
       +                break;
       +        case Qclone:
       +                p = f->p[PROTO(c->qid)];
       +                QLOCK(p);
       +                if(waserror()){
       +                        QUNLOCK(p);
       +                        nexterror();
       +                }
       +                cv = Fsprotoclone(p, ATTACHER(c));
       +                QUNLOCK(p);
       +                poperror();
       +                if(cv == nil) {
       +                        error(Enodev);
       +                        break;
       +                }
       +                mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
       +                break;
       +        case Qdata:
       +        case Qctl:
       +        case Qerr:
       +                p = f->p[PROTO(c->qid)];
       +                QLOCK(p);
       +                cv = p->conv[CONV(c->qid)];
       +                QLOCK(cv);
       +                if(waserror()) {
       +                        QUNLOCK(cv);
       +                        QUNLOCK(p);
       +                        nexterror();
       +                }
       +                if((perm & (cv->perm>>6)) != perm) {
       +                        if(strcmp(ATTACHER(c), cv->owner) != 0)
       +                                error(Eperm);
       +                         if((perm & cv->perm) != perm)
       +                                error(Eperm);
       +
       +                }
       +                cv->inuse++;
       +                if(cv->inuse == 1){
       +                        kstrdup(&cv->owner, ATTACHER(c));
       +                        cv->perm = 0660;
       +                }
       +                QUNLOCK(cv);
       +                QUNLOCK(p);
       +                poperror();
       +                break;
       +        case Qlisten:
       +                cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
       +                if((perm & (cv->perm>>6)) != perm) {
       +                        if(strcmp(ATTACHER(c), cv->owner) != 0)
       +                                error(Eperm);
       +                         if((perm & cv->perm) != perm)
       +                                error(Eperm);
       +
       +                }
       +
       +                if(cv->state != Announced)
       +                        error("not announced");
       +
       +                if(waserror()){
       +                        closeconv(cv);
       +                        nexterror();
       +                }
       +                QLOCK(cv);
       +                cv->inuse++;
       +                QUNLOCK(cv);
       +
       +                nc = nil;
       +                while(nc == nil) {
       +                        /* give up if we got a hangup */
       +                        if(qisclosed(cv->rq))
       +                                error("listen hungup");
       +
       +                        qlock(&cv->listenq);
       +                        if(waserror()) {
       +                                qunlock(&cv->listenq);
       +                                nexterror();
       +                        }
       +
       +                        /* wait for a connect */
       +                        sleep(&cv->listenr, incoming, cv);
       +
       +                        QLOCK(cv);
       +                        nc = cv->incall;
       +                        if(nc != nil){
       +                                cv->incall = nc->next;
       +                                mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
       +                                kstrdup(&cv->owner, ATTACHER(c));
       +                        }
       +                        QUNLOCK(cv);
       +
       +                        qunlock(&cv->listenq);
       +                        poperror();
       +                }
       +                closeconv(cv);
       +                poperror();
       +                break;
       +        }
       +        c->mode = openmode(omode);
       +        c->flag |= COPEN;
       +        c->offset = 0;
       +        return c;
       +}
       +
       +static void
       +ipcreate(Chan* _, char* __, int ___, ulong ____)
       +{
       +        error(Eperm);
       +}
       +
       +static void
       +ipremove(Chan* _)
       +{
       +        error(Eperm);
       +}
       +
       +static int
       +ipwstat(Chan *c, uchar *dp, int n)
       +{
       +        Dir d;
       +        Conv *cv;
       +        Fs *f;
       +        Proto *p;
       +
       +        f = ipfs[c->dev];
       +        switch(TYPE(c->qid)) {
       +        default:
       +                error(Eperm);
       +                break;
       +        case Qctl:
       +        case Qdata:
       +                break;
       +        }
       +
       +        n = convM2D(dp, n, &d, nil);
       +        if(n > 0){
       +                p = f->p[PROTO(c->qid)];
       +                cv = p->conv[CONV(c->qid)];
       +                if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
       +                        error(Eperm);
       +                if(d.uid[0])
       +                        kstrdup(&cv->owner, d.uid);
       +                cv->perm = d.mode & 0777;
       +        }
       +        return n;
       +}
       +
       +void
       +closeconv(Conv *cv)
       +{
       +        Conv *nc;
       +        Ipmulti *mp;
       +
       +        QLOCK(cv);
       +
       +        if(--cv->inuse > 0) {
       +                QUNLOCK(cv);
       +                return;
       +        }
       +
       +        /* close all incoming calls since no listen will ever happen */
       +        for(nc = cv->incall; nc; nc = cv->incall){
       +                cv->incall = nc->next;
       +                closeconv(nc);
       +        }
       +        cv->incall = nil;
       +
       +        kstrdup(&cv->owner, network);
       +        cv->perm = 0660;
       +
       +        while((mp = cv->multi) != nil)
       +                ipifcremmulti(cv, mp->ma, mp->ia);
       +
       +        cv->r = nil;
       +        cv->rgen = 0;
       +        cv->p->close(cv);
       +        cv->state = Idle;
       +        QUNLOCK(cv);
       +}
       +
       +static void
       +ipclose(Chan* c)
       +{
       +        Fs *f;
       +
       +        f = ipfs[c->dev];
       +        switch(TYPE(c->qid)) {
       +        default:
       +                break;
       +        case Qlog:
       +                if(c->flag & COPEN)
       +                        netlogclose(f);
       +                break;
       +        case Qdata:
       +        case Qctl:
       +        case Qerr:
       +                if(c->flag & COPEN)
       +                        closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
       +                break;
       +        case Qsnoop:
       +                if(c->flag & COPEN)
       +                        decref(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
       +                break;
       +        }
       +        free(((IPaux*)c->aux)->owner);
       +        free(c->aux);
       +}
       +
       +enum
       +{
       +        Statelen=        32*1024,
       +};
       +
       +static long
       +ipread(Chan *ch, void *a, long n, vlong off)
       +{
       +        Conv *c;
       +        Proto *x;
       +        char *buf, *p;
       +        long rv;
       +        Fs *f;
       +        ulong offset = off;
       +
       +        f = ipfs[ch->dev];
       +
       +        p = a;
       +        switch(TYPE(ch->qid)) {
       +        default:
       +                error(Eperm);
       +        case Qtopdir:
       +        case Qprotodir:
       +        case Qconvdir:
       +                return devdirread(ch, a, n, 0, 0, ipgen);
       +        case Qarp:
       +                return arpread(f->arp, a, offset, n);
       +         case Qbootp:
       +                 return bootpread(a, offset, n);
       +         case Qndb:
       +                return readstr(offset, a, n, f->ndb);
       +        case Qiproute:
       +                return routeread(f, a, offset, n);
       +        case Qipselftab:
       +                return ipselftabread(f, a, offset, n);
       +        case Qlog:
       +                return netlogread(f, a, offset, n);
       +        case Qctl:
       +                buf = smalloc(16);
       +                sprint(buf, "%lud", CONV(ch->qid));
       +                rv = readstr(offset, p, n, buf);
       +                free(buf);
       +                return rv;
       +        case Qremote:
       +                buf = smalloc(Statelen);
       +                x = f->p[PROTO(ch->qid)];
       +                c = x->conv[CONV(ch->qid)];
       +                if(x->remote == nil) {
       +                        sprint(buf, "%I!%d\n", c->raddr, c->rport);
       +                } else {
       +                        (*x->remote)(c, buf, Statelen-2);
       +                }
       +                rv = readstr(offset, p, n, buf);
       +                free(buf);
       +                return rv;
       +        case Qlocal:
       +                buf = smalloc(Statelen);
       +                x = f->p[PROTO(ch->qid)];
       +                c = x->conv[CONV(ch->qid)];
       +                if(x->local == nil) {
       +                        sprint(buf, "%I!%d\n", c->laddr, c->lport);
       +                } else {
       +                        (*x->local)(c, buf, Statelen-2);
       +                }
       +                rv = readstr(offset, p, n, buf);
       +                free(buf);
       +                return rv;
       +        case Qstatus:
       +                buf = smalloc(Statelen);
       +                x = f->p[PROTO(ch->qid)];
       +                c = x->conv[CONV(ch->qid)];
       +                (*x->state)(c, buf, Statelen-2);
       +                rv = readstr(offset, p, n, buf);
       +                free(buf);
       +                return rv;
       +        case Qdata:
       +                c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
       +                return qread(c->rq, a, n);
       +        case Qerr:
       +                c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
       +                return qread(c->eq, a, n);
       +        case Qsnoop:
       +                c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
       +                return qread(c->sq, a, n);
       +        case Qstats:
       +                x = f->p[PROTO(ch->qid)];
       +                if(x->stats == nil)
       +                        error("stats not implemented");
       +                buf = smalloc(Statelen);
       +                (*x->stats)(x, buf, Statelen);
       +                rv = readstr(offset, p, n, buf);
       +                free(buf);
       +                return rv;
       +        }
       +}
       +
       +static Block*
       +ipbread(Chan* ch, long n, ulong offset)
       +{
       +        Conv *c;
       +        Proto *x;
       +        Fs *f;
       +
       +        switch(TYPE(ch->qid)){
       +        case Qdata:
       +                f = ipfs[ch->dev];
       +                x = f->p[PROTO(ch->qid)];
       +                c = x->conv[CONV(ch->qid)];
       +                return qbread(c->rq, n);
       +        default:
       +                return devbread(ch, n, offset);
       +        }
       +}
       +
       +/*
       + *  set local address to be that of the ifc closest to remote address
       + */
       +static void
       +setladdr(Conv* c)
       +{
       +        findlocalip(c->p->f, c->laddr, c->raddr);
       +}
       +
       +/*
       + *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
       + */
       +char*
       +setluniqueport(Conv* c, int lport)
       +{
       +        Proto *p;
       +        Conv *xp;
       +        int x;
       +
       +        p = c->p;
       +
       +        QLOCK(p);
       +        for(x = 0; x < p->nc; x++){
       +                xp = p->conv[x];
       +                if(xp == nil)
       +                        break;
       +                if(xp == c)
       +                        continue;
       +                if((xp->state == Connected || xp->state == Announced)
       +                && xp->lport == lport
       +                && xp->rport == c->rport
       +                && ipcmp(xp->raddr, c->raddr) == 0
       +                && ipcmp(xp->laddr, c->laddr) == 0){
       +                        QUNLOCK(p);
       +                        return "address in use";
       +                }
       +        }
       +        c->lport = lport;
       +        QUNLOCK(p);
       +        return nil;
       +}
       +
       +/*
       + * is lport in use by anyone?
       + */
       +static int
       +lportinuse(Proto *p, ushort lport)
       +{
       +        int x;
       +
       +        for(x = 0; x < p->nc && p->conv[x]; x++)
       +                if(p->conv[x]->lport == lport)
       +                        return 1;
       +        return 0;
       +}
       +
       +/*
       + *  pick a local port and set it
       + */
       +char *
       +setlport(Conv* c)
       +{
       +        Proto *p;
       +        int i, port;
       +
       +        p = c->p;
       +        QLOCK(p);
       +        if(c->restricted){
       +                /* Restricted ports cycle between 600 and 1024. */
       +                for(i=0; i<1024-600; i++){
       +                        if(p->nextrport >= 1024 || p->nextrport < 600)
       +                                p->nextrport = 600;
       +                        port = p->nextrport++;
       +                        if(!lportinuse(p, port))
       +                                goto chosen;
       +                }
       +        }else{
       +                /*
       +                 * Unrestricted ports are chosen randomly
       +                 * between 2^15 and 2^16.  There are at most
       +                 * 4*Nchan = 4096 ports in use at any given time,
       +                 * so even in the worst case, a random probe has a
       +                 * 1 - 4096/2^15 = 87% chance of success.
       +                 * If 64 successive probes fail, there is a bug somewhere
       +                 * (or a once in 10^58 event has happened, but that's
       +                 * less likely than a venti collision).
       +                 */
       +                for(i=0; i<64; i++){
       +                        port = (1<<15) + nrand(1<<15);
       +                        if(!lportinuse(p, port))
       +                                goto chosen;
       +                }
       +        }
       +        QUNLOCK(p);
       +        return "no ports available";
       +
       +chosen:
       +        c->lport = port;
       +        QUNLOCK(p);
       +        return nil;
       +}
       +
       +/*
       + *  set a local address and port from a string of the form
       + *        [address!]port[!r]
       + */
       +char*
       +setladdrport(Conv* c, char* str, int announcing)
       +{
       +        char *p;
       +        char *rv;
       +        ushort lport;
       +        uchar addr[IPaddrlen];
       +
       +        /*
       +         *  ignore restricted part if it exists.  it's
       +         *  meaningless on local ports.
       +         */
       +        p = strchr(str, '!');
       +        if(p != nil){
       +                *p++ = 0;
       +                if(strcmp(p, "r") == 0)
       +                        p = nil;
       +        }
       +
       +        c->lport = 0;
       +        if(p == nil){
       +                if(announcing)
       +                        ipmove(c->laddr, IPnoaddr);
       +                else
       +                        setladdr(c);
       +                p = str;
       +        } else {
       +                if(strcmp(str, "*") == 0)
       +                        ipmove(c->laddr, IPnoaddr);
       +                else {
       +                        if(parseip(addr, str) == -1)
       +                                return Ebadip;
       +                        if(ipforme(c->p->f, addr))
       +                                ipmove(c->laddr, addr);
       +                        else
       +                                return "not a local IP address";
       +                }
       +        }
       +
       +        /* one process can get all connections */
       +        if(announcing && strcmp(p, "*") == 0){
       +                if(!iseve())
       +                        error(Eperm);
       +                return setluniqueport(c, 0);
       +        }
       +
       +        lport = atoi(p);
       +        if(lport <= 0)
       +                rv = setlport(c);
       +        else
       +                rv = setluniqueport(c, lport);
       +        return rv;
       +}
       +
       +static char*
       +setraddrport(Conv* c, char* str)
       +{
       +        char *p;
       +
       +        p = strchr(str, '!');
       +        if(p == nil)
       +                return "malformed address";
       +        *p++ = 0;
       +        if (parseip(c->raddr, str) == -1)
       +                return Ebadip;
       +        c->rport = atoi(p);
       +        p = strchr(p, '!');
       +        if(p){
       +                if(strstr(p, "!r") != nil)
       +                        c->restricted = 1;
       +        }
       +        return nil;
       +}
       +
       +/*
       + *  called by protocol connect routine to set addresses
       + */
       +char*
       +Fsstdconnect(Conv *c, char *argv[], int argc)
       +{
       +        char *p;
       +
       +        switch(argc) {
       +        default:
       +                return "bad args to connect";
       +        case 2:
       +                p = setraddrport(c, argv[1]);
       +                if(p != nil)
       +                        return p;
       +                setladdr(c);
       +                p = setlport(c);
       +                if (p != nil)
       +                        return p;
       +                break;
       +        case 3:
       +                p = setraddrport(c, argv[1]);
       +                if(p != nil)
       +                        return p;
       +                p = setladdrport(c, argv[2], 0);
       +                if(p != nil)
       +                        return p;
       +        }
       +
       +        if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
       +                memcmp(c->laddr, v4prefix, IPv4off) == 0)
       +                || ipcmp(c->raddr, IPnoaddr) == 0)
       +                c->ipversion = V4;
       +        else
       +                c->ipversion = V6;
       +
       +        return nil;
       +}
       +/*
       + *  initiate connection and sleep till its set up
       + */
       +static int
       +connected(void* a)
       +{
       +        return ((Conv*)a)->state == Connected;
       +}
       +static void
       +connectctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
       +{
       +        char *p;
       +
       +        if(c->state != 0)
       +                error(Econinuse);
       +        c->state = Connecting;
       +        c->cerr[0] = '\0';
       +        if(x->connect == nil)
       +                error("connect not supported");
       +        p = x->connect(c, cb->f, cb->nf);
       +        if(p != nil)
       +                error(p);
       +
       +        QUNLOCK(c);
       +        if(waserror()){
       +                QLOCK(c);
       +                nexterror();
       +        }
       +        sleep(&c->cr, connected, c);
       +        QLOCK(c);
       +        poperror();
       +
       +        if(c->cerr[0] != '\0')
       +                error(c->cerr);
       +}
       +
       +/*
       + *  called by protocol announce routine to set addresses
       + */
       +char*
       +Fsstdannounce(Conv* c, char* argv[], int argc)
       +{
       +        memset(c->raddr, 0, sizeof(c->raddr));
       +        c->rport = 0;
       +        switch(argc){
       +        default:
       +                break;
       +        case 2:
       +                return setladdrport(c, argv[1], 1);
       +        }
       +        return "bad args to announce";
       +}
       +
       +/*
       + *  initiate announcement and sleep till its set up
       + */
       +static int
       +announced(void* a)
       +{
       +        return ((Conv*)a)->state == Announced;
       +}
       +static void
       +announcectlmsg(Proto *x, Conv *c, Cmdbuf *cb)
       +{
       +        char *p;
       +
       +        if(c->state != 0)
       +                error(Econinuse);
       +        c->state = Announcing;
       +        c->cerr[0] = '\0';
       +        if(x->announce == nil)
       +                error("announce not supported");
       +        p = x->announce(c, cb->f, cb->nf);
       +        if(p != nil)
       +                error(p);
       +
       +        QUNLOCK(c);
       +        if(waserror()){
       +                QLOCK(c);
       +                nexterror();
       +        }
       +        sleep(&c->cr, announced, c);
       +        QLOCK(c);
       +        poperror();
       +
       +        if(c->cerr[0] != '\0')
       +                error(c->cerr);
       +}
       +
       +/*
       + *  called by protocol bind routine to set addresses
       + */
       +char*
       +Fsstdbind(Conv* c, char* argv[], int argc)
       +{
       +        switch(argc){
       +        default:
       +                break;
       +        case 2:
       +                return setladdrport(c, argv[1], 0);
       +        }
       +        return "bad args to bind";
       +}
       +
       +static void
       +bindctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
       +{
       +        char *p;
       +
       +        if(x->bind == nil)
       +                p = Fsstdbind(c, cb->f, cb->nf);
       +        else
       +                p = x->bind(c, cb->f, cb->nf);
       +        if(p != nil)
       +                error(p);
       +}
       +
       +static void
       +tosctlmsg(Conv *c, Cmdbuf *cb)
       +{
       +        if(cb->nf < 2)
       +                c->tos = 0;
       +        else
       +                c->tos = atoi(cb->f[1]);
       +}
       +
       +static void
       +ttlctlmsg(Conv *c, Cmdbuf *cb)
       +{
       +        if(cb->nf < 2)
       +                c->ttl = MAXTTL;
       +        else
       +                c->ttl = atoi(cb->f[1]);
       +}
       +
       +static long
       +ipwrite(Chan* ch, void *v, long n, vlong off)
       +{
       +        Conv *c;
       +        Proto *x;
       +        char *p;
       +        Cmdbuf *cb;
       +        uchar ia[IPaddrlen], ma[IPaddrlen];
       +        Fs *f;
       +        char *a;
       +        ulong offset = off;
       +
       +        a = v;
       +        f = ipfs[ch->dev];
       +
       +        switch(TYPE(ch->qid)){
       +        default:
       +                error(Eperm);
       +        case Qdata:
       +                x = f->p[PROTO(ch->qid)];
       +                c = x->conv[CONV(ch->qid)];
       +
       +                if(c->wq == nil)
       +                        error(Eperm);
       +
       +                qwrite(c->wq, a, n);
       +                break;
       +        case Qarp:
       +                return arpwrite(f, a, n);
       +        case Qiproute:
       +                return routewrite(f, ch, a, n);
       +        case Qlog:
       +                netlogctl(f, a, n);
       +                return n;
       +        case Qndb:
       +                return ndbwrite(f, a, offset, n);
       +                break;
       +        case Qctl:
       +                x = f->p[PROTO(ch->qid)];
       +                c = x->conv[CONV(ch->qid)];
       +                cb = parsecmd(a, n);
       +
       +                QLOCK(c);
       +                if(waserror()) {
       +                        QUNLOCK(c);
       +                        free(cb);
       +                        nexterror();
       +                }
       +                if(cb->nf < 1)
       +                        error("short control request");
       +                if(strcmp(cb->f[0], "connect") == 0)
       +                        connectctlmsg(x, c, cb);
       +                else if(strcmp(cb->f[0], "announce") == 0)
       +                        announcectlmsg(x, c, cb);
       +                else if(strcmp(cb->f[0], "bind") == 0)
       +                        bindctlmsg(x, c, cb);
       +                else if(strcmp(cb->f[0], "ttl") == 0)
       +                        ttlctlmsg(c, cb);
       +                else if(strcmp(cb->f[0], "tos") == 0)
       +                        tosctlmsg(c, cb);
       +                else if(strcmp(cb->f[0], "ignoreadvice") == 0)
       +                        c->ignoreadvice = 1;
       +                else if(strcmp(cb->f[0], "addmulti") == 0){
       +                        if(cb->nf < 2)
       +                                error("addmulti needs interface address");
       +                        if(cb->nf == 2){
       +                                if(!ipismulticast(c->raddr))
       +                                        error("addmulti for a non multicast address");
       +                                if (parseip(ia, cb->f[1]) == -1)
       +                                        error(Ebadip);
       +                                ipifcaddmulti(c, c->raddr, ia);
       +                        } else {
       +                                if (parseip(ia, cb->f[1]) == -1 ||
       +                                    parseip(ma, cb->f[2]) == -1)
       +                                        error(Ebadip);
       +                                if(!ipismulticast(ma))
       +                                        error("addmulti for a non multicast address");
       +                                ipifcaddmulti(c, ma, ia);
       +                        }
       +                } else if(strcmp(cb->f[0], "remmulti") == 0){
       +                        if(cb->nf < 2)
       +                                error("remmulti needs interface address");
       +                        if(!ipismulticast(c->raddr))
       +                                error("remmulti for a non multicast address");
       +                        if (parseip(ia, cb->f[1]) == -1)
       +                                error(Ebadip);
       +                        ipifcremmulti(c, c->raddr, ia);
       +                } else if(strcmp(cb->f[0], "maxfragsize") == 0){
       +                        if(cb->nf < 2)
       +                                error("maxfragsize needs size");
       +
       +                        c->maxfragsize = (int)strtol(cb->f[1], nil, 0);
       +                        
       +                } else if(x->ctl != nil) {
       +                        p = x->ctl(c, cb->f, cb->nf);
       +                        if(p != nil)
       +                                error(p);
       +                } else
       +                        error("unknown control request");
       +                QUNLOCK(c);
       +                free(cb);
       +                poperror();
       +        }
       +        return n;
       +}
       +
       +static long
       +ipbwrite(Chan* ch, Block* bp, ulong offset)
       +{
       +        Conv *c;
       +        Proto *x;
       +        Fs *f;
       +        int n;
       +
       +        switch(TYPE(ch->qid)){
       +        case Qdata:
       +                f = ipfs[ch->dev];
       +                x = f->p[PROTO(ch->qid)];
       +                c = x->conv[CONV(ch->qid)];
       +
       +                if(c->wq == nil)
       +                        error(Eperm);
       +
       +                if(bp->next)
       +                        bp = concatblock(bp);
       +                n = BLEN(bp);
       +                qbwrite(c->wq, bp);
       +                return n;
       +        default:
       +                return devbwrite(ch, bp, offset);
       +        }
       +}
       +
       +Dev ipdevtab = {
       +        'I',
       +        "ip",
       +
       +        ipreset,
       +        devinit,
       +        devshutdown,
       +        ipattach,
       +        ipwalk,
       +        ipstat,
       +        ipopen,
       +        ipcreate,
       +        ipclose,
       +        ipread,
       +        ipbread,
       +        ipwrite,
       +        ipbwrite,
       +        ipremove,
       +        ipwstat,
       +};
       +
       +int
       +Fsproto(Fs *f, Proto *p)
       +{
       +        if(f->np >= Maxproto)
       +                return -1;
       +
       +        p->f = f;
       +
       +        if(p->ipproto > 0){
       +                if(f->t2p[p->ipproto] != nil)
       +                        return -1;
       +                f->t2p[p->ipproto] = p;
       +        }
       +
       +        p->qid.type = QTDIR;
       +        p->qid.path = QID(f->np, 0, Qprotodir);
       +        p->conv = malloc(sizeof(Conv*)*(p->nc+1));
       +        if(p->conv == nil)
       +                panic("Fsproto");
       +
       +        p->x = f->np;
       +        p->nextrport = 600;
       +        f->p[f->np++] = p;
       +
       +        return 0;
       +}
       +
       +/*
       + *  return true if this protocol is
       + *  built in
       + */
       +int
       +Fsbuiltinproto(Fs* f, uchar proto)
       +{
       +        return f->t2p[proto] != nil;
       +}
       +
       +/*
       + *  called with protocol locked
       + */
       +Conv*
       +Fsprotoclone(Proto *p, char *user)
       +{
       +        Conv *c, **pp, **ep;
       +
       +retry:
       +        c = nil;
       +        ep = &p->conv[p->nc];
       +        for(pp = p->conv; pp < ep; pp++) {
       +                c = *pp;
       +                if(c == nil){
       +                        c = malloc(sizeof(Conv));
       +                        if(c == nil)
       +                                error(Enomem);
       +                        QLOCK(c);
       +                        c->p = p;
       +                        c->x = pp - p->conv;
       +                        if(p->ptclsize != 0){
       +                                c->ptcl = malloc(p->ptclsize);
       +                                if(c->ptcl == nil) {
       +                                        free(c);
       +                                        error(Enomem);
       +                                }
       +                        }
       +                        *pp = c;
       +                        p->ac++;
       +                        c->eq = qopen(1024, Qmsg, 0, 0);
       +                        (*p->create)(c);
       +                        break;
       +                }
       +                if(CANQLOCK(c)){
       +                        /*
       +                         *  make sure both processes and protocol
       +                         *  are done with this Conv
       +                         */
       +                        if(c->inuse == 0 && (p->inuse == nil || (*p->inuse)(c) == 0))
       +                                break;
       +
       +                        QUNLOCK(c);
       +                }
       +        }
       +        if(pp >= ep) {
       +                if(p->gc != nil && (*p->gc)(p))
       +                        goto retry;
       +                return nil;
       +        }
       +
       +        c->inuse = 1;
       +        kstrdup(&c->owner, user);
       +        c->perm = 0660;
       +        c->state = Idle;
       +        ipmove(c->laddr, IPnoaddr);
       +        ipmove(c->raddr, IPnoaddr);
       +        c->r = nil;
       +        c->rgen = 0;
       +        c->lport = 0;
       +        c->rport = 0;
       +        c->restricted = 0;
       +        c->maxfragsize = 0;
       +        c->ttl = MAXTTL;
       +        qreopen(c->rq);
       +        qreopen(c->wq);
       +        qreopen(c->eq);
       +
       +        QUNLOCK(c);
       +        return c;
       +}
       +
       +int
       +Fsconnected(Conv* c, char* msg)
       +{
       +        if(msg != nil && *msg != '\0')
       +                strncpy(c->cerr, msg, ERRMAX-1);
       +
       +        switch(c->state){
       +
       +        case Announcing:
       +                c->state = Announced;
       +                break;
       +
       +        case Connecting:
       +                c->state = Connected;
       +                break;
       +        }
       +
       +        wakeup(&c->cr);
       +        return 0;
       +}
       +
       +Proto*
       +Fsrcvpcol(Fs* f, uchar proto)
       +{
       +        if(f->ipmux)
       +                return f->ipmux;
       +        else
       +                return f->t2p[proto];
       +}
       +
       +Proto*
       +Fsrcvpcolx(Fs *f, uchar proto)
       +{
       +        return f->t2p[proto];
       +}
       +
       +/*
       + *  called with protocol locked
       + */
       +Conv*
       +Fsnewcall(Conv *c, uchar *raddr, ushort rport, uchar *laddr, ushort lport, uchar version)
       +{
       +        Conv *nc;
       +        Conv **l;
       +        int i;
       +
       +        QLOCK(c);
       +        i = 0;
       +        for(l = &c->incall; *l; l = &(*l)->next)
       +                i++;
       +        if(i >= Maxincall) {
       +                QUNLOCK(c);
       +                return nil;
       +        }
       +
       +        /* find a free conversation */
       +        nc = Fsprotoclone(c->p, network);
       +        if(nc == nil) {
       +                QUNLOCK(c);
       +                return nil;
       +        }
       +        ipmove(nc->raddr, raddr);
       +        nc->rport = rport;
       +        ipmove(nc->laddr, laddr);
       +        nc->lport = lport;
       +        nc->next = nil;
       +        *l = nc;
       +        nc->state = Connected;
       +        nc->ipversion = version;
       +
       +        QUNLOCK(c);
       +
       +        wakeup(&c->listenr);
       +
       +        return nc;
       +}
       +
       +long
       +ndbwrite(Fs *f, char *a, ulong off, int n)
       +{
       +        if(off > strlen(f->ndb))
       +                error(Eio);
       +        if(off+n >= sizeof(f->ndb))
       +                error(Eio);
       +        memmove(f->ndb+off, a, n);
       +        f->ndb[off+n] = 0;
       +        f->ndbvers++;
       +        f->ndbmtime = seconds();
       +        return n;
       +}
       +
       +ulong
       +scalednconv(void)
       +{
       +        if(cpuserver && conf.npage*BY2PG >= 128*MB)
       +                return Nchans*4;
       +        return Nchans;
       +}
 (DIR) diff --git a/src/9vx/a/ip/eipconvtest.c b/src/9vx/a/ip/eipconvtest.c
       @@ -0,0 +1,152 @@
       +#include <u.h>
       +#include <libc.h>
       +
       +enum
       +{
       +        Isprefix= 16,
       +};
       +
       +uchar prefixvals[256] =
       +{
       +[0x00] 0 | Isprefix,
       +[0x80] 1 | Isprefix,
       +[0xC0] 2 | Isprefix,
       +[0xE0] 3 | Isprefix,
       +[0xF0] 4 | Isprefix,
       +[0xF8] 5 | Isprefix,
       +[0xFC] 6 | Isprefix,
       +[0xFE] 7 | Isprefix,
       +[0xFF] 8 | Isprefix,
       +};
       +
       +uchar v4prefix[16] = {
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0xff, 0xff,
       +        0, 0, 0, 0
       +};
       +
       +void
       +hnputl(void *p, ulong v)
       +{
       +        uchar *a;
       +
       +        a = p;
       +        a[0] = v>>24;
       +        a[1] = v>>16;
       +        a[2] = v>>8;
       +        a[3] = v;
       +}
       +
       +int
       +eipconv(va_list *arg, Fconv *f)
       +{
       +        char buf[8*5];
       +        static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
       +        static char *ifmt = "%d.%d.%d.%d";
       +        uchar *p, ip[16];
       +        ulong *lp;
       +        ushort s;
       +        int i, j, n, eln, eli;
       +
       +        switch(f->chr) {
       +        case 'E':                /* Ethernet address */
       +                p = va_arg(*arg, uchar*);
       +                sprint(buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
       +                break;
       +        case 'I':                /* Ip address */
       +                p = va_arg(*arg, uchar*);
       +common:
       +                if(memcmp(p, v4prefix, 12) == 0)
       +                        sprint(buf, ifmt, p[12], p[13], p[14], p[15]);
       +                else {
       +                        /* find longest elision */
       +                        eln = eli = -1;
       +                        for(i = 0; i < 16; i += 2){
       +                                for(j = i; j < 16; j += 2)
       +                                        if(p[j] != 0 || p[j+1] != 0)
       +                                                break;
       +                                if(j > i && j - i > eln){
       +                                        eli = i;
       +                                        eln = j - i;
       +                                }
       +                        }
       +
       +                        /* print with possible elision */
       +                        n = 0;
       +                        for(i = 0; i < 16; i += 2){
       +                                if(i == eli){
       +                                        n += sprint(buf+n, "::");
       +                                        i += eln;
       +                                        if(i >= 16)
       +                                                break;
       +                                } else if(i != 0)
       +                                        n += sprint(buf+n, ":");
       +                                s = (p[i]<<8) + p[i+1];
       +                                n += sprint(buf+n, "%ux", s);
       +                        }
       +                }
       +                break;
       +        case 'i':                /* v6 address as 4 longs */
       +                lp = va_arg(*arg, ulong*);
       +                for(i = 0; i < 4; i++)
       +                        hnputl(ip+4*i, *lp++);
       +                p = ip;
       +                goto common;
       +        case 'V':                /* v4 ip address */
       +                p = va_arg(*arg, uchar*);
       +                sprint(buf, ifmt, p[0], p[1], p[2], p[3]);
       +                break;
       +        case 'M':                /* ip mask */
       +                p = va_arg(*arg, uchar*);
       +
       +                /* look for a prefix mask */
       +                for(i = 0; i < 16; i++)
       +                        if(p[i] != 0xff)
       +                                break;
       +                if(i < 16){
       +                        if((prefixvals[p[i]] & Isprefix) == 0)
       +                                goto common;
       +                        for(j = i+1; j < 16; j++)
       +                                if(p[j] != 0)
       +                                        goto common;
       +                        n = 8*i + (prefixvals[p[i]] & ~Isprefix);
       +                } else
       +                        n = 8*16;
       +
       +                /* got one, use /xx format */
       +                sprint(buf, "/%d", n);
       +                break;
       +        default:
       +                strcpy(buf, "(eipconv)");
       +        }
       +        strconv(buf, f);
       +        return sizeof(uchar*);
       +}
       +
       +uchar testvec[11][16] =
       +{
       + { 0,0,0,0, 0,0,0,0, 0,0,0xff,0xff, 1,3,4,5, },
       + { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
       + { 0xff,0xff,0x80,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
       + { 0xff,0xff,0xff,0xc0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
       + { 0xff,0xff,0xff,0xff, 0xe0,0,0,0, 0,0,0,0, 0,0,0,0, },
       + { 0xff,0xff,0xff,0xff, 0xff,0xf0,0,0, 0,0,0,0, 0,0,0,0, },
       + { 0xff,0xff,0xff,0xff, 0xff,0xff,0xf8,0, 0,0,0,0, 0,0,0,0, },
       + { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
       + { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
       + { 0,0,0,0, 0,0x11,0,0, 0,0,0,0, 0,0,0,0, },
       + { 0,0,0,0x11, 0,0,0,0, 0,0,0,0, 0,0,0,0x12, },
       +};
       +
       +void
       +main(void)
       +{
       +        int i;
       +
       +        fmtinstall('I', eipconv);
       +        fmtinstall('M', eipconv);
       +        for(i = 0; i < 11; i++)
       +                print("%I\n%M\n", testvec[i], testvec[i]);
       +        exits(0);
       +}
 (DIR) diff --git a/src/9vx/a/ip/esp.c b/src/9vx/a/ip/esp.c
       @@ -0,0 +1,951 @@
       +/*
       + * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
       + *        currently only implements tunnel mode.
       + * TODO: update to match rfc4303.
       + */
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +
       +#include        "ip.h"
       +#include        "ipv6.h"
       +#include        "libsec.h"
       +
       +typedef struct Esphdr Esphdr;
       +typedef struct Esp4hdr Esp4hdr;
       +typedef struct Esp6hdr Esp6hdr;
       +typedef struct Esptail Esptail;
       +typedef struct Userhdr Userhdr;
       +typedef struct Esppriv Esppriv;
       +typedef struct Espcb Espcb;
       +typedef struct Algorithm Algorithm;
       +
       +enum
       +{
       +        IP_ESPPROTO        = 50,        /* IP v4 and v6 protocol number */
       +        Esp4hdrlen        = IP4HDR + 8,
       +        Esp6hdrlen        = IP6HDR + 8,
       +
       +        Esptaillen        = 2,        /* does not include pad or auth data */
       +        Userhdrlen        = 4,        /* user-visible header size - if enabled */
       +};
       +
       +struct Esphdr
       +{
       +        uchar        espspi[4];        /* Security parameter index */
       +        uchar        espseq[4];        /* Sequence number */
       +};
       +
       +/*
       + * tunnel-mode layout:                IP | ESP | TCP/UDP | user data.
       + * transport-mode layout is:        ESP | IP | TCP/UDP | user data.
       + */
       +struct Esp4hdr
       +{
       +        /* ipv4 header */
       +        uchar        vihl;                /* Version and header length */
       +        uchar        tos;                /* Type of service */
       +        uchar        length[2];        /* packet length */
       +        uchar        id[2];                /* Identification */
       +        uchar        frag[2];        /* Fragment information */
       +        uchar        Unused;
       +        uchar        espproto;        /* Protocol */
       +        uchar        espplen[2];        /* Header plus data length */
       +        uchar        espsrc[4];        /* Ip source */
       +        uchar        espdst[4];        /* Ip destination */
       +
       +        /* Esphdr; */
       +        uchar        espspi[4];        /* Security parameter index */
       +        uchar        espseq[4];        /* Sequence number */
       +};
       +
       +/* tunnel-mode layout */
       +struct Esp6hdr
       +{
       +        /* Ip6hdr; */
       +        uchar        vcf[4];                /* version:4, traffic class:8, flow label:20 */
       +        uchar        ploadlen[2];        /* payload length: packet length - 40 */
       +        uchar        proto;                /* next header type */
       +        uchar        ttl;                /* hop limit */
       +        uchar        src[IPaddrlen];
       +        uchar        dst[IPaddrlen];
       +
       +        /* Esphdr; */
       +        uchar        espspi[4];        /* Security parameter index */
       +        uchar        espseq[4];        /* Sequence number */
       +};
       +
       +struct Esptail
       +{
       +        uchar        pad;
       +        uchar        nexthdr;
       +};
       +
       +/* header as seen by the user */
       +struct Userhdr
       +{
       +        uchar        nexthdr;        /* next protocol */
       +        uchar        unused[3];
       +};
       +
       +struct Esppriv
       +{
       +        ulong        in;
       +        ulong        inerrors;
       +};
       +
       +/*
       + *  protocol specific part of Conv
       + */
       +struct Espcb
       +{
       +        int        incoming;
       +        int        header;                /* user user level header */
       +        ulong        spi;
       +        ulong        seq;                /* last seq sent */
       +        ulong        window;                /* for replay attacks */
       +        char        *espalg;
       +        void        *espstate;        /* other state for esp */
       +        int        espivlen;        /* in bytes */
       +        int        espblklen;
       +        int        (*cipher)(Espcb*, uchar *buf, int len);
       +        char        *ahalg;
       +        void        *ahstate;        /* other state for esp */
       +        int        ahlen;                /* auth data length in bytes */
       +        int        ahblklen;
       +        int        (*auth)(Espcb*, uchar *buf, int len, uchar *hash);
       +};
       +
       +struct Algorithm
       +{
       +        char         *name;
       +        int        keylen;                /* in bits */
       +        void        (*init)(Espcb*, char* name, uchar *key, int keylen);
       +};
       +
       +static        Conv* convlookup(Proto *esp, ulong spi);
       +static        char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
       +static        void espkick(void *x);
       +
       +static        void nullespinit(Espcb*, char*, uchar *key, int keylen);
       +static        void desespinit(Espcb *ecb, char *name, uchar *k, int n);
       +
       +static        void nullahinit(Espcb*, char*, uchar *key, int keylen);
       +static        void shaahinit(Espcb*, char*, uchar *key, int keylen);
       +static        void md5ahinit(Espcb*, char*, uchar *key, int keylen);
       +
       +static Algorithm espalg[] =
       +{
       +        "null",                        0,        nullespinit,
       +//        "des3_cbc",                192,        des3espinit,        /* rfc2451 */
       +//        "aes_128_cbc",                128,        aescbcespinit,        /* rfc3602 */
       +//        "aes_ctr",                128,        aesctrespinit,        /* rfc3686 */
       +        "des_56_cbc",                64,        desespinit,        /* rfc2405, deprecated */
       +//        "rc4_128",                128,        rc4espinit,        /* gone in rfc4305 */
       +        nil,                        0,        nil,
       +};
       +
       +static Algorithm ahalg[] =
       +{
       +        "null",                        0,        nullahinit,
       +        "hmac_sha1_96",                128,        shaahinit,        /* rfc2404 */
       +//        "aes_xcbc_mac_96",        128,        aesahinit,        /* rfc3566 */
       +        "hmac_md5_96",                128,        md5ahinit,        /* rfc2403 */
       +        nil,                        0,        nil,
       +};
       +
       +static char*
       +espconnect(Conv *c, char **argv, int argc)
       +{
       +        char *p, *pp;
       +        char *e = nil;
       +        ulong spi;
       +        Espcb *ecb = (Espcb*)c->ptcl;
       +
       +        switch(argc) {
       +        default:
       +                e = "bad args to connect";
       +                break;
       +        case 2:
       +                p = strchr(argv[1], '!');
       +                if(p == nil){
       +                        e = "malformed address";
       +                        break;
       +                }
       +                *p++ = 0;
       +                parseip(c->raddr, argv[1]);
       +                findlocalip(c->p->f, c->laddr, c->raddr);
       +                ecb->incoming = 0;
       +                ecb->seq = 0;
       +                if(strcmp(p, "*") == 0) {
       +                        QLOCK(c->p);
       +                        for(;;) {
       +                                spi = nrand(1<<16) + 256;
       +                                if(convlookup(c->p, spi) == nil)
       +                                        break;
       +                        }
       +                        QUNLOCK(c->p);
       +                        ecb->spi = spi;
       +                        ecb->incoming = 1;
       +                        qhangup(c->wq, nil);
       +                } else {
       +                        spi = strtoul(p, &pp, 10);
       +                        if(pp == p) {
       +                                e = "malformed address";
       +                                break;
       +                        }
       +                        ecb->spi = spi;
       +                        qhangup(c->rq, nil);
       +                }
       +                nullespinit(ecb, "null", nil, 0);
       +                nullahinit(ecb, "null", nil, 0);
       +        }
       +        Fsconnected(c, e);
       +
       +        return e;
       +}
       +
       +
       +static int
       +espstate(Conv *c, char *state, int n)
       +{
       +        return snprint(state, n, "%s", c->inuse?"Open\n":"Closed\n");
       +}
       +
       +static void
       +espcreate(Conv *c)
       +{
       +        c->rq = qopen(64*1024, Qmsg, 0, 0);
       +        c->wq = qopen(64*1024, Qkick, espkick, c);
       +}
       +
       +static void
       +espclose(Conv *c)
       +{
       +        Espcb *ecb;
       +
       +        qclose(c->rq);
       +        qclose(c->wq);
       +        qclose(c->eq);
       +        ipmove(c->laddr, IPnoaddr);
       +        ipmove(c->raddr, IPnoaddr);
       +
       +        ecb = (Espcb*)c->ptcl;
       +        free(ecb->espstate);
       +        free(ecb->ahstate);
       +        memset(ecb, 0, sizeof(Espcb));
       +}
       +
       +static int
       +ipvers(Conv *c)
       +{
       +        if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
       +            memcmp(c->laddr, v4prefix, IPv4off) == 0) ||
       +            ipcmp(c->raddr, IPnoaddr) == 0)
       +                return V4;
       +        else
       +                return V6;
       +}
       +
       +static void
       +espkick(void *x)
       +{
       +        Conv *c = x;
       +        Esp4hdr *eh4;
       +        Esp6hdr *eh6;
       +        Esptail *et;
       +        Userhdr *uh;
       +        Espcb *ecb;
       +        Block *bp;
       +        int nexthdr, payload, pad, align, version, hdrlen, iphdrlen;
       +        uchar *auth;
       +
       +        version = ipvers(c);
       +        iphdrlen = version == V4? IP4HDR: IP6HDR;
       +        hdrlen =   version == V4? Esp4hdrlen: Esp6hdrlen;
       +
       +        bp = qget(c->wq);
       +        if(bp == nil)
       +                return;
       +
       +        QLOCK(c);
       +        ecb = c->ptcl;
       +
       +        if(ecb->header) {
       +                /* make sure the message has a User header */
       +                bp = pullupblock(bp, Userhdrlen);
       +                if(bp == nil) {
       +                        QUNLOCK(c);
       +                        return;
       +                }
       +                uh = (Userhdr*)bp->rp;
       +                nexthdr = uh->nexthdr;
       +                bp->rp += Userhdrlen;
       +        } else {
       +                nexthdr = 0;        /* what should this be? */
       +        }
       +
       +        payload = BLEN(bp) + ecb->espivlen;
       +
       +        /* Make space to fit ip header */
       +        bp = padblock(bp, hdrlen + ecb->espivlen);
       +
       +        align = 4;
       +        if(ecb->espblklen > align)
       +                align = ecb->espblklen;
       +        if(align % ecb->ahblklen != 0)
       +                panic("espkick: ahblklen is important after all");
       +        pad = (align-1) - (payload + Esptaillen-1)%align;
       +
       +        /*
       +         * Make space for tail
       +         * this is done by calling padblock with a negative size
       +         * Padblock does not change bp->wp!
       +         */
       +        bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
       +        bp->wp += pad+Esptaillen+ecb->ahlen;
       +
       +        eh4 = (Esp4hdr *)bp->rp;
       +        eh6 = (Esp6hdr *)bp->rp;
       +        et = (Esptail*)(bp->rp + hdrlen + payload + pad);
       +
       +        /* fill in tail */
       +        et->pad = pad;
       +        et->nexthdr = nexthdr;
       +
       +        ecb->cipher(ecb, bp->rp + hdrlen, payload + pad + Esptaillen);
       +        auth = bp->rp + hdrlen + payload + pad + Esptaillen;
       +
       +        /* fill in head */
       +        if (version == V4) {
       +                eh4->vihl = IP_VER4;
       +                hnputl(eh4->espspi, ecb->spi);
       +                hnputl(eh4->espseq, ++ecb->seq);
       +                v6tov4(eh4->espsrc, c->laddr);
       +                v6tov4(eh4->espdst, c->raddr);
       +                eh4->espproto = IP_ESPPROTO;
       +                eh4->frag[0] = 0;
       +                eh4->frag[1] = 0;
       +        } else {
       +                eh6->vcf[0] = IP_VER6;
       +                hnputl(eh6->espspi, ecb->spi);
       +                hnputl(eh6->espseq, ++ecb->seq);
       +                ipmove(eh6->src, c->laddr);
       +                ipmove(eh6->dst, c->raddr);
       +                eh6->proto = IP_ESPPROTO;
       +        }
       +
       +        ecb->auth(ecb, bp->rp + iphdrlen, (hdrlen - iphdrlen) +
       +                payload + pad + Esptaillen, auth);
       +
       +        QUNLOCK(c);
       +        /* print("esp: pass down: %uld\n", BLEN(bp)); */
       +        if (version == V4)
       +                ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
       +        else
       +                ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
       +}
       +
       +void
       +espiput(Proto *esp, Ipifc* _, Block *bp)
       +{
       +        Esp4hdr *eh4;
       +        Esp6hdr *eh6;
       +        Esptail *et;
       +        Userhdr *uh;
       +        Conv *c;
       +        Espcb *ecb;
       +        uchar raddr[IPaddrlen], laddr[IPaddrlen];
       +        Fs *f;
       +        uchar *auth, *espspi;
       +        ulong spi;
       +        int payload, nexthdr, version, hdrlen;
       +
       +        f = esp->f;
       +        if (bp == nil || BLEN(bp) == 0) {
       +                /* get enough to identify the IP version */
       +                bp = pullupblock(bp, IP4HDR);
       +                if(bp == nil) {
       +                        netlog(f, Logesp, "esp: short packet\n");
       +                        return;
       +                }
       +        }
       +        eh4 = (Esp4hdr*)bp->rp;
       +        version = ((eh4->vihl & 0xf0) == IP_VER4? V4: V6);
       +        hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
       +
       +        bp = pullupblock(bp, hdrlen + Esptaillen);
       +        if(bp == nil) {
       +                netlog(f, Logesp, "esp: short packet\n");
       +                return;
       +        }
       +
       +        if (version == V4) {
       +                eh4 = (Esp4hdr*)bp->rp;
       +                spi = nhgetl(eh4->espspi);
       +                v4tov6(raddr, eh4->espsrc);
       +                v4tov6(laddr, eh4->espdst);
       +        } else {
       +                eh6 = (Esp6hdr*)bp->rp;
       +                spi = nhgetl(eh6->espspi);
       +                ipmove(raddr, eh6->src);
       +                ipmove(laddr, eh6->dst);
       +        }
       +
       +        QLOCK(esp);
       +        /* Look for a conversation structure for this port */
       +        c = convlookup(esp, spi);
       +        if(c == nil) {
       +                QUNLOCK(esp);
       +                netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
       +                        laddr, spi);
       +                icmpnoconv(f, bp);
       +                freeblist(bp);
       +                return;
       +        }
       +
       +        QLOCK(c);
       +        QUNLOCK(esp);
       +
       +        ecb = c->ptcl;
       +        /* too hard to do decryption/authentication on block lists */
       +        if(bp->next)
       +                bp = concatblock(bp);
       +
       +        if(BLEN(bp) < hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
       +                QUNLOCK(c);
       +                netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
       +                        laddr, spi);
       +                freeb(bp);
       +                return;
       +        }
       +
       +        auth = bp->wp - ecb->ahlen;
       +        espspi = version == V4? ((Esp4hdr*)bp->rp)->espspi:
       +                                ((Esp6hdr*)bp->rp)->espspi;
       +        if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
       +                QUNLOCK(c);
       +print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
       +                netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
       +                        laddr, spi);
       +                freeb(bp);
       +                return;
       +        }
       +
       +        payload = BLEN(bp) - hdrlen - ecb->ahlen;
       +        if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
       +                QUNLOCK(c);
       +                netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n",
       +                        raddr, laddr, spi, payload, BLEN(bp));
       +                freeb(bp);
       +                return;
       +        }
       +        if(!ecb->cipher(ecb, bp->rp + hdrlen, payload)) {
       +                QUNLOCK(c);
       +print("esp: cipher failed %I -> %I!%ld: %s\n", raddr, laddr, spi, up->errstr);
       +                netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %s\n", raddr,
       +                        laddr, spi, up->errstr);
       +                freeb(bp);
       +                return;
       +        }
       +
       +        payload -= Esptaillen;
       +        et = (Esptail*)(bp->rp + hdrlen + payload);
       +        payload -= et->pad + ecb->espivlen;
       +        nexthdr = et->nexthdr;
       +        if(payload <= 0) {
       +                QUNLOCK(c);
       +                netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n",
       +                        raddr, laddr, spi);
       +                freeb(bp);
       +                return;
       +        }
       +
       +        /* trim packet */
       +        bp->rp += hdrlen + ecb->espivlen;
       +        bp->wp = bp->rp + payload;
       +        if(ecb->header) {
       +                /* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
       +                bp->rp -= Userhdrlen;
       +                uh = (Userhdr*)bp->rp;
       +                memset(uh, 0, Userhdrlen);
       +                uh->nexthdr = nexthdr;
       +        }
       +
       +        if(qfull(c->rq)){
       +                netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
       +                        laddr, spi);
       +                freeblist(bp);
       +        }else {
       +//                print("esp: pass up: %uld\n", BLEN(bp));
       +                qpass(c->rq, bp);
       +        }
       +
       +        QUNLOCK(c);
       +}
       +
       +char*
       +espctl(Conv *c, char **f, int n)
       +{
       +        Espcb *ecb = c->ptcl;
       +        char *e = nil;
       +
       +        if(strcmp(f[0], "esp") == 0)
       +                e = setalg(ecb, f, n, espalg);
       +        else if(strcmp(f[0], "ah") == 0)
       +                e = setalg(ecb, f, n, ahalg);
       +        else if(strcmp(f[0], "header") == 0)
       +                ecb->header = 1;
       +        else if(strcmp(f[0], "noheader") == 0)
       +                ecb->header = 0;
       +        else
       +                e = "unknown control request";
       +        return e;
       +}
       +
       +void
       +espadvise(Proto *esp, Block *bp, char *msg)
       +{
       +        Esp4hdr *h;
       +        Conv *c;
       +        ulong spi;
       +
       +        h = (Esp4hdr*)(bp->rp);
       +
       +        spi = nhgets(h->espspi);
       +        QLOCK(esp);
       +        c = convlookup(esp, spi);
       +        if(c != nil) {
       +                qhangup(c->rq, msg);
       +                qhangup(c->wq, msg);
       +        }
       +        QUNLOCK(esp);
       +        freeblist(bp);
       +}
       +
       +int
       +espstats(Proto *esp, char *buf, int len)
       +{
       +        Esppriv *upriv;
       +
       +        upriv = esp->priv;
       +        return snprint(buf, len, "%lud %lud\n",
       +                upriv->in,
       +                upriv->inerrors);
       +}
       +
       +static int
       +esplocal(Conv *c, char *buf, int len)
       +{
       +        Espcb *ecb = c->ptcl;
       +        int n;
       +
       +        QLOCK(c);
       +        if(ecb->incoming)
       +                n = snprint(buf, len, "%I!%uld\n", c->laddr, ecb->spi);
       +        else
       +                n = snprint(buf, len, "%I\n", c->laddr);
       +        QUNLOCK(c);
       +        return n;
       +}
       +
       +static int
       +espremote(Conv *c, char *buf, int len)
       +{
       +        Espcb *ecb = c->ptcl;
       +        int n;
       +
       +        QLOCK(c);
       +        if(ecb->incoming)
       +                n = snprint(buf, len, "%I\n", c->raddr);
       +        else
       +                n = snprint(buf, len, "%I!%uld\n", c->raddr, ecb->spi);
       +        QUNLOCK(c);
       +        return n;
       +}
       +
       +static        Conv*
       +convlookup(Proto *esp, ulong spi)
       +{
       +        Conv *c, **p;
       +        Espcb *ecb;
       +
       +        for(p=esp->conv; *p; p++){
       +                c = *p;
       +                ecb = c->ptcl;
       +                if(ecb->incoming && ecb->spi == spi)
       +                        return c;
       +        }
       +        return nil;
       +}
       +
       +static char *
       +setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
       +{
       +        uchar *key;
       +        int c, i, nbyte, nchar;
       +
       +        if(n < 2)
       +                return "bad format";
       +        for(; alg->name; alg++)
       +                if(strcmp(f[1], alg->name) == 0)
       +                        break;
       +        if(alg->name == nil)
       +                return "unknown algorithm";
       +
       +        if(n != 3)
       +                return "bad format";
       +        nbyte = (alg->keylen + 7) >> 3;
       +        nchar = strlen(f[2]);
       +        for(i=0; i<nchar; i++) {
       +                c = f[2][i];
       +                if(c >= '0' && c <= '9')
       +                        f[2][i] -= '0';
       +                else if(c >= 'a' && c <= 'f')
       +                        f[2][i] -= 'a'-10;
       +                else if(c >= 'A' && c <= 'F')
       +                        f[2][i] -= 'A'-10;
       +                else
       +                        return "bad character in key";
       +        }
       +        key = smalloc(nbyte);
       +        for(i=0; i<nchar && i*2<nbyte; i++) {
       +                c = f[2][nchar-i-1];
       +                if(i&1)
       +                        c <<= 4;
       +                key[i>>1] |= c;
       +        }
       +
       +        alg->init(ecb, alg->name, key, alg->keylen);
       +        free(key);
       +        return nil;
       +}
       +
       +static int
       +nullcipher(Espcb* _, uchar* __, int ___)
       +{
       +        return 1;
       +}
       +
       +static void
       +nullespinit(Espcb *ecb, char *name, uchar* _, int __)
       +{
       +        ecb->espalg = name;
       +        ecb->espblklen = 1;
       +        ecb->espivlen = 0;
       +        ecb->cipher = nullcipher;
       +}
       +
       +static int
       +nullauth(Espcb* _, uchar* __, int ___, uchar* ____)
       +{
       +        return 1;
       +}
       +
       +static void
       +nullahinit(Espcb *ecb, char *name, uchar* _, int __)
       +{
       +        ecb->ahalg = name;
       +        ecb->ahblklen = 1;
       +        ecb->ahlen = 0;
       +        ecb->auth = nullauth;
       +}
       +
       +void
       +seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
       +{
       +        uchar ipad[65], opad[65];
       +        int i;
       +        DigestState *digest;
       +        uchar innerhash[SHA1dlen];
       +
       +        for(i=0; i<64; i++){
       +                ipad[i] = 0x36;
       +                opad[i] = 0x5c;
       +        }
       +        ipad[64] = opad[64] = 0;
       +        for(i=0; i<klen; i++){
       +                ipad[i] ^= key[i];
       +                opad[i] ^= key[i];
       +        }
       +        digest = sha1(ipad, 64, nil, nil);
       +        sha1(t, tlen, innerhash, digest);
       +        digest = sha1(opad, 64, nil, nil);
       +        sha1(innerhash, SHA1dlen, hash, digest);
       +}
       +
       +static int
       +shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
       +{
       +        uchar hash[SHA1dlen];
       +        int r;
       +
       +        memset(hash, 0, SHA1dlen);
       +        seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
       +        r = memcmp(auth, hash, ecb->ahlen) == 0;
       +        memmove(auth, hash, ecb->ahlen);
       +        return r;
       +}
       +
       +static void
       +shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
       +{
       +        if(klen != 128)
       +                panic("shaahinit: bad keylen");
       +        klen >>= 8;                /* convert to bytes */
       +
       +        ecb->ahalg = name;
       +        ecb->ahblklen = 1;
       +        ecb->ahlen = 12;
       +        ecb->auth = shaauth;
       +        ecb->ahstate = smalloc(klen);
       +        memmove(ecb->ahstate, key, klen);
       +}
       +
       +void
       +seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
       +{
       +        uchar ipad[65], opad[65];
       +        int i;
       +        DigestState *digest;
       +        uchar innerhash[MD5dlen];
       +
       +        for(i=0; i<64; i++){
       +                ipad[i] = 0x36;
       +                opad[i] = 0x5c;
       +        }
       +        ipad[64] = opad[64] = 0;
       +        for(i=0; i<klen; i++){
       +                ipad[i] ^= key[i];
       +                opad[i] ^= key[i];
       +        }
       +        digest = md5(ipad, 64, nil, nil);
       +        md5(t, tlen, innerhash, digest);
       +        digest = md5(opad, 64, nil, nil);
       +        md5(innerhash, MD5dlen, hash, digest);
       +}
       +
       +static int
       +md5auth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
       +{
       +        uchar hash[MD5dlen];
       +        int r;
       +
       +        memset(hash, 0, MD5dlen);
       +        seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
       +        r = memcmp(auth, hash, ecb->ahlen) == 0;
       +        memmove(auth, hash, ecb->ahlen);
       +        return r;
       +}
       +
       +static void
       +md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
       +{
       +        if(klen != 128)
       +                panic("md5ahinit: bad keylen");
       +        klen >>= 3;                /* convert to bytes */
       +
       +        ecb->ahalg = name;
       +        ecb->ahblklen = 1;
       +        ecb->ahlen = 12;
       +        ecb->auth = md5auth;
       +        ecb->ahstate = smalloc(klen);
       +        memmove(ecb->ahstate, key, klen);
       +}
       +
       +static int
       +descipher(Espcb *ecb, uchar *p, int n)
       +{
       +        uchar tmp[8];
       +        uchar *pp, *tp, *ip, *eip, *ep;
       +        DESstate *ds = ecb->espstate;
       +
       +        ep = p + n;
       +        if(ecb->incoming) {
       +                memmove(ds->ivec, p, 8);
       +                p += 8;
       +                while(p < ep){
       +                        memmove(tmp, p, 8);
       +                        block_cipher(ds->expanded, p, 1);
       +                        tp = tmp;
       +                        ip = ds->ivec;
       +                        for(eip = ip+8; ip < eip; ){
       +                                *p++ ^= *ip;
       +                                *ip++ = *tp++;
       +                        }
       +                }
       +        } else {
       +                memmove(p, ds->ivec, 8);
       +                for(p += 8; p < ep; p += 8){
       +                        pp = p;
       +                        ip = ds->ivec;
       +                        for(eip = ip+8; ip < eip; )
       +                                *pp++ ^= *ip++;
       +                        block_cipher(ds->expanded, p, 0);
       +                        memmove(ds->ivec, p, 8);
       +                }
       +        }
       +        return 1;
       +}
       +
       +static void
       +desespinit(Espcb *ecb, char *name, uchar *k, int n)
       +{
       +        uchar key[8], ivec[8];
       +        int i;
       +
       +        /* bits to bytes */
       +        n = (n+7)>>3;
       +        if(n > 8)
       +                n = 8;
       +        memset(key, 0, sizeof(key));
       +        memmove(key, k, n);
       +        for(i=0; i<8; i++)
       +                ivec[i] = nrand(256);
       +        ecb->espalg = name;
       +        ecb->espblklen = 8;
       +        ecb->espivlen = 8;
       +        ecb->cipher = descipher;
       +        ecb->espstate = smalloc(sizeof(DESstate));
       +        setupDESstate(ecb->espstate, key, ivec);
       +}
       +
       +void
       +espinit(Fs *fs)
       +{
       +        Proto *esp;
       +
       +        esp = smalloc(sizeof(Proto));
       +        esp->priv = smalloc(sizeof(Esppriv));
       +        esp->name = "esp";
       +        esp->connect = espconnect;
       +        esp->announce = nil;
       +        esp->ctl = espctl;
       +        esp->state = espstate;
       +        esp->create = espcreate;
       +        esp->close = espclose;
       +        esp->rcv = espiput;
       +        esp->advise = espadvise;
       +        esp->stats = espstats;
       +        esp->local = esplocal;
       +        esp->remote = espremote;
       +        esp->ipproto = IP_ESPPROTO;
       +        esp->nc = Nchans;
       +        esp->ptclsize = sizeof(Espcb);
       +
       +        Fsproto(fs, esp);
       +}
       +
       +
       +#ifdef notdef
       +enum {
       +        RC4forward= 10*1024*1024,        /* maximum skip forward */
       +        RC4back = 100*1024,        /* maximum look back */
       +};
       +
       +typedef struct Esprc4 Esprc4;
       +struct Esprc4
       +{
       +        ulong        cseq;                /* current byte sequence number */
       +        RC4state current;
       +
       +        int        ovalid;                /* old is valid */
       +        ulong        lgseq;                /* last good sequence */
       +        ulong        oseq;                /* old byte sequence number */
       +        RC4state old;
       +};
       +
       +static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
       +
       +static int
       +rc4cipher(Espcb *ecb, uchar *p, int n)
       +{
       +        Esprc4 *esprc4;
       +        RC4state tmpstate;
       +        ulong seq;
       +        long d, dd;
       +
       +        if(n < 4)
       +                return 0;
       +
       +        esprc4 = ecb->espstate;
       +        if(ecb->incoming) {
       +                seq = nhgetl(p);
       +                p += 4;
       +                n -= 4;
       +                d = seq-esprc4->cseq;
       +                if(d == 0) {
       +                        rc4(&esprc4->current, p, n);
       +                        esprc4->cseq += n;
       +                        if(esprc4->ovalid) {
       +                                dd = esprc4->cseq - esprc4->lgseq;
       +                                if(dd > RC4back)
       +                                        esprc4->ovalid = 0;
       +                        }
       +                } else if(d > 0) {
       +print("esp rc4cipher: missing packet: %uld %ld\n", seq, d); /* this link is hosed */
       +                        if(d > RC4forward) {
       +                                strcpy(up->errstr, "rc4cipher: skipped too much");
       +                                return 0;
       +                        }
       +                        esprc4->lgseq = seq;
       +                        if(!esprc4->ovalid) {
       +                                esprc4->ovalid = 1;
       +                                esprc4->oseq = esprc4->cseq;
       +                                memmove(&esprc4->old, &esprc4->current,
       +                                        sizeof(RC4state));
       +                        }
       +                        rc4skip(&esprc4->current, d);
       +                        rc4(&esprc4->current, p, n);
       +                        esprc4->cseq = seq+n;
       +                } else {
       +print("esp rc4cipher: reordered packet: %uld %ld\n", seq, d);
       +                        dd = seq - esprc4->oseq;
       +                        if(!esprc4->ovalid || -d > RC4back || dd < 0) {
       +                                strcpy(up->errstr, "rc4cipher: too far back");
       +                                return 0;
       +                        }
       +                        memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
       +                        rc4skip(&tmpstate, dd);
       +                        rc4(&tmpstate, p, n);
       +                        return 1;
       +                }
       +
       +                /* move old state up */
       +                if(esprc4->ovalid) {
       +                        dd = esprc4->cseq - RC4back - esprc4->oseq;
       +                        if(dd > 0) {
       +                                rc4skip(&esprc4->old, dd);
       +                                esprc4->oseq += dd;
       +                        }
       +                }
       +        } else {
       +                hnputl(p, esprc4->cseq);
       +                p += 4;
       +                n -= 4;
       +                rc4(&esprc4->current, p, n);
       +                esprc4->cseq += n;
       +        }
       +        return 1;
       +}
       +
       +static void
       +rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
       +{
       +        Esprc4 *esprc4;
       +
       +        /* bits to bytes */
       +        n = (n+7)>>3;
       +        esprc4 = smalloc(sizeof(Esprc4));
       +        memset(esprc4, 0, sizeof(Esprc4));
       +        setupRC4state(&esprc4->current, k, n);
       +        ecb->espalg = name;
       +        ecb->espblklen = 4;
       +        ecb->espivlen = 4;
       +        ecb->cipher = rc4cipher;
       +        ecb->espstate = esprc4;
       +}
       +#endif
 (DIR) diff --git a/src/9vx/a/ip/ethermedium.c b/src/9vx/a/ip/ethermedium.c
       @@ -0,0 +1,766 @@
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "netif.h"
       +#include "ip.h"
       +#include "ipv6.h"
       +
       +typedef struct Etherhdr Etherhdr;
       +struct Etherhdr
       +{
       +        uchar        d[6];
       +        uchar        s[6];
       +        uchar        t[2];
       +};
       +
       +static uchar ipbroadcast[IPaddrlen] = {
       +        0xff,0xff,0xff,0xff,
       +        0xff,0xff,0xff,0xff,
       +        0xff,0xff,0xff,0xff,
       +        0xff,0xff,0xff,0xff,
       +};
       +
       +static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
       +
       +static void        etherread4(void *a);
       +static void        etherread6(void *a);
       +static void        etherbind(Ipifc *ifc, int argc, char **argv);
       +static void        etherunbind(Ipifc *ifc);
       +static void        etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
       +static void        etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
       +static void        etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
       +static Block*        multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
       +static void        sendarp(Ipifc *ifc, Arpent *a);
       +static void        sendgarp(Ipifc *ifc, uchar*);
       +static int        multicastea(uchar *ea, uchar *ip);
       +static void        recvarpproc(void*);
       +static void        resolveaddr6(Ipifc *ifc, Arpent *a);
       +static void        etherpref2addr(uchar *pref, uchar *ea);
       +
       +Medium ethermedium =
       +{
       +.name=                "ether",
       +.hsize=                14,
       +.mintu=                60,
       +.maxtu=                1514,
       +.maclen=        6,
       +.bind=                etherbind,
       +.unbind=        etherunbind,
       +.bwrite=        etherbwrite,
       +.addmulti=        etheraddmulti,
       +.remmulti=        etherremmulti,
       +.ares=                arpenter,
       +.areg=                sendgarp,
       +.pref2addr=        etherpref2addr,
       +};
       +
       +Medium gbemedium =
       +{
       +.name=                "gbe",
       +.hsize=                14,
       +.mintu=                60,
       +.maxtu=                9014,
       +.maclen=        6,
       +.bind=                etherbind,
       +.unbind=        etherunbind,
       +.bwrite=        etherbwrite,
       +.addmulti=        etheraddmulti,
       +.remmulti=        etherremmulti,
       +.ares=                arpenter,
       +.areg=                sendgarp,
       +.pref2addr=        etherpref2addr,
       +};
       +
       +typedef struct        Etherrock Etherrock;
       +struct Etherrock
       +{
       +        Fs        *f;                /* file system we belong to */
       +        Proc        *arpp;                /* arp process */
       +        Proc        *read4p;        /* reading process (v4)*/
       +        Proc        *read6p;        /* reading process (v6)*/
       +        Chan        *mchan4;        /* Data channel for v4 */
       +        Chan        *achan;                /* Arp channel */
       +        Chan        *cchan4;        /* Control channel for v4 */
       +        Chan        *mchan6;        /* Data channel for v6 */
       +        Chan        *cchan6;        /* Control channel for v6 */
       +};
       +
       +/*
       + *  ethernet arp request
       + */
       +enum
       +{
       +        ARPREQUEST        = 1,
       +        ARPREPLY        = 2,
       +};
       +
       +typedef struct Etherarp Etherarp;
       +struct Etherarp
       +{
       +        uchar        d[6];
       +        uchar        s[6];
       +        uchar        type[2];
       +        uchar        hrd[2];
       +        uchar        pro[2];
       +        uchar        hln;
       +        uchar        pln;
       +        uchar        op[2];
       +        uchar        sha[6];
       +        uchar        spa[4];
       +        uchar        tha[6];
       +        uchar        tpa[4];
       +};
       +
       +static char *nbmsg = "nonblocking";
       +
       +/*
       + *  called to bind an IP ifc to an ethernet device
       + *  called with ifc wlock'd
       + */
       +
       +static void
       +etherbind(Ipifc *ifc, int argc, char **argv)
       +{
       +        Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6, *schan;
       +        char addr[Maxpath];        //char addr[2*KNAMELEN];
       +        char dir[Maxpath];        //char dir[2*KNAMELEN];
       +        char *buf;
       +        int n;
       +        char *ptr;
       +        Etherrock *er;
       +
       +        if(argc < 2)
       +                error(Ebadarg);
       +
       +        mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
       +        buf = nil;
       +        if(waserror()){
       +                if(mchan4 != nil)
       +                        cclose(mchan4);
       +                if(cchan4 != nil)
       +                        cclose(cchan4);
       +                if(achan != nil)
       +                        cclose(achan);
       +                if(mchan6 != nil)
       +                        cclose(mchan6);
       +                if(cchan6 != nil)
       +                        cclose(cchan6);
       +                if(buf != nil)
       +                        free(buf);
       +                nexterror();
       +        }
       +
       +        /*
       +         *  open ipv4 conversation
       +         *
       +         *  the dial will fail if the type is already open on
       +         *  this device.
       +         */
       +        snprint(addr, sizeof(addr), "%s!0x800", argv[2]);        /* ETIP4 */
       +        mchan4 = chandial(addr, nil, dir, &cchan4);
       +
       +        /*
       +         *  make it non-blocking
       +         */
       +        devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
       +
       +        /*
       +         *  get mac address and speed
       +         */
       +        snprint(addr, sizeof(addr), "%s/stats", argv[2]);
       +        buf = smalloc(512);
       +        schan = namec(addr, Aopen, OREAD, 0);
       +        if(waserror()){
       +                cclose(schan);
       +                nexterror();
       +        }
       +        n = devtab[schan->type]->read(schan, buf, 511, 0);
       +        cclose(schan);
       +        poperror();
       +        buf[n] = 0;
       +
       +        ptr = strstr(buf, "addr: ");
       +        if(!ptr)
       +                error(Eio);
       +        ptr += 6;
       +        parsemac(ifc->mac, ptr, 6);
       +
       +        ptr = strstr(buf, "mbps: ");
       +        if(ptr){
       +                ptr += 6;
       +                ifc->mbps = atoi(ptr);
       +        } else
       +                ifc->mbps = 100;
       +
       +        /*
       +          *  open arp conversation
       +         */
       +        snprint(addr, sizeof(addr), "%s!0x806", argv[2]);        /* ETARP */
       +        achan = chandial(addr, nil, nil, nil);
       +
       +        /*
       +         *  open ipv6 conversation
       +         *
       +         *  the dial will fail if the type is already open on
       +         *  this device.
       +         */
       +        snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);        /* ETIP6 */
       +        mchan6 = chandial(addr, nil, dir, &cchan6);
       +
       +        /*
       +         *  make it non-blocking
       +         */
       +        devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
       +
       +        er = smalloc(sizeof(*er));
       +        er->mchan4 = mchan4;
       +        er->cchan4 = cchan4;
       +        er->achan = achan;
       +        er->mchan6 = mchan6;
       +        er->cchan6 = cchan6;
       +        er->f = ifc->conv->p->f;
       +        ifc->arg = er;
       +
       +        free(buf);
       +        poperror();
       +
       +        kproc("etherread4", etherread4, ifc);
       +        kproc("recvarpproc", recvarpproc, ifc);
       +        kproc("etherread6", etherread6, ifc);
       +}
       +
       +/*
       + *  called with ifc wlock'd
       + */
       +static void
       +etherunbind(Ipifc *ifc)
       +{
       +        Etherrock *er = ifc->arg;
       +
       +        if(er->read4p)
       +                postnote(er->read4p, 1, "unbind", 0);
       +        if(er->read6p)
       +                postnote(er->read6p, 1, "unbind", 0);
       +        if(er->arpp)
       +                postnote(er->arpp, 1, "unbind", 0);
       +
       +        /* wait for readers to die */
       +        while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
       +                tsleep(&up->sleep, return0, 0, 300);
       +
       +        if(er->mchan4 != nil)
       +                cclose(er->mchan4);
       +        if(er->achan != nil)
       +                cclose(er->achan);
       +        if(er->cchan4 != nil)
       +                cclose(er->cchan4);
       +        if(er->mchan6 != nil)
       +                cclose(er->mchan6);
       +        if(er->cchan6 != nil)
       +                cclose(er->cchan6);
       +
       +        free(er);
       +}
       +
       +/*
       + *  called by ipoput with a single block to write with ifc RLOCK'd
       + */
       +static void
       +etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip)
       +{
       +        Etherhdr *eh;
       +        Arpent *a;
       +        uchar mac[6];
       +        Etherrock *er = ifc->arg;
       +
       +        /* get mac address of destination */
       +        a = arpget(er->f->arp, bp, version, ifc, ip, mac);
       +        if(a){
       +                /* check for broadcast or multicast */
       +                bp = multicastarp(er->f, a, ifc->m, mac);
       +                if(bp==nil){
       +                        switch(version){
       +                        case V4:
       +                                sendarp(ifc, a);
       +                                break;
       +                        case V6:
       +                                resolveaddr6(ifc, a);
       +                                break;
       +                        default:
       +                                panic("etherbwrite: version %d", version);
       +                        }
       +                        return;
       +                }
       +        }
       +
       +        /* make it a single block with space for the ether header */
       +        bp = padblock(bp, ifc->m->hsize);
       +        if(bp->next)
       +                bp = concatblock(bp);
       +        if(BLEN(bp) < ifc->mintu)
       +                bp = adjustblock(bp, ifc->mintu);
       +        eh = (Etherhdr*)bp->rp;
       +
       +        /* copy in mac addresses and ether type */
       +        memmove(eh->s, ifc->mac, sizeof(eh->s));
       +        memmove(eh->d, mac, sizeof(eh->d));
       +
       +         switch(version){
       +        case V4:
       +                eh->t[0] = 0x08;
       +                eh->t[1] = 0x00;
       +                devtab[er->mchan4->type]->bwrite(er->mchan4, bp, 0);
       +                break;
       +        case V6:
       +                eh->t[0] = 0x86;
       +                eh->t[1] = 0xDD;
       +                devtab[er->mchan6->type]->bwrite(er->mchan6, bp, 0);
       +                break;
       +        default:
       +                panic("etherbwrite2: version %d", version);
       +        }
       +        ifc->out++;
       +}
       +
       +
       +/*
       + *  process to read from the ethernet
       + */
       +static void
       +etherread4(void *a)
       +{
       +        Ipifc *ifc;
       +        Block *bp;
       +        Etherrock *er;
       +
       +        ifc = a;
       +        er = ifc->arg;
       +        er->read4p = up;        /* hide identity under a rock for unbind */
       +        if(waserror()){
       +                er->read4p = 0;
       +                pexit("hangup", 1);
       +        }
       +        for(;;){
       +                bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
       +                if(!CANRLOCK(ifc)){
       +                        freeb(bp);
       +                        continue;
       +                }
       +                if(waserror()){
       +                        RUNLOCK(ifc);
       +                        nexterror();
       +                }
       +                ifc->in++;
       +                bp->rp += ifc->m->hsize;
       +                if(ifc->lifc == nil)
       +                        freeb(bp);
       +                else
       +                        ipiput4(er->f, ifc, bp);
       +                RUNLOCK(ifc);
       +                poperror();
       +        }
       +}
       +
       +
       +/*
       + *  process to read from the ethernet, IPv6
       + */
       +static void
       +etherread6(void *a)
       +{
       +        Ipifc *ifc;
       +        Block *bp;
       +        Etherrock *er;
       +
       +        ifc = a;
       +        er = ifc->arg;
       +        er->read6p = up;        /* hide identity under a rock for unbind */
       +        if(waserror()){
       +                er->read6p = 0;
       +                pexit("hangup", 1);
       +        }
       +        for(;;){
       +                bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
       +                if(!CANRLOCK(ifc)){
       +                        freeb(bp);
       +                        continue;
       +                }
       +                if(waserror()){
       +                        RUNLOCK(ifc);
       +                        nexterror();
       +                }
       +                ifc->in++;
       +                bp->rp += ifc->m->hsize;
       +                if(ifc->lifc == nil)
       +                        freeb(bp);
       +                else
       +                        ipiput6(er->f, ifc, bp);
       +                RUNLOCK(ifc);
       +                poperror();
       +        }
       +}
       +
       +static void
       +etheraddmulti(Ipifc *ifc, uchar *a, uchar *_)
       +{
       +        uchar mac[6];
       +        char buf[64];
       +        Etherrock *er = ifc->arg;
       +        int version;
       +
       +        version = multicastea(mac, a);
       +        sprint(buf, "addmulti %E", mac);
       +        switch(version){
       +        case V4:
       +                devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
       +                break;
       +        case V6:
       +                devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
       +                break;
       +        default:
       +                panic("etheraddmulti: version %d", version);
       +        }
       +}
       +
       +static void
       +etherremmulti(Ipifc *ifc, uchar *a, uchar *_)
       +{
       +        uchar mac[6];
       +        char buf[64];
       +        Etherrock *er = ifc->arg;
       +        int version;
       +
       +        version = multicastea(mac, a);
       +        sprint(buf, "remmulti %E", mac);
       +        switch(version){
       +        case V4:
       +                devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
       +                break;
       +        case V6:
       +                devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
       +                break;
       +        default:
       +                panic("etherremmulti: version %d", version);
       +        }
       +}
       +
       +/*
       + *  send an ethernet arp
       + *  (only v4, v6 uses the neighbor discovery, rfc1970)
       + */
       +static void
       +sendarp(Ipifc *ifc, Arpent *a)
       +{
       +        int n;
       +        Block *bp;
       +        Etherarp *e;
       +        Etherrock *er = ifc->arg;
       +
       +        /* don't do anything if it's been less than a second since the last */
       +        if(NOW - a->ctime < 1000){
       +                arprelease(er->f->arp, a);
       +                return;
       +        }
       +
       +        /* remove all but the last message */
       +        while((bp = a->hold) != nil){
       +                if(bp == a->last)
       +                        break;
       +                a->hold = bp->list;
       +                freeblist(bp);
       +        }
       +
       +        /* try to keep it around for a second more */
       +        a->ctime = NOW;
       +        arprelease(er->f->arp, a);
       +
       +        n = sizeof(Etherarp);
       +        if(n < a->type->mintu)
       +                n = a->type->mintu;
       +        bp = allocb(n);
       +        memset(bp->rp, 0, n);
       +        e = (Etherarp*)bp->rp;
       +        memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
       +        ipv4local(ifc, e->spa);
       +        memmove(e->sha, ifc->mac, sizeof(e->sha));
       +        memset(e->d, 0xff, sizeof(e->d));                /* ethernet broadcast */
       +        memmove(e->s, ifc->mac, sizeof(e->s));
       +
       +        hnputs(e->type, ETARP);
       +        hnputs(e->hrd, 1);
       +        hnputs(e->pro, ETIP4);
       +        e->hln = sizeof(e->sha);
       +        e->pln = sizeof(e->spa);
       +        hnputs(e->op, ARPREQUEST);
       +        bp->wp += n;
       +
       +        devtab[er->achan->type]->bwrite(er->achan, bp, 0);
       +}
       +
       +static void
       +resolveaddr6(Ipifc *ifc, Arpent *a)
       +{
       +        int sflag;
       +        Block *bp;
       +        Etherrock *er = ifc->arg;
       +        uchar ipsrc[IPaddrlen];
       +
       +        /* don't do anything if it's been less than a second since the last */
       +        if(NOW - a->ctime < ReTransTimer){
       +                arprelease(er->f->arp, a);
       +                return;
       +        }
       +
       +        /* remove all but the last message */
       +        while((bp = a->hold) != nil){
       +                if(bp == a->last)
       +                        break;
       +                a->hold = bp->list;
       +                freeblist(bp);
       +        }
       +
       +        /* try to keep it around for a second more */
       +        a->ctime = NOW;
       +        a->rtime = NOW + ReTransTimer;
       +        if(a->rxtsrem <= 0) {
       +                arprelease(er->f->arp, a);
       +                return;
       +        }
       +
       +        a->rxtsrem--;
       +        arprelease(er->f->arp, a);
       +
       +        if((sflag = ipv6anylocal(ifc, ipsrc)) != 0)
       +                icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
       +}
       +
       +/*
       + *  send a gratuitous arp to refresh arp caches
       + */
       +static void
       +sendgarp(Ipifc *ifc, uchar *ip)
       +{
       +        int n;
       +        Block *bp;
       +        Etherarp *e;
       +        Etherrock *er = ifc->arg;
       +
       +        /* don't arp for our initial non address */
       +        if(ipcmp(ip, IPnoaddr) == 0)
       +                return;
       +
       +        n = sizeof(Etherarp);
       +        if(n < ifc->m->mintu)
       +                n = ifc->m->mintu;
       +        bp = allocb(n);
       +        memset(bp->rp, 0, n);
       +        e = (Etherarp*)bp->rp;
       +        memmove(e->tpa, ip+IPv4off, sizeof(e->tpa));
       +        memmove(e->spa, ip+IPv4off, sizeof(e->spa));
       +        memmove(e->sha, ifc->mac, sizeof(e->sha));
       +        memset(e->d, 0xff, sizeof(e->d));                /* ethernet broadcast */
       +        memmove(e->s, ifc->mac, sizeof(e->s));
       +
       +        hnputs(e->type, ETARP);
       +        hnputs(e->hrd, 1);
       +        hnputs(e->pro, ETIP4);
       +        e->hln = sizeof(e->sha);
       +        e->pln = sizeof(e->spa);
       +        hnputs(e->op, ARPREQUEST);
       +        bp->wp += n;
       +
       +        devtab[er->achan->type]->bwrite(er->achan, bp, 0);
       +}
       +
       +static void
       +recvarp(Ipifc *ifc)
       +{
       +        int n;
       +        Block *ebp, *rbp;
       +        Etherarp *e, *r;
       +        uchar ip[IPaddrlen];
       +        static uchar eprinted[4];
       +        Etherrock *er = ifc->arg;
       +
       +        ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
       +        if(ebp == nil)
       +                return;
       +
       +        e = (Etherarp*)ebp->rp;
       +        switch(nhgets(e->op)) {
       +        default:
       +                break;
       +
       +        case ARPREPLY:
       +                /* check for machine using my ip address */
       +                v4tov6(ip, e->spa);
       +                if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
       +                        if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
       +                                print("arprep: 0x%E/0x%E also has ip addr %V\n",
       +                                        e->s, e->sha, e->spa);
       +                                break;
       +                        }
       +                }
       +
       +                /* make sure we're not entering broadcast addresses */
       +                if(ipcmp(ip, ipbroadcast) == 0 ||
       +                        !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
       +                        print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
       +                                e->s, e->sha, e->spa);
       +                        break;
       +                }
       +
       +                arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
       +                break;
       +
       +        case ARPREQUEST:
       +                /* don't answer arps till we know who we are */
       +                if(ifc->lifc == 0)
       +                        break;
       +
       +                /* check for machine using my ip or ether address */
       +                v4tov6(ip, e->spa);
       +                if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
       +                        if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
       +                                if (memcmp(eprinted, e->spa, sizeof(e->spa))){
       +                                        /* print only once */
       +                                        print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
       +                                        memmove(eprinted, e->spa, sizeof(e->spa));
       +                                }
       +                        }
       +                } else {
       +                        if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
       +                                print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
       +                                break;
       +                        }
       +                }
       +
       +                /* refresh what we know about sender */
       +                arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
       +
       +                /* answer only requests for our address or systems we're proxying for */
       +                v4tov6(ip, e->tpa);
       +                if(!iplocalonifc(ifc, ip))
       +                if(!ipproxyifc(er->f, ifc, ip))
       +                        break;
       +
       +                n = sizeof(Etherarp);
       +                if(n < ifc->mintu)
       +                        n = ifc->mintu;
       +                rbp = allocb(n);
       +                r = (Etherarp*)rbp->rp;
       +                memset(r, 0, sizeof(Etherarp));
       +                hnputs(r->type, ETARP);
       +                hnputs(r->hrd, 1);
       +                hnputs(r->pro, ETIP4);
       +                r->hln = sizeof(r->sha);
       +                r->pln = sizeof(r->spa);
       +                hnputs(r->op, ARPREPLY);
       +                memmove(r->tha, e->sha, sizeof(r->tha));
       +                memmove(r->tpa, e->spa, sizeof(r->tpa));
       +                memmove(r->sha, ifc->mac, sizeof(r->sha));
       +                memmove(r->spa, e->tpa, sizeof(r->spa));
       +                memmove(r->d, e->sha, sizeof(r->d));
       +                memmove(r->s, ifc->mac, sizeof(r->s));
       +                rbp->wp += n;
       +
       +                devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
       +        }
       +        freeb(ebp);
       +}
       +
       +static void
       +recvarpproc(void *v)
       +{
       +        Ipifc *ifc = v;
       +        Etherrock *er = ifc->arg;
       +
       +        er->arpp = up;
       +        if(waserror()){
       +                er->arpp = 0;
       +                pexit("hangup", 1);
       +        }
       +        for(;;)
       +                recvarp(ifc);
       +}
       +
       +static int
       +multicastea(uchar *ea, uchar *ip)
       +{
       +        int x;
       +
       +        switch(x = ipismulticast(ip)){
       +        case V4:
       +                ea[0] = 0x01;
       +                ea[1] = 0x00;
       +                ea[2] = 0x5e;
       +                ea[3] = ip[13] & 0x7f;
       +                ea[4] = ip[14];
       +                ea[5] = ip[15];
       +                break;
       +         case V6:
       +                 ea[0] = 0x33;
       +                 ea[1] = 0x33;
       +                 ea[2] = ip[12];
       +                ea[3] = ip[13];
       +                 ea[4] = ip[14];
       +                 ea[5] = ip[15];
       +                 break;
       +        }
       +        return x;
       +}
       +
       +/*
       + *  fill in an arp entry for broadcast or multicast
       + *  addresses.  Return the first queued packet for the
       + *  IP address.
       + */
       +static Block*
       +multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
       +{
       +        /* is it broadcast? */
       +        switch(ipforme(f, a->ip)){
       +        case Runi:
       +                return nil;
       +        case Rbcast:
       +                memset(mac, 0xff, 6);
       +                return arpresolve(f->arp, a, medium, mac);
       +        default:
       +                break;
       +        }
       +
       +        /* if multicast, fill in mac */
       +        switch(multicastea(mac, a->ip)){
       +        case V4:
       +        case V6:
       +                return arpresolve(f->arp, a, medium, mac);
       +        }
       +
       +        /* let arp take care of it */
       +        return nil;
       +}
       +
       +void
       +ethermediumlink(void)
       +{
       +        addipmedium(&ethermedium);
       +        addipmedium(&gbemedium);
       +}
       +
       +
       +static void
       +etherpref2addr(uchar *pref, uchar *ea)
       +{
       +        pref[8] = ea[0] | 0x2;
       +        pref[9] = ea[1];
       +        pref[10] = ea[2];
       +        pref[11] = 0xFF;
       +        pref[12] = 0xFE;
       +        pref[13] = ea[3];
       +        pref[14] = ea[4];
       +        pref[15] = ea[5];
       +}
 (DIR) diff --git a/src/9vx/a/ip/gre.c b/src/9vx/a/ip/gre.c
       @@ -0,0 +1,283 @@
       +/*
       + * Generic Routing Encapsulation over IPv4, rfc1702
       + */
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "ip.h"
       +
       +enum
       +{
       +        GRE_IPONLY        = 12,                /* size of ip header */
       +        GRE_IPPLUSGRE        = 12,                /* minimum size of GRE header */
       +        IP_GREPROTO        = 47,
       +
       +        GRErxms                = 200,
       +        GREtickms        = 100,
       +        GREmaxxmit        = 10,
       +};
       +
       +typedef struct GREhdr
       +{
       +        /* ip header */
       +        uchar        vihl;                /* Version and header length */
       +        uchar        tos;                /* Type of service */
       +        uchar        len[2];                /* packet length (including headers) */
       +        uchar        id[2];                /* Identification */
       +        uchar        frag[2];        /* Fragment information */
       +        uchar        Unused;        
       +        uchar        proto;                /* Protocol */
       +        uchar        cksum[2];        /* checksum */
       +        uchar        src[4];                /* Ip source */
       +        uchar        dst[4];                /* Ip destination */
       +
       +        /* gre header */
       +        uchar        flags[2];
       +        uchar        eproto[2];        /* encapsulation protocol */
       +} GREhdr;
       +
       +typedef struct GREpriv GREpriv;
       +struct GREpriv
       +{
       +        int                raw;                        /* Raw GRE mode */
       +
       +        /* non-MIB stats */
       +        ulong                csumerr;                /* checksum errors */
       +        ulong                lenerr;                        /* short packet */
       +};
       +
       +static void grekick(void *x, Block *bp);
       +
       +static char*
       +greconnect(Conv *c, char **argv, int argc)
       +{
       +        Proto *p;
       +        char *err;
       +        Conv *tc, **cp, **ecp;
       +
       +        err = Fsstdconnect(c, argv, argc);
       +        if(err != nil)
       +                return err;
       +
       +        /* make sure noone's already connected to this other sys */
       +        p = c->p;
       +        QLOCK(p);
       +        ecp = &p->conv[p->nc];
       +        for(cp = p->conv; cp < ecp; cp++){
       +                tc = *cp;
       +                if(tc == nil)
       +                        break;
       +                if(tc == c)
       +                        continue;
       +                if(tc->rport == c->rport && ipcmp(tc->raddr, c->raddr) == 0){
       +                        err = "already connected to that addr/proto";
       +                        ipmove(c->laddr, IPnoaddr);
       +                        ipmove(c->raddr, IPnoaddr);
       +                        break;
       +                }
       +        }
       +        QUNLOCK(p);
       +
       +        if(err != nil)
       +                return err;
       +        Fsconnected(c, nil);
       +
       +        return nil;
       +}
       +
       +static void
       +grecreate(Conv *c)
       +{
       +        c->rq = qopen(64*1024, Qmsg, 0, c);
       +        c->wq = qbypass(grekick, c);
       +}
       +
       +static int
       +grestate(Conv *c, char *state, int n)
       +{
       +        USED(c);
       +        return snprint(state, n, "%s\n", "Datagram");
       +}
       +
       +static char*
       +greannounce(Conv* _, char** __, int ___)
       +{
       +        return "pktifc does not support announce";
       +}
       +
       +static void
       +greclose(Conv *c)
       +{
       +        qclose(c->rq);
       +        qclose(c->wq);
       +        qclose(c->eq);
       +        ipmove(c->laddr, IPnoaddr);
       +        ipmove(c->raddr, IPnoaddr);
       +        c->lport = 0;
       +        c->rport = 0;
       +}
       +
       +int drop;
       +
       +static void
       +grekick(void *x, Block *bp)
       +{
       +        Conv *c = x;
       +        GREhdr *ghp;
       +        uchar laddr[IPaddrlen], raddr[IPaddrlen];
       +
       +        if(bp == nil)
       +                return;
       +
       +        /* Make space to fit ip header (gre header already there) */
       +        bp = padblock(bp, GRE_IPONLY);
       +        if(bp == nil)
       +                return;
       +
       +        /* make sure the message has a GRE header */
       +        bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
       +        if(bp == nil)
       +                return;
       +
       +        ghp = (GREhdr *)(bp->rp);
       +        ghp->vihl = IP_VER4;
       +
       +        if(!((GREpriv*)c->p->priv)->raw){
       +                v4tov6(raddr, ghp->dst);
       +                if(ipcmp(raddr, v4prefix) == 0)
       +                        memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
       +                v4tov6(laddr, ghp->src);
       +                if(ipcmp(laddr, v4prefix) == 0){
       +                        if(ipcmp(c->laddr, IPnoaddr) == 0)
       +                                findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
       +                        memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
       +                }
       +                hnputs(ghp->eproto, c->rport);
       +        }
       +
       +        ghp->proto = IP_GREPROTO;
       +        ghp->frag[0] = 0;
       +        ghp->frag[1] = 0;
       +
       +        ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
       +}
       +
       +static void
       +greiput(Proto *gre, Ipifc* __, Block *bp)
       +{
       +        int len;
       +        GREhdr *ghp;
       +        Conv *c, **p;
       +        ushort eproto;
       +        uchar raddr[IPaddrlen];
       +        GREpriv *gpriv;
       +
       +        gpriv = gre->priv;
       +        ghp = (GREhdr*)(bp->rp);
       +
       +        v4tov6(raddr, ghp->src);
       +        eproto = nhgets(ghp->eproto);
       +        QLOCK(gre);
       +
       +        /* Look for a conversation structure for this port and address */
       +        c = nil;
       +        for(p = gre->conv; *p; p++) {
       +                c = *p;
       +                if(c->inuse == 0)
       +                        continue;
       +                if(c->rport == eproto && 
       +                        (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
       +                        break;
       +        }
       +
       +        if(*p == nil) {
       +                QUNLOCK(gre);
       +                freeblist(bp);
       +                return;
       +        }
       +
       +        QUNLOCK(gre);
       +
       +        /*
       +         * Trim the packet down to data size
       +         */
       +        len = nhgets(ghp->len) - GRE_IPONLY;
       +        if(len < GRE_IPPLUSGRE){
       +                freeblist(bp);
       +                return;
       +        }
       +        bp = trimblock(bp, GRE_IPONLY, len);
       +        if(bp == nil){
       +                gpriv->lenerr++;
       +                return;
       +        }
       +
       +        /*
       +         *  Can't delimit packet so pull it all into one block.
       +         */
       +        if(qlen(c->rq) > 64*1024)
       +                freeblist(bp);
       +        else{
       +                bp = concatblock(bp);
       +                if(bp == 0)
       +                        panic("greiput");
       +                qpass(c->rq, bp);
       +        }
       +}
       +
       +int
       +grestats(Proto *gre, char *buf, int len)
       +{
       +        GREpriv *gpriv;
       +
       +        gpriv = gre->priv;
       +
       +        return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
       +}
       +
       +char*
       +grectl(Conv *c, char **f, int n)
       +{
       +        GREpriv *gpriv;
       +
       +        gpriv = c->p->priv;
       +        if(n == 1){
       +                if(strcmp(f[0], "raw") == 0){
       +                        gpriv->raw = 1;
       +                        return nil;
       +                }
       +                else if(strcmp(f[0], "cooked") == 0){
       +                        gpriv->raw = 0;
       +                        return nil;
       +                }
       +        }
       +        return "unknown control request";
       +}
       +
       +void
       +greinit(Fs *fs)
       +{
       +        Proto *gre;
       +
       +        gre = smalloc(sizeof(Proto));
       +        gre->priv = smalloc(sizeof(GREpriv));
       +        gre->name = "gre";
       +        gre->connect = greconnect;
       +        gre->announce = greannounce;
       +        gre->state = grestate;
       +        gre->create = grecreate;
       +        gre->close = greclose;
       +        gre->rcv = greiput;
       +        gre->ctl = grectl;
       +        gre->advise = nil;
       +        gre->stats = grestats;
       +        gre->ipproto = IP_GREPROTO;
       +        gre->nc = 64;
       +        gre->ptclsize = 0;
       +
       +        Fsproto(fs, gre);
       +}
 (DIR) diff --git a/src/9vx/a/ip/icmp.c b/src/9vx/a/ip/icmp.c
       @@ -0,0 +1,490 @@
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "ip.h"
       +
       +typedef struct Icmp {
       +        uchar        vihl;                /* Version and header length */
       +        uchar        tos;                /* Type of service */
       +        uchar        length[2];        /* packet length */
       +        uchar        id[2];                /* Identification */
       +        uchar        frag[2];        /* Fragment information */
       +        uchar        ttl;                /* Time to live */
       +        uchar        proto;                /* Protocol */
       +        uchar        ipcksum[2];        /* Header checksum */
       +        uchar        src[4];                /* Ip source */
       +        uchar        dst[4];                /* Ip destination */
       +        uchar        type;
       +        uchar        code;
       +        uchar        cksum[2];
       +        uchar        icmpid[2];
       +        uchar        seq[2];
       +        uchar        data[1];
       +} Icmp;
       +
       +enum {                        /* Packet Types */
       +        EchoReply        = 0,
       +        Unreachable        = 3,
       +        SrcQuench        = 4,
       +        Redirect        = 5,
       +        EchoRequest        = 8,
       +        TimeExceed        = 11,
       +        InParmProblem        = 12,
       +        Timestamp        = 13,
       +        TimestampReply        = 14,
       +        InfoRequest        = 15,
       +        InfoReply        = 16,
       +        AddrMaskRequest = 17,
       +        AddrMaskReply   = 18,
       +
       +        Maxtype                = 18,
       +};
       +
       +enum
       +{
       +        MinAdvise        = 24,        /* minimum needed for us to advise another protocol */ 
       +};
       +
       +char *icmpnames[Maxtype+1] =
       +{
       +[EchoReply]                "EchoReply",
       +[Unreachable]                "Unreachable",
       +[SrcQuench]                "SrcQuench",
       +[Redirect]                "Redirect",
       +[EchoRequest]                "EchoRequest",
       +[TimeExceed]                "TimeExceed",
       +[InParmProblem]                "InParmProblem",
       +[Timestamp]                "Timestamp",
       +[TimestampReply]        "TimestampReply",
       +[InfoRequest]                "InfoRequest",
       +[InfoReply]                "InfoReply",
       +[AddrMaskRequest]        "AddrMaskRequest",
       +[AddrMaskReply  ]        "AddrMaskReply  ",
       +};
       +
       +enum {
       +        IP_ICMPPROTO        = 1,
       +        ICMP_IPSIZE        = 20,
       +        ICMP_HDRSIZE        = 8,
       +};
       +
       +enum
       +{
       +        InMsgs,
       +        InErrors,
       +        OutMsgs,
       +        CsumErrs,
       +        LenErrs,
       +        HlenErrs,
       +
       +        Nstats,
       +};
       +
       +static char *statnames[Nstats] =
       +{
       +[InMsgs]        "InMsgs",
       +[InErrors]        "InErrors",
       +[OutMsgs]        "OutMsgs",
       +[CsumErrs]        "CsumErrs",
       +[LenErrs]        "LenErrs",
       +[HlenErrs]        "HlenErrs",
       +};
       +
       +typedef struct Icmppriv Icmppriv;
       +struct Icmppriv
       +{
       +        ulong        stats[Nstats];
       +
       +        /* message counts */
       +        ulong        in[Maxtype+1];
       +        ulong        out[Maxtype+1];
       +};
       +
       +static void icmpkick(void *x, Block*);
       +
       +static void
       +icmpcreate(Conv *c)
       +{
       +        c->rq = qopen(64*1024, Qmsg, 0, c);
       +        c->wq = qbypass(icmpkick, c);
       +}
       +
       +extern char*
       +icmpconnect(Conv *c, char **argv, int argc)
       +{
       +        char *e;
       +
       +        e = Fsstdconnect(c, argv, argc);
       +        if(e != nil)
       +                return e;
       +        Fsconnected(c, e);
       +
       +        return nil;
       +}
       +
       +extern int
       +icmpstate(Conv *c, char *state, int n)
       +{
       +        USED(c);
       +        return snprint(state, n, "%s qin %d qout %d\n",
       +                "Datagram",
       +                c->rq ? qlen(c->rq) : 0,
       +                c->wq ? qlen(c->wq) : 0
       +        );
       +}
       +
       +extern char*
       +icmpannounce(Conv *c, char **argv, int argc)
       +{
       +        char *e;
       +
       +        e = Fsstdannounce(c, argv, argc);
       +        if(e != nil)
       +                return e;
       +        Fsconnected(c, nil);
       +
       +        return nil;
       +}
       +
       +extern void
       +icmpclose(Conv *c)
       +{
       +        qclose(c->rq);
       +        qclose(c->wq);
       +        ipmove(c->laddr, IPnoaddr);
       +        ipmove(c->raddr, IPnoaddr);
       +        c->lport = 0;
       +}
       +
       +static void
       +icmpkick(void *x, Block *bp)
       +{
       +        Conv *c = x;
       +        Icmp *p;
       +        Icmppriv *ipriv;
       +
       +        if(bp == nil)
       +                return;
       +
       +        if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
       +                freeblist(bp);
       +                return;
       +        }
       +        p = (Icmp *)(bp->rp);
       +        p->vihl = IP_VER4;
       +        ipriv = c->p->priv;
       +        if(p->type <= Maxtype)        
       +                ipriv->out[p->type]++;
       +        
       +        v6tov4(p->dst, c->raddr);
       +        v6tov4(p->src, c->laddr);
       +        p->proto = IP_ICMPPROTO;
       +        hnputs(p->icmpid, c->lport);
       +        memset(p->cksum, 0, sizeof(p->cksum));
       +        hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
       +        ipriv->stats[OutMsgs]++;
       +        ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
       +}
       +
       +extern void
       +icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
       +{
       +        Block        *nbp;
       +        Icmp        *p, *np;
       +
       +        p = (Icmp *)bp->rp;
       +
       +        netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
       +        nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
       +        nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
       +        np = (Icmp *)nbp->rp;
       +        np->vihl = IP_VER4;
       +        memmove(np->dst, p->src, sizeof(np->dst));
       +        v6tov4(np->src, ia);
       +        memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
       +        np->type = TimeExceed;
       +        np->code = 0;
       +        np->proto = IP_ICMPPROTO;
       +        hnputs(np->icmpid, 0);
       +        hnputs(np->seq, 0);
       +        memset(np->cksum, 0, sizeof(np->cksum));
       +        hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
       +        ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
       +
       +}
       +
       +static void
       +icmpunreachable(Fs *f, Block *bp, int code, int seq)
       +{
       +        Block        *nbp;
       +        Icmp        *p, *np;
       +        int        i;
       +        uchar        addr[IPaddrlen];
       +
       +        p = (Icmp *)bp->rp;
       +
       +        /* only do this for unicast sources and destinations */
       +        v4tov6(addr, p->dst);
       +        i = ipforme(f, addr);
       +        if((i&Runi) == 0)
       +                return;
       +        v4tov6(addr, p->src);
       +        i = ipforme(f, addr);
       +        if(i != 0 && (i&Runi) == 0)
       +                return;
       +
       +        netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
       +        nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
       +        nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
       +        np = (Icmp *)nbp->rp;
       +        np->vihl = IP_VER4;
       +        memmove(np->dst, p->src, sizeof(np->dst));
       +        memmove(np->src, p->dst, sizeof(np->src));
       +        memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
       +        np->type = Unreachable;
       +        np->code = code;
       +        np->proto = IP_ICMPPROTO;
       +        hnputs(np->icmpid, 0);
       +        hnputs(np->seq, seq);
       +        memset(np->cksum, 0, sizeof(np->cksum));
       +        hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
       +        ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
       +}
       +
       +extern void
       +icmpnoconv(Fs *f, Block *bp)
       +{
       +        icmpunreachable(f, bp, 3, 0);
       +}
       +
       +extern void
       +icmpcantfrag(Fs *f, Block *bp, int mtu)
       +{
       +        icmpunreachable(f, bp, 4, mtu);
       +}
       +
       +static void
       +goticmpkt(Proto *icmp, Block *bp)
       +{
       +        Conv        **c, *s;
       +        Icmp        *p;
       +        uchar        dst[IPaddrlen];
       +        ushort        recid;
       +
       +        p = (Icmp *) bp->rp;
       +        v4tov6(dst, p->src);
       +        recid = nhgets(p->icmpid);
       +
       +        for(c = icmp->conv; *c; c++) {
       +                s = *c;
       +                if(s->lport == recid)
       +                if(ipcmp(s->raddr, dst) == 0){
       +                        bp = concatblock(bp);
       +                        if(bp != nil)
       +                                qpass(s->rq, bp);
       +                        return;
       +                }
       +        }
       +        freeblist(bp);
       +}
       +
       +static Block *
       +mkechoreply(Block *bp)
       +{
       +        Icmp        *q;
       +        uchar        ip[4];
       +
       +        q = (Icmp *)bp->rp;
       +        q->vihl = IP_VER4;
       +        memmove(ip, q->src, sizeof(q->dst));
       +        memmove(q->src, q->dst, sizeof(q->src));
       +        memmove(q->dst, ip,  sizeof(q->dst));
       +        q->type = EchoReply;
       +        memset(q->cksum, 0, sizeof(q->cksum));
       +        hnputs(q->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
       +
       +        return bp;
       +}
       +
       +static char *unreachcode[] =
       +{
       +[0]        "net unreachable",
       +[1]        "host unreachable",
       +[2]        "protocol unreachable",
       +[3]        "port unreachable",
       +[4]        "fragmentation needed and DF set",
       +[5]        "source route failed",
       +};
       +
       +static void
       +icmpiput(Proto *icmp, Ipifc* __, Block *bp)
       +{
       +        int        n, iplen;
       +        Icmp        *p;
       +        Block        *r;
       +        Proto        *pr;
       +        char        *msg;
       +        char        m2[128];
       +        Icmppriv *ipriv;
       +
       +        ipriv = icmp->priv;
       +        
       +        ipriv->stats[InMsgs]++;
       +
       +        p = (Icmp *)bp->rp;
       +        netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
       +        n = blocklen(bp);
       +        if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
       +                ipriv->stats[InErrors]++;
       +                ipriv->stats[HlenErrs]++;
       +                netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
       +                goto raise;
       +        }
       +        iplen = nhgets(p->length);
       +        if(iplen > n || ((uint)iplen % 1)){
       +                ipriv->stats[LenErrs]++;
       +                ipriv->stats[InErrors]++;
       +                netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
       +                goto raise;
       +        }
       +        if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
       +                ipriv->stats[InErrors]++;
       +                ipriv->stats[CsumErrs]++;
       +                netlog(icmp->f, Logicmp, "icmp checksum error\n");
       +                goto raise;
       +        }
       +        if(p->type <= Maxtype)
       +                ipriv->in[p->type]++;
       +
       +        switch(p->type) {
       +        case EchoRequest:
       +                if (iplen < n)
       +                        bp = trimblock(bp, 0, iplen);
       +                r = mkechoreply(bp);
       +                ipriv->out[EchoReply]++;
       +                ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
       +                break;
       +        case Unreachable:
       +                if(p->code > 5)
       +                        msg = unreachcode[1];
       +                else
       +                        msg = unreachcode[p->code];
       +
       +                bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
       +                if(blocklen(bp) < MinAdvise){
       +                        ipriv->stats[LenErrs]++;
       +                        goto raise;
       +                }
       +                p = (Icmp *)bp->rp;
       +                pr = Fsrcvpcolx(icmp->f, p->proto);
       +                if(pr != nil && pr->advise != nil) {
       +                        (*pr->advise)(pr, bp, msg);
       +                        return;
       +                }
       +
       +                bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
       +                goticmpkt(icmp, bp);
       +                break;
       +        case TimeExceed:
       +                if(p->code == 0){
       +                        sprint(m2, "ttl exceeded at %V", p->src);
       +
       +                        bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
       +                        if(blocklen(bp) < MinAdvise){
       +                                ipriv->stats[LenErrs]++;
       +                                goto raise;
       +                        }
       +                        p = (Icmp *)bp->rp;
       +                        pr = Fsrcvpcolx(icmp->f, p->proto);
       +                        if(pr != nil && pr->advise != nil) {
       +                                (*pr->advise)(pr, bp, m2);
       +                                return;
       +                        }
       +                        bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
       +                }
       +
       +                goticmpkt(icmp, bp);
       +                break;
       +        default:
       +                goticmpkt(icmp, bp);
       +                break;
       +        }
       +        return;
       +
       +raise:
       +        freeblist(bp);
       +}
       +
       +void
       +icmpadvise(Proto *icmp, Block *bp, char *msg)
       +{
       +        Conv        **c, *s;
       +        Icmp        *p;
       +        uchar        dst[IPaddrlen];
       +        ushort        recid;
       +
       +        p = (Icmp *) bp->rp;
       +        v4tov6(dst, p->dst);
       +        recid = nhgets(p->icmpid);
       +
       +        for(c = icmp->conv; *c; c++) {
       +                s = *c;
       +                if(s->lport == recid)
       +                if(ipcmp(s->raddr, dst) == 0){
       +                        qhangup(s->rq, msg);
       +                        qhangup(s->wq, msg);
       +                        break;
       +                }
       +        }
       +        freeblist(bp);
       +}
       +
       +int
       +icmpstats(Proto *icmp, char *buf, int len)
       +{
       +        Icmppriv *priv;
       +        char *p, *e;
       +        int i;
       +
       +        priv = icmp->priv;
       +        p = buf;
       +        e = p+len;
       +        for(i = 0; i < Nstats; i++)
       +                p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
       +        for(i = 0; i <= Maxtype; i++){
       +                if(icmpnames[i])
       +                        p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
       +                else
       +                        p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
       +        }
       +        return p - buf;
       +}
       +        
       +void
       +icmpinit(Fs *fs)
       +{
       +        Proto *icmp;
       +
       +        icmp = smalloc(sizeof(Proto));
       +        icmp->priv = smalloc(sizeof(Icmppriv));
       +        icmp->name = "icmp";
       +        icmp->connect = icmpconnect;
       +        icmp->announce = icmpannounce;
       +        icmp->state = icmpstate;
       +        icmp->create = icmpcreate;
       +        icmp->close = icmpclose;
       +        icmp->rcv = icmpiput;
       +        icmp->stats = icmpstats;
       +        icmp->ctl = nil;
       +        icmp->advise = icmpadvise;
       +        icmp->gc = nil;
       +        icmp->ipproto = IP_ICMPPROTO;
       +        icmp->nc = 128;
       +        icmp->ptclsize = 0;
       +
       +        Fsproto(fs, icmp);
       +}
 (DIR) diff --git a/src/9vx/a/ip/icmp6.c b/src/9vx/a/ip/icmp6.c
       @@ -0,0 +1,946 @@
       +/*
       + * Internet Control Message Protocol for IPv6
       + */
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +#include "ip.h"
       +#include "ipv6.h"
       +
       +enum
       +{
       +        InMsgs6,
       +        InErrors6,
       +        OutMsgs6,
       +        CsumErrs6,
       +        LenErrs6,
       +        HlenErrs6,
       +        HoplimErrs6,
       +        IcmpCodeErrs6,
       +        TargetErrs6,
       +        OptlenErrs6,
       +        AddrmxpErrs6,
       +        RouterAddrErrs6,
       +
       +        Nstats6,
       +};
       +
       +enum {
       +        ICMP_USEAD6        = 40,
       +};
       +
       +enum {
       +        Oflag        = 1<<5,
       +        Sflag        = 1<<6,
       +        Rflag        = 1<<7,
       +};
       +
       +enum {
       +        /* ICMPv6 types */
       +        EchoReply        = 0,
       +        UnreachableV6        = 1,
       +        PacketTooBigV6        = 2,
       +        TimeExceedV6        = 3,
       +        SrcQuench        = 4,
       +        ParamProblemV6        = 4,
       +        Redirect        = 5,
       +        EchoRequest        = 8,
       +        TimeExceed        = 11,
       +        InParmProblem        = 12,
       +        Timestamp        = 13,
       +        TimestampReply        = 14,
       +        InfoRequest        = 15,
       +        InfoReply        = 16,
       +        AddrMaskRequest = 17,
       +        AddrMaskReply   = 18,
       +        EchoRequestV6        = 128,
       +        EchoReplyV6        = 129,
       +        RouterSolicit        = 133,
       +        RouterAdvert        = 134,
       +        NbrSolicit        = 135,
       +        NbrAdvert        = 136,
       +        RedirectV6        = 137,
       +
       +        Maxtype6        = 137,
       +};
       +
       +typedef struct ICMPpkt ICMPpkt;
       +typedef struct IPICMP IPICMP;
       +typedef struct Ndpkt Ndpkt;
       +typedef struct NdiscC NdiscC;
       +
       +struct ICMPpkt {
       +        uchar        type;
       +        uchar        code;
       +        uchar        cksum[2];
       +        uchar        icmpid[2];
       +        uchar        seq[2];
       +};
       +
       +struct IPICMP {
       +        /* Ip6hdr; */
       +        uchar        vcf[4];                /* version:4, traffic class:8, flow label:20 */
       +        uchar        ploadlen[2];        /* payload length: packet length - 40 */
       +        uchar        proto;                /* next header type */
       +        uchar        ttl;                /* hop limit */
       +        uchar        src[IPaddrlen];
       +        uchar        dst[IPaddrlen];
       +
       +        /* ICMPpkt; */
       +        uchar        type;
       +        uchar        code;
       +        uchar        cksum[2];
       +        uchar        icmpid[2];
       +        uchar        seq[2];
       +};
       +
       +struct NdiscC
       +{
       +        /* IPICMP; */
       +        /* Ip6hdr; */
       +        uchar        vcf[4];                /* version:4, traffic class:8, flow label:20 */
       +        uchar        ploadlen[2];        /* payload length: packet length - 40 */
       +        uchar        proto;                /* next header type */
       +        uchar        ttl;                /* hop limit */
       +        uchar        src[IPaddrlen];
       +        uchar        dst[IPaddrlen];
       +
       +        /* ICMPpkt; */
       +        uchar        type;
       +        uchar        code;
       +        uchar        cksum[2];
       +        uchar        icmpid[2];
       +        uchar        seq[2];
       +
       +        uchar        target[IPaddrlen];
       +};
       +
       +struct Ndpkt
       +{
       +        /* NdiscC; */
       +        /* IPICMP; */
       +        /* Ip6hdr; */
       +        uchar        vcf[4];                /* version:4, traffic class:8, flow label:20 */
       +        uchar        ploadlen[2];        /* payload length: packet length - 40 */
       +        uchar        proto;                /* next header type */
       +        uchar        ttl;                /* hop limit */
       +        uchar        src[IPaddrlen];
       +        uchar        dst[IPaddrlen];
       +
       +        /* ICMPpkt; */
       +        uchar        type;
       +        uchar        code;
       +        uchar        cksum[2];
       +        uchar        icmpid[2];
       +        uchar        seq[2];
       +
       +        uchar        target[IPaddrlen];
       +
       +        uchar        otype;
       +        uchar        olen;                /* length in units of 8 octets(incl type, code),
       +                                 * 1 for IEEE 802 addresses */
       +        uchar        lnaddr[6];        /* link-layer address */
       +};
       +
       +typedef struct Icmppriv6
       +{
       +        ulong        stats[Nstats6];
       +
       +        /* message counts */
       +        ulong        in[Maxtype6+1];
       +        ulong        out[Maxtype6+1];
       +} Icmppriv6;
       +
       +typedef struct Icmpcb6
       +{
       +        QLock        qlock;
       +        uchar        headers;
       +} Icmpcb6;
       +
       +char *icmpnames6[Maxtype6+1] =
       +{
       +[EchoReply]                "EchoReply",
       +[UnreachableV6]                "UnreachableV6",
       +[PacketTooBigV6]        "PacketTooBigV6",
       +[TimeExceedV6]                "TimeExceedV6",
       +[SrcQuench]                "SrcQuench",
       +[Redirect]                "Redirect",
       +[EchoRequest]                "EchoRequest",
       +[TimeExceed]                "TimeExceed",
       +[InParmProblem]                "InParmProblem",
       +[Timestamp]                "Timestamp",
       +[TimestampReply]        "TimestampReply",
       +[InfoRequest]                "InfoRequest",
       +[InfoReply]                "InfoReply",
       +[AddrMaskRequest]        "AddrMaskRequest",
       +[AddrMaskReply]                "AddrMaskReply",
       +[EchoRequestV6]                "EchoRequestV6",
       +[EchoReplyV6]                "EchoReplyV6",
       +[RouterSolicit]                "RouterSolicit",
       +[RouterAdvert]                "RouterAdvert",
       +[NbrSolicit]                "NbrSolicit",
       +[NbrAdvert]                "NbrAdvert",
       +[RedirectV6]                "RedirectV6",
       +};
       +
       +static char *statnames6[Nstats6] =
       +{
       +[InMsgs6]        "InMsgs",
       +[InErrors6]        "InErrors",
       +[OutMsgs6]        "OutMsgs",
       +[CsumErrs6]        "CsumErrs",
       +[LenErrs6]        "LenErrs",
       +[HlenErrs6]        "HlenErrs",
       +[HoplimErrs6]        "HoplimErrs",
       +[IcmpCodeErrs6]        "IcmpCodeErrs",
       +[TargetErrs6]        "TargetErrs",
       +[OptlenErrs6]        "OptlenErrs",
       +[AddrmxpErrs6]        "AddrmxpErrs",
       +[RouterAddrErrs6]        "RouterAddrErrs",
       +};
       +
       +static char *unreachcode[] =
       +{
       +[Icmp6_no_route]        "no route to destination",
       +[Icmp6_ad_prohib]        "comm with destination administratively prohibited",
       +[Icmp6_out_src_scope]        "beyond scope of source address",
       +[Icmp6_adr_unreach]        "address unreachable",
       +[Icmp6_port_unreach]        "port unreachable",
       +[Icmp6_gress_src_fail]        "source address failed ingress/egress policy",
       +[Icmp6_rej_route]        "reject route to destination",
       +[Icmp6_unknown]                "icmp unreachable: unknown code",
       +};
       +
       +static void icmpkick6(void *x, Block *bp);
       +
       +static void
       +icmpcreate6(Conv *c)
       +{
       +        c->rq = qopen(64*1024, Qmsg, 0, c);
       +        c->wq = qbypass(icmpkick6, c);
       +}
       +
       +static void
       +set_cksum(Block *bp)
       +{
       +        IPICMP *p = (IPICMP *)(bp->rp);
       +
       +        hnputl(p->vcf, 0);          /* borrow IP header as pseudoheader */
       +        hnputs(p->ploadlen, blocklen(bp) - IP6HDR);
       +        p->proto = 0;
       +        p->ttl = ICMPv6;        /* ttl gets set later */
       +        hnputs(p->cksum, 0);
       +        hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
       +        p->proto = ICMPv6;
       +}
       +
       +static Block *
       +newIPICMP(int packetlen)
       +{
       +        Block *nbp;
       +
       +        nbp = allocb(packetlen);
       +        nbp->wp += packetlen;
       +        memset(nbp->rp, 0, packetlen);
       +        return nbp;
       +}
       +
       +void
       +icmpadvise6(Proto *icmp, Block *bp, char *msg)
       +{
       +        ushort recid;
       +        Conv **c, *s;
       +        IPICMP *p;
       +
       +        p = (IPICMP *)bp->rp;
       +        recid = nhgets(p->icmpid);
       +
       +        for(c = icmp->conv; *c; c++) {
       +                s = *c;
       +                if(s->lport == recid && ipcmp(s->raddr, p->dst) == 0){
       +                        qhangup(s->rq, msg);
       +                        qhangup(s->wq, msg);
       +                        break;
       +                }
       +        }
       +        freeblist(bp);
       +}
       +
       +static void
       +icmpkick6(void *x, Block *bp)
       +{
       +        uchar laddr[IPaddrlen], raddr[IPaddrlen];
       +        Conv *c = x;
       +        IPICMP *p;
       +        Icmppriv6 *ipriv = c->p->priv;
       +        Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
       +
       +        if(bp == nil)
       +                return;
       +
       +        if(icb->headers==6) {
       +                /* get user specified addresses */
       +                bp = pullupblock(bp, ICMP_USEAD6);
       +                if(bp == nil)
       +                        return;
       +                bp->rp += 8;
       +                ipmove(laddr, bp->rp);
       +                bp->rp += IPaddrlen;
       +                ipmove(raddr, bp->rp);
       +                bp->rp += IPaddrlen;
       +                bp = padblock(bp, sizeof(Ip6hdr));
       +        }
       +
       +        if(blocklen(bp) < sizeof(IPICMP)){
       +                freeblist(bp);
       +                return;
       +        }
       +        p = (IPICMP *)(bp->rp);
       +        if(icb->headers == 6) {
       +                ipmove(p->dst, raddr);
       +                ipmove(p->src, laddr);
       +        } else {
       +                ipmove(p->dst, c->raddr);
       +                ipmove(p->src, c->laddr);
       +                hnputs(p->icmpid, c->lport);
       +        }
       +
       +        set_cksum(bp);
       +        p->vcf[0] = 0x06 << 4;
       +        if(p->type <= Maxtype6)
       +                ipriv->out[p->type]++;
       +        ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
       +}
       +
       +char*
       +icmpctl6(Conv *c, char **argv, int argc)
       +{
       +        Icmpcb6 *icb;
       +
       +        icb = (Icmpcb6*) c->ptcl;
       +        if(argc==1 && strcmp(argv[0], "headers")==0) {
       +                icb->headers = 6;
       +                return nil;
       +        }
       +        return "unknown control request";
       +}
       +
       +static void
       +goticmpkt6(Proto *icmp, Block *bp, int muxkey)
       +{
       +        ushort recid;
       +        uchar *addr;
       +        Conv **c, *s;
       +        IPICMP *p = (IPICMP *)bp->rp;
       +
       +        if(muxkey == 0) {
       +                recid = nhgets(p->icmpid);
       +                addr = p->src;
       +        } else {
       +                recid = muxkey;
       +                addr = p->dst;
       +        }
       +
       +        for(c = icmp->conv; *c; c++){
       +                s = *c;
       +                if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
       +                        bp = concatblock(bp);
       +                        if(bp != nil)
       +                                qpass(s->rq, bp);
       +                        return;
       +                }
       +        }
       +
       +        freeblist(bp);
       +}
       +
       +static Block *
       +mkechoreply6(Block *bp, Ipifc *ifc)
       +{
       +        uchar addr[IPaddrlen];
       +        IPICMP *p = (IPICMP *)(bp->rp);
       +
       +        ipmove(addr, p->src);
       +        if(!isv6mcast(p->dst))
       +                ipmove(p->src, p->dst);
       +        else if (!ipv6anylocal(ifc, p->src))
       +                return nil;
       +        ipmove(p->dst, addr);
       +        p->type = EchoReplyV6;
       +        set_cksum(bp);
       +        return bp;
       +}
       +
       +/*
       + * sends out an ICMPv6 neighbor solicitation
       + *         suni == SRC_UNSPEC or SRC_UNI,
       + *        tuni == TARG_MULTI => multicast for address resolution,
       + *         and tuni == TARG_UNI => neighbor reachability.
       + */
       +extern void
       +icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
       +{
       +        Block *nbp;
       +        Ndpkt *np;
       +        Proto *icmp = f->t2p[ICMPv6];
       +        Icmppriv6 *ipriv = icmp->priv;
       +
       +        nbp = newIPICMP(sizeof(Ndpkt));
       +        np = (Ndpkt*) nbp->rp;
       +
       +        if(suni == SRC_UNSPEC)
       +                memmove(np->src, v6Unspecified, IPaddrlen);
       +        else
       +                memmove(np->src, src, IPaddrlen);
       +
       +        if(tuni == TARG_UNI)
       +                memmove(np->dst, targ, IPaddrlen);
       +        else
       +                ipv62smcast(np->dst, targ);
       +
       +        np->type = NbrSolicit;
       +        np->code = 0;
       +        memmove(np->target, targ, IPaddrlen);
       +        if(suni != SRC_UNSPEC) {
       +                np->otype = SRC_LLADDR;
       +                np->olen = 1;                /* 1+1+6 = 8 = 1 8-octet */
       +                memmove(np->lnaddr, mac, sizeof(np->lnaddr));
       +        } else
       +                nbp->wp -= sizeof(Ndpkt) - sizeof(NdiscC);
       +
       +        set_cksum(nbp);
       +        np = (Ndpkt*)nbp->rp;
       +        np->ttl = HOP_LIMIT;
       +        np->vcf[0] = 0x06 << 4;
       +        ipriv->out[NbrSolicit]++;
       +        netlog(f, Logicmp, "sending neighbor solicitation %I\n", targ);
       +        ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
       +}
       +
       +/*
       + * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
       + */
       +extern void
       +icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
       +{
       +        Block *nbp;
       +        Ndpkt *np;
       +        Proto *icmp = f->t2p[ICMPv6];
       +        Icmppriv6 *ipriv = icmp->priv;
       +
       +        nbp = newIPICMP(sizeof(Ndpkt));
       +        np = (Ndpkt*)nbp->rp;
       +
       +        memmove(np->src, src, IPaddrlen);
       +        memmove(np->dst, dst, IPaddrlen);
       +
       +        np->type = NbrAdvert;
       +        np->code = 0;
       +        np->icmpid[0] = flags;
       +        memmove(np->target, targ, IPaddrlen);
       +
       +        np->otype = TARGET_LLADDR;
       +        np->olen = 1;
       +        memmove(np->lnaddr, mac, sizeof(np->lnaddr));
       +
       +        set_cksum(nbp);
       +        np = (Ndpkt*) nbp->rp;
       +        np->ttl = HOP_LIMIT;
       +        np->vcf[0] = 0x06 << 4;
       +        ipriv->out[NbrAdvert]++;
       +        netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
       +        ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
       +}
       +
       +extern void
       +icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
       +{
       +        int osz = BLEN(bp);
       +        int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
       +        Block *nbp;
       +        IPICMP *np;
       +        Ip6hdr *p;
       +        Proto *icmp = f->t2p[ICMPv6];
       +        Icmppriv6 *ipriv = icmp->priv;
       +
       +        p = (Ip6hdr *)bp->rp;
       +
       +        if(isv6mcast(p->src))
       +                goto clean;
       +
       +        nbp = newIPICMP(sz);
       +        np = (IPICMP *)nbp->rp;
       +
       +        RLOCK(ifc);
       +        if(ipv6anylocal(ifc, np->src))
       +                netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n",
       +                        p->src, p->dst);
       +        else {
       +                netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n",
       +                        p->src, p->dst);
       +                freeblist(nbp);
       +                if(free)
       +                        goto clean;
       +                else
       +                        return;
       +        }
       +
       +        memmove(np->dst, p->src, IPaddrlen);
       +        np->type = UnreachableV6;
       +        np->code = code;
       +        memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
       +        set_cksum(nbp);
       +        np->ttl = HOP_LIMIT;
       +        np->vcf[0] = 0x06 << 4;
       +        ipriv->out[UnreachableV6]++;
       +
       +        if(free)
       +                ipiput6(f, ifc, nbp);
       +        else {
       +                ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
       +                return;
       +        }
       +
       +clean:
       +        RUNLOCK(ifc);
       +        freeblist(bp);
       +}
       +
       +extern void
       +icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
       +{
       +        int osz = BLEN(bp);
       +        int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
       +        Block *nbp;
       +        IPICMP *np;
       +        Ip6hdr *p;
       +        Proto *icmp = f->t2p[ICMPv6];
       +        Icmppriv6 *ipriv = icmp->priv;
       +
       +        p = (Ip6hdr *)bp->rp;
       +
       +        if(isv6mcast(p->src))
       +                return;
       +
       +        nbp = newIPICMP(sz);
       +        np = (IPICMP *) nbp->rp;
       +
       +        if(ipv6anylocal(ifc, np->src))
       +                netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n",
       +                        p->src, p->dst);
       +        else {
       +                netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n",
       +                        p->src, p->dst);
       +                return;
       +        }
       +
       +        memmove(np->dst, p->src, IPaddrlen);
       +        np->type = TimeExceedV6;
       +        np->code = 0;
       +        memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
       +        set_cksum(nbp);
       +        np->ttl = HOP_LIMIT;
       +        np->vcf[0] = 0x06 << 4;
       +        ipriv->out[TimeExceedV6]++;
       +        ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
       +}
       +
       +extern void
       +icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
       +{
       +        int osz = BLEN(bp);
       +        int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
       +        Block *nbp;
       +        IPICMP *np;
       +        Ip6hdr *p;
       +        Proto *icmp = f->t2p[ICMPv6];
       +        Icmppriv6 *ipriv = icmp->priv;
       +
       +        p = (Ip6hdr *)bp->rp;
       +
       +        if(isv6mcast(p->src))
       +                return;
       +
       +        nbp = newIPICMP(sz);
       +        np = (IPICMP *)nbp->rp;
       +
       +        if(ipv6anylocal(ifc, np->src))
       +                netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n",
       +                        p->src, p->dst);
       +        else {
       +                netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n",
       +                        p->src, p->dst);
       +                return;
       +        }
       +
       +        memmove(np->dst, p->src, IPaddrlen);
       +        np->type = PacketTooBigV6;
       +        np->code = 0;
       +        hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
       +        memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
       +        set_cksum(nbp);
       +        np->ttl = HOP_LIMIT;
       +        np->vcf[0] = 0x06 << 4;
       +        ipriv->out[PacketTooBigV6]++;
       +        ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
       +}
       +
       +/*
       + * RFC 2461, pages 39-40, pages 57-58.
       + */
       +static int
       +valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv)
       +{
       +        int sz, osz, unsp, n, ttl, iplen;
       +        int pktsz = BLEN(bp);
       +        uchar *packet = bp->rp;
       +        IPICMP *p = (IPICMP *) packet;
       +        Ndpkt *np;
       +
       +        USED(ifc);
       +        n = blocklen(bp);
       +        if(n < sizeof(IPICMP)) {
       +                ipriv->stats[HlenErrs6]++;
       +                netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
       +                goto err;
       +        }
       +
       +        iplen = nhgets(p->ploadlen);
       +        if(iplen > n - IP6HDR || ((uint)iplen % 1) != 0) {
       +                ipriv->stats[LenErrs6]++;
       +                netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
       +                goto err;
       +        }
       +
       +        /* Rather than construct explicit pseudoheader, overwrite IPv6 header */
       +        if(p->proto != ICMPv6) {
       +                /* This code assumes no extension headers!!! */
       +                netlog(icmp->f, Logicmp, "icmp error: extension header\n");
       +                goto err;
       +        }
       +        memset(packet, 0, 4);
       +        ttl = p->ttl;
       +        p->ttl = p->proto;
       +        p->proto = 0;
       +        if(ptclcsum(bp, 0, iplen + IP6HDR)) {
       +                ipriv->stats[CsumErrs6]++;
       +                netlog(icmp->f, Logicmp, "icmp checksum error\n");
       +                goto err;
       +        }
       +        p->proto = p->ttl;
       +        p->ttl = ttl;
       +
       +        /* additional tests for some pkt types */
       +        if (p->type == NbrSolicit   || p->type == NbrAdvert ||
       +            p->type == RouterAdvert || p->type == RouterSolicit ||
       +            p->type == RedirectV6) {
       +                if(p->ttl != HOP_LIMIT) {
       +                        ipriv->stats[HoplimErrs6]++;
       +                        goto err;
       +                }
       +                if(p->code != 0) {
       +                        ipriv->stats[IcmpCodeErrs6]++;
       +                        goto err;
       +                }
       +
       +                switch (p->type) {
       +                case NbrSolicit:
       +                case NbrAdvert:
       +                        np = (Ndpkt*) p;
       +                        if(isv6mcast(np->target)) {
       +                                ipriv->stats[TargetErrs6]++;
       +                                goto err;
       +                        }
       +                        if(optexsts(np) && np->olen == 0) {
       +                                ipriv->stats[OptlenErrs6]++;
       +                                goto err;
       +                        }
       +
       +                        if (p->type == NbrSolicit &&
       +                            ipcmp(np->src, v6Unspecified) == 0)
       +                                if(!issmcast(np->dst) || optexsts(np)) {
       +                                        ipriv->stats[AddrmxpErrs6]++;
       +                                        goto err;
       +                                }
       +
       +                        if(p->type == NbrAdvert)
       +                                if(isv6mcast(np->dst) &&
       +                                    (nhgets(np->icmpid) & Sflag)){
       +                                        ipriv->stats[AddrmxpErrs6]++;
       +                                        goto err;
       +                                }
       +                        break;
       +
       +                case RouterAdvert:
       +                        if(pktsz - sizeof(Ip6hdr) < 16) {
       +                                ipriv->stats[HlenErrs6]++;
       +                                goto err;
       +                        }
       +                        if(!islinklocal(p->src)) {
       +                                ipriv->stats[RouterAddrErrs6]++;
       +                                goto err;
       +                        }
       +                        sz = sizeof(IPICMP) + 8;
       +                        while (sz+1 < pktsz) {
       +                                osz = packet[sz+1];
       +                                if(osz <= 0) {
       +                                        ipriv->stats[OptlenErrs6]++;
       +                                        goto err;
       +                                }
       +                                sz += 8*osz;
       +                        }
       +                        break;
       +
       +                case RouterSolicit:
       +                        if(pktsz - sizeof(Ip6hdr) < 8) {
       +                                ipriv->stats[HlenErrs6]++;
       +                                goto err;
       +                        }
       +                        unsp = (ipcmp(p->src, v6Unspecified) == 0);
       +                        sz = sizeof(IPICMP) + 8;
       +                        while (sz+1 < pktsz) {
       +                                osz = packet[sz+1];
       +                                if(osz <= 0 ||
       +                                    (unsp && packet[sz] == SRC_LLADDR)) {
       +                                        ipriv->stats[OptlenErrs6]++;
       +                                        goto err;
       +                                }
       +                                sz += 8*osz;
       +                        }
       +                        break;
       +
       +                case RedirectV6:
       +                        /* to be filled in */
       +                        break;
       +
       +                default:
       +                        goto err;
       +                }
       +        }
       +        return 1;
       +err:
       +        ipriv->stats[InErrors6]++;
       +        return 0;
       +}
       +
       +static int
       +targettype(Fs *f, Ipifc *ifc, uchar *target)
       +{
       +        Iplifc *lifc;
       +        int t;
       +
       +        RLOCK(ifc);
       +        if(ipproxyifc(f, ifc, target)) {
       +                RUNLOCK(ifc);
       +                return Tuniproxy;
       +        }
       +
       +        for(lifc = ifc->lifc; lifc; lifc = lifc->next)
       +                if(ipcmp(lifc->local, target) == 0) {
       +                        t = (lifc->tentative)? Tunitent: Tunirany;
       +                        RUNLOCK(ifc);
       +                        return t;
       +                }
       +
       +        RUNLOCK(ifc);
       +        return 0;
       +}
       +
       +static void
       +icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
       +{
       +        int refresh = 1;
       +        char *msg, m2[128];
       +        uchar pktflags;
       +        uchar *packet = bp->rp;
       +        uchar lsrc[IPaddrlen];
       +        Block *r;
       +        IPICMP *p = (IPICMP *)packet;
       +        Icmppriv6 *ipriv = icmp->priv;
       +        Iplifc *lifc;
       +        Ndpkt* np;
       +        Proto *pr;
       +
       +        if(!valid(icmp, ipifc, bp, ipriv) || p->type > Maxtype6)
       +                goto raise;
       +
       +        ipriv->in[p->type]++;
       +
       +        switch(p->type) {
       +        case EchoRequestV6:
       +                r = mkechoreply6(bp, ipifc);
       +                if(r == nil)
       +                        goto raise;
       +                ipriv->out[EchoReply]++;
       +                ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
       +                break;
       +
       +        case UnreachableV6:
       +                if(p->code >= nelem(unreachcode))
       +                        msg = unreachcode[Icmp6_unknown];
       +                else
       +                        msg = unreachcode[p->code];
       +
       +                bp->rp += sizeof(IPICMP);
       +                if(blocklen(bp) < 8){
       +                        ipriv->stats[LenErrs6]++;
       +                        goto raise;
       +                }
       +                p = (IPICMP *)bp->rp;
       +                pr = Fsrcvpcolx(icmp->f, p->proto);
       +                if(pr != nil && pr->advise != nil) {
       +                        (*pr->advise)(pr, bp, msg);
       +                        return;
       +                }
       +
       +                bp->rp -= sizeof(IPICMP);
       +                goticmpkt6(icmp, bp, 0);
       +                break;
       +
       +        case TimeExceedV6:
       +                if(p->code == 0){
       +                        sprint(m2, "ttl exceeded at %I", p->src);
       +
       +                        bp->rp += sizeof(IPICMP);
       +                        if(blocklen(bp) < 8){
       +                                ipriv->stats[LenErrs6]++;
       +                                goto raise;
       +                        }
       +                        p = (IPICMP *)bp->rp;
       +                        pr = Fsrcvpcolx(icmp->f, p->proto);
       +                        if(pr && pr->advise) {
       +                                (*pr->advise)(pr, bp, m2);
       +                                return;
       +                        }
       +                        bp->rp -= sizeof(IPICMP);
       +                }
       +
       +                goticmpkt6(icmp, bp, 0);
       +                break;
       +
       +        case RouterAdvert:
       +        case RouterSolicit:
       +                /* using lsrc as a temp, munge hdr for goticmp6 */
       +                if (0) {
       +                        memmove(lsrc, p->src, IPaddrlen);
       +                        memmove(p->src, p->dst, IPaddrlen);
       +                        memmove(p->dst, lsrc, IPaddrlen);
       +                }
       +                goticmpkt6(icmp, bp, p->type);
       +                break;
       +
       +        case NbrSolicit:
       +                np = (Ndpkt*) p;
       +                pktflags = 0;
       +                switch (targettype(icmp->f, ipifc, np->target)) {
       +                case Tunirany:
       +                        pktflags |= Oflag;
       +                        /* fall through */
       +
       +                case Tuniproxy:
       +                        if(ipcmp(np->src, v6Unspecified) != 0) {
       +                                arpenter(icmp->f, V6, np->src, np->lnaddr,
       +                                        8*np->olen-2, 0);
       +                                pktflags |= Sflag;
       +                        }
       +                        if(ipv6local(ipifc, lsrc))
       +                                icmpna(icmp->f, lsrc,
       +                                        (ipcmp(np->src, v6Unspecified) == 0?
       +                                                v6allnodesL: np->src),
       +                                        np->target, ipifc->mac, pktflags);
       +                        else
       +                                freeblist(bp);
       +                        break;
       +
       +                case Tunitent:
       +                        /* not clear what needs to be done. send up
       +                         * an icmp mesg saying don't use this address? */
       +                default:
       +                        freeblist(bp);
       +                }
       +                break;
       +
       +        case NbrAdvert:
       +                np = (Ndpkt*) p;
       +
       +                /*
       +                 * if the target address matches one of the local interface
       +                 * addresses and the local interface address has tentative bit
       +                 * set, insert into ARP table. this is so the duplicate address
       +                 * detection part of ipconfig can discover duplication through
       +                 * the arp table.
       +                 */
       +                lifc = iplocalonifc(ipifc, np->target);
       +                if(lifc && lifc->tentative)
       +                        refresh = 0;
       +                arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2,
       +                        refresh);
       +                freeblist(bp);
       +                break;
       +
       +        case PacketTooBigV6:
       +        default:
       +                goticmpkt6(icmp, bp, 0);
       +                break;
       +        }
       +        return;
       +raise:
       +        freeblist(bp);
       +}
       +
       +int
       +icmpstats6(Proto *icmp6, char *buf, int len)
       +{
       +        Icmppriv6 *priv;
       +        char *p, *e;
       +        int i;
       +
       +        priv = icmp6->priv;
       +        p = buf;
       +        e = p+len;
       +        for(i = 0; i < Nstats6; i++)
       +                p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
       +        for(i = 0; i <= Maxtype6; i++)
       +                if(icmpnames6[i])
       +                        p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
       +                                priv->in[i], priv->out[i]);
       +/*                else
       +                        p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i],
       +                                priv->out[i]);
       + */
       +        return p - buf;
       +}
       +
       +
       +/* import from icmp.c */
       +extern int        icmpstate(Conv *c, char *state, int n);
       +extern char*        icmpannounce(Conv *c, char **argv, int argc);
       +extern char*        icmpconnect(Conv *c, char **argv, int argc);
       +extern void        icmpclose(Conv *c);
       +
       +void
       +icmp6init(Fs *fs)
       +{
       +        Proto *icmp6 = smalloc(sizeof(Proto));
       +
       +        icmp6->priv = smalloc(sizeof(Icmppriv6));
       +        icmp6->name = "icmpv6";
       +        icmp6->connect = icmpconnect;
       +        icmp6->announce = icmpannounce;
       +        icmp6->state = icmpstate;
       +        icmp6->create = icmpcreate6;
       +        icmp6->close = icmpclose;
       +        icmp6->rcv = icmpiput6;
       +        icmp6->stats = icmpstats6;
       +        icmp6->ctl = icmpctl6;
       +        icmp6->advise = icmpadvise6;
       +        icmp6->gc = nil;
       +        icmp6->ipproto = ICMPv6;
       +        icmp6->nc = 16;
       +        icmp6->ptclsize = sizeof(Icmpcb6);
       +
       +        Fsproto(fs, icmp6);
       +}
 (DIR) diff --git a/src/9vx/a/ip/igmp.c b/src/9vx/a/ip/igmp.c
       @@ -0,0 +1,294 @@
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "ip.h"
       +
       +enum
       +{
       +        IGMP_IPHDRSIZE        = 20,                /* size of ip header */
       +        IGMP_HDRSIZE        = 8,                /* size of IGMP header */
       +        IP_IGMPPROTO        = 2,
       +
       +        IGMPquery        = 1,
       +        IGMPreport        = 2,
       +
       +        MSPTICK                = 100,
       +        MAXTIMEOUT        = 10000/MSPTICK,        /* at most 10 secs for a response */
       +};
       +
       +typedef struct IGMPpkt IGMPpkt;
       +typedef char byte;
       +
       +struct IGMPpkt
       +{
       +        /* ip header */
       +        byte        vihl;                /* Version and header length */
       +        byte        tos;                /* Type of service */
       +        byte        len[2];                /* packet length (including headers) */
       +        byte        id[2];                /* Identification */
       +        byte        frag[2];        /* Fragment information */
       +        byte        Unused;        
       +        byte        proto;                /* Protocol */
       +        byte        cksum[2];        /* checksum of ip portion */
       +        byte        src[IPaddrlen];                /* Ip source */
       +        byte        dst[IPaddrlen];                /* Ip destination */
       +
       +        /* igmp header */
       +        byte        vertype;        /* version and type */
       +        byte        unused;
       +        byte        igmpcksum[2];                /* checksum of igmp portion */
       +        byte        group[IPaddrlen];        /* multicast group */
       +};
       +
       +/*
       + *  lists for group reports
       + */
       +typedef struct IGMPrep IGMPrep;
       +struct IGMPrep
       +{
       +        IGMPrep                *next;
       +        Media                *m;
       +        int                ticks;
       +        Multicast        *multi;
       +};
       +
       +typedef struct IGMP IGMP;
       +struct IGMP
       +{
       +        Lock lk;
       +
       +        Rendez        r;
       +        IGMPrep        *reports;
       +};
       +
       +IGMP igmpalloc;
       +
       +        Proto        igmp;
       +extern        Fs        fs;
       +
       +static struct Stats
       +{
       +        ulong         inqueries;
       +        ulong        outqueries;
       +        ulong        inreports;
       +        ulong        outreports;
       +} stats;
       +
       +void
       +igmpsendreport(Media *m, byte *addr)
       +{
       +        IGMPpkt *p;
       +        Block *bp;
       +
       +        bp = allocb(sizeof(IGMPpkt));
       +        if(bp == nil)
       +                return;
       +        p = (IGMPpkt*)bp->wp;
       +        p->vihl = IP_VER4;
       +        bp->wp += sizeof(IGMPpkt);
       +        memset(bp->rp, 0, sizeof(IGMPpkt));
       +        hnputl(p->src, Mediagetaddr(m));
       +        hnputl(p->dst, Ipallsys);
       +        p->vertype = (1<<4) | IGMPreport;
       +        p->proto = IP_IGMPPROTO;
       +        memmove(p->group, addr, IPaddrlen);
       +        hnputs(p->igmpcksum, ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE));
       +        netlog(Logigmp, "igmpreport %I\n", p->group);
       +        stats.outreports++;
       +        ipoput4(bp, 0, 1, DFLTTOS, nil);        /* TTL of 1 */
       +}
       +
       +static int
       +isreport(void *a)
       +{
       +        USED(a);
       +        return igmpalloc.reports != 0;
       +}
       +
       +
       +void
       +igmpproc(void *a)
       +{
       +        IGMPrep *rp, **lrp;
       +        Multicast *mp, **lmp;
       +        byte ip[IPaddrlen];
       +
       +        USED(a);
       +
       +        for(;;){
       +                sleep(&igmpalloc.r, isreport, 0);
       +                for(;;){
       +                        lock(&igmpalloc);
       +
       +                        if(igmpalloc.reports == nil)
       +                                break;
       +        
       +                        /* look for a single report */
       +                        lrp = &igmpalloc.reports;
       +                        mp = nil;
       +                        for(rp = *lrp; rp; rp = *lrp){
       +                                rp->ticks++;
       +                                lmp = &rp->multi;
       +                                for(mp = *lmp; mp; mp = *lmp){
       +                                        if(rp->ticks >= mp->timeout){
       +                                                *lmp = mp->next;
       +                                                break;
       +                                        }
       +                                        lmp = &mp->next;
       +                                }
       +                                if(mp != nil)
       +                                        break;
       +
       +                                if(rp->multi != nil){
       +                                        lrp = &rp->next;
       +                                        continue;
       +                                } else {
       +                                        *lrp = rp->next;
       +                                        free(rp);
       +                                }
       +                        }
       +                        unlock(&igmpalloc);
       +
       +                        if(mp){
       +                                /* do a single report and try again */
       +                                hnputl(ip, mp->addr);
       +                                igmpsendreport(rp->m, ip);
       +                                free(mp);
       +                                continue;
       +                        }
       +
       +                        tsleep(&up->sleep, return0, 0, MSPTICK);
       +                }
       +                unlock(&igmpalloc);
       +        }
       +
       +}
       +
       +void
       +igmpiput(Media *m, Ipifc *, Block *bp)
       +{
       +        int n;
       +        IGMPpkt *ghp;
       +        Ipaddr group;
       +        IGMPrep *rp, **lrp;
       +        Multicast *mp, **lmp;
       +
       +        ghp = (IGMPpkt*)(bp->rp);
       +        netlog(Logigmp, "igmpiput: %d %I\n", ghp->vertype, ghp->group);
       +
       +        n = blocklen(bp);
       +        if(n < IGMP_IPHDRSIZE+IGMP_HDRSIZE){
       +                netlog(Logigmp, "igmpiput: bad len\n");
       +                goto error;
       +        }
       +        if((ghp->vertype>>4) != 1){
       +                netlog(Logigmp, "igmpiput: bad igmp type\n");
       +                goto error;
       +        }
       +        if(ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE)){
       +                netlog(Logigmp, "igmpiput: checksum error %I\n", ghp->src);
       +                goto error;
       +        }
       +
       +        group = nhgetl(ghp->group);
       +        
       +        lock(&igmpalloc);
       +        switch(ghp->vertype & 0xf){
       +        case IGMPquery:
       +                /*
       +                 *  start reporting groups that we're a member of.
       +                 */
       +                stats.inqueries++;
       +                for(rp = igmpalloc.reports; rp; rp = rp->next)
       +                        if(rp->m == m)
       +                                break;
       +                if(rp != nil)
       +                        break;        /* already reporting */
       +
       +                mp = Mediacopymulti(m);
       +                if(mp == nil)
       +                        break;
       +
       +                rp = malloc(sizeof(*rp));
       +                if(rp == nil)
       +                        break;
       +
       +                rp->m = m;
       +                rp->multi = mp;
       +                rp->ticks = 0;
       +                for(; mp; mp = mp->next)
       +                        mp->timeout = nrand(MAXTIMEOUT);
       +                rp->next = igmpalloc.reports;
       +                igmpalloc.reports = rp;
       +
       +                wakeup(&igmpalloc.r);
       +
       +                break;
       +        case IGMPreport:
       +                /*
       +                 *  find report list for this medium
       +                 */
       +                stats.inreports++;
       +                lrp = &igmpalloc.reports;
       +                for(rp = *lrp; rp; rp = *lrp){
       +                        if(rp->m == m)
       +                                break;
       +                        lrp = &rp->next;
       +                }
       +                if(rp == nil)
       +                        break;
       +
       +                /*
       +                 *  if someone else has reported a group,
       +                 *  we don't have to.
       +                 */
       +                lmp = &rp->multi;
       +                for(mp = *lmp; mp; mp = *lmp){
       +                        if(mp->addr == group){
       +                                *lmp = mp->next;
       +                                free(mp);
       +                                break;
       +                        }
       +                        lmp = &mp->next;
       +                }
       +
       +                break;
       +        }
       +        unlock(&igmpalloc);
       +
       +error:
       +        freeb(bp);
       +}
       +
       +int
       +igmpstats(char *buf, int len)
       +{
       +        return snprint(buf, len, "\trcvd %d %d\n\tsent %d %d\n",
       +                stats.inqueries, stats.inreports,
       +                stats.outqueries, stats.outreports);
       +}
       +
       +void
       +igmpinit(Fs *fs)
       +{
       +        igmp.name = "igmp";
       +        igmp.connect = nil;
       +        igmp.announce = nil;
       +        igmp.ctl = nil;
       +        igmp.state = nil;
       +        igmp.close = nil;
       +        igmp.rcv = igmpiput;
       +        igmp.stats = igmpstats;
       +        igmp.ipproto = IP_IGMPPROTO;
       +        igmp.nc = 0;
       +        igmp.ptclsize = 0;
       +
       +        igmpreportfn = igmpsendreport;
       +        kproc("igmpproc", igmpproc, 0);
       +
       +        Fsproto(fs, &igmp);
       +}
 (DIR) diff --git a/src/9vx/a/ip/il.c b/src/9vx/a/ip/il.c
       @@ -0,0 +1,1408 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +
       +#include        "ip.h"
       +
       +enum                                /* Connection state */
       +{
       +        Ilclosed,
       +        Ilsyncer,
       +        Ilsyncee,
       +        Ilestablished,
       +        Illistening,
       +        Ilclosing,
       +        Ilopening,                /* only for file server */
       +};
       +
       +char        *ilstates[] = 
       +{ 
       +        "Closed",
       +        "Syncer",
       +        "Syncee",
       +        "Established",
       +        "Listen",
       +        "Closing",
       +        "Opening",                /* only for file server */
       +};
       +
       +enum                                /* Packet types */
       +{
       +        Ilsync,
       +        Ildata,
       +        Ildataquery,
       +        Ilack,
       +        Ilquery,
       +        Ilstate,
       +        Ilclose,
       +};
       +
       +char        *iltype[] = 
       +{        
       +        "sync",
       +        "data",
       +        "dataquery",
       +        "ack",
       +        "query",
       +        "state",
       +        "close" 
       +};
       +
       +enum
       +{
       +        Seconds                = 1000,
       +        Iltickms         = 50,                /* time base */
       +        AckDelay        = 2*Iltickms,        /* max time twixt message rcvd & ack sent */
       +        MaxTimeout         = 30*Seconds,        /* max time between rexmit */
       +        QueryTime        = 10*Seconds,        /* time between subsequent queries */
       +        DeathTime        = 30*QueryTime,
       +
       +        MaxRexmit         = 16,                /* max retransmissions before hangup */
       +        Defaultwin        = 20,
       +
       +        LogAGain        = 3,
       +        AGain                = 1<<LogAGain,
       +        LogDGain        = 2,
       +        DGain                = 1<<LogDGain,
       +
       +        DefByteRate        = 100,                /* assume a megabit link */
       +        DefRtt                = 50,                /* cross country on a great day */
       +
       +        Maxrq                = 64*1024,
       +};
       +
       +enum
       +{
       +        Nqt=        8,
       +};
       +
       +typedef struct Ilcb Ilcb;
       +struct Ilcb                        /* Control block */
       +{
       +        int        state;                /* Connection state */
       +        Conv        *conv;
       +        QLock        ackq;                /* Unacknowledged queue */
       +        Block        *unacked;
       +        Block        *unackedtail;
       +        ulong        unackedbytes;
       +        QLock        outo;                /* Out of order packet queue */
       +        Block        *outoforder;
       +        ulong        next;                /* Id of next to send */
       +        ulong        recvd;                /* Last packet received */
       +        ulong        acksent;        /* Last packet acked */
       +        ulong        start;                /* Local start id */
       +        ulong        rstart;                /* Remote start id */
       +        int        window;                /* Maximum receive window */
       +        int        rxquery;        /* number of queries on this connection */
       +        int        rxtot;                /* number of retransmits on this connection */
       +        int        rexmit;                /* number of retransmits of *unacked */
       +        ulong        qt[Nqt+1];        /* state table for query messages */
       +        int        qtx;                /* ... index into qt */
       +
       +        /* if set, fasttimeout causes a connection request to terminate after 4*Iltickms */
       +        int        fasttimeout;
       +
       +        /* timers */
       +        ulong        lastxmit;        /* time of last xmit */
       +        ulong        lastrecv;        /* time of last recv */
       +        ulong        timeout;        /* retransmission time for *unacked */
       +        ulong        acktime;        /* time to send next ack */
       +        ulong        querytime;        /* time to send next query */
       +
       +        /* adaptive measurements */
       +        int        delay;                /* Average of the fixed rtt delay */
       +        int        rate;                /* Average uchar rate */
       +        int        mdev;                /* Mean deviation of rtt */
       +        int        maxrtt;                /* largest rtt seen */
       +        ulong        rttack;                /* The ack we are waiting for */
       +        int        rttlen;                /* Length of rttack packet */
       +        uvlong        rttstart;        /* Time we issued rttack packet */
       +};
       +
       +enum
       +{
       +        IL_IPSIZE         = 20,
       +        IL_HDRSIZE        = 18,        
       +        IL_LISTEN        = 0,
       +        IL_CONNECT        = 1,
       +        IP_ILPROTO        = 40,
       +};
       +
       +typedef struct Ilhdr Ilhdr;
       +struct Ilhdr
       +{
       +        uchar        vihl;                /* Version and header length */
       +        uchar        tos;                /* Type of service */
       +        uchar        length[2];        /* packet length */
       +        uchar        id[2];                /* Identification */
       +        uchar        frag[2];        /* Fragment information */
       +        uchar        ttl;                /* Time to live */
       +        uchar        proto;                /* Protocol */
       +        uchar        cksum[2];        /* Header checksum */
       +        uchar        src[4];                /* Ip source */
       +        uchar        dst[4];                /* Ip destination */
       +        uchar        ilsum[2];        /* Checksum including header */
       +        uchar        illen[2];        /* Packet length */
       +        uchar        iltype;                /* Packet type */
       +        uchar        ilspec;                /* Special */
       +        uchar        ilsrc[2];        /* Src port */
       +        uchar        ildst[2];        /* Dst port */
       +        uchar        ilid[4];        /* Sequence id */
       +        uchar        ilack[4];        /* Acked sequence */
       +};
       +
       +enum
       +{
       +        InMsgs,
       +        OutMsgs,
       +        CsumErrs,                /* checksum errors */
       +        HlenErrs,                /* header length error */
       +        LenErrs,                /* short packet */
       +        OutOfOrder,                /* out of order */
       +        Retrans,                /* retransmissions */
       +        DupMsg,
       +        DupBytes,
       +        DroppedMsgs,
       +
       +        Nstats,
       +};
       +
       +static char *statnames[] =
       +{
       +[InMsgs]        "InMsgs",
       +[OutMsgs]        "OutMsgs",
       +[CsumErrs]        "CsumErrs",
       +[HlenErrs]        "HlenErr",
       +[LenErrs]        "LenErrs",
       +[OutOfOrder]        "OutOfOrder",
       +[Retrans]        "Retrans",
       +[DupMsg]        "DupMsg",
       +[DupBytes]        "DupBytes",
       +[DroppedMsgs]        "DroppedMsgs",
       +};
       +
       +typedef struct Ilpriv Ilpriv;
       +struct Ilpriv
       +{
       +        Ipht        ht;
       +
       +        ulong        stats[Nstats];
       +
       +        ulong        csumerr;                /* checksum errors */
       +        ulong        hlenerr;                /* header length error */
       +        ulong        lenerr;                        /* short packet */
       +        ulong        order;                        /* out of order */
       +        ulong        rexmit;                        /* retransmissions */
       +        ulong        dup;
       +        ulong        dupb;
       +
       +        /* keeping track of the ack kproc */
       +        int        ackprocstarted;
       +        QLock        apl;
       +};
       +
       +/* state for query/dataquery messages */
       +
       +
       +void        ilrcvmsg(Conv*, Block*);
       +void        ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
       +void        ilackq(Ilcb*, Block*);
       +void        ilprocess(Conv*, Ilhdr*, Block*);
       +void        ilpullup(Conv*);
       +void        ilhangup(Conv*, char*);
       +void        ilfreeq(Ilcb*);
       +void        ilrexmit(Ilcb*);
       +void        ilbackoff(Ilcb*);
       +void        ilsettimeout(Ilcb*);
       +char*        ilstart(Conv*, int, int);
       +void        ilackproc(void*);
       +void        iloutoforder(Conv*, Ilhdr*, Block*);
       +void        iliput(Proto*, Ipifc*, Block*);
       +void        iladvise(Proto*, Block*, char*);
       +int        ilnextqt(Ilcb*);
       +void        ilcbinit(Ilcb*);
       +int        later(ulong, ulong, char*);
       +void        ilreject(Fs*, Ilhdr*);
       +void        illocalclose(Conv *c);
       +        int         ilcksum = 1;
       +static         int         initseq = 25001;
       +static        ulong        scalediv, scalemul;
       +static        char        *etime = "connection timed out";
       +
       +static char*
       +ilconnect(Conv *c, char **argv, int argc)
       +{
       +        char *e, *p;
       +        int fast;
       +
       +        /* huge hack to quickly try an il connection */
       +        fast = 0;
       +        if(argc > 1){
       +                p = strstr(argv[1], "!fasttimeout");
       +                if(p != nil){
       +                        *p = 0;
       +                        fast = 1;
       +                }
       +        }
       +
       +        e = Fsstdconnect(c, argv, argc);
       +        if(e != nil)
       +                return e;
       +        return ilstart(c, IL_CONNECT, fast);
       +}
       +
       +static int
       +ilstate(Conv *c, char *state, int n)
       +{
       +        Ilcb *ic;
       +
       +        ic = (Ilcb*)(c->ptcl);
       +        return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
       +                ilstates[ic->state],
       +                c->rq ? qlen(c->rq) : 0,
       +                c->wq ? qlen(c->wq) : 0,
       +                ic->delay>>LogAGain, ic->rate>>LogAGain, ic->mdev>>LogDGain,
       +                ic->unackedbytes, ic->rxtot, ic->rxquery, ic->maxrtt);
       +}
       +
       +static int
       +ilinuse(Conv *c)
       +{
       +        Ilcb *ic;
       +
       +        ic = (Ilcb*)(c->ptcl);
       +        return ic->state != Ilclosed;
       +
       +}
       +
       +/* called with c locked */
       +static char*
       +ilannounce(Conv *c, char **argv, int argc)
       +{
       +        char *e;
       +
       +        e = Fsstdannounce(c, argv, argc);
       +        if(e != nil)
       +                return e;
       +        e = ilstart(c, IL_LISTEN, 0);
       +        if(e != nil)
       +                return e;
       +        Fsconnected(c, nil);
       +
       +        return nil;
       +}
       +
       +void
       +illocalclose(Conv *c)
       +{
       +        Ilcb *ic;
       +        Ilpriv *ipriv;
       +
       +        ipriv = c->p->priv;
       +        ic = (Ilcb*)c->ptcl;
       +        ic->state = Ilclosed;
       +        iphtrem(&ipriv->ht, c);
       +        ipmove(c->laddr, IPnoaddr);
       +        c->lport = 0;
       +}
       +
       +static void
       +ilclose(Conv *c)
       +{
       +        Ilcb *ic;
       +
       +        ic = (Ilcb*)c->ptcl;
       +
       +        qclose(c->rq);
       +        qclose(c->wq);
       +        qclose(c->eq);
       +
       +        switch(ic->state) {
       +        case Ilclosing:
       +        case Ilclosed:
       +                break;
       +        case Ilsyncer:
       +        case Ilsyncee:
       +        case Ilestablished:
       +                ic->state = Ilclosing;
       +                ilsettimeout(ic);
       +                ilsendctl(c, nil, Ilclose, ic->next, ic->recvd, 0);
       +                break;
       +        case Illistening:
       +                illocalclose(c);
       +                break;
       +        }
       +        ilfreeq(ic);
       +}
       +
       +void
       +ilkick(void *x, Block *bp)
       +{
       +        Conv *c = x;
       +        Ilhdr *ih;
       +        Ilcb *ic;
       +        int dlen;
       +        ulong id, ack;
       +        Fs *f;
       +        Ilpriv *priv;
       +
       +        f = c->p->f;
       +        priv = c->p->priv;
       +        ic = (Ilcb*)c->ptcl;
       +
       +        if(bp == nil)
       +                return;
       +
       +        switch(ic->state) {
       +        case Ilclosed:
       +        case Illistening:
       +        case Ilclosing:
       +                freeblist(bp);
       +                qhangup(c->rq, nil);
       +                return;
       +        }
       +
       +        dlen = blocklen(bp);
       +
       +        /* Make space to fit il & ip */
       +        bp = padblock(bp, IL_IPSIZE+IL_HDRSIZE);
       +        ih = (Ilhdr *)(bp->rp);
       +        ih->vihl = IP_VER4;
       +
       +        /* Ip fields */
       +        ih->frag[0] = 0;
       +        ih->frag[1] = 0;
       +        v6tov4(ih->dst, c->raddr);
       +        v6tov4(ih->src, c->laddr);
       +        ih->proto = IP_ILPROTO;
       +
       +        /* Il fields */
       +        hnputs(ih->illen, dlen+IL_HDRSIZE);
       +        hnputs(ih->ilsrc, c->lport);
       +        hnputs(ih->ildst, c->rport);
       +
       +        qlock(&ic->ackq);
       +        id = ic->next++;
       +        hnputl(ih->ilid, id);
       +        ack = ic->recvd;
       +        hnputl(ih->ilack, ack);
       +        ic->acksent = ack;
       +        ic->acktime = NOW + AckDelay;
       +        ih->iltype = Ildata;
       +        ih->ilspec = 0;
       +        ih->ilsum[0] = 0;
       +        ih->ilsum[1] = 0;
       +
       +        /* Checksum of ilheader plus data (not ip & no pseudo header) */
       +        if(ilcksum)
       +                hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, dlen+IL_HDRSIZE));
       +
       +        ilackq(ic, bp);
       +        qunlock(&ic->ackq);
       +
       +        /* Start the round trip timer for this packet if the timer is free */
       +        if(ic->rttack == 0) {
       +                ic->rttack = id;
       +                ic->rttstart = fastticks(nil);
       +                ic->rttlen = dlen + IL_IPSIZE + IL_HDRSIZE;
       +        }
       +
       +        if(later(NOW, ic->timeout, nil))
       +                ilsettimeout(ic);
       +        ipoput4(f, bp, 0, c->ttl, c->tos, c);
       +        priv->stats[OutMsgs]++;
       +}
       +
       +static void
       +ilcreate(Conv *c)
       +{
       +        c->rq = qopen(Maxrq, 0, 0, c);
       +        c->wq = qbypass(ilkick, c);
       +}
       +
       +int
       +ilxstats(Proto *il, char *buf, int len)
       +{
       +        Ilpriv *priv;
       +        char *p, *e;
       +        int i;
       +
       +        priv = il->priv;
       +        p = buf;
       +        e = p+len;
       +        for(i = 0; i < Nstats; i++)
       +                p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
       +        return p - buf;
       +}
       +
       +void
       +ilackq(Ilcb *ic, Block *bp)
       +{
       +        Block *np;
       +        int n;
       +
       +        n = blocklen(bp);
       +
       +        /* Enqueue a copy on the unacked queue in case this one gets lost */
       +        np = copyblock(bp, n);
       +        if(ic->unacked)
       +                ic->unackedtail->list = np;
       +        else
       +                ic->unacked = np;
       +        ic->unackedtail = np;
       +        np->list = nil;
       +        ic->unackedbytes += n;
       +}
       +
       +static
       +void
       +ilrttcalc(Ilcb *ic, Block *bp)
       +{
       +        int rtt, tt, pt, delay, rate;
       +
       +        rtt = fastticks(nil) - ic->rttstart;
       +        rtt = (rtt*scalemul)/scalediv;
       +        delay = ic->delay;
       +        rate = ic->rate;
       +
       +        /* Guard against zero wrap */
       +        if(rtt > 120000 || rtt < 0)
       +                return;
       +
       +        /* this block had to be transmitted after the one acked so count its size */
       +        ic->rttlen += blocklen(bp)  + IL_IPSIZE + IL_HDRSIZE;
       +
       +        if(ic->rttlen < 256){
       +                /* guess fixed delay as rtt of small packets */
       +                delay += rtt - (delay>>LogAGain);
       +                if(delay < AGain)
       +                        delay = AGain;
       +                ic->delay = delay;
       +        } else {
       +                /* if packet took longer than avg rtt delay, recalc rate */
       +                tt = rtt - (delay>>LogAGain);
       +                if(tt > 0){
       +                        rate += ic->rttlen/tt - (rate>>LogAGain);
       +                        if(rate < AGain)
       +                                rate = AGain;
       +                        ic->rate = rate;
       +                }
       +        }
       +
       +        /* mdev */
       +        pt = ic->rttlen/(rate>>LogAGain) + (delay>>LogAGain);
       +        ic->mdev += abs(rtt-pt) - (ic->mdev>>LogDGain);
       +
       +        if(rtt > ic->maxrtt)
       +                ic->maxrtt = rtt;
       +}
       +
       +void
       +ilackto(Ilcb *ic, ulong ackto, Block *bp)
       +{
       +        Ilhdr *h;
       +        ulong id;
       +
       +        if(ic->rttack == ackto)
       +                ilrttcalc(ic, bp);
       +
       +        /* Cancel if we've passed the packet we were interested in */
       +        if(ic->rttack <= ackto)
       +                ic->rttack = 0;
       +
       +        qlock(&ic->ackq);
       +        while(ic->unacked) {
       +                h = (Ilhdr *)ic->unacked->rp;
       +                id = nhgetl(h->ilid);
       +                if(ackto < id)
       +                        break;
       +
       +                bp = ic->unacked;
       +                ic->unacked = bp->list;
       +                bp->list = nil;
       +                ic->unackedbytes -= blocklen(bp);
       +                freeblist(bp);
       +                ic->rexmit = 0;
       +                ilsettimeout(ic);
       +        }
       +        qunlock(&ic->ackq);
       +}
       +
       +void
       +iliput(Proto *il, Ipifc *dummy, Block *bp)
       +{
       +        char *st;
       +        Ilcb *ic;
       +        Ilhdr *ih;
       +        uchar raddr[IPaddrlen];
       +        uchar laddr[IPaddrlen];
       +        ushort sp, dp, csum;
       +        int plen, illen;
       +        Conv *new, *s;
       +        Ilpriv *ipriv;
       +
       +        ipriv = il->priv;
       +
       +        ih = (Ilhdr *)bp->rp;
       +        plen = blocklen(bp);
       +        if(plen < IL_IPSIZE+IL_HDRSIZE){
       +                netlog(il->f, Logil, "il: hlenerr\n");
       +                ipriv->stats[HlenErrs]++;
       +                goto raise;
       +        }
       +
       +        illen = nhgets(ih->illen);
       +        if(illen+IL_IPSIZE > plen){
       +                netlog(il->f, Logil, "il: lenerr\n");
       +                ipriv->stats[LenErrs]++;
       +                goto raise;
       +        }
       +
       +        sp = nhgets(ih->ildst);
       +        dp = nhgets(ih->ilsrc);
       +        v4tov6(raddr, ih->src);
       +        v4tov6(laddr, ih->dst);
       +
       +        if((csum = ptclcsum(bp, IL_IPSIZE, illen)) != 0) {
       +                if(ih->iltype > Ilclose)
       +                        st = "?";
       +                else
       +                        st = iltype[ih->iltype];
       +                ipriv->stats[CsumErrs]++;
       +                netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
       +                        csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
       +                goto raise;
       +        }
       +
       +        QLOCK(il);
       +        s = iphtlook(&ipriv->ht, raddr, dp, laddr, sp);
       +        if(s == nil){
       +                if(ih->iltype == Ilsync)
       +                        ilreject(il->f, ih);                /* no listener */
       +                QUNLOCK(il);
       +                goto raise;
       +        }
       +
       +        ic = (Ilcb*)s->ptcl;
       +        if(ic->state == Illistening){
       +                if(ih->iltype != Ilsync){
       +                        QUNLOCK(il);
       +                        if(ih->iltype > Ilclose)
       +                                st = "?";
       +                        else
       +                                st = iltype[ih->iltype];
       +                        ilreject(il->f, ih);                /* no channel and not sync */
       +                        netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
       +                                st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); 
       +                        goto raise;
       +                }
       +
       +                new = Fsnewcall(s, raddr, dp, laddr, sp, V4);
       +                if(new == nil){
       +                        QUNLOCK(il);
       +                        netlog(il->f, Logil, "il: bad newcall %I/%ud->%ud\n", raddr, sp, dp);
       +                        ilsendctl(s, ih, Ilclose, 0, nhgetl(ih->ilid), 0);
       +                        goto raise;
       +                }
       +                s = new;
       +
       +                ic = (Ilcb*)s->ptcl;
       +        
       +                ic->conv = s;
       +                ic->state = Ilsyncee;
       +                ilcbinit(ic);
       +                ic->rstart = nhgetl(ih->ilid);
       +                iphtadd(&ipriv->ht, s);
       +        }
       +
       +        QLOCK(s);
       +        QUNLOCK(il);
       +        if(waserror()){
       +                QUNLOCK(s);
       +                nexterror();
       +        }
       +        ilprocess(s, ih, bp);
       +        QUNLOCK(s);
       +        poperror();
       +        return;
       +raise:
       +        freeblist(bp);
       +}
       +
       +void
       +_ilprocess(Conv *s, Ilhdr *h, Block *bp)
       +{
       +        Ilcb *ic;
       +        ulong id, ack;
       +        Ilpriv *priv;
       +
       +        id = nhgetl(h->ilid);
       +        ack = nhgetl(h->ilack);
       +
       +        ic = (Ilcb*)s->ptcl;
       +
       +        ic->lastrecv = NOW;
       +        ic->querytime = NOW + QueryTime;
       +        priv = s->p->priv;
       +        priv->stats[InMsgs]++;
       +
       +        switch(ic->state) {
       +        default:
       +                netlog(s->p->f, Logil, "il: unknown state %d\n", ic->state);
       +        case Ilclosed:
       +                freeblist(bp);
       +                break;
       +        case Ilsyncer:
       +                switch(h->iltype) {
       +                default:
       +                        break;
       +                case Ilsync:
       +                        if(ack != ic->start)
       +                                ilhangup(s, "connection rejected");
       +                        else {
       +                                ic->recvd = id;
       +                                ic->rstart = id;
       +                                ilsendctl(s, nil, Ilack, ic->next, ic->recvd, 0);
       +                                ic->state = Ilestablished;
       +                                ic->fasttimeout = 0;
       +                                ic->rexmit = 0;
       +                                Fsconnected(s, nil);
       +                                ilpullup(s);
       +                        }
       +                        break;
       +                case Ilclose:
       +                        if(ack == ic->start)
       +                                ilhangup(s, "connection rejected");
       +                        break;
       +                }
       +                freeblist(bp);
       +                break;
       +        case Ilsyncee:
       +                switch(h->iltype) {
       +                default:
       +                        break;
       +                case Ilsync:
       +                        if(id != ic->rstart || ack != 0){
       +                                illocalclose(s);
       +                        } else {
       +                                ic->recvd = id;
       +                                ilsendctl(s, nil, Ilsync, ic->start, ic->recvd, 0);
       +                        }
       +                        break;
       +                case Ilack:
       +                        if(ack == ic->start) {
       +                                ic->state = Ilestablished;
       +                                ic->fasttimeout = 0;
       +                                ic->rexmit = 0;
       +                                ilpullup(s);
       +                        }
       +                        break;
       +                case Ildata:
       +                        if(ack == ic->start) {
       +                                ic->state = Ilestablished;
       +                                ic->fasttimeout = 0;
       +                                ic->rexmit = 0;
       +                                goto established;
       +                        }
       +                        break;
       +                case Ilclose:
       +                        if(ack == ic->start)
       +                                ilhangup(s, "remote close");
       +                        break;
       +                }
       +                freeblist(bp);
       +                break;
       +        case Ilestablished:
       +        established:
       +                switch(h->iltype) {
       +                case Ilsync:
       +                        if(id != ic->rstart)
       +                                ilhangup(s, "remote close");
       +                        else
       +                                ilsendctl(s, nil, Ilack, ic->next, ic->rstart, 0);
       +                        freeblist(bp);        
       +                        break;
       +                case Ildata:
       +                        /*
       +                         * avoid consuming all the mount rpc buffers in the
       +                         * system.  if the input queue is too long, drop this
       +                         * packet.
       +                         */
       +                        if (s->rq && qlen(s->rq) >= Maxrq) {
       +                                priv->stats[DroppedMsgs]++;
       +                                freeblist(bp);
       +                                break;
       +                        }
       +
       +                        ilackto(ic, ack, bp);
       +                        iloutoforder(s, h, bp);
       +                        ilpullup(s);
       +                        break;
       +                case Ildataquery:
       +                        ilackto(ic, ack, bp);
       +                        iloutoforder(s, h, bp);
       +                        ilpullup(s);
       +                        ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
       +                        break;
       +                case Ilack:
       +                        ilackto(ic, ack, bp);
       +                        freeblist(bp);
       +                        break;
       +                case Ilquery:
       +                        ilackto(ic, ack, bp);
       +                        ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
       +                        freeblist(bp);
       +                        break;
       +                case Ilstate:
       +                        if(ack >= ic->rttack)
       +                                ic->rttack = 0;
       +                        ilackto(ic, ack, bp);
       +                        if(h->ilspec > Nqt)
       +                                h->ilspec = 0;
       +                        if(ic->qt[h->ilspec] > ack){
       +                                ilrexmit(ic);
       +                                ilsettimeout(ic);
       +                        }
       +                        freeblist(bp);
       +                        break;
       +                case Ilclose:
       +                        freeblist(bp);
       +                        if(ack < ic->start || ack > ic->next) 
       +                                break;
       +                        ic->recvd = id;
       +                        ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
       +                        ic->state = Ilclosing;
       +                        ilsettimeout(ic);
       +                        ilfreeq(ic);
       +                        break;
       +                }
       +                break;
       +        case Illistening:
       +                freeblist(bp);
       +                break;
       +        case Ilclosing:
       +                switch(h->iltype) {
       +                case Ilclose:
       +                        ic->recvd = id;
       +                        ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
       +                        if(ack == ic->next)
       +                                ilhangup(s, nil);
       +                        break;
       +                default:
       +                        break;
       +                }
       +                freeblist(bp);
       +                break;
       +        }
       +}
       +
       +void
       +ilrexmit(Ilcb *ic)
       +{
       +        Ilhdr *h;
       +        Block *nb;
       +        Conv *c;
       +        ulong id;
       +        Ilpriv *priv;
       +
       +        nb = nil;
       +        qlock(&ic->ackq);
       +        if(ic->unacked)
       +                nb = copyblock(ic->unacked, blocklen(ic->unacked));
       +        qunlock(&ic->ackq);
       +
       +        if(nb == nil)
       +                return;
       +
       +        h = (Ilhdr*)nb->rp;
       +        h->vihl = IP_VER4;
       +
       +        h->iltype = Ildataquery;
       +        hnputl(h->ilack, ic->recvd);
       +        h->ilspec = ilnextqt(ic);
       +        h->ilsum[0] = 0;
       +        h->ilsum[1] = 0;
       +        hnputs(h->ilsum, ptclcsum(nb, IL_IPSIZE, nhgets(h->illen)));
       +
       +        c = ic->conv;
       +        id = nhgetl(h->ilid);
       +        netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
       +                ic->rexmit, ic->timeout,
       +                c->raddr, c->lport, c->rport);
       +
       +        ilbackoff(ic);
       +
       +        ipoput4(c->p->f, nb, 0, c->ttl, c->tos, c);
       +
       +        /* statistics */
       +        ic->rxtot++;
       +        priv = c->p->priv;
       +        priv->rexmit++;
       +}
       +
       +/* DEBUG */
       +void
       +ilprocess(Conv *s, Ilhdr *h, Block *bp)
       +{
       +        Ilcb *ic;
       +
       +        ic = (Ilcb*)s->ptcl;
       +
       +        USED(ic);
       +        netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
       +                ilstates[ic->state],  ic->rstart, ic->recvd, ic->start, 
       +                ic->next, iltype[h->iltype], nhgetl(h->ilid), 
       +                nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
       +
       +        _ilprocess(s, h, bp);
       +
       +        netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
       +}
       +
       +void
       +ilhangup(Conv *s, char *msg)
       +{
       +        Ilcb *ic;
       +        int callout;
       +
       +        netlog(s->p->f, Logil, "il: hangup! %I %d/%d: %s\n", s->raddr,
       +                s->lport, s->rport, msg?msg:"no reason");
       +
       +        ic = (Ilcb*)s->ptcl;
       +        callout = ic->state == Ilsyncer;
       +        illocalclose(s);
       +
       +        qhangup(s->rq, msg);
       +        qhangup(s->wq, msg);
       +
       +        if(callout)
       +                Fsconnected(s, msg);
       +}
       +
       +void
       +ilpullup(Conv *s)
       +{
       +        Ilcb *ic;
       +        Ilhdr *oh;
       +        Block *bp;
       +        ulong oid, dlen;
       +        Ilpriv *ipriv;
       +
       +        ic = (Ilcb*)s->ptcl;
       +        if(ic->state != Ilestablished)
       +                return;
       +
       +        qlock(&ic->outo);
       +        while(ic->outoforder) {
       +                bp = ic->outoforder;
       +                oh = (Ilhdr*)bp->rp;
       +                oid = nhgetl(oh->ilid);
       +                if(oid <= ic->recvd) {
       +                        ic->outoforder = bp->list;
       +                        freeblist(bp);
       +                        continue;
       +                }
       +                if(oid != ic->recvd+1){
       +                        ipriv = s->p->priv;
       +                        ipriv->stats[OutOfOrder]++;
       +                        break;
       +                }
       +
       +                ic->recvd = oid;
       +                ic->outoforder = bp->list;
       +
       +                bp->list = nil;
       +                dlen = nhgets(oh->illen)-IL_HDRSIZE;
       +                bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
       +                /*
       +                 * Upper levels don't know about multiple-block
       +                 * messages so copy all into one (yick).
       +                 */
       +                bp = concatblock(bp);
       +                if(bp == 0)
       +                        panic("ilpullup");
       +                bp = packblock(bp);
       +                if(bp == 0)
       +                        panic("ilpullup2");
       +                qpass(s->rq, bp);
       +        }
       +        qunlock(&ic->outo);
       +}
       +
       +void
       +iloutoforder(Conv *s, Ilhdr *h, Block *bp)
       +{
       +        Ilcb *ic;
       +        uchar *lid;
       +        Block *f, **l;
       +        ulong id, newid;
       +        Ilpriv *ipriv;
       +
       +        ipriv = s->p->priv;
       +        ic = (Ilcb*)s->ptcl;
       +        bp->list = nil;
       +
       +        id = nhgetl(h->ilid);
       +        /* Window checks */
       +        if(id <= ic->recvd || id > ic->recvd+ic->window) {
       +                netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
       +                        id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
       +                freeblist(bp);
       +                return;
       +        }
       +
       +        /* Packet is acceptable so sort onto receive queue for pullup */
       +        qlock(&ic->outo);
       +        if(ic->outoforder == nil)
       +                ic->outoforder = bp;
       +        else {
       +                l = &ic->outoforder;
       +                for(f = *l; f; f = f->list) {
       +                        lid = ((Ilhdr*)(f->rp))->ilid;
       +                        newid = nhgetl(lid);
       +                        if(id <= newid) {
       +                                if(id == newid) {
       +                                        ipriv->stats[DupMsg]++;
       +                                        ipriv->stats[DupBytes] += blocklen(bp);
       +                                        qunlock(&ic->outo);
       +                                        freeblist(bp);
       +                                        return;
       +                                }
       +                                bp->list = f;
       +                                *l = bp;
       +                                qunlock(&ic->outo);
       +                                return;
       +                        }
       +                        l = &f->list;
       +                }
       +                *l = bp;
       +        }
       +        qunlock(&ic->outo);
       +}
       +
       +void
       +ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
       +{
       +        Ilhdr *ih;
       +        Ilcb *ic;
       +        Block *bp;
       +        int ttl, tos;
       +
       +        bp = allocb(IL_IPSIZE+IL_HDRSIZE);
       +        bp->wp += IL_IPSIZE+IL_HDRSIZE;
       +
       +        ih = (Ilhdr *)(bp->rp);
       +        ih->vihl = IP_VER4;
       +
       +        /* Ip fields */
       +        ih->proto = IP_ILPROTO;
       +        hnputs(ih->illen, IL_HDRSIZE);
       +        ih->frag[0] = 0;
       +        ih->frag[1] = 0;
       +        if(inih) {
       +                hnputl(ih->dst, nhgetl(inih->src));
       +                hnputl(ih->src, nhgetl(inih->dst));
       +                hnputs(ih->ilsrc, nhgets(inih->ildst));
       +                hnputs(ih->ildst, nhgets(inih->ilsrc));
       +                hnputl(ih->ilid, nhgetl(inih->ilack));
       +                hnputl(ih->ilack, nhgetl(inih->ilid));
       +                ttl = MAXTTL;
       +                tos = DFLTTOS;
       +        }
       +        else {
       +                v6tov4(ih->dst, ipc->raddr);
       +                v6tov4(ih->src, ipc->laddr);
       +                hnputs(ih->ilsrc, ipc->lport);
       +                hnputs(ih->ildst, ipc->rport);
       +                hnputl(ih->ilid, id);
       +                hnputl(ih->ilack, ack);
       +                ic = (Ilcb*)ipc->ptcl;
       +                ic->acksent = ack;
       +                ic->acktime = NOW;
       +                ttl = ipc->ttl;
       +                tos = ipc->tos;
       +        }
       +        ih->iltype = type;
       +        ih->ilspec = ilspec;
       +        ih->ilsum[0] = 0;
       +        ih->ilsum[1] = 0;
       +
       +        if(ilcksum)
       +                hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
       +
       +if(ipc==nil)
       +        panic("ipc is nil caller is %#p", getcallerpc(&ipc));
       +if(ipc->p==nil)
       +        panic("ipc->p is nil");
       +
       +        netlog(ipc->p->f, Logilmsg, "ctl(%s id %d ack %d %d->%d)\n",
       +                iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack), 
       +                nhgets(ih->ilsrc), nhgets(ih->ildst));
       +
       +        ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
       +}
       +
       +void
       +ilreject(Fs *f, Ilhdr *inih)
       +{
       +        Ilhdr *ih;
       +        Block *bp;
       +
       +        bp = allocb(IL_IPSIZE+IL_HDRSIZE);
       +        bp->wp += IL_IPSIZE+IL_HDRSIZE;
       +
       +        ih = (Ilhdr *)(bp->rp);
       +        ih->vihl = IP_VER4;
       +
       +        /* Ip fields */
       +        ih->proto = IP_ILPROTO;
       +        hnputs(ih->illen, IL_HDRSIZE);
       +        ih->frag[0] = 0;
       +        ih->frag[1] = 0;
       +        hnputl(ih->dst, nhgetl(inih->src));
       +        hnputl(ih->src, nhgetl(inih->dst));
       +        hnputs(ih->ilsrc, nhgets(inih->ildst));
       +        hnputs(ih->ildst, nhgets(inih->ilsrc));
       +        hnputl(ih->ilid, nhgetl(inih->ilack));
       +        hnputl(ih->ilack, nhgetl(inih->ilid));
       +        ih->iltype = Ilclose;
       +        ih->ilspec = 0;
       +        ih->ilsum[0] = 0;
       +        ih->ilsum[1] = 0;
       +
       +        if(ilcksum)
       +                hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
       +
       +        ipoput4(f, bp, 0, MAXTTL, DFLTTOS, nil);
       +}
       +
       +void
       +ilsettimeout(Ilcb *ic)
       +{
       +        ulong pt;
       +
       +        pt = (ic->delay>>LogAGain)
       +                + ic->unackedbytes/(ic->rate>>LogAGain)
       +                + (ic->mdev>>(LogDGain-1))
       +                + AckDelay;
       +        if(pt > MaxTimeout)
       +                pt = MaxTimeout;
       +        ic->timeout = NOW + pt;
       +}
       +
       +void
       +ilbackoff(Ilcb *ic)
       +{
       +        ulong pt;
       +        int i;
       +
       +        pt = (ic->delay>>LogAGain)
       +                + ic->unackedbytes/(ic->rate>>LogAGain)
       +                + (ic->mdev>>(LogDGain-1))
       +                + AckDelay;
       +        for(i = 0; i < ic->rexmit; i++)
       +                pt = pt + (pt>>1);
       +        if(pt > MaxTimeout)
       +                pt = MaxTimeout;
       +        ic->timeout = NOW + pt;
       +
       +        if(ic->fasttimeout)
       +                ic->timeout = NOW+Iltickms;
       +
       +        ic->rexmit++;
       +}
       +
       +// complain if two numbers not within an hour of each other
       +#define Tfuture (1000*60*60)
       +int
       +later(ulong t1, ulong t2, char *x)
       +{
       +        int dt;
       +
       +        dt = t1 - t2;
       +        if(dt > 0) {
       +                if(x != nil && dt > Tfuture)
       +                        print("%s: way future %d\n", x, dt);
       +                return 1;
       +        }
       +        if(dt < -Tfuture) {
       +                if(x != nil)
       +                        print("%s: way past %d\n", x, -dt);
       +                return 1;
       +        }
       +        return 0;
       +}
       +
       +void
       +ilackproc(void *x)
       +{
       +        Ilcb *ic;
       +        Conv **s, *p;
       +        Proto *il;
       +
       +        il = x;
       +
       +loop:
       +        tsleep(&up->sleep, return0, 0, Iltickms);
       +        for(s = il->conv; s && *s; s++) {
       +                p = *s;
       +                ic = (Ilcb*)p->ptcl;
       +
       +                switch(ic->state) {
       +                case Ilclosed:
       +                case Illistening:
       +                        break;
       +                case Ilclosing:
       +                        if(later(NOW, ic->timeout, "timeout0")) {
       +                                if(ic->rexmit > MaxRexmit){
       +                                        ilhangup(p, nil);
       +                                        break;
       +                                }
       +                                ilsendctl(p, nil, Ilclose, ic->next, ic->recvd, 0);
       +                                ilbackoff(ic);
       +                        }
       +                        break;
       +
       +                case Ilsyncee:
       +                case Ilsyncer:
       +                        if(later(NOW, ic->timeout, "timeout1")) {
       +                                if(ic->rexmit > MaxRexmit){
       +                                        ilhangup(p, etime);
       +                                        break;
       +                                }
       +                                ilsendctl(p, nil, Ilsync, ic->start, ic->recvd, 0);
       +                                ilbackoff(ic);
       +                        }
       +                        break;
       +
       +                case Ilestablished:
       +                        if(ic->recvd != ic->acksent)
       +                        if(later(NOW, ic->acktime, "acktime"))
       +                                ilsendctl(p, nil, Ilack, ic->next, ic->recvd, 0);
       +
       +                        if(later(NOW, ic->querytime, "querytime")){
       +                                if(later(NOW, ic->lastrecv+DeathTime, "deathtime")){
       +                                        netlog(il->f, Logil, "il: hangup: deathtime\n");
       +                                        ilhangup(p, etime);
       +                                        break;
       +                                }
       +                                ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
       +                                ic->querytime = NOW + QueryTime;
       +                        }
       +
       +                        if(ic->unacked != nil)
       +                        if(later(NOW, ic->timeout, "timeout2")) {
       +                                if(ic->rexmit > MaxRexmit){
       +                                        netlog(il->f, Logil, "il: hangup: too many rexmits\n");
       +                                        ilhangup(p, etime);
       +                                        break;
       +                                }
       +                                ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
       +                                ic->rxquery++;
       +                                ilbackoff(ic);
       +                        }
       +                        break;
       +                }
       +        }
       +        goto loop;
       +}
       +
       +void
       +ilcbinit(Ilcb *ic)
       +{
       +        ic->start = nrand(0x1000000);
       +        ic->next = ic->start+1;
       +        ic->recvd = 0;
       +        ic->window = Defaultwin;
       +        ic->unackedbytes = 0;
       +        ic->unacked = nil;
       +        ic->outoforder = nil;
       +        ic->rexmit = 0;
       +        ic->rxtot = 0;
       +        ic->rxquery = 0;
       +        ic->qtx = 1;
       +        ic->fasttimeout = 0;
       +
       +        /* timers */
       +        ic->delay = DefRtt<<LogAGain;
       +        ic->mdev = DefRtt<<LogDGain;
       +        ic->rate = DefByteRate<<LogAGain;
       +        ic->querytime = NOW + QueryTime;
       +        ic->lastrecv = NOW;        /* or we'll timeout right away */
       +        ilsettimeout(ic);
       +}
       +
       +char*
       +ilstart(Conv *c, int type, int fasttimeout)
       +{
       +        Ilcb *ic;
       +        Ilpriv *ipriv;
       +        char kpname[KNAMELEN];
       +
       +        ipriv = c->p->priv;
       +
       +        if(ipriv->ackprocstarted == 0){
       +                qlock(&ipriv->apl);
       +                if(ipriv->ackprocstarted == 0){
       +                        sprint(kpname, "#I%dilack", c->p->f->dev);
       +                        kproc(kpname, ilackproc, c->p);
       +                        ipriv->ackprocstarted = 1;
       +                }
       +                qunlock(&ipriv->apl);
       +        }
       +
       +        ic = (Ilcb*)c->ptcl;
       +        ic->conv = c;
       +
       +        if(ic->state != Ilclosed)
       +                return nil;
       +
       +        ilcbinit(ic);
       +
       +        if(fasttimeout){
       +                /* timeout if we can't connect quickly */
       +                ic->fasttimeout = 1;
       +                ic->timeout = NOW+Iltickms;
       +                ic->rexmit = MaxRexmit - 4;
       +        };
       +
       +        switch(type) {
       +        default:
       +                netlog(c->p->f, Logil, "il: start: type %d\n", type);
       +                break;
       +        case IL_LISTEN:
       +                ic->state = Illistening;
       +                iphtadd(&ipriv->ht, c);
       +                break;
       +        case IL_CONNECT:
       +                ic->state = Ilsyncer;
       +                iphtadd(&ipriv->ht, c);
       +                ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
       +                break;
       +        }
       +
       +        return nil;
       +}
       +
       +void
       +ilfreeq(Ilcb *ic)
       +{
       +        Block *bp, *next;
       +
       +        qlock(&ic->ackq);
       +        for(bp = ic->unacked; bp; bp = next) {
       +                next = bp->list;
       +                freeblist(bp);
       +        }
       +        ic->unacked = nil;
       +        qunlock(&ic->ackq);
       +
       +        qlock(&ic->outo);
       +        for(bp = ic->outoforder; bp; bp = next) {
       +                next = bp->list;
       +                freeblist(bp);
       +        }
       +        ic->outoforder = nil;
       +        qunlock(&ic->outo);
       +}
       +
       +void
       +iladvise(Proto *il, Block *bp, char *msg)
       +{
       +        Ilhdr *h;
       +        Ilcb *ic;                
       +        uchar source[IPaddrlen], dest[IPaddrlen];
       +        ushort psource;
       +        Conv *s, **p;
       +
       +        h = (Ilhdr*)(bp->rp);
       +
       +        v4tov6(dest, h->dst);
       +        v4tov6(source, h->src);
       +        psource = nhgets(h->ilsrc);
       +
       +
       +        /* Look for a connection, unfortunately the destination port is missing */
       +        QLOCK(il);
       +        for(p = il->conv; *p; p++) {
       +                s = *p;
       +                if(s->lport == psource)
       +                if(ipcmp(s->laddr, source) == 0)
       +                if(ipcmp(s->raddr, dest) == 0){
       +                        QUNLOCK(il);
       +                        ic = (Ilcb*)s->ptcl;
       +                        switch(ic->state){
       +                        case Ilsyncer:
       +                                ilhangup(s, msg);
       +                                break;
       +                        }
       +                        freeblist(bp);
       +                        return;
       +                }
       +        }
       +        QUNLOCK(il);
       +        freeblist(bp);
       +}
       +
       +int
       +ilnextqt(Ilcb *ic)
       +{
       +        int x;
       +
       +        qlock(&ic->ackq);
       +        x = ic->qtx;
       +        if(++x > Nqt)
       +                x = 1;
       +        ic->qtx = x;
       +        ic->qt[x] = ic->next-1;        /* highest xmitted packet */
       +        ic->qt[0] = ic->qt[x];        /* compatibility with old implementations */
       +        qunlock(&ic->ackq);
       +
       +        return x;
       +}
       +
       +/* calculate scale constants that converts fast ticks to ms (more or less) */
       +static void
       +inittimescale(void)
       +{
       +        uvlong hz;
       +
       +        fastticks(&hz);
       +        if(hz > 1000){
       +                scalediv = hz/1000;
       +                scalemul = 1;
       +        } else {
       +                scalediv = 1;
       +                scalemul = 1000/hz;
       +        }
       +}
       +
       +void
       +ilinit(Fs *f)
       +{
       +        Proto *il;
       +
       +        inittimescale();
       +
       +        il = smalloc(sizeof(Proto));
       +        il->priv = smalloc(sizeof(Ilpriv));
       +        il->name = "il";
       +        il->connect = ilconnect;
       +        il->announce = ilannounce;
       +        il->state = ilstate;
       +        il->create = ilcreate;
       +        il->close = ilclose;
       +        il->rcv = iliput;
       +        il->ctl = nil;
       +        il->advise = iladvise;
       +        il->stats = ilxstats;
       +        il->inuse = ilinuse;
       +        il->gc = nil;
       +        il->ipproto = IP_ILPROTO;
       +        il->nc = scalednconv();
       +        il->ptclsize = sizeof(Ilcb);
       +        Fsproto(f, il);
       +}
 (DIR) diff --git a/src/9vx/a/ip/inferno.c b/src/9vx/a/ip/inferno.c
       @@ -0,0 +1,46 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +#include        "ip.h"
       +
       +/*
       + *  some hacks for commonality twixt inferno and plan9
       + */
       +
       +char*
       +commonuser(void)
       +{
       +        return up->user;
       +}
       +
       +Chan*
       +commonfdtochan(int fd, int mode, int a, int b)
       +{
       +        return fdtochan(fd, mode, a, b);
       +}
       +
       +char*
       +commonerror(void)
       +{
       +        return up->errstr;
       +}
       +
       +char*
       +bootp(Ipifc* _)
       +{
       +        return "unimplmented";
       +}
       +
       +int
       +bootpread(char* _, ulong __, int ___)
       +{
       +        return        0;
       +}
       +
       +Medium tripmedium =
       +{
       +        "trip",
       +};
 (DIR) diff --git a/src/9vx/a/ip/ip.c b/src/9vx/a/ip/ip.c
       @@ -0,0 +1,776 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +
       +#include        "ip.h"
       +
       +typedef struct Fragment4        Fragment4;
       +typedef struct Fragment6        Fragment6;
       +typedef struct Ipfrag                Ipfrag;
       +
       +#define BLKIPVER(xp)        (((Ip4hdr*)((xp)->rp))->vihl&0xF0)
       +
       +/* MIB II counters */
       +enum
       +{
       +        Forwarding,
       +        DefaultTTL,
       +        InReceives,
       +        InHdrErrors,
       +        InAddrErrors,
       +        ForwDatagrams,
       +        InUnknownProtos,
       +        InDiscards,
       +        InDelivers,
       +        OutRequests,
       +        OutDiscards,
       +        OutNoRoutes,
       +        ReasmTimeout,
       +        ReasmReqds,
       +        ReasmOKs,
       +        ReasmFails,
       +        FragOKs,
       +        FragFails,
       +        FragCreates,
       +
       +        Nstats,
       +};
       +
       +struct Fragment4
       +{
       +        Block*        blist;
       +        Fragment4*        next;
       +        ulong         src;
       +        ulong         dst;
       +        ushort        id;
       +        ulong         age;
       +};
       +
       +struct Fragment6
       +{
       +        Block*        blist;
       +        Fragment6*        next;
       +        uchar         src[IPaddrlen];
       +        uchar         dst[IPaddrlen];
       +        uint        id;
       +        ulong         age;
       +};
       +
       +struct Ipfrag
       +{
       +        ushort        foff;
       +        ushort        flen;
       +};
       +
       +/* an instance of IP */
       +struct IP
       +{
       +        ulong                stats[Nstats];
       +
       +        QLock                fraglock4;
       +        Fragment4*        flisthead4;
       +        Fragment4*        fragfree4;
       +        Ref                id4;
       +
       +        QLock                fraglock6;
       +        Fragment6*        flisthead6;
       +        Fragment6*        fragfree6;
       +        Ref                id6;
       +
       +        int                iprouting;        /* true if we route like a gateway */
       +};
       +
       +static char *statnames[] =
       +{
       +[Forwarding]        "Forwarding",
       +[DefaultTTL]        "DefaultTTL",
       +[InReceives]        "InReceives",
       +[InHdrErrors]        "InHdrErrors",
       +[InAddrErrors]        "InAddrErrors",
       +[ForwDatagrams]        "ForwDatagrams",
       +[InUnknownProtos]        "InUnknownProtos",
       +[InDiscards]        "InDiscards",
       +[InDelivers]        "InDelivers",
       +[OutRequests]        "OutRequests",
       +[OutDiscards]        "OutDiscards",
       +[OutNoRoutes]        "OutNoRoutes",
       +[ReasmTimeout]        "ReasmTimeout",
       +[ReasmReqds]        "ReasmReqds",
       +[ReasmOKs]        "ReasmOKs",
       +[ReasmFails]        "ReasmFails",
       +[FragOKs]        "FragOKs",
       +[FragFails]        "FragFails",
       +[FragCreates]        "FragCreates",
       +};
       +
       +#define BLKIP(xp)        ((Ip4hdr*)((xp)->rp))
       +/*
       + * This sleazy macro relies on the media header size being
       + * larger than sizeof(Ipfrag). ipreassemble checks this is true
       + */
       +#define BKFG(xp)        ((Ipfrag*)((xp)->base))
       +
       +ushort                ipcsum(uchar*);
       +Block*                ip4reassemble(IP*, int, Block*, Ip4hdr*);
       +void                ipfragfree4(IP*, Fragment4*);
       +Fragment4*        ipfragallo4(IP*);
       +
       +void
       +ip_init_6(Fs *f)
       +{
       +        v6params *v6p;
       +
       +        v6p = smalloc(sizeof(v6params));
       +
       +        v6p->rp.mflag                = 0;                /* default not managed */
       +        v6p->rp.oflag                = 0;
       +        v6p->rp.maxraint        = 600000;        /* millisecs */
       +        v6p->rp.minraint        = 200000;
       +        v6p->rp.linkmtu                = 0;                /* no mtu sent */
       +        v6p->rp.reachtime        = 0;
       +        v6p->rp.rxmitra                = 0;
       +        v6p->rp.ttl                = MAXTTL;
       +        v6p->rp.routerlt        = 3 * v6p->rp.maxraint;
       +
       +        v6p->hp.rxmithost        = 1000;                /* v6 RETRANS_TIMER */
       +
       +        v6p->cdrouter                 = -1;
       +
       +        f->v6p                        = v6p;
       +}
       +
       +void
       +initfrag(IP *ip, int size)
       +{
       +        Fragment4 *fq4, *eq4;
       +        Fragment6 *fq6, *eq6;
       +
       +        ip->fragfree4 = (Fragment4*)malloc(sizeof(Fragment4) * size);
       +        if(ip->fragfree4 == nil)
       +                panic("initfrag");
       +
       +        eq4 = &ip->fragfree4[size];
       +        for(fq4 = ip->fragfree4; fq4 < eq4; fq4++)
       +                fq4->next = fq4+1;
       +
       +        ip->fragfree4[size-1].next = nil;
       +
       +        ip->fragfree6 = (Fragment6*)malloc(sizeof(Fragment6) * size);
       +        if(ip->fragfree6 == nil)
       +                panic("initfrag");
       +
       +        eq6 = &ip->fragfree6[size];
       +        for(fq6 = ip->fragfree6; fq6 < eq6; fq6++)
       +                fq6->next = fq6+1;
       +
       +        ip->fragfree6[size-1].next = nil;
       +}
       +
       +void
       +ip_init(Fs *f)
       +{
       +        IP *ip;
       +
       +        ip = smalloc(sizeof(IP));
       +        initfrag(ip, 100);
       +        f->ip = ip;
       +
       +        ip_init_6(f);
       +}
       +
       +void
       +iprouting(Fs *f, int on)
       +{
       +        f->ip->iprouting = on;
       +        if(f->ip->iprouting==0)
       +                f->ip->stats[Forwarding] = 2;
       +        else
       +                f->ip->stats[Forwarding] = 1;
       +}
       +
       +int
       +ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
       +{
       +        Ipifc *ifc;
       +        uchar *gate;
       +        ulong fragoff;
       +        Block *xp, *nb;
       +        Ip4hdr *eh, *feh;
       +        int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
       +        Route *r, *sr;
       +        IP *ip;
       +        int rv = 0;
       +
       +        ip = f->ip;
       +
       +        /* Fill out the ip header */
       +        eh = (Ip4hdr*)(bp->rp);
       +
       +        ip->stats[OutRequests]++;
       +
       +        /* Number of uchars in data and ip header to write */
       +        len = blocklen(bp);
       +
       +        if(gating){
       +                chunk = nhgets(eh->length);
       +                if(chunk > len){
       +                        ip->stats[OutDiscards]++;
       +                        netlog(f, Logip, "short gated packet\n");
       +                        goto free;
       +                }
       +                if(chunk < len)
       +                        len = chunk;
       +        }
       +        if(len >= IP_MAX){
       +                ip->stats[OutDiscards]++;
       +                netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
       +                goto free;
       +        }
       +
       +        r = v4lookup(f, eh->dst, c);
       +        if(r == nil){
       +                ip->stats[OutNoRoutes]++;
       +                netlog(f, Logip, "no interface %V\n", eh->dst);
       +                rv = -1;
       +                goto free;
       +        }
       +
       +        ifc = r->ifc;
       +        if(r->type & (Rifc|Runi))
       +                gate = eh->dst;
       +        else
       +        if(r->type & (Rbcast|Rmulti)) {
       +                gate = eh->dst;
       +                sr = v4lookup(f, eh->src, nil);
       +                if(sr != nil && (sr->type & Runi))
       +                        ifc = sr->ifc;
       +        }
       +        else
       +                gate = r->v4.gate;
       +
       +        if(!gating)
       +                eh->vihl = IP_VER4|IP_HLEN4;
       +        eh->ttl = ttl;
       +        if(!gating)
       +                eh->tos = tos;
       +
       +        if(!CANRLOCK(ifc))
       +                goto free;
       +        if(waserror()){
       +                RUNLOCK(ifc);
       +                nexterror();
       +        }
       +        if(ifc->m == nil)
       +                goto raise;
       +
       +        /* If we dont need to fragment just send it */
       +        medialen = ifc->maxtu - ifc->m->hsize;
       +        if(len <= medialen) {
       +                if(!gating)
       +                        hnputs(eh->id, incref(&ip->id4));
       +                hnputs(eh->length, len);
       +                if(!gating){
       +                        eh->frag[0] = 0;
       +                        eh->frag[1] = 0;
       +                }
       +                eh->cksum[0] = 0;
       +                eh->cksum[1] = 0;
       +                hnputs(eh->cksum, ipcsum(&eh->vihl));
       +                ifc->m->bwrite(ifc, bp, V4, gate);
       +                RUNLOCK(ifc);
       +                poperror();
       +                return 0;
       +        }
       +
       +if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
       +
       +        if(eh->frag[0] & (IP_DF>>8)){
       +                ip->stats[FragFails]++;
       +                ip->stats[OutDiscards]++;
       +                icmpcantfrag(f, bp, medialen);
       +                netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
       +                goto raise;
       +        }
       +
       +        seglen = (medialen - IP4HDR) & ~7;
       +        if(seglen < 8){
       +                ip->stats[FragFails]++;
       +                ip->stats[OutDiscards]++;
       +                netlog(f, Logip, "%V seglen < 8\n", eh->dst);
       +                goto raise;
       +        }
       +
       +        dlen = len - IP4HDR;
       +        xp = bp;
       +        if(gating)
       +                lid = nhgets(eh->id);
       +        else
       +                lid = incref(&ip->id4);
       +
       +        offset = IP4HDR;
       +        while(xp != nil && offset && offset >= BLEN(xp)) {
       +                offset -= BLEN(xp);
       +                xp = xp->next;
       +        }
       +        xp->rp += offset;
       +
       +        if(gating)
       +                fragoff = nhgets(eh->frag)<<3;
       +        else
       +                fragoff = 0;
       +        dlen += fragoff;
       +        for(; fragoff < dlen; fragoff += seglen) {
       +                nb = allocb(IP4HDR+seglen);
       +                feh = (Ip4hdr*)(nb->rp);
       +
       +                memmove(nb->wp, eh, IP4HDR);
       +                nb->wp += IP4HDR;
       +
       +                if((fragoff + seglen) >= dlen) {
       +                        seglen = dlen - fragoff;
       +                        hnputs(feh->frag, fragoff>>3);
       +                }
       +                else
       +                        hnputs(feh->frag, (fragoff>>3)|IP_MF);
       +
       +                hnputs(feh->length, seglen + IP4HDR);
       +                hnputs(feh->id, lid);
       +
       +                /* Copy up the data area */
       +                chunk = seglen;
       +                while(chunk) {
       +                        if(!xp) {
       +                                ip->stats[OutDiscards]++;
       +                                ip->stats[FragFails]++;
       +                                freeblist(nb);
       +                                netlog(f, Logip, "!xp: chunk %d\n", chunk);
       +                                goto raise;
       +                        }
       +                        blklen = chunk;
       +                        if(BLEN(xp) < chunk)
       +                                blklen = BLEN(xp);
       +                        memmove(nb->wp, xp->rp, blklen);
       +                        nb->wp += blklen;
       +                        xp->rp += blklen;
       +                        chunk -= blklen;
       +                        if(xp->rp == xp->wp)
       +                                xp = xp->next;
       +                }
       +
       +                feh->cksum[0] = 0;
       +                feh->cksum[1] = 0;
       +                hnputs(feh->cksum, ipcsum(&feh->vihl));
       +                ifc->m->bwrite(ifc, nb, V4, gate);
       +                ip->stats[FragCreates]++;
       +        }
       +        ip->stats[FragOKs]++;
       +raise:
       +        RUNLOCK(ifc);
       +        poperror();
       +free:
       +        freeblist(bp);
       +        return rv;
       +}
       +
       +void
       +ipiput4(Fs *f, Ipifc *ifc, Block *bp)
       +{
       +        int hl;
       +        int hop, tos, proto, olen;
       +        Ip4hdr *h;
       +        Proto *p;
       +        ushort frag;
       +        int notforme;
       +        uchar *dp, v6dst[IPaddrlen];
       +        IP *ip;
       +        Route *r;
       +
       +        if(BLKIPVER(bp) != IP_VER4) {
       +                ipiput6(f, ifc, bp);
       +                return;
       +        }
       +
       +        ip = f->ip;
       +        ip->stats[InReceives]++;
       +
       +        /*
       +         *  Ensure we have all the header info in the first
       +         *  block.  Make life easier for other protocols by
       +         *  collecting up to the first 64 bytes in the first block.
       +         */
       +        if(BLEN(bp) < 64) {
       +                hl = blocklen(bp);
       +                if(hl < IP4HDR)
       +                        hl = IP4HDR;
       +                if(hl > 64)
       +                        hl = 64;
       +                bp = pullupblock(bp, hl);
       +                if(bp == nil)
       +                        return;
       +        }
       +
       +        h = (Ip4hdr*)(bp->rp);
       +
       +        /* dump anything that whose header doesn't checksum */
       +        if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
       +                ip->stats[InHdrErrors]++;
       +                netlog(f, Logip, "ip: checksum error %V\n", h->src);
       +                freeblist(bp);
       +                return;
       +        }
       +        v4tov6(v6dst, h->dst);
       +        notforme = ipforme(f, v6dst) == 0;
       +
       +        /* Check header length and version */
       +        if((h->vihl&0x0F) != IP_HLEN4) {
       +                hl = (h->vihl&0xF)<<2;
       +                if(hl < (IP_HLEN4<<2)) {
       +                        ip->stats[InHdrErrors]++;
       +                        netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
       +                        freeblist(bp);
       +                        return;
       +                }
       +                /* If this is not routed strip off the options */
       +                if(notforme == 0) {
       +                        olen = nhgets(h->length);
       +                        dp = bp->rp + (hl - (IP_HLEN4<<2));
       +                        memmove(dp, h, IP_HLEN4<<2);
       +                        bp->rp = dp;
       +                        h = (Ip4hdr*)(bp->rp);
       +                        h->vihl = (IP_VER4|IP_HLEN4);
       +                        hnputs(h->length, olen-hl+(IP_HLEN4<<2));
       +                }
       +        }
       +
       +        /* route */
       +        if(notforme) {
       +                Conv conv;
       +
       +                if(!ip->iprouting){
       +                        freeb(bp);
       +                        return;
       +                }
       +
       +                /* don't forward to source's network */
       +                conv.r = nil;
       +                r = v4lookup(f, h->dst, &conv);
       +                if(r == nil || r->ifc == ifc){
       +                        ip->stats[OutDiscards]++;
       +                        freeblist(bp);
       +                        return;
       +                }
       +
       +                /* don't forward if packet has timed out */
       +                hop = h->ttl;
       +                if(hop < 1) {
       +                        ip->stats[InHdrErrors]++;
       +                        icmpttlexceeded(f, ifc->lifc->local, bp);
       +                        freeblist(bp);
       +                        return;
       +                }
       +
       +                /* reassemble if the interface expects it */
       +if(r->ifc == nil) panic("nil route rfc");
       +                if(r->ifc->reassemble){
       +                        frag = nhgets(h->frag);
       +                        if(frag) {
       +                                h->tos = 0;
       +                                if(frag & IP_MF)
       +                                        h->tos = 1;
       +                                bp = ip4reassemble(ip, frag, bp, h);
       +                                if(bp == nil)
       +                                        return;
       +                                h = (Ip4hdr*)(bp->rp);
       +                        }
       +                }
       +
       +                ip->stats[ForwDatagrams]++;
       +                tos = h->tos;
       +                hop = h->ttl;
       +                ipoput4(f, bp, 1, hop - 1, tos, &conv);
       +                return;
       +        }
       +
       +        frag = nhgets(h->frag);
       +        if(frag) {
       +                h->tos = 0;
       +                if(frag & IP_MF)
       +                        h->tos = 1;
       +                bp = ip4reassemble(ip, frag, bp, h);
       +                if(bp == nil)
       +                        return;
       +                h = (Ip4hdr*)(bp->rp);
       +        }
       +
       +        /* don't let any frag info go up the stack */
       +        h->frag[0] = 0;
       +        h->frag[1] = 0;
       +
       +        proto = h->proto;
       +        p = Fsrcvpcol(f, proto);
       +        if(p != nil && p->rcv != nil) {
       +                ip->stats[InDelivers]++;
       +                (*p->rcv)(p, ifc, bp);
       +                return;
       +        }
       +        ip->stats[InDiscards]++;
       +        ip->stats[InUnknownProtos]++;
       +        freeblist(bp);
       +}
       +
       +int
       +ipstats(Fs *f, char *buf, int len)
       +{
       +        IP *ip;
       +        char *p, *e;
       +        int i;
       +
       +        ip = f->ip;
       +        ip->stats[DefaultTTL] = MAXTTL;
       +
       +        p = buf;
       +        e = p+len;
       +        for(i = 0; i < Nstats; i++)
       +                p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
       +        return p - buf;
       +}
       +
       +Block*
       +ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
       +{
       +        int fend;
       +        ushort id;
       +        Fragment4 *f, *fnext;
       +        ulong src, dst;
       +        Block *bl, **l, *last, *prev;
       +        int ovlap, len, fragsize, pktposn;
       +
       +        src = nhgetl(ih->src);
       +        dst = nhgetl(ih->dst);
       +        id = nhgets(ih->id);
       +
       +        /*
       +         *  block lists are too hard, pullupblock into a single block
       +         */
       +        if(bp->next){
       +                bp = pullupblock(bp, blocklen(bp));
       +                ih = (Ip4hdr*)(bp->rp);
       +        }
       +
       +        qlock(&ip->fraglock4);
       +
       +        /*
       +         *  find a reassembly queue for this fragment
       +         */
       +        for(f = ip->flisthead4; f; f = fnext){
       +                fnext = f->next;        /* because ipfragfree4 changes the list */
       +                if(f->src == src && f->dst == dst && f->id == id)
       +                        break;
       +                if(f->age < NOW){
       +                        ip->stats[ReasmTimeout]++;
       +                        ipfragfree4(ip, f);
       +                }
       +        }
       +
       +        /*
       +         *  if this isn't a fragmented packet, accept it
       +         *  and get rid of any fragments that might go
       +         *  with it.
       +         */
       +        if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
       +                if(f != nil) {
       +                        ipfragfree4(ip, f);
       +                        ip->stats[ReasmFails]++;
       +                }
       +                qunlock(&ip->fraglock4);
       +                return bp;
       +        }
       +
       +        if(bp->base+sizeof(Ipfrag) >= bp->rp){
       +                bp = padblock(bp, sizeof(Ipfrag));
       +                bp->rp += sizeof(Ipfrag);
       +        }
       +
       +        BKFG(bp)->foff = offset<<3;
       +        BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
       +
       +        /* First fragment allocates a reassembly queue */
       +        if(f == nil) {
       +                f = ipfragallo4(ip);
       +                f->id = id;
       +                f->src = src;
       +                f->dst = dst;
       +
       +                f->blist = bp;
       +
       +                qunlock(&ip->fraglock4);
       +                ip->stats[ReasmReqds]++;
       +                return nil;
       +        }
       +
       +        /*
       +         *  find the new fragment's position in the queue
       +         */
       +        prev = nil;
       +        l = &f->blist;
       +        bl = f->blist;
       +        while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
       +                prev = bl;
       +                l = &bl->next;
       +                bl = bl->next;
       +        }
       +
       +        /* Check overlap of a previous fragment - trim away as necessary */
       +        if(prev) {
       +                ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
       +                if(ovlap > 0) {
       +                        if(ovlap >= BKFG(bp)->flen) {
       +                                freeblist(bp);
       +                                qunlock(&ip->fraglock4);
       +                                return nil;
       +                        }
       +                        BKFG(prev)->flen -= ovlap;
       +                }
       +        }
       +
       +        /* Link onto assembly queue */
       +        bp->next = *l;
       +        *l = bp;
       +
       +        /* Check to see if succeeding segments overlap */
       +        if(bp->next) {
       +                l = &bp->next;
       +                fend = BKFG(bp)->foff + BKFG(bp)->flen;
       +                /* Take completely covered segments out */
       +                while(*l) {
       +                        ovlap = fend - BKFG(*l)->foff;
       +                        if(ovlap <= 0)
       +                                break;
       +                        if(ovlap < BKFG(*l)->flen) {
       +                                BKFG(*l)->flen -= ovlap;
       +                                BKFG(*l)->foff += ovlap;
       +                                /* move up ih hdrs */
       +                                memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
       +                                (*l)->rp += ovlap;
       +                                break;
       +                        }
       +                        last = (*l)->next;
       +                        (*l)->next = nil;
       +                        freeblist(*l);
       +                        *l = last;
       +                }
       +        }
       +
       +        /*
       +         *  look for a complete packet.  if we get to a fragment
       +         *  without IP_MF set, we're done.
       +         */
       +        pktposn = 0;
       +        for(bl = f->blist; bl; bl = bl->next) {
       +                if(BKFG(bl)->foff != pktposn)
       +                        break;
       +                if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
       +                        bl = f->blist;
       +                        len = nhgets(BLKIP(bl)->length);
       +                        bl->wp = bl->rp + len;
       +
       +                        /* Pullup all the fragment headers and
       +                         * return a complete packet
       +                         */
       +                        for(bl = bl->next; bl; bl = bl->next) {
       +                                fragsize = BKFG(bl)->flen;
       +                                len += fragsize;
       +                                bl->rp += IP4HDR;
       +                                bl->wp = bl->rp + fragsize;
       +                        }
       +
       +                        bl = f->blist;
       +                        f->blist = nil;
       +                        ipfragfree4(ip, f);
       +                        ih = BLKIP(bl);
       +                        hnputs(ih->length, len);
       +                        qunlock(&ip->fraglock4);
       +                        ip->stats[ReasmOKs]++;
       +                        return bl;
       +                }
       +                pktposn += BKFG(bl)->flen;
       +        }
       +        qunlock(&ip->fraglock4);
       +        return nil;
       +}
       +
       +/*
       + * ipfragfree4 - Free a list of fragments - assume hold fraglock4
       + */
       +void
       +ipfragfree4(IP *ip, Fragment4 *frag)
       +{
       +        Fragment4 *fl, **l;
       +
       +        if(frag->blist)
       +                freeblist(frag->blist);
       +
       +        frag->src = 0;
       +        frag->id = 0;
       +        frag->blist = nil;
       +
       +        l = &ip->flisthead4;
       +        for(fl = *l; fl; fl = fl->next) {
       +                if(fl == frag) {
       +                        *l = frag->next;
       +                        break;
       +                }
       +                l = &fl->next;
       +        }
       +
       +        frag->next = ip->fragfree4;
       +        ip->fragfree4 = frag;
       +
       +}
       +
       +/*
       + * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
       + */
       +Fragment4 *
       +ipfragallo4(IP *ip)
       +{
       +        Fragment4 *f;
       +
       +        while(ip->fragfree4 == nil) {
       +                /* free last entry on fraglist */
       +                for(f = ip->flisthead4; f->next; f = f->next)
       +                        ;
       +                ipfragfree4(ip, f);
       +        }
       +        f = ip->fragfree4;
       +        ip->fragfree4 = f->next;
       +        f->next = ip->flisthead4;
       +        ip->flisthead4 = f;
       +        f->age = NOW + 30000;
       +
       +        return f;
       +}
       +
       +ushort
       +ipcsum(uchar *addr)
       +{
       +        int len;
       +        ulong sum;
       +
       +        sum = 0;
       +        len = (addr[0]&0xf)<<2;
       +
       +        while(len > 0) {
       +                sum += addr[0]<<8 | addr[1] ;
       +                len -= 2;
       +                addr += 2;
       +        }
       +
       +        sum = (sum & 0xffff) + (sum >> 16);
       +        sum = (sum & 0xffff) + (sum >> 16);
       +
       +        return (sum^0xffff);
       +}
 (DIR) diff --git a/src/9vx/a/ip/ip.h b/src/9vx/a/ip/ip.h
       @@ -0,0 +1,677 @@
       +typedef struct        Conv        Conv;
       +typedef struct        Fs        Fs;
       +typedef union        Hwaddr        Hwaddr;
       +typedef struct        IP        IP;
       +typedef struct        IPaux        IPaux;
       +typedef struct        Ipself        Ipself;
       +typedef struct        Ipselftab        Ipselftab;
       +typedef struct        Iplink        Iplink;
       +typedef struct        Iplifc        Iplifc;
       +typedef struct        Ipmulti        Ipmulti;
       +typedef struct        Ipifc        Ipifc;
       +typedef struct        Iphash        Iphash;
       +typedef struct        Ipht        Ipht;
       +typedef struct        Netlog        Netlog;
       +typedef struct        Medium        Medium;
       +typedef struct        Proto        Proto;
       +typedef struct        Arpent        Arpent;
       +typedef struct        Arp Arp;
       +typedef struct        Route        Route;
       +
       +typedef struct        Routerparams        Routerparams;
       +typedef struct         Hostparams        Hostparams;
       +typedef struct         v6router        v6router;
       +typedef struct        v6params        v6params;
       +
       +enum
       +{
       +        Addrlen=        64,
       +        Maxproto=        20,
       +        Nhash=                64,
       +        Maxincall=        5,
       +        Nchans=                1024,
       +        MAClen=                16,                /* longest mac address */
       +
       +        MAXTTL=                255,
       +        DFLTTOS=        0,
       +
       +        IPaddrlen=        16,
       +        IPv4addrlen=        4,
       +        IPv4off=        12,
       +        IPllen=                4,
       +
       +        /* ip versions */
       +        V4=                4,
       +        V6=                6,
       +        IP_VER4=         0x40,
       +        IP_VER6=        0x60,
       +        IP_HLEN4=        5,                /* v4: Header length in words */
       +        IP_DF=                0x4000,                /* v4: Don't fragment */
       +        IP_MF=                0x2000,                /* v4: More fragments */
       +        IP4HDR=                20,                /* sizeof(Ip4hdr) */
       +        IP_MAX=                64*1024,        /* Max. Internet packet size, v4 & v6 */
       +
       +        /* 2^Lroot trees in the root table */
       +        Lroot=                10,
       +
       +        Maxpath =        64,
       +};
       +
       +enum
       +{
       +        Idle=                0,
       +        Announcing=        1,
       +        Announced=        2,
       +        Connecting=        3,
       +        Connected=        4,
       +};
       +
       +/* on the wire packet header */
       +typedef struct Ip4hdr                Ip4hdr;
       +struct Ip4hdr
       +{
       +        uchar        vihl;                /* Version and header length */
       +        uchar        tos;                /* Type of service */
       +        uchar        length[2];        /* packet length */
       +        uchar        id[2];                /* ip->identification */
       +        uchar        frag[2];        /* Fragment information */
       +        uchar        ttl;              /* Time to live */
       +        uchar        proto;                /* Protocol */
       +        uchar        cksum[2];        /* Header checksum */
       +        uchar        src[4];                /* IP source */
       +        uchar        dst[4];                /* IP destination */
       +};
       +
       +/*
       + *  one per conversation directory
       + */
       +struct Conv
       +{
       +        QLock        qlock;
       +
       +        int        x;                        /* conversation index */
       +        Proto*        p;
       +
       +        int        restricted;                /* remote port is restricted */
       +        uint        ttl;                        /* max time to live */
       +        uint        tos;                        /* type of service */
       +        int        ignoreadvice;                /* don't terminate connection on icmp errors */
       +
       +        uchar        ipversion;
       +        uchar        laddr[IPaddrlen];        /* local IP address */
       +        uchar        raddr[IPaddrlen];        /* remote IP address */
       +        ushort        lport;                        /* local port number */
       +        ushort        rport;                        /* remote port number */
       +
       +        char        *owner;                        /* protections */
       +        int        perm;
       +        int        inuse;                        /* opens of listen/data/ctl */
       +        int        length;
       +        int        state;
       +
       +        int        maxfragsize;                /* If set, used for fragmentation */
       +
       +        /* udp specific */
       +        int        headers;                /* data src/dst headers in udp */
       +        int        reliable;                /* true if reliable udp */
       +
       +        Conv*        incall;                        /* calls waiting to be listened for */
       +        Conv*        next;
       +
       +        Queue*        rq;                        /* queued data waiting to be read */
       +        Queue*        wq;                        /* queued data waiting to be written */
       +        Queue*        eq;                        /* returned error packets */
       +        Queue*        sq;                        /* snooping queue */
       +        Ref        snoopers;                /* number of processes with snoop open */
       +
       +        QLock        car;
       +        Rendez        cr;
       +        char        cerr[ERRMAX];
       +
       +        QLock        listenq;
       +        Rendez        listenr;
       +
       +        Ipmulti        *multi;                        /* multicast bindings for this interface */
       +
       +        void*        ptcl;                        /* protocol specific stuff */
       +
       +        Route        *r;                        /* last route used */
       +        ulong        rgen;                        /* routetable generation for *r */
       +};
       +
       +struct Medium
       +{
       +        char        *name;
       +        int        hsize;                /* medium header size */
       +        int        mintu;                /* default min mtu */
       +        int        maxtu;                /* default max mtu */
       +        int        maclen;                /* mac address length  */
       +        void        (*bind)(Ipifc*, int, char**);
       +        void        (*unbind)(Ipifc*);
       +        void        (*bwrite)(Ipifc *ifc, Block *b, int version, uchar *ip);
       +
       +        /* for arming interfaces to receive multicast */
       +        void        (*addmulti)(Ipifc *ifc, uchar *a, uchar *ia);
       +        void        (*remmulti)(Ipifc *ifc, uchar *a, uchar *ia);
       +
       +        /* process packets written to 'data' */
       +        void        (*pktin)(Fs *f, Ipifc *ifc, Block *bp);
       +
       +        /* routes for router boards */
       +        void        (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
       +        void        (*remroute)(Ipifc *ifc, int, uchar*, uchar*);
       +        void        (*flushroutes)(Ipifc *ifc);
       +
       +        /* for routing multicast groups */
       +        void        (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
       +        void        (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
       +
       +        /* address resolution */
       +        void        (*ares)(Fs*, int, uchar*, uchar*, int, int);        /* resolve */
       +        void        (*areg)(Ipifc*, uchar*);                        /* register */
       +
       +        /* v6 address generation */
       +        void        (*pref2addr)(uchar *pref, uchar *ea);
       +
       +        int        unbindonclose;        /* if non-zero, unbind on last close */
       +};
       +
       +/* logical interface associated with a physical one */
       +struct Iplifc
       +{
       +        uchar        local[IPaddrlen];
       +        uchar        mask[IPaddrlen];
       +        uchar        remote[IPaddrlen];
       +        uchar        net[IPaddrlen];
       +        uchar        tentative;        /* =1 => v6 dup disc on, =0 => confirmed unique */
       +        uchar        onlink;                /* =1 => onlink, =0 offlink. */
       +        uchar        autoflag;        /* v6 autonomous flag */
       +        long         validlt;        /* v6 valid lifetime */
       +        long         preflt;                /* v6 preferred lifetime */
       +        long        origint;        /* time when addr was added */
       +        Iplink        *link;                /* addresses linked to this lifc */
       +        Iplifc        *next;
       +};
       +
       +/* binding twixt Ipself and Iplifc */
       +struct Iplink
       +{
       +        Ipself        *self;
       +        Iplifc        *lifc;
       +        Iplink        *selflink;        /* next link for this local address */
       +        Iplink        *lifclink;        /* next link for this ifc */
       +        ulong        expire;
       +        Iplink        *next;                /* free list */
       +        int        ref;
       +};
       +
       +/* rfc 2461, pp.40—43. */
       +
       +/* default values, one per stack */
       +struct Routerparams {
       +        int        mflag;                /* flag: managed address configuration */
       +        int        oflag;                /* flag: other stateful configuration */
       +        int         maxraint;        /* max. router adv interval (ms) */
       +        int        minraint;        /* min. router adv interval (ms) */
       +        int        linkmtu;        /* mtu options */
       +        int        reachtime;        /* reachable time */
       +        int        rxmitra;        /* retransmit interval */
       +        int        ttl;                /* cur hop count limit */
       +        int        routerlt;        /* router lifetime */
       +};
       +
       +struct Hostparams {
       +        int        rxmithost;
       +};
       +
       +struct Ipifc
       +{
       +        RWlock        rwlock;
       +
       +        Conv        *conv;                /* link to its conversation structure */
       +        char        dev[64];        /* device we're attached to */
       +        Medium        *m;                /* Media pointer */
       +        int        maxtu;                /* Maximum transfer unit */
       +        int        mintu;                /* Minumum tranfer unit */
       +        int        mbps;                /* megabits per second */
       +        void        *arg;                /* medium specific */
       +        int        reassemble;        /* reassemble IP packets before forwarding */
       +
       +        /* these are used so that we can unbind on the fly */
       +        Lock        idlock;
       +        uchar        ifcid;                /* incremented each 'bind/unbind/add/remove' */
       +        int        ref;                /* number of proc's using this ipifc */
       +        Rendez        wait;                /* where unbinder waits for ref == 0 */
       +        int        unbinding;
       +
       +        uchar        mac[MAClen];        /* MAC address */
       +
       +        Iplifc        *lifc;                /* logical interfaces on this physical one */
       +
       +        ulong        in, out;        /* message statistics */
       +        ulong        inerr, outerr;        /* ... */
       +
       +        uchar        sendra6;        /* flag: send router advs on this ifc */
       +        uchar        recvra6;        /* flag: recv router advs on this ifc */
       +        Routerparams rp;        /* router parameters as in RFC 2461, pp.40—43.
       +                                        used only if node is router */
       +};
       +
       +/*
       + *  one per multicast-lifc pair used by a Conv
       + */
       +struct Ipmulti
       +{
       +        uchar        ma[IPaddrlen];
       +        uchar        ia[IPaddrlen];
       +        Ipmulti        *next;
       +};
       +
       +/*
       + *  hash table for 2 ip addresses + 2 ports
       + */
       +enum
       +{
       +        Nipht=                521,        /* convenient prime */
       +
       +        IPmatchexact=        0,        /* match on 4 tuple */
       +        IPmatchany,                /* *!* */
       +        IPmatchport,                /* *!port */
       +        IPmatchaddr,                /* addr!* */
       +        IPmatchpa,                /* addr!port */
       +};
       +struct Iphash
       +{
       +        Iphash        *next;
       +        Conv        *c;
       +        int        match;
       +};
       +struct Ipht
       +{
       +        Lock        lk;
       +
       +        Iphash        *tab[Nipht];
       +};
       +void iphtadd(Ipht*, Conv*);
       +void iphtrem(Ipht*, Conv*);
       +Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp);
       +
       +/*
       + *  one per multiplexed protocol
       + */
       +struct Proto
       +{
       +        QLock                qlock;
       +
       +        char*                name;                /* protocol name */
       +        int                x;                /* protocol index */
       +        int                ipproto;        /* ip protocol type */
       +
       +        char*                (*connect)(Conv*, char**, int);
       +        char*                (*announce)(Conv*, char**, int);
       +        char*                (*bind)(Conv*, char**, int);
       +        int                (*state)(Conv*, char*, int);
       +        void                (*create)(Conv*);
       +        void                (*close)(Conv*);
       +        void                (*rcv)(Proto*, Ipifc*, Block*);
       +        char*                (*ctl)(Conv*, char**, int);
       +        void                (*advise)(Proto*, Block*, char*);
       +        int                (*stats)(Proto*, char*, int);
       +        int                (*local)(Conv*, char*, int);
       +        int                (*remote)(Conv*, char*, int);
       +        int                (*inuse)(Conv*);
       +        int                (*gc)(Proto*);        /* returns true if any conversations are freed */
       +
       +        Fs                *f;                /* file system this proto is part of */
       +        Conv                **conv;                /* array of conversations */
       +        int                ptclsize;        /* size of per protocol ctl block */
       +        int                nc;                /* number of conversations */
       +        int                ac;
       +        Qid                qid;                /* qid for protocol directory */
       +        ushort                nextrport;
       +
       +        void                *priv;
       +};
       +
       +
       +/*
       + *  one per IP protocol stack
       + */
       +struct Fs
       +{
       +        RWlock        rwlock;
       +
       +        Conv        *conv;                /* link to its conversation structure */
       +        int        dev;
       +
       +        int        np;
       +        Proto*        p[Maxproto+1];                /* list of supported protocols */
       +        Proto*        t2p[256];                /* vector of all protocols */
       +        Proto*        ipifc;                        /* kludge for ipifcremroute & ipifcaddroute */
       +        Proto*        ipmux;                        /* kludge for finding an ip multiplexor */
       +
       +        IP        *ip;
       +        Ipselftab        *self;
       +        Arp        *arp;
       +        v6params        *v6p;
       +
       +        Route        *v4root[1<<Lroot];        /* v4 routing forest */
       +        Route        *v6root[1<<Lroot];        /* v6 routing forest */
       +        Route        *queue;                        /* used as temp when reinjecting routes */
       +
       +        Netlog        *alog;
       +
       +        char        ndb[1024];                /* an ndb entry for this interface */
       +        int        ndbvers;
       +        long        ndbmtime;
       +};
       +
       +/* one per default router known to host */
       +struct v6router {
       +        uchar        inuse;
       +        Ipifc        *ifc;
       +        int        ifcid;
       +        uchar        routeraddr[IPaddrlen];
       +        long        ltorigin;
       +        Routerparams        rp;
       +};
       +
       +struct v6params
       +{
       +        Routerparams        rp;                /* v6 params, one copy per node now */
       +        Hostparams        hp;
       +        v6router        v6rlist[3];        /* max 3 default routers, currently */
       +        int                cdrouter;        /* uses only v6rlist[cdrouter] if   */
       +                                        /* cdrouter >= 0. */
       +};
       +
       +
       +int        Fsconnected(Conv*, char*);
       +Conv*        Fsnewcall(Conv*, uchar*, ushort, uchar*, ushort, uchar);
       +int        Fspcolstats(char*, int);
       +int        Fsproto(Fs*, Proto*);
       +int        Fsbuiltinproto(Fs*, uchar);
       +Conv*        Fsprotoclone(Proto*, char*);
       +Proto*        Fsrcvpcol(Fs*, uchar);
       +Proto*        Fsrcvpcolx(Fs*, uchar);
       +char*        Fsstdconnect(Conv*, char**, int);
       +char*        Fsstdannounce(Conv*, char**, int);
       +char*        Fsstdbind(Conv*, char**, int);
       +ulong        scalednconv(void);
       +void        closeconv(Conv*);
       +/*
       + *  logging
       + */
       +enum
       +{
       +        Logip=                1<<1,
       +        Logtcp=                1<<2,
       +        Logfs=                1<<3,
       +        Logil=                1<<4,
       +        Logicmp=        1<<5,
       +        Logudp=                1<<6,
       +        Logcompress=        1<<7,
       +        Logilmsg=        1<<8,
       +        Loggre=                1<<9,
       +        Logppp=                1<<10,
       +        Logtcprxmt=        1<<11,
       +        Logigmp=        1<<12,
       +        Logudpmsg=        1<<13,
       +        Logipmsg=        1<<14,
       +        Logrudp=        1<<15,
       +        Logrudpmsg=        1<<16,
       +        Logesp=                1<<17,
       +        Logtcpwin=        1<<18,
       +};
       +
       +void        netloginit(Fs*);
       +void        netlogopen(Fs*);
       +void        netlogclose(Fs*);
       +void        netlogctl(Fs*, char*, int);
       +long        netlogread(Fs*, void*, ulong, long);
       +void        netlog(Fs*, int, char*, ...);
       +void        ifcloginit(Fs*);
       +long        ifclogread(Fs*, Chan *,void*, ulong, long);
       +void        ifclog(Fs*, uchar *, int);
       +void        ifclogopen(Fs*, Chan*);
       +void        ifclogclose(Fs*, Chan*);
       +
       +/*
       + *  iproute.c
       + */
       +typedef        struct RouteTree RouteTree;
       +typedef struct Routewalk Routewalk;
       +typedef struct V4route V4route;
       +typedef struct V6route V6route;
       +
       +enum
       +{
       +
       +        /* type bits */
       +        Rv4=                (1<<0),                /* this is a version 4 route */
       +        Rifc=                (1<<1),                /* this route is a directly connected interface */
       +        Rptpt=                (1<<2),                /* this route is a pt to pt interface */
       +        Runi=                (1<<3),                /* a unicast self address */
       +        Rbcast=                (1<<4),                /* a broadcast self address */
       +        Rmulti=                (1<<5),                /* a multicast self address */
       +        Rproxy=                (1<<6),                /* this route should be proxied */
       +};
       +
       +struct Routewalk
       +{
       +        int        o;
       +        int        h;
       +        char*        p;
       +        char*        e;
       +        void*        state;
       +        void        (*walk)(Route*, Routewalk*);
       +};
       +
       +struct        RouteTree
       +{
       +        Route*        right;
       +        Route*        left;
       +        Route*        mid;
       +        uchar        depth;
       +        uchar        type;
       +        uchar        ifcid;                /* must match ifc->id */
       +        Ipifc        *ifc;
       +        char        tag[4];
       +        int        ref;
       +};
       +
       +struct V4route
       +{
       +        ulong        address;
       +        ulong        endaddress;
       +        uchar        gate[IPv4addrlen];
       +};
       +
       +struct V6route
       +{
       +        ulong        address[IPllen];
       +        ulong        endaddress[IPllen];
       +        uchar        gate[IPaddrlen];
       +};
       +
       +struct Route
       +{
       +/*        RouteTree; */
       +        Route*        right;
       +        Route*        left;
       +        Route*        mid;
       +        uchar        depth;
       +        uchar        type;
       +        uchar        ifcid;                /* must match ifc->id */
       +        Ipifc        *ifc;
       +        char        tag[4];
       +        int        ref;
       +
       +        union {
       +                V6route        v6;
       +                V4route v4;
       +        };
       +};
       +extern void        v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
       +extern void        v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
       +extern void        v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
       +extern void        v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
       +extern Route*        v4lookup(Fs *f, uchar *a, Conv *c);
       +extern Route*        v6lookup(Fs *f, uchar *a, Conv *c);
       +extern long        routeread(Fs *f, char*, ulong, int);
       +extern long        routewrite(Fs *f, Chan*, char*, int);
       +extern void        routetype(int, char*);
       +extern void        ipwalkroutes(Fs*, Routewalk*);
       +extern void        convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
       +
       +/*
       + *  devip.c
       + */
       +
       +/*
       + *  Hanging off every ip channel's ->aux is the following structure.
       + *  It maintains the state used by devip and iproute.
       + */
       +struct IPaux
       +{
       +        char        *owner;                /* the user that did the attach */
       +        char        tag[4];
       +};
       +
       +extern IPaux*        newipaux(char*, char*);
       +
       +/*
       + *  arp.c
       + */
       +struct Arpent
       +{
       +        uchar        ip[IPaddrlen];
       +        uchar        mac[MAClen];
       +        Medium        *type;                        /* media type */
       +        Arpent*        hash;
       +        Block*        hold;
       +        Block*        last;
       +        uint        ctime;                        /* time entry was created or refreshed */
       +        uint        utime;                        /* time entry was last used */
       +        uchar        state;
       +        Arpent        *nextrxt;                /* re-transmit chain */
       +        uint        rtime;                        /* time for next retransmission */
       +        uchar        rxtsrem;
       +        Ipifc        *ifc;
       +        uchar        ifcid;                        /* must match ifc->id */
       +};
       +
       +extern void        arpinit(Fs*);
       +extern int        arpread(Arp*, char*, ulong, int);
       +extern int        arpwrite(Fs*, char*, int);
       +extern Arpent*        arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
       +extern void        arprelease(Arp*, Arpent *a);
       +extern Block*        arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
       +extern void        arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
       +
       +/*
       + * ipaux.c
       + */
       +
       +extern int        myetheraddr(uchar*, char*);
       +extern vlong        parseip(uchar*, char*);
       +extern vlong        parseipmask(uchar*, char*);
       +extern char*        v4parseip(uchar*, char*);
       +extern void        maskip(uchar *from, uchar *mask, uchar *to);
       +extern int        parsemac(uchar *to, char *from, int len);
       +extern uchar*        defmask(uchar*);
       +extern int        isv4(uchar*);
       +extern void        v4tov6(uchar *v6, uchar *v4);
       +extern int        v6tov4(uchar *v4, uchar *v6);
       +extern int        eipfmt(Fmt*);
       +
       +#define        ipmove(x, y) memmove(x, y, IPaddrlen)
       +#define        ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
       +
       +extern uchar IPv4bcast[IPaddrlen];
       +extern uchar IPv4bcastobs[IPaddrlen];
       +extern uchar IPv4allsys[IPaddrlen];
       +extern uchar IPv4allrouter[IPaddrlen];
       +extern uchar IPnoaddr[IPaddrlen];
       +extern uchar v4prefix[IPaddrlen];
       +extern uchar IPallbits[IPaddrlen];
       +
       +#define        NOW        msec()
       +
       +/*
       + *  media
       + */
       +extern Medium        ethermedium;
       +extern Medium        nullmedium;
       +extern Medium        pktmedium;
       +extern Medium        tripmedium;
       +
       +/*
       + *  ipifc.c
       + */
       +extern Medium*        ipfindmedium(char *name);
       +extern void        addipmedium(Medium *med);
       +extern int        ipforme(Fs*, uchar *addr);
       +extern int        iptentative(Fs*, uchar *addr);
       +extern int        ipisbm(uchar *);
       +extern int        ipismulticast(uchar *);
       +extern Ipifc*        findipifc(Fs*, uchar *remote, int type);
       +extern void        findlocalip(Fs*, uchar *local, uchar *remote);
       +extern int        ipv4local(Ipifc *ifc, uchar *addr);
       +extern int        ipv6local(Ipifc *ifc, uchar *addr);
       +extern int        ipv6anylocal(Ipifc *ifc, uchar *addr);
       +extern Iplifc*        iplocalonifc(Ipifc *ifc, uchar *ip);
       +extern int        ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
       +extern int        ipismulticast(uchar *ip);
       +extern int        ipisbooting(void);
       +extern int        ipifccheckin(Ipifc *ifc, Medium *med);
       +extern void        ipifccheckout(Ipifc *ifc);
       +extern int        ipifcgrab(Ipifc *ifc);
       +extern void        ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
       +extern void        ipifcremroute(Fs*, int, uchar*, uchar*);
       +extern void        ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
       +extern void        ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
       +extern char*        ipifcrem(Ipifc *ifc, char **argv, int argc);
       +extern char*        ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
       +extern long        ipselftabread(Fs*, char *a, ulong offset, int n);
       +extern char*        ipifcadd6(Ipifc *ifc, char**argv, int argc);
       +/*
       + *  ip.c
       + */
       +extern void        iprouting(Fs*, int);
       +extern void        icmpnoconv(Fs*, Block*);
       +extern void        icmpcantfrag(Fs*, Block*, int);
       +extern void        icmpttlexceeded(Fs*, uchar*, Block*);
       +extern ushort        ipcsum(uchar*);
       +extern void        ipiput4(Fs*, Ipifc*, Block*);
       +extern void        ipiput6(Fs*, Ipifc*, Block*);
       +extern int        ipoput4(Fs*, Block*, int, int, int, Conv*);
       +extern int        ipoput6(Fs*, Block*, int, int, int, Conv*);
       +extern int        ipstats(Fs*, char*, int);
       +extern ushort        ptclbsum(uchar*, int);
       +extern ushort        ptclcsum(Block*, int, int);
       +extern void        ip_init(Fs*);
       +extern void        update_mtucache(uchar*, ulong);
       +extern ulong        restrict_mtu(uchar*, ulong);
       +/*
       + * bootp.c
       + */
       +extern char*        bootp(Ipifc*);
       +extern int        bootpread(char*, ulong, int);
       +
       +/*
       + *  resolving inferno/plan9 differences
       + */
       +Chan*                commonfdtochan(int, int, int, int);
       +char*                commonuser(void);
       +char*                commonerror(void);
       +
       +/*
       + * chandial.c
       + */
       +extern Chan*        chandial(char*, char*, char*, Chan**);
       +
       +/*
       + *  global to all of the stack
       + */
       +extern void        (*igmpreportfn)(Ipifc*, uchar*);
 (DIR) diff --git a/src/9vx/a/ip/ipaux.c b/src/9vx/a/ip/ipaux.c
       @@ -0,0 +1,368 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +#include        "ip.h"
       +#include        "ipv6.h"
       +
       +char *v6hdrtypes[Maxhdrtype] =
       +{
       +        [HBH]                "HopbyHop",
       +        [ICMP]                "ICMP",
       +        [IGMP]                "IGMP",
       +        [GGP]                "GGP",
       +        [IPINIP]        "IP",
       +        [ST]                "ST",
       +        [TCP]                "TCP",
       +        [UDP]                "UDP",
       +        [ISO_TP4]        "ISO_TP4",
       +        [RH]                "Routinghdr",
       +        [FH]                "Fraghdr",
       +        [IDRP]                "IDRP",
       +        [RSVP]                "RSVP",
       +        [AH]                "Authhdr",
       +        [ESP]                "ESP",
       +        [ICMPv6]        "ICMPv6",
       +        [NNH]                "Nonexthdr",
       +        [ISO_IP]        "ISO_IP",
       +        [IGRP]                "IGRP",
       +        [OSPF]                "OSPF",
       +};
       +
       +/*
       + *  well known IPv6 addresses
       + */
       +uchar v6Unspecified[IPaddrlen] = {
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0
       +};
       +uchar v6loopback[IPaddrlen] = {
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0x01
       +};
       +
       +uchar v6linklocal[IPaddrlen] = {
       +        0xfe, 0x80, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0
       +};
       +uchar v6linklocalmask[IPaddrlen] = {
       +        0xff, 0xff, 0xff, 0xff,
       +        0xff, 0xff, 0xff, 0xff,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0
       +};
       +int v6llpreflen = 8;        /* link-local prefix length in bytes */
       +
       +uchar v6multicast[IPaddrlen] = {
       +        0xff, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0
       +};
       +uchar v6multicastmask[IPaddrlen] = {
       +        0xff, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0
       +};
       +int v6mcpreflen = 1;        /* multicast prefix length */
       +
       +uchar v6allnodesN[IPaddrlen] = {
       +        0xff, 0x01, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0x01
       +};
       +uchar v6allroutersN[IPaddrlen] = {
       +        0xff, 0x01, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0x02
       +};
       +uchar v6allnodesNmask[IPaddrlen] = {
       +        0xff, 0xff, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0
       +};
       +int v6aNpreflen = 2;        /* all nodes (N) prefix */
       +
       +uchar v6allnodesL[IPaddrlen] = {
       +        0xff, 0x02, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0x01
       +};
       +uchar v6allroutersL[IPaddrlen] = {
       +        0xff, 0x02, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0x02
       +};
       +uchar v6allnodesLmask[IPaddrlen] = {
       +        0xff, 0xff, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0
       +};
       +int v6aLpreflen = 2;        /* all nodes (L) prefix */
       +
       +uchar v6solicitednode[IPaddrlen] = {
       +        0xff, 0x02, 0, 0,
       +        0, 0, 0, 0,
       +        0, 0, 0, 0x01,
       +        0xff, 0, 0, 0
       +};
       +uchar v6solicitednodemask[IPaddrlen] = {
       +        0xff, 0xff, 0xff, 0xff,
       +        0xff, 0xff, 0xff, 0xff,
       +        0xff, 0xff, 0xff, 0xff,
       +        0xff, 0x0, 0x0, 0x0
       +};
       +int v6snpreflen = 13;
       +
       +ushort
       +ptclcsum(Block *bp, int offset, int len)
       +{
       +        uchar *addr;
       +        ulong losum, hisum;
       +        ushort csum;
       +        int odd, blocklen, x;
       +
       +        /* Correct to front of data area */
       +        while(bp != nil && offset && offset >= BLEN(bp)) {
       +                offset -= BLEN(bp);
       +                bp = bp->next;
       +        }
       +        if(bp == nil)
       +                return 0;
       +
       +        addr = bp->rp + offset;
       +        blocklen = BLEN(bp) - offset;
       +
       +        if(bp->next == nil) {
       +                if(blocklen < len)
       +                        len = blocklen;
       +                return ~ptclbsum(addr, len) & 0xffff;
       +        }
       +
       +        losum = 0;
       +        hisum = 0;
       +
       +        odd = 0;
       +        while(len) {
       +                x = blocklen;
       +                if(len < x)
       +                        x = len;
       +
       +                csum = ptclbsum(addr, x);
       +                if(odd)
       +                        hisum += csum;
       +                else
       +                        losum += csum;
       +                odd = (odd+x) & 1;
       +                len -= x;
       +
       +                bp = bp->next;
       +                if(bp == nil)
       +                        break;
       +                blocklen = BLEN(bp);
       +                addr = bp->rp;
       +        }
       +
       +        losum += hisum>>8;
       +        losum += (hisum&0xff)<<8;
       +        while((csum = losum>>16) != 0)
       +                losum = csum + (losum & 0xffff);
       +
       +        return ~losum & 0xffff;
       +}
       +
       +enum
       +{
       +        Isprefix= 16,
       +};
       +
       +#define CLASS(p) ((*(uchar*)(p))>>6)
       +
       +void
       +ipv62smcast(uchar *smcast, uchar *a)
       +{
       +        assert(IPaddrlen == 16);
       +        memmove(smcast, v6solicitednode, IPaddrlen);
       +        smcast[13] = a[13];
       +        smcast[14] = a[14];
       +        smcast[15] = a[15];
       +}
       +
       +
       +/*
       + *  parse a hex mac address
       + */
       +int
       +parsemac(uchar *to, char *from, int len)
       +{
       +        char nip[4];
       +        char *p;
       +        int i;
       +
       +        p = from;
       +        memset(to, 0, len);
       +        for(i = 0; i < len; i++){
       +                if(p[0] == '\0' || p[1] == '\0')
       +                        break;
       +
       +                nip[0] = p[0];
       +                nip[1] = p[1];
       +                nip[2] = '\0';
       +                p += 2;
       +
       +                to[i] = strtoul(nip, 0, 16);
       +                if(*p == ':')
       +                        p++;
       +        }
       +        return i;
       +}
       +
       +/*
       + *  hashing tcp, udp, ... connections
       + */
       +ulong
       +iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
       +{
       +        return (ulong)(sa[IPaddrlen-1]<<24 ^ sp<< 16 ^ da[IPaddrlen-1]<<8 ^ dp) % Nhash;
       +}
       +
       +void
       +iphtadd(Ipht *ht, Conv *c)
       +{
       +        ulong hv;
       +        Iphash *h;
       +
       +        hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
       +        h = smalloc(sizeof(*h));
       +        if(ipcmp(c->raddr, IPnoaddr) != 0)
       +                h->match = IPmatchexact;
       +        else {
       +                if(ipcmp(c->laddr, IPnoaddr) != 0){
       +                        if(c->lport == 0)
       +                                h->match = IPmatchaddr;
       +                        else
       +                                h->match = IPmatchpa;
       +                } else {
       +                        if(c->lport == 0)
       +                                h->match = IPmatchany;
       +                        else
       +                                h->match = IPmatchport;
       +                }
       +        }
       +        h->c = c;
       +
       +        LOCK(ht);
       +        h->next = ht->tab[hv];
       +        ht->tab[hv] = h;
       +        UNLOCK(ht);
       +}
       +
       +void
       +iphtrem(Ipht *ht, Conv *c)
       +{
       +        ulong hv;
       +        Iphash **l, *h;
       +
       +        hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
       +        LOCK(ht);
       +        for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->next)
       +                if((*l)->c == c){
       +                        h = *l;
       +                        (*l) = h->next;
       +                        free(h);
       +                        break;
       +                }
       +        UNLOCK(ht);
       +}
       +
       +/* look for a matching conversation with the following precedence
       + *        connected && raddr,rport,laddr,lport
       + *        announced && laddr,lport
       + *        announced && *,lport
       + *        announced && laddr,*
       + *        announced && *,*
       + */
       +Conv*
       +iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp)
       +{
       +        ulong hv;
       +        Iphash *h;
       +        Conv *c;
       +
       +        /* exact 4 pair match (connection) */
       +        hv = iphash(sa, sp, da, dp);
       +        LOCK(ht);
       +        for(h = ht->tab[hv]; h != nil; h = h->next){
       +                if(h->match != IPmatchexact)
       +                        continue;
       +                c = h->c;
       +                if(sp == c->rport && dp == c->lport
       +                && ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){
       +                        UNLOCK(ht);
       +                        return c;
       +                }
       +        }
       +
       +        /* match local address and port */
       +        hv = iphash(IPnoaddr, 0, da, dp);
       +        for(h = ht->tab[hv]; h != nil; h = h->next){
       +                if(h->match != IPmatchpa)
       +                        continue;
       +                c = h->c;
       +                if(dp == c->lport && ipcmp(da, c->laddr) == 0){
       +                        UNLOCK(ht);
       +                        return c;
       +                }
       +        }
       +
       +        /* match just port */
       +        hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
       +        for(h = ht->tab[hv]; h != nil; h = h->next){
       +                if(h->match != IPmatchport)
       +                        continue;
       +                c = h->c;
       +                if(dp == c->lport){
       +                        UNLOCK(ht);
       +                        return c;
       +                }
       +        }
       +
       +        /* match local address */
       +        hv = iphash(IPnoaddr, 0, da, 0);
       +        for(h = ht->tab[hv]; h != nil; h = h->next){
       +                if(h->match != IPmatchaddr)
       +                        continue;
       +                c = h->c;
       +                if(ipcmp(da, c->laddr) == 0){
       +                        UNLOCK(ht);
       +                        return c;
       +                }
       +        }
       +
       +        /* look for something that matches anything */
       +        hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
       +        for(h = ht->tab[hv]; h != nil; h = h->next){
       +                if(h->match != IPmatchany)
       +                        continue;
       +                c = h->c;
       +                UNLOCK(ht);
       +                return c;
       +        }
       +        UNLOCK(ht);
       +        return nil;
       +}
 (DIR) diff --git a/src/9vx/a/ip/ipifc.c b/src/9vx/a/ip/ipifc.c
       @@ -0,0 +1,1654 @@
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "ip.h"
       +#include "ipv6.h"
       +
       +#define DPRINT if(0)print
       +
       +enum {
       +        Maxmedia        = 32,
       +        Nself                = Maxmedia*5,
       +        NHASH                = 1<<6,
       +        NCACHE                = 256,
       +        QMAX                = 64*1024-1,
       +};
       +
       +Medium *media[Maxmedia] = { 0 };
       +
       +/*
       + *  cache of local addresses (addresses we answer to)
       + */
       +struct Ipself
       +{
       +        uchar        a[IPaddrlen];
       +        Ipself        *hnext;                /* next address in the hash table */
       +        Iplink        *link;                /* binding twixt Ipself and Ipifc */
       +        ulong        expire;
       +        uchar        type;                /* type of address */
       +        int        ref;
       +        Ipself        *next;                /* free list */
       +};
       +
       +struct Ipselftab
       +{
       +        QLock        qlock;
       +        int        inited;
       +        int        acceptall;        /* true if an interface has the null address */
       +        Ipself        *hash[NHASH];        /* hash chains */
       +};
       +
       +/*
       + *  Multicast addresses are chained onto a Chan so that
       + *  we can remove them when the Chan is closed.
       + */
       +typedef struct Ipmcast Ipmcast;
       +struct Ipmcast
       +{
       +        Ipmcast        *next;
       +        uchar        ma[IPaddrlen];        /* multicast address */
       +        uchar        ia[IPaddrlen];        /* interface address */
       +};
       +
       +/* quick hash for ip addresses */
       +#define hashipa(a) ( (ulong)(((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1])%NHASH )
       +
       +static char tifc[] = "ifc ";
       +
       +static void        addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
       +static void        remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
       +static char*        ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
       +static char*        ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
       +static void        ipifcregisterproxy(Fs*, Ipifc*, uchar*);
       +static char*        ipifcremlifc(Ipifc*, Iplifc*);
       +
       +/*
       + *  link in a new medium
       + */
       +void
       +addipmedium(Medium *med)
       +{
       +        int i;
       +
       +        for(i = 0; i < nelem(media)-1; i++)
       +                if(media[i] == nil){
       +                        media[i] = med;
       +                        break;
       +                }
       +}
       +
       +/*
       + *  find the medium with this name
       + */
       +Medium*
       +ipfindmedium(char *name)
       +{
       +        Medium **mp;
       +
       +        for(mp = media; *mp != nil; mp++)
       +                if(strcmp((*mp)->name, name) == 0)
       +                        break;
       +        return *mp;
       +}
       +
       +/*
       + *  attach a device (or pkt driver) to the interface.
       + *  called with c locked
       + */
       +static char*
       +ipifcbind(Conv *c, char **argv, int argc)
       +{
       +        Ipifc *ifc;
       +        Medium *m;
       +
       +        if(argc < 2)
       +                return Ebadarg;
       +
       +        ifc = (Ipifc*)c->ptcl;
       +
       +        /* bind the device to the interface */
       +        m = ipfindmedium(argv[1]);
       +        if(m == nil)
       +                return "unknown interface type";
       +
       +        WLOCK(ifc);
       +        if(ifc->m != nil){
       +                WUNLOCK(ifc);
       +                return "interface already bound";
       +        }
       +        if(waserror()){
       +                WUNLOCK(ifc);
       +                nexterror();
       +        }
       +
       +        /* do medium specific binding */
       +        (*m->bind)(ifc, argc, argv);
       +
       +        /* set the bound device name */
       +        if(argc > 2)
       +                strncpy(ifc->dev, argv[2], sizeof(ifc->dev));
       +        else
       +                snprint(ifc->dev, sizeof ifc->dev, "%s%d", m->name, c->x);
       +        ifc->dev[sizeof(ifc->dev)-1] = 0;
       +
       +        /* set up parameters */
       +        ifc->m = m;
       +        ifc->mintu = ifc->m->mintu;
       +        ifc->maxtu = ifc->m->maxtu;
       +        if(ifc->m->unbindonclose == 0)
       +                ifc->conv->inuse++;
       +        ifc->rp.mflag = 0;                /* default not managed */
       +        ifc->rp.oflag = 0;
       +        ifc->rp.maxraint = 600000;        /* millisecs */
       +        ifc->rp.minraint = 200000;
       +        ifc->rp.linkmtu = 0;                /* no mtu sent */
       +        ifc->rp.reachtime = 0;
       +        ifc->rp.rxmitra = 0;
       +        ifc->rp.ttl = MAXTTL;
       +        ifc->rp.routerlt = 3 * ifc->rp.maxraint;
       +
       +        /* any ancillary structures (like routes) no longer pertain */
       +        ifc->ifcid++;
       +
       +        /* reopen all the queues closed by a previous unbind */
       +        qreopen(c->rq);
       +        qreopen(c->eq);
       +        qreopen(c->sq);
       +
       +        WUNLOCK(ifc);
       +        poperror();
       +
       +        return nil;
       +}
       +
       +/*
       + *  detach a device from an interface, close the interface
       + *  called with ifc->conv closed
       + */
       +static char*
       +ipifcunbind(Ipifc *ifc)
       +{
       +        char *err;
       +
       +        if(waserror()){
       +                WUNLOCK(ifc);
       +                nexterror();
       +        }
       +        WLOCK(ifc);
       +
       +        /* dissociate routes */
       +        if(ifc->m != nil && ifc->m->unbindonclose == 0)
       +                ifc->conv->inuse--;
       +        ifc->ifcid++;
       +
       +        /* disassociate logical interfaces (before zeroing ifc->arg) */
       +        while(ifc->lifc){
       +                err = ipifcremlifc(ifc, ifc->lifc);
       +                /*
       +                 * note: err non-zero means lifc not found,
       +                 * which can't happen in this case.
       +                 */
       +                if(err)
       +                        error(err);
       +        }
       +
       +        /* disassociate device */
       +        if(ifc->m && ifc->m->unbind)
       +                (*ifc->m->unbind)(ifc);
       +        memset(ifc->dev, 0, sizeof(ifc->dev));
       +        ifc->arg = nil;
       +        ifc->reassemble = 0;
       +
       +        /* close queues to stop queuing of packets */
       +        qclose(ifc->conv->rq);
       +        qclose(ifc->conv->wq);
       +        qclose(ifc->conv->sq);
       +
       +        ifc->m = nil;
       +        WUNLOCK(ifc);
       +        poperror();
       +        return nil;
       +}
       +
       +char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag"
       +" %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt"
       +" %d pktin %lud pktout %lud errin %lud errout %lud\n";
       +
       +char slineformat[] = "        %-40I %-10M %-40I %-12lud %-12lud\n";
       +
       +static int
       +ipifcstate(Conv *c, char *state, int n)
       +{
       +        Ipifc *ifc;
       +        Iplifc *lifc;
       +        int m;
       +
       +        ifc = (Ipifc*)c->ptcl;
       +        m = snprint(state, n, sfixedformat,
       +                ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
       +                ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
       +                ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
       +                ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
       +                ifc->in, ifc->out, ifc->inerr, ifc->outerr);
       +
       +        RLOCK(ifc);
       +        for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
       +                m += snprint(state+m, n - m, slineformat, lifc->local,
       +                        lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
       +        if(ifc->lifc == nil)
       +                m += snprint(state+m, n - m, "\n");
       +        RUNLOCK(ifc);
       +        return m;
       +}
       +
       +static int
       +ipifclocal(Conv *c, char *state, int n)
       +{
       +        Ipifc *ifc;
       +        Iplifc *lifc;
       +        Iplink *link;
       +        int m;
       +
       +        ifc = (Ipifc*)c->ptcl;
       +        m = 0;
       +
       +        RLOCK(ifc);
       +        for(lifc = ifc->lifc; lifc; lifc = lifc->next){
       +                m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
       +                for(link = lifc->link; link; link = link->lifclink)
       +                        m += snprint(state+m, n - m, " %-40.40I", link->self->a);
       +                m += snprint(state+m, n - m, "\n");
       +        }
       +        RUNLOCK(ifc);
       +        return m;
       +}
       +
       +static int
       +ipifcinuse(Conv *c)
       +{
       +        Ipifc *ifc;
       +
       +        ifc = (Ipifc*)c->ptcl;
       +        return ifc->m != nil;
       +}
       +
       +/*
       + *  called when a process writes to an interface's 'data'
       + */
       +static void
       +ipifckick(void *x)
       +{
       +        Conv *c = x;
       +        Block *bp;
       +        Ipifc *ifc;
       +
       +        bp = qget(c->wq);
       +        if(bp == nil)
       +                return;
       +
       +        ifc = (Ipifc*)c->ptcl;
       +        if(!CANRLOCK(ifc)){
       +                freeb(bp);
       +                return;
       +        }
       +        if(waserror()){
       +                RUNLOCK(ifc);
       +                nexterror();
       +        }
       +        if(ifc->m == nil || ifc->m->pktin == nil)
       +                freeb(bp);
       +        else
       +                (*ifc->m->pktin)(c->p->f, ifc, bp);
       +        RUNLOCK(ifc);
       +        poperror();
       +}
       +
       +/*
       + *  called when a new ipifc structure is created
       + */
       +static void
       +ipifccreate(Conv *c)
       +{
       +        Ipifc *ifc;
       +
       +        c->rq = qopen(QMAX, 0, 0, 0);
       +        c->sq = qopen(2*QMAX, 0, 0, 0);
       +        c->wq = qopen(QMAX, Qkick, ipifckick, c);
       +        ifc = (Ipifc*)c->ptcl;
       +        ifc->conv = c;
       +        ifc->unbinding = 0;
       +        ifc->m = nil;
       +        ifc->reassemble = 0;
       +}
       +
       +/*
       + *  called after last close of ipifc data or ctl
       + *  called with c locked, we must unlock
       + */
       +static void
       +ipifcclose(Conv *c)
       +{
       +        Ipifc *ifc;
       +        Medium *m;
       +
       +        ifc = (Ipifc*)c->ptcl;
       +        m = ifc->m;
       +        if(m && m->unbindonclose)
       +                ipifcunbind(ifc);
       +}
       +
       +/*
       + *  change an interface's mtu
       + */
       +char*
       +ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
       +{
       +        int mtu;
       +
       +        if(argc < 2 || ifc->m == nil)
       +                return Ebadarg;
       +        mtu = strtoul(argv[1], 0, 0);
       +        if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
       +                return Ebadarg;
       +        ifc->maxtu = mtu;
       +        return nil;
       +}
       +
       +/*
       + *  add an address to an interface.
       + */
       +char*
       +ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp)
       +{
       +        int i, type, mtu, sendnbrdisc = 0;
       +        uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
       +        uchar bcast[IPaddrlen], net[IPaddrlen];
       +        Iplifc *lifc, **l;
       +        Fs *f;
       +
       +        if(ifc->m == nil)
       +                return "ipifc not yet bound to device";
       +
       +        f = ifc->conv->p->f;
       +
       +        type = Rifc;
       +        memset(ip, 0, IPaddrlen);
       +        memset(mask, 0, IPaddrlen);
       +        memset(rem, 0, IPaddrlen);
       +        switch(argc){
       +        case 6:
       +                if(strcmp(argv[5], "proxy") == 0)
       +                        type |= Rproxy;
       +                /* fall through */
       +        case 5:
       +                mtu = strtoul(argv[4], 0, 0);
       +                if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
       +                        ifc->maxtu = mtu;
       +                /* fall through */
       +        case 4:
       +                if (parseip(ip, argv[1]) == -1 || parseip(rem, argv[3]) == -1)
       +                        return Ebadip;
       +                parseipmask(mask, argv[2]);
       +                maskip(rem, mask, net);
       +                break;
       +        case 3:
       +                if (parseip(ip, argv[1]) == -1)
       +                        return Ebadip;
       +                parseipmask(mask, argv[2]);
       +                maskip(ip, mask, rem);
       +                maskip(rem, mask, net);
       +                break;
       +        case 2:
       +                if (parseip(ip, argv[1]) == -1)
       +                        return Ebadip;
       +                memmove(mask, defmask(ip), IPaddrlen);
       +                maskip(ip, mask, rem);
       +                maskip(rem, mask, net);
       +                break;
       +        default:
       +                return Ebadarg;
       +        }
       +        if(isv4(ip))
       +                tentative = 0;
       +        WLOCK(ifc);
       +
       +        /* ignore if this is already a local address for this ifc */
       +        for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
       +                if(ipcmp(lifc->local, ip) == 0) {
       +                        if(lifc->tentative != tentative)
       +                                lifc->tentative = tentative;
       +                        if(lifcp) {
       +                                lifc->onlink = lifcp->onlink;
       +                                lifc->autoflag = lifcp->autoflag;
       +                                lifc->validlt = lifcp->validlt;
       +                                lifc->preflt = lifcp->preflt;
       +                                lifc->origint = lifcp->origint;
       +                        }
       +                        goto out;
       +                }
       +        }
       +
       +        /* add the address to the list of logical ifc's for this ifc */
       +        lifc = smalloc(sizeof(Iplifc));
       +        ipmove(lifc->local, ip);
       +        ipmove(lifc->mask, mask);
       +        ipmove(lifc->remote, rem);
       +        ipmove(lifc->net, net);
       +        lifc->tentative = tentative;
       +        if(lifcp) {
       +                lifc->onlink = lifcp->onlink;
       +                lifc->autoflag = lifcp->autoflag;
       +                lifc->validlt = lifcp->validlt;
       +                lifc->preflt = lifcp->preflt;
       +                lifc->origint = lifcp->origint;
       +        } else {                /* default values */
       +                lifc->onlink = lifc->autoflag = 1;
       +                lifc->validlt = lifc->preflt = ~0L;
       +                lifc->origint = NOW / 1000;
       +        }
       +        lifc->next = nil;
       +
       +        for(l = &ifc->lifc; *l; l = &(*l)->next)
       +                ;
       +        *l = lifc;
       +
       +        /* check for point-to-point interface */
       +        if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */
       +        if(ipcmp(mask, IPallbits) == 0)
       +                type |= Rptpt;
       +
       +        /* add local routes */
       +        if(isv4(ip))
       +                v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
       +        else
       +                v6addroute(f, tifc, rem, mask, rem, type);
       +
       +        addselfcache(f, ifc, lifc, ip, Runi);
       +
       +        if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
       +                ipifcregisterproxy(f, ifc, rem);
       +                goto out;
       +        }
       +
       +        if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
       +                /* add subnet directed broadcast address to the self cache */
       +                for(i = 0; i < IPaddrlen; i++)
       +                        bcast[i] = (ip[i] & mask[i]) | ~mask[i];
       +                addselfcache(f, ifc, lifc, bcast, Rbcast);
       +
       +                /* add subnet directed network address to the self cache */
       +                for(i = 0; i < IPaddrlen; i++)
       +                        bcast[i] = (ip[i] & mask[i]) & mask[i];
       +                addselfcache(f, ifc, lifc, bcast, Rbcast);
       +
       +                /* add network directed broadcast address to the self cache */
       +                memmove(mask, defmask(ip), IPaddrlen);
       +                for(i = 0; i < IPaddrlen; i++)
       +                        bcast[i] = (ip[i] & mask[i]) | ~mask[i];
       +                addselfcache(f, ifc, lifc, bcast, Rbcast);
       +
       +                /* add network directed network address to the self cache */
       +                memmove(mask, defmask(ip), IPaddrlen);
       +                for(i = 0; i < IPaddrlen; i++)
       +                        bcast[i] = (ip[i] & mask[i]) & mask[i];
       +                addselfcache(f, ifc, lifc, bcast, Rbcast);
       +
       +                addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
       +        }
       +        else {
       +                if(ipcmp(ip, v6loopback) == 0) {
       +                        /* add node-local mcast address */
       +                        addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
       +
       +                        /* add route for all node multicast */
       +                        v6addroute(f, tifc, v6allnodesN, v6allnodesNmask,
       +                                v6allnodesN, Rmulti);
       +                }
       +
       +                /* add all nodes multicast address */
       +                addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
       +
       +                /* add route for all nodes multicast */
       +                v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL,
       +                        Rmulti);
       +
       +                /* add solicited-node multicast address */
       +                ipv62smcast(bcast, ip);
       +                addselfcache(f, ifc, lifc, bcast, Rmulti);
       +
       +                sendnbrdisc = 1;
       +        }
       +
       +        /* register the address on this network for address resolution */
       +        if(isv4(ip) && ifc->m->areg != nil)
       +                (*ifc->m->areg)(ifc, ip);
       +
       +out:
       +        WUNLOCK(ifc);
       +        if(tentative && sendnbrdisc)
       +                icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
       +        return nil;
       +}
       +
       +/*
       + *  remove a logical interface from an ifc
       + *  always called with ifc WLOCK'd
       + */
       +static char*
       +ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
       +{
       +        Iplifc **l;
       +        Fs *f;
       +
       +        f = ifc->conv->p->f;
       +
       +        /*
       +         *  find address on this interface and remove from chain.
       +         *  for pt to pt we actually specify the remote address as the
       +         *  addresss to remove.
       +         */
       +        for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
       +                ;
       +        if(*l == nil)
       +                return "address not on this interface";
       +        *l = lifc->next;
       +
       +        /* disassociate any addresses */
       +        while(lifc->link)
       +                remselfcache(f, ifc, lifc, lifc->link->self->a);
       +
       +        /* remove the route for this logical interface */
       +        if(isv4(lifc->local))
       +                v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
       +        else {
       +                v6delroute(f, lifc->remote, lifc->mask, 1);
       +                if(ipcmp(lifc->local, v6loopback) == 0)
       +                        /* remove route for all node multicast */
       +                        v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
       +                else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
       +                        /* remove route for all link multicast */
       +                        v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
       +        }
       +
       +        free(lifc);
       +        return nil;
       +}
       +
       +/*
       + *  remove an address from an interface.
       + *  called with c->car locked
       + */
       +char*
       +ipifcrem(Ipifc *ifc, char **argv, int argc)
       +{
       +        char *rv;
       +        uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
       +        Iplifc *lifc;
       +
       +        if(argc < 3)
       +                return Ebadarg;
       +
       +        if (parseip(ip, argv[1]) == -1)
       +                return Ebadip;
       +        parseipmask(mask, argv[2]);
       +        if(argc < 4)
       +                maskip(ip, mask, rem);
       +        else
       +                if (parseip(rem, argv[3]) == -1)
       +                        return Ebadip;
       +
       +        WLOCK(ifc);
       +
       +        /*
       +         *  find address on this interface and remove from chain.
       +         *  for pt to pt we actually specify the remote address as the
       +         *  addresss to remove.
       +         */
       +        for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
       +                if (memcmp(ip, lifc->local, IPaddrlen) == 0
       +                && memcmp(mask, lifc->mask, IPaddrlen) == 0
       +                && memcmp(rem, lifc->remote, IPaddrlen) == 0)
       +                        break;
       +        }
       +
       +        rv = ipifcremlifc(ifc, lifc);
       +        WUNLOCK(ifc);
       +        return rv;
       +}
       +
       +/*
       + * distribute routes to active interfaces like the
       + * TRIP linecards
       + */
       +void
       +ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
       +{
       +        Medium *m;
       +        Conv **cp, **e;
       +        Ipifc *ifc;
       +
       +        e = &f->ipifc->conv[f->ipifc->nc];
       +        for(cp = f->ipifc->conv; cp < e; cp++){
       +                if(*cp != nil) {
       +                        ifc = (Ipifc*)(*cp)->ptcl;
       +                        m = ifc->m;
       +                        if(m && m->addroute)
       +                                m->addroute(ifc, vers, addr, mask, gate, type);
       +                }
       +        }
       +}
       +
       +void
       +ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
       +{
       +        Medium *m;
       +        Conv **cp, **e;
       +        Ipifc *ifc;
       +
       +        e = &f->ipifc->conv[f->ipifc->nc];
       +        for(cp = f->ipifc->conv; cp < e; cp++){
       +                if(*cp != nil) {
       +                        ifc = (Ipifc*)(*cp)->ptcl;
       +                        m = ifc->m;
       +                        if(m && m->remroute)
       +                                m->remroute(ifc, vers, addr, mask);
       +                }
       +        }
       +}
       +
       +/*
       + *  associate an address with the interface.  This wipes out any previous
       + *  addresses.  This is a macro that means, remove all the old interfaces
       + *  and add a new one.
       + */
       +static char*
       +ipifcconnect(Conv* c, char **argv, int argc)
       +{
       +        char *err;
       +        Ipifc *ifc;
       +
       +        ifc = (Ipifc*)c->ptcl;
       +
       +        if(ifc->m == nil)
       +                 return "ipifc not yet bound to device";
       +
       +        if(waserror()){
       +                WUNLOCK(ifc);
       +                nexterror();
       +        }
       +        WLOCK(ifc);
       +        while(ifc->lifc){
       +                err = ipifcremlifc(ifc, ifc->lifc);
       +                if(err)
       +                        error(err);
       +        }
       +        WUNLOCK(ifc);
       +        poperror();
       +
       +        err = ipifcadd(ifc, argv, argc, 0, nil);
       +        if(err)
       +                return err;
       +
       +        Fsconnected(c, nil);
       +        return nil;
       +}
       +
       +char*
       +ipifcra6(Ipifc *ifc, char **argv, int argc)
       +{
       +        int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
       +
       +        argsleft = argc - 1;
       +        i = 1;
       +
       +        if(argsleft % 2 != 0)
       +                return Ebadarg;
       +
       +        while (argsleft > 1) {
       +                if(strcmp(argv[i], "recvra") == 0)
       +                        ifc->recvra6 = (atoi(argv[i+1]) != 0);
       +                else if(strcmp(argv[i], "sendra") == 0)
       +                        ifc->sendra6 = (atoi(argv[i+1]) != 0);
       +                else if(strcmp(argv[i], "mflag") == 0)
       +                        ifc->rp.mflag = (atoi(argv[i+1]) != 0);
       +                else if(strcmp(argv[i], "oflag") == 0)
       +                        ifc->rp.oflag = (atoi(argv[i+1]) != 0);
       +                else if(strcmp(argv[i], "maxraint") == 0)
       +                        ifc->rp.maxraint = atoi(argv[i+1]);
       +                else if(strcmp(argv[i], "minraint") == 0)
       +                        ifc->rp.minraint = atoi(argv[i+1]);
       +                else if(strcmp(argv[i], "linkmtu") == 0)
       +                        ifc->rp.linkmtu = atoi(argv[i+1]);
       +                else if(strcmp(argv[i], "reachtime") == 0)
       +                        ifc->rp.reachtime = atoi(argv[i+1]);
       +                else if(strcmp(argv[i], "rxmitra") == 0)
       +                        ifc->rp.rxmitra = atoi(argv[i+1]);
       +                else if(strcmp(argv[i], "ttl") == 0)
       +                        ifc->rp.ttl = atoi(argv[i+1]);
       +                else if(strcmp(argv[i], "routerlt") == 0)
       +                        ifc->rp.routerlt = atoi(argv[i+1]);
       +                else
       +                        return Ebadarg;
       +
       +                argsleft -= 2;
       +                i += 2;
       +        }
       +
       +        /* consistency check */
       +        if(ifc->rp.maxraint < ifc->rp.minraint) {
       +                ifc->rp.maxraint = vmax;
       +                ifc->rp.minraint = vmin;
       +                return Ebadarg;
       +        }
       +        return nil;
       +}
       +
       +/*
       + *  non-standard control messages.
       + *  called with c->car locked.
       + */
       +static char*
       +ipifcctl(Conv* c, char**argv, int argc)
       +{
       +        Ipifc *ifc;
       +        int i;
       +
       +        ifc = (Ipifc*)c->ptcl;
       +        if(strcmp(argv[0], "add") == 0)
       +                return ipifcadd(ifc, argv, argc, 0, nil);
       +        else if(strcmp(argv[0], "try") == 0)
       +                return ipifcadd(ifc, argv, argc, 1, nil);
       +        else if(strcmp(argv[0], "remove") == 0)
       +                return ipifcrem(ifc, argv, argc);
       +        else if(strcmp(argv[0], "unbind") == 0)
       +                return ipifcunbind(ifc);
       +        else if(strcmp(argv[0], "joinmulti") == 0)
       +                return ipifcjoinmulti(ifc, argv, argc);
       +        else if(strcmp(argv[0], "leavemulti") == 0)
       +                return ipifcleavemulti(ifc, argv, argc);
       +        else if(strcmp(argv[0], "mtu") == 0)
       +                return ipifcsetmtu(ifc, argv, argc);
       +        else if(strcmp(argv[0], "reassemble") == 0){
       +                ifc->reassemble = 1;
       +                return nil;
       +        }
       +        else if(strcmp(argv[0], "iprouting") == 0){
       +                i = 1;
       +                if(argc > 1)
       +                        i = atoi(argv[1]);
       +                iprouting(c->p->f, i);
       +                return nil;
       +        }
       +        else if(strcmp(argv[0], "add6") == 0)
       +                return ipifcadd6(ifc, argv, argc);
       +        else if(strcmp(argv[0], "ra6") == 0)
       +                return ipifcra6(ifc, argv, argc);
       +        return "unsupported ctl";
       +}
       +
       +int
       +ipifcstats(Proto *ipifc, char *buf, int len)
       +{
       +        return ipstats(ipifc->f, buf, len);
       +}
       +
       +void
       +ipifcinit(Fs *f)
       +{
       +        Proto *ipifc;
       +
       +        ipifc = smalloc(sizeof(Proto));
       +        ipifc->name = "ipifc";
       +        ipifc->connect = ipifcconnect;
       +        ipifc->announce = nil;
       +        ipifc->bind = ipifcbind;
       +        ipifc->state = ipifcstate;
       +        ipifc->create = ipifccreate;
       +        ipifc->close = ipifcclose;
       +        ipifc->rcv = nil;
       +        ipifc->ctl = ipifcctl;
       +        ipifc->advise = nil;
       +        ipifc->stats = ipifcstats;
       +        ipifc->inuse = ipifcinuse;
       +        ipifc->local = ipifclocal;
       +        ipifc->ipproto = -1;
       +        ipifc->nc = Maxmedia;
       +        ipifc->ptclsize = sizeof(Ipifc);
       +
       +        f->ipifc = ipifc;        /* hack for ipifcremroute, findipifc, ... */
       +        f->self = smalloc(sizeof(Ipselftab));        /* hack for ipforme */
       +
       +        Fsproto(f, ipifc);
       +}
       +
       +/*
       + *  add to self routing cache
       + *        called with c->car locked
       + */
       +static void
       +addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
       +{
       +        Ipself *p;
       +        Iplink *lp;
       +        int h;
       +
       +        QLOCK(f->self);
       +
       +        /* see if the address already exists */
       +        h = hashipa(a);
       +        for(p = f->self->hash[h]; p; p = p->next)
       +                if(memcmp(a, p->a, IPaddrlen) == 0)
       +                        break;
       +
       +        /* allocate a local address and add to hash chain */
       +        if(p == nil){
       +                p = smalloc(sizeof(*p));
       +                ipmove(p->a, a);
       +                p->type = type;
       +                p->next = f->self->hash[h];
       +                f->self->hash[h] = p;
       +
       +                /* if the null address, accept all packets */
       +                if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
       +                        f->self->acceptall = 1;
       +        }
       +
       +        /* look for a link for this lifc */
       +        for(lp = p->link; lp; lp = lp->selflink)
       +                if(lp->lifc == lifc)
       +                        break;
       +
       +        /* allocate a lifc-to-local link and link to both */
       +        if(lp == nil){
       +                lp = smalloc(sizeof(*lp));
       +                lp->ref = 1;
       +                lp->lifc = lifc;
       +                lp->self = p;
       +                lp->selflink = p->link;
       +                p->link = lp;
       +                lp->lifclink = lifc->link;
       +                lifc->link = lp;
       +
       +                /* add to routing table */
       +                if(isv4(a))
       +                        v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off,
       +                                a+IPv4off, type);
       +                else
       +                        v6addroute(f, tifc, a, IPallbits, a, type);
       +
       +                if((type & Rmulti) && ifc->m->addmulti != nil)
       +                        (*ifc->m->addmulti)(ifc, a, lifc->local);
       +        } else
       +                lp->ref++;
       +
       +        QUNLOCK(f->self);
       +}
       +
       +/*
       + *  These structures are unlinked from their chains while
       + *  other threads may be using them.  To avoid excessive locking,
       + *  just put them aside for a while before freeing them.
       + *        called with f->self locked
       + */
       +static Iplink *freeiplink;
       +static Ipself *freeipself;
       +
       +static void
       +iplinkfree(Iplink *p)
       +{
       +        Iplink **l, *np;
       +        ulong now = NOW;
       +
       +        l = &freeiplink;
       +        for(np = *l; np; np = *l){
       +                if(np->expire > now){
       +                        *l = np->next;
       +                        free(np);
       +                        continue;
       +                }
       +                l = &np->next;
       +        }
       +        p->expire = now + 5000;        /* give other threads 5 secs to get out */
       +        p->next = nil;
       +        *l = p;
       +}
       +
       +static void
       +ipselffree(Ipself *p)
       +{
       +        Ipself **l, *np;
       +        ulong now = NOW;
       +
       +        l = &freeipself;
       +        for(np = *l; np; np = *l){
       +                if(np->expire > now){
       +                        *l = np->next;
       +                        free(np);
       +                        continue;
       +                }
       +                l = &np->next;
       +        }
       +        p->expire = now + 5000;        /* give other threads 5 secs to get out */
       +        p->next = nil;
       +        *l = p;
       +}
       +
       +/*
       + *  Decrement reference for this address on this link.
       + *  Unlink from selftab if this is the last ref.
       + *        called with c->car locked
       + */
       +static void
       +remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
       +{
       +        Ipself *p, **l;
       +        Iplink *link, **l_self, **l_lifc;
       +
       +        QLOCK(f->self);
       +
       +        /* find the unique selftab entry */
       +        l = &f->self->hash[hashipa(a)];
       +        for(p = *l; p; p = *l){
       +                if(ipcmp(p->a, a) == 0)
       +                        break;
       +                l = &p->next;
       +        }
       +
       +        if(p == nil)
       +                goto out;
       +
       +        /*
       +         *  walk down links from an ifc looking for one
       +         *  that matches the selftab entry
       +         */
       +        l_lifc = &lifc->link;
       +        for(link = *l_lifc; link; link = *l_lifc){
       +                if(link->self == p)
       +                        break;
       +                l_lifc = &link->lifclink;
       +        }
       +
       +        if(link == nil)
       +                goto out;
       +
       +        /*
       +         *  walk down the links from the selftab looking for
       +         *  the one we just found
       +         */
       +        l_self = &p->link;
       +        for(link = *l_self; link; link = *l_self){
       +                if(link == *l_lifc)
       +                        break;
       +                l_self = &link->selflink;
       +        }
       +
       +        if(link == nil)
       +                panic("remselfcache");
       +
       +        if(--(link->ref) != 0)
       +                goto out;
       +
       +        if((p->type & Rmulti) && ifc->m->remmulti != nil)
       +                (*ifc->m->remmulti)(ifc, a, lifc->local);
       +
       +        /* ref == 0, remove from both chains and free the link */
       +        *l_lifc = link->lifclink;
       +        *l_self = link->selflink;
       +        iplinkfree(link);
       +
       +        if(p->link != nil)
       +                goto out;
       +
       +        /* remove from routing table */
       +        if(isv4(a))
       +                v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
       +        else
       +                v6delroute(f, a, IPallbits, 1);
       +
       +        /* no more links, remove from hash and free */
       +        *l = p->next;
       +        ipselffree(p);
       +
       +        /* if IPnoaddr, forget */
       +        if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
       +                f->self->acceptall = 0;
       +
       +out:
       +        QUNLOCK(f->self);
       +}
       +
       +static char *stformat = "%-44.44I %2.2d %4.4s\n";
       +enum
       +{
       +        Nstformat= 41,
       +};
       +
       +long
       +ipselftabread(Fs *f, char *cp, ulong offset, int n)
       +{
       +        int i, m, nifc, off;
       +        Ipself *p;
       +        Iplink *link;
       +        char state[8];
       +
       +        m = 0;
       +        off = offset;
       +        QLOCK(f->self);
       +        for(i = 0; i < NHASH && m < n; i++){
       +                for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
       +                        nifc = 0;
       +                        for(link = p->link; link; link = link->selflink)
       +                                nifc++;
       +                        routetype(p->type, state);
       +                        m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
       +                        if(off > 0){
       +                                off -= m;
       +                                m = 0;
       +                        }
       +                }
       +        }
       +        QUNLOCK(f->self);
       +        return m;
       +}
       +
       +int
       +iptentative(Fs *f, uchar *addr)
       +{
       +         Ipself *p;
       +
       +        p = f->self->hash[hashipa(addr)];
       +        for(; p; p = p->next){
       +                if(ipcmp(addr, p->a) == 0)
       +                        return p->link->lifc->tentative;
       +        }
       +        return 0;
       +}
       +
       +/*
       + *  returns
       + *        0                - no match
       + *        Runi
       + *        Rbcast
       + *        Rmcast
       + */
       +int
       +ipforme(Fs *f, uchar *addr)
       +{
       +        Ipself *p;
       +
       +        p = f->self->hash[hashipa(addr)];
       +        for(; p; p = p->next){
       +                if(ipcmp(addr, p->a) == 0)
       +                        return p->type;
       +        }
       +
       +        /* hack to say accept anything */
       +        if(f->self->acceptall)
       +                return Runi;
       +        return 0;
       +}
       +
       +/*
       + *  find the ifc on same net as the remote system.  If none,
       + *  return nil.
       + */
       +Ipifc*
       +findipifc(Fs *f, uchar *remote, int type)
       +{
       +        Ipifc *ifc, *x;
       +        Iplifc *lifc;
       +        Conv **cp, **e;
       +        uchar gnet[IPaddrlen], xmask[IPaddrlen];
       +
       +        x = nil;
       +        memset(xmask, 0, IPaddrlen);
       +
       +        /* find most specific match */
       +        e = &f->ipifc->conv[f->ipifc->nc];
       +        for(cp = f->ipifc->conv; cp < e; cp++){
       +                if(*cp == 0)
       +                        continue;
       +                ifc = (Ipifc*)(*cp)->ptcl;
       +                for(lifc = ifc->lifc; lifc; lifc = lifc->next){
       +                        maskip(remote, lifc->mask, gnet);
       +                        if(ipcmp(gnet, lifc->net) == 0){
       +                                if(x == nil || ipcmp(lifc->mask, xmask) > 0){
       +                                        x = ifc;
       +                                        ipmove(xmask, lifc->mask);
       +                                }
       +                        }
       +                }
       +        }
       +        if(x != nil)
       +                return x;
       +
       +        /* for now for broadcast and multicast, just use first interface */
       +        if(type & (Rbcast|Rmulti)){
       +                for(cp = f->ipifc->conv; cp < e; cp++){
       +                        if(*cp == 0)
       +                                continue;
       +                        ifc = (Ipifc*)(*cp)->ptcl;
       +                        if(ifc->lifc != nil)
       +                                return ifc;
       +                }
       +        }
       +        return nil;
       +}
       +
       +enum {
       +        unknownv6,                /* UGH */
       +//        multicastv6,
       +        unspecifiedv6,
       +        linklocalv6,
       +        globalv6,
       +};
       +
       +int
       +v6addrtype(uchar *addr)
       +{
       +        if(islinklocal(addr) ||
       +            (isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop))
       +                return linklocalv6;
       +        else
       +                return globalv6;
       +}
       +
       +#define v6addrcurr(lifc) ((lifc)->preflt == ~0L || \
       +                        (lifc)->origint + (lifc)->preflt >= NOW/1000)
       +
       +static void
       +findprimaryipv6(Fs *f, uchar *local)
       +{
       +        int atype, atypel;
       +        Conv **cp, **e;
       +        Ipifc *ifc;
       +        Iplifc *lifc;
       +
       +        ipmove(local, v6Unspecified);
       +        atype = unspecifiedv6;
       +
       +        /*
       +         * find "best" (global > link local > unspecified)
       +         * local address; address must be current.
       +         */
       +        e = &f->ipifc->conv[f->ipifc->nc];
       +        for(cp = f->ipifc->conv; cp < e; cp++){
       +                if(*cp == 0)
       +                        continue;
       +                ifc = (Ipifc*)(*cp)->ptcl;
       +                for(lifc = ifc->lifc; lifc; lifc = lifc->next){
       +                        atypel = v6addrtype(lifc->local);
       +                        if(atypel > atype && v6addrcurr(lifc)) {
       +                                ipmove(local, lifc->local);
       +                                atype = atypel;
       +                                if(atype == globalv6)
       +                                        return;
       +                        }
       +                }
       +        }
       +}
       +
       +/*
       + *  returns first ip address configured
       + */
       +static void
       +findprimaryipv4(Fs *f, uchar *local)
       +{
       +        Conv **cp, **e;
       +        Ipifc *ifc;
       +        Iplifc *lifc;
       +
       +        /* find first ifc local address */
       +        e = &f->ipifc->conv[f->ipifc->nc];
       +        for(cp = f->ipifc->conv; cp < e; cp++){
       +                if(*cp == 0)
       +                        continue;
       +                ifc = (Ipifc*)(*cp)->ptcl;
       +                if((lifc = ifc->lifc) != nil){
       +                        ipmove(local, lifc->local);
       +                        return;
       +                }
       +        }
       +}
       +
       +/*
       + *  find the local address 'closest' to the remote system, copy it to
       + *  local and return the ifc for that address
       + */
       +void
       +findlocalip(Fs *f, uchar *local, uchar *remote)
       +{
       +        int version, atype = unspecifiedv6, atypel = unknownv6;
       +        int atyper, deprecated;
       +        uchar gate[IPaddrlen], gnet[IPaddrlen];
       +        Ipifc *ifc;
       +        Iplifc *lifc;
       +        Route *r;
       +
       +        QLOCK(f->ipifc);
       +        r = v6lookup(f, remote, nil);
       +         version = (memcmp(remote, v4prefix, IPv4off) == 0)? V4: V6;
       +
       +        if(r != nil){
       +                ifc = r->ifc;
       +                if(r->type & Rv4)
       +                        v4tov6(gate, r->v4.gate);
       +                else {
       +                        ipmove(gate, r->v6.gate);
       +                        ipmove(local, v6Unspecified);
       +                }
       +
       +                switch(version) {
       +                case V4:
       +                        /* find ifc address closest to the gateway to use */
       +                        for(lifc = ifc->lifc; lifc; lifc = lifc->next){
       +                                maskip(gate, lifc->mask, gnet);
       +                                if(ipcmp(gnet, lifc->net) == 0){
       +                                        ipmove(local, lifc->local);
       +                                        goto out;
       +                                }
       +                        }
       +                        break;
       +                case V6:
       +                        /* find ifc address with scope matching the destination */
       +                        atyper = v6addrtype(remote);
       +                        deprecated = 0;
       +                        for(lifc = ifc->lifc; lifc; lifc = lifc->next){
       +                                atypel = v6addrtype(lifc->local);
       +                                /* prefer appropriate scope */
       +                                if((atypel > atype && atype < atyper) ||
       +                                   (atypel < atype && atype > atyper)){
       +                                        ipmove(local, lifc->local);
       +                                        deprecated = !v6addrcurr(lifc);
       +                                        atype = atypel;
       +                                } else if(atypel == atype){
       +                                        /* avoid deprecated addresses */
       +                                        if(deprecated && v6addrcurr(lifc)){
       +                                                ipmove(local, lifc->local);
       +                                                atype = atypel;
       +                                                deprecated = 0;
       +                                        }
       +                                }
       +                                if(atype == atyper && !deprecated)
       +                                        goto out;
       +                        }
       +                        if(atype >= atyper)
       +                                goto out;
       +                        break;
       +                default:
       +                        panic("findlocalip: version %d", version);
       +                }
       +        }
       +
       +        switch(version){
       +        case V4:
       +                findprimaryipv4(f, local);
       +                break;
       +        case V6:
       +                findprimaryipv6(f, local);
       +                break;
       +        default:
       +                panic("findlocalip2: version %d", version);
       +        }
       +
       +out:
       +        QUNLOCK(f->ipifc);
       +}
       +
       +/*
       + *  return first v4 address associated with an interface
       + */
       +int
       +ipv4local(Ipifc *ifc, uchar *addr)
       +{
       +        Iplifc *lifc;
       +
       +        for(lifc = ifc->lifc; lifc; lifc = lifc->next){
       +                if(isv4(lifc->local)){
       +                        memmove(addr, lifc->local+IPv4off, IPv4addrlen);
       +                        return 1;
       +                }
       +        }
       +        return 0;
       +}
       +
       +/*
       + *  return first v6 address associated with an interface
       + */
       +int
       +ipv6local(Ipifc *ifc, uchar *addr)
       +{
       +        Iplifc *lifc;
       +
       +        for(lifc = ifc->lifc; lifc; lifc = lifc->next){
       +                if(!isv4(lifc->local) && !(lifc->tentative)){
       +                        ipmove(addr, lifc->local);
       +                        return 1;
       +                }
       +        }
       +        return 0;
       +}
       +
       +int
       +ipv6anylocal(Ipifc *ifc, uchar *addr)
       +{
       +        Iplifc *lifc;
       +
       +        for(lifc = ifc->lifc; lifc; lifc = lifc->next){
       +                if(!isv4(lifc->local)){
       +                        ipmove(addr, lifc->local);
       +                        return SRC_UNI;
       +                }
       +        }
       +        return SRC_UNSPEC;
       +}
       +
       +/*
       + *  see if this address is bound to the interface
       + */
       +Iplifc*
       +iplocalonifc(Ipifc *ifc, uchar *ip)
       +{
       +        Iplifc *lifc;
       +
       +        for(lifc = ifc->lifc; lifc; lifc = lifc->next)
       +                if(ipcmp(ip, lifc->local) == 0)
       +                        return lifc;
       +        return nil;
       +}
       +
       +
       +/*
       + *  See if we're proxying for this address on this interface
       + */
       +int
       +ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
       +{
       +        Route *r;
       +        uchar net[IPaddrlen];
       +        Iplifc *lifc;
       +
       +        /* see if this is a direct connected pt to pt address */
       +        r = v6lookup(f, ip, nil);
       +        if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
       +                return 0;
       +
       +        /* see if this is on the right interface */
       +        for(lifc = ifc->lifc; lifc; lifc = lifc->next){
       +                maskip(ip, lifc->mask, net);
       +                if(ipcmp(net, lifc->remote) == 0)
       +                        return 1;
       +        }
       +        return 0;
       +}
       +
       +/*
       + *  return multicast version if any
       + */
       +int
       +ipismulticast(uchar *ip)
       +{
       +        if(isv4(ip)){
       +                if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
       +                        return V4;
       +        }
       +        else if(ip[0] == 0xff)
       +                return V6;
       +        return 0;
       +}
       +int
       +ipisbm(uchar *ip)
       +{
       +        if(isv4(ip)){
       +                if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
       +                        return V4;
       +                else if(ipcmp(ip, IPv4bcast) == 0)
       +                        return V4;
       +        }
       +        else if(ip[0] == 0xff)
       +                return V6;
       +        return 0;
       +}
       +
       +
       +/*
       + *  add a multicast address to an interface, called with c->car locked
       + */
       +void
       +ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
       +{
       +        Ipifc *ifc;
       +        Iplifc *lifc;
       +        Conv **p;
       +        Ipmulti *multi, **l;
       +        Fs *f;
       +
       +        f = c->p->f;
       +
       +        for(l = &c->multi; *l; l = &(*l)->next)
       +                if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
       +                        return;                /* it's already there */
       +
       +        multi = *l = smalloc(sizeof(*multi));
       +        ipmove(multi->ma, ma);
       +        ipmove(multi->ia, ia);
       +        multi->next = nil;
       +
       +        for(p = f->ipifc->conv; *p; p++){
       +                if((*p)->inuse == 0)
       +                        continue;
       +                ifc = (Ipifc*)(*p)->ptcl;
       +                if(waserror()){
       +                        WUNLOCK(ifc);
       +                        nexterror();
       +                }
       +                WLOCK(ifc);
       +                for(lifc = ifc->lifc; lifc; lifc = lifc->next)
       +                        if(ipcmp(ia, lifc->local) == 0)
       +                                addselfcache(f, ifc, lifc, ma, Rmulti);
       +                WUNLOCK(ifc);
       +                poperror();
       +        }
       +}
       +
       +
       +/*
       + *  remove a multicast address from an interface, called with c->car locked
       + */
       +void
       +ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
       +{
       +        Ipmulti *multi, **l;
       +        Iplifc *lifc;
       +        Conv **p;
       +        Ipifc *ifc;
       +        Fs *f;
       +
       +        f = c->p->f;
       +
       +        for(l = &c->multi; *l; l = &(*l)->next)
       +                if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
       +                        break;
       +
       +        multi = *l;
       +        if(multi == nil)
       +                return;         /* we don't have it open */
       +
       +        *l = multi->next;
       +
       +        for(p = f->ipifc->conv; *p; p++){
       +                if((*p)->inuse == 0)
       +                        continue;
       +
       +                ifc = (Ipifc*)(*p)->ptcl;
       +                if(waserror()){
       +                        WUNLOCK(ifc);
       +                        nexterror();
       +                }
       +                WLOCK(ifc);
       +                for(lifc = ifc->lifc; lifc; lifc = lifc->next)
       +                        if(ipcmp(ia, lifc->local) == 0)
       +                                remselfcache(f, ifc, lifc, ma);
       +                WUNLOCK(ifc);
       +                poperror();
       +        }
       +
       +        free(multi);
       +}
       +
       +/*
       + *  make lifc's join and leave multicast groups
       + */
       +static char*
       +ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
       +{
       +        return nil;
       +}
       +
       +static char*
       +ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
       +{
       +        return nil;
       +}
       +
       +static void
       +ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
       +{
       +        Conv **cp, **e;
       +        Ipifc *nifc;
       +        Iplifc *lifc;
       +        Medium *m;
       +        uchar net[IPaddrlen];
       +
       +        /* register the address on any network that will proxy for us */
       +        e = &f->ipifc->conv[f->ipifc->nc];
       +
       +        if(!isv4(ip)) {                                /* V6 */
       +                for(cp = f->ipifc->conv; cp < e; cp++){
       +                        if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc)
       +                                continue;
       +                        RLOCK(nifc);
       +                        m = nifc->m;
       +                        if(m == nil || m->addmulti == nil) {
       +                                RUNLOCK(nifc);
       +                                continue;
       +                        }
       +                        for(lifc = nifc->lifc; lifc; lifc = lifc->next){
       +                                maskip(ip, lifc->mask, net);
       +                                if(ipcmp(net, lifc->remote) == 0) {
       +                                        /* add solicited-node multicast addr */
       +                                        ipv62smcast(net, ip);
       +                                        addselfcache(f, nifc, lifc, net, Rmulti);
       +                                        arpenter(f, V6, ip, nifc->mac, 6, 0);
       +                                        // (*m->addmulti)(nifc, net, ip);
       +                                        break;
       +                                }
       +                        }
       +                        RUNLOCK(nifc);
       +                }
       +        }
       +        else {                                        /* V4 */
       +                for(cp = f->ipifc->conv; cp < e; cp++){
       +                        if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc)
       +                                continue;
       +                        RLOCK(nifc);
       +                        m = nifc->m;
       +                        if(m == nil || m->areg == nil){
       +                                RUNLOCK(nifc);
       +                                continue;
       +                        }
       +                        for(lifc = nifc->lifc; lifc; lifc = lifc->next){
       +                                maskip(ip, lifc->mask, net);
       +                                if(ipcmp(net, lifc->remote) == 0){
       +                                        (*m->areg)(nifc, ip);
       +                                        break;
       +                                }
       +                        }
       +                        RUNLOCK(nifc);
       +                }
       +        }
       +}
       +
       +
       +/* added for new v6 mesg types */
       +static void
       +adddefroute6(Fs *f, uchar *gate, int force)
       +{
       +        Route *r;
       +
       +        r = v6lookup(f, v6Unspecified, nil);
       +        /*
       +         * route entries generated by all other means take precedence
       +         * over router announcements.
       +         */
       +        if (r && !force && strcmp(r->tag, "ra") != 0)
       +                return;
       +
       +        v6delroute(f, v6Unspecified, v6Unspecified, 1);
       +        v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
       +}
       +
       +enum {
       +        Ngates = 3,
       +};
       +
       +char*
       +ipifcadd6(Ipifc *ifc, char**argv, int argc)
       +{
       +        int plen = 64;
       +        long origint = NOW / 1000, preflt = ~0L, validlt = ~0L;
       +        char addr[40], preflen[6];
       +        char *params[3];
       +        uchar autoflag = 1, onlink = 1;
       +        uchar prefix[IPaddrlen];
       +        Iplifc *lifc;
       +
       +        switch(argc) {
       +        case 7:
       +                preflt = atoi(argv[6]);
       +                /* fall through */
       +        case 6:
       +                validlt = atoi(argv[5]);
       +                /* fall through */
       +        case 5:
       +                autoflag = atoi(argv[4]);
       +                /* fall through */
       +        case 4:
       +                onlink = atoi(argv[3]);
       +                /* fall through */
       +        case 3:
       +                plen = atoi(argv[2]);
       +                /* fall through */
       +        case 2:
       +                break;
       +        default:
       +                return Ebadarg;
       +        }
       +
       +        if (parseip(prefix, argv[1]) != 6 || validlt < preflt || plen < 0 ||
       +            plen > 64 || islinklocal(prefix))
       +                return Ebadarg;
       +
       +        lifc = smalloc(sizeof(Iplifc));
       +        lifc->onlink = (onlink != 0);
       +        lifc->autoflag = (autoflag != 0);
       +        lifc->validlt = validlt;
       +        lifc->preflt = preflt;
       +        lifc->origint = origint;
       +
       +        /* issue "add" ctl msg for v6 link-local addr and prefix len */
       +        if(!ifc->m->pref2addr)
       +                return Ebadarg;
       +        ifc->m->pref2addr(prefix, ifc->mac);        /* mac → v6 link-local addr */
       +        sprint(addr, "%I", prefix);
       +        sprint(preflen, "/%d", plen);
       +        params[0] = "add";
       +        params[1] = addr;
       +        params[2] = preflen;
       +
       +        return ipifcadd(ifc, params, 3, 0, lifc);
       +}
 (DIR) diff --git a/src/9vx/a/ip/ipmux.c b/src/9vx/a/ip/ipmux.c
       @@ -0,0 +1,842 @@
       +/*
       + * IP packet filter
       + */
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "ip.h"
       +#include "ipv6.h"
       +
       +typedef struct Ipmuxrock  Ipmuxrock;
       +typedef struct Ipmux      Ipmux;
       +
       +typedef struct Myip4hdr Myip4hdr;
       +struct Myip4hdr
       +{
       +        uchar        vihl;                /* Version and header length */
       +        uchar        tos;                /* Type of service */
       +        uchar        length[2];        /* packet length */
       +        uchar        id[2];                /* ip->identification */
       +        uchar        frag[2];        /* Fragment information */
       +        uchar        ttl;                /* Time to live */
       +        uchar        proto;                /* Protocol */
       +        uchar        cksum[2];        /* Header checksum */
       +        uchar        src[4];                /* IP source */
       +        uchar        dst[4];                /* IP destination */
       +
       +        uchar        data[1];        /* start of data */
       +};
       +Myip4hdr *ipoff = 0;
       +
       +enum
       +{
       +        Tproto,
       +        Tdata,
       +        Tiph,
       +        Tdst,
       +        Tsrc,
       +        Tifc,
       +
       +        Cother = 0,
       +        Cbyte,                /* single byte */
       +        Cmbyte,                /* single byte with mask */
       +        Cshort,                /* single short */
       +        Cmshort,        /* single short with mask */
       +        Clong,                /* single long */
       +        Cmlong,                /* single long with mask */
       +        Cifc,
       +        Cmifc,
       +};
       +
       +char *ftname[] = 
       +{
       +[Tproto]        "proto",
       +[Tdata]                "data",
       +[Tiph]                 "iph",
       +[Tdst]                "dst",
       +[Tsrc]                "src",
       +[Tifc]                "ifc",
       +};
       +
       +/*
       + *  a node in the decision tree
       + */
       +struct Ipmux
       +{
       +        Ipmux        *yes;
       +        Ipmux        *no;
       +        uchar        type;                /* type of field(Txxxx) */
       +        uchar        ctype;                /* tupe of comparison(Cxxxx) */
       +        uchar        len;                /* length in bytes of item to compare */
       +        uchar        n;                /* number of items val points to */
       +        short        off;                /* offset of comparison */
       +        short        eoff;                /* end offset of comparison */
       +        uchar        skiphdr;        /* should offset start after ipheader */
       +        uchar        *val;
       +        uchar        *mask;
       +        uchar        *e;                /* val+n*len*/
       +
       +        int        ref;                /* so we can garbage collect */
       +        Conv        *conv;
       +};
       +
       +/*
       + *  someplace to hold per conversation data
       + */
       +struct Ipmuxrock
       +{
       +        Ipmux        *chain;
       +};
       +
       +static int        ipmuxsprint(Ipmux*, int, char*, int);
       +static void        ipmuxkick(void *x);
       +
       +static char*
       +skipwhite(char *p)
       +{
       +        while(*p == ' ' || *p == '\t')
       +                p++;
       +        return p;
       +}
       +
       +static char*
       +follows(char *p, char c)
       +{
       +        char *f;
       +
       +        f = strchr(p, c);
       +        if(f == nil)
       +                return nil;
       +        *f++ = 0;
       +        f = skipwhite(f);
       +        if(*f == 0)
       +                return nil;
       +        return f;
       +}
       +
       +static Ipmux*
       +parseop(char **pp)
       +{
       +        char *p = *pp;
       +        int type, off, end, len;
       +        Ipmux *f;
       +
       +        p = skipwhite(p);
       +        if(strncmp(p, "dst", 3) == 0){
       +                type = Tdst;
       +                off = (ulong)(ipoff->dst);
       +                len = IPv4addrlen;
       +                p += 3;
       +        }
       +        else if(strncmp(p, "src", 3) == 0){
       +                type = Tsrc;
       +                off = (ulong)(ipoff->src);
       +                len = IPv4addrlen;
       +                p += 3;
       +        }
       +        else if(strncmp(p, "ifc", 3) == 0){
       +                type = Tifc;
       +                off = -IPv4addrlen;
       +                len = IPv4addrlen;
       +                p += 3;
       +        }
       +        else if(strncmp(p, "proto", 5) == 0){
       +                type = Tproto;
       +                off = (ulong)&(ipoff->proto);
       +                len = 1;
       +                p += 5;
       +        }
       +        else if(strncmp(p, "data", 4) == 0 || strncmp(p, "iph", 3) == 0){
       +                if(strncmp(p, "data", 4) == 0) {
       +                        type = Tdata;
       +                        p += 4;
       +                }
       +                else {
       +                        type = Tiph;
       +                        p += 3;
       +                }
       +                p = skipwhite(p);
       +                if(*p != '[')
       +                        return nil;
       +                p++;
       +                off = strtoul(p, &p, 0);
       +                if(off < 0 || off > (64-IP4HDR))
       +                        return nil;
       +                p = skipwhite(p);
       +                if(*p != ':')
       +                        end = off;
       +                else {
       +                        p++;
       +                        p = skipwhite(p);
       +                        end = strtoul(p, &p, 0);
       +                        if(end < off)
       +                                return nil;
       +                        p = skipwhite(p);
       +                }
       +                if(*p != ']')
       +                        return nil;
       +                p++;
       +                len = end - off + 1;
       +        }
       +        else
       +                return nil;
       +
       +        f = smalloc(sizeof(*f));
       +        f->type = type;
       +        f->len = len;
       +        f->off = off;
       +        f->val = nil;
       +        f->mask = nil;
       +        f->n = 1;
       +        f->ref = 1;
       +        if(type == Tdata)
       +                f->skiphdr = 1;
       +        else
       +                f->skiphdr = 0;
       +
       +        return f;        
       +}
       +
       +static int
       +htoi(char x)
       +{
       +        if(x >= '0' && x <= '9')
       +                x -= '0';
       +        else if(x >= 'a' && x <= 'f')
       +                x -= 'a' - 10;
       +        else if(x >= 'A' && x <= 'F')
       +                x -= 'A' - 10;
       +        else
       +                x = 0;
       +        return x;
       +}
       +
       +static int
       +hextoi(char *p)
       +{
       +        return (htoi(p[0])<<4) | htoi(p[1]);
       +}
       +
       +static void
       +parseval(uchar *v, char *p, int len)
       +{
       +        while(*p && len-- > 0){
       +                *v++ = hextoi(p);
       +                p += 2;
       +        }
       +}
       +
       +static Ipmux*
       +parsemux(char *p)
       +{
       +        int n, nomask;
       +        Ipmux *f;
       +        char *val;
       +        char *mask;
       +        char *vals[20];
       +        uchar *v;
       +
       +        /* parse operand */
       +        f = parseop(&p);
       +        if(f == nil)
       +                return nil;
       +
       +        /* find value */
       +        val = follows(p, '=');
       +        if(val == nil)
       +                goto parseerror;
       +
       +        /* parse mask */
       +        mask = follows(p, '&');
       +        if(mask != nil){
       +                switch(f->type){
       +                case Tsrc:
       +                case Tdst:
       +                case Tifc:
       +                        f->mask = smalloc(f->len);
       +                        v4parseip(f->mask, mask);
       +                        break;
       +                case Tdata:
       +                case Tiph:
       +                        f->mask = smalloc(f->len);
       +                        parseval(f->mask, mask, f->len);
       +                        break;
       +                default:
       +                        goto parseerror;
       +                }
       +                nomask = 0;
       +        } else {
       +                nomask = 1;
       +                f->mask = smalloc(f->len);
       +                memset(f->mask, 0xff, f->len);
       +        }
       +
       +        /* parse vals */
       +        f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
       +        if(f->n == 0)
       +                goto parseerror;
       +        f->val = smalloc(f->n*f->len);
       +        v = f->val;
       +        for(n = 0; n < f->n; n++){
       +                switch(f->type){
       +                case Tsrc:
       +                case Tdst:
       +                case Tifc:
       +                        v4parseip(v, vals[n]);
       +                        break;
       +                case Tproto:
       +                case Tdata:
       +                case Tiph:
       +                        parseval(v, vals[n], f->len);
       +                        break;
       +                }
       +                v += f->len;
       +        }
       +
       +        f->eoff = f->off + f->len;
       +        f->e = f->val + f->n*f->len;
       +        f->ctype = Cother;
       +        if(f->n == 1){
       +                switch(f->len){
       +                case 1:
       +                        f->ctype = nomask ? Cbyte : Cmbyte;
       +                        break;
       +                case 2:
       +                        f->ctype = nomask ? Cshort : Cmshort;
       +                        break;
       +                case 4:
       +                        if(f->type == Tifc)
       +                                f->ctype = nomask ? Cifc : Cmifc;
       +                        else
       +                                f->ctype = nomask ? Clong : Cmlong;
       +                        break;
       +                }
       +        }
       +        return f;
       +
       +parseerror:
       +        if(f->mask)
       +                free(f->mask);
       +        if(f->val)
       +                free(f->val);
       +        free(f);
       +        return nil;
       +}
       +
       +/*
       + *  Compare relative ordering of two ipmuxs.  This doesn't compare the
       + *  values, just the fields being looked at.  
       + *
       + *  returns:        <0 if a is a more specific match
       + *                 0 if a and b are matching on the same fields
       + *                >0 if b is a more specific match
       + */
       +static int
       +ipmuxcmp(Ipmux *a, Ipmux *b)
       +{
       +        int n;
       +
       +        /* compare types, lesser ones are more important */
       +        n = a->type - b->type;
       +        if(n != 0)
       +                return n;
       +
       +        /* compare offsets, call earlier ones more specific */
       +        n = (a->off+((int)a->skiphdr)*(ulong)ipoff->data) - 
       +                (b->off+((int)b->skiphdr)*(ulong)ipoff->data);
       +        if(n != 0)
       +                return n;
       +
       +        /* compare match lengths, longer ones are more specific */
       +        n = b->len - a->len;
       +        if(n != 0)
       +                return n;
       +
       +        /*
       +         *  if we get here we have two entries matching
       +         *  the same bytes of the record.  Now check
       +         *  the mask for equality.  Longer masks are
       +         *  more specific.
       +         */
       +        if(a->mask != nil && b->mask == nil)
       +                return -1;
       +        if(a->mask == nil && b->mask != nil)
       +                return 1;
       +        if(a->mask != nil && b->mask != nil){
       +                n = memcmp(b->mask, a->mask, a->len);
       +                if(n != 0)
       +                        return n;
       +        }
       +        return 0;
       +}
       +
       +/*
       + *  Compare the values of two ipmuxs.  We're assuming that ipmuxcmp
       + *  returned 0 comparing them.
       + */
       +static int
       +ipmuxvalcmp(Ipmux *a, Ipmux *b)
       +{
       +        int n;
       +
       +        n = b->len*b->n - a->len*a->n;
       +        if(n != 0)
       +                return n;
       +        return memcmp(a->val, b->val, a->len*a->n);
       +} 
       +
       +/*
       + *  add onto an existing ipmux chain in the canonical comparison
       + *  order
       + */
       +static void
       +ipmuxchain(Ipmux **l, Ipmux *f)
       +{
       +        for(; *l; l = &(*l)->yes)
       +                if(ipmuxcmp(f, *l) < 0)
       +                        break;
       +        f->yes = *l;
       +        *l = f;
       +}
       +
       +/*
       + *  copy a tree
       + */
       +static Ipmux*
       +ipmuxcopy(Ipmux *f)
       +{
       +        Ipmux *nf;
       +
       +        if(f == nil)
       +                return nil;
       +        nf = smalloc(sizeof *nf);
       +        *nf = *f;
       +        nf->no = ipmuxcopy(f->no);
       +        nf->yes = ipmuxcopy(f->yes);
       +        nf->val = smalloc(f->n*f->len);
       +        nf->e = nf->val + f->len*f->n;
       +        memmove(nf->val, f->val, f->n*f->len);
       +        return nf;
       +}
       +
       +static void
       +ipmuxfree(Ipmux *f)
       +{
       +        if(f->val != nil)
       +                free(f->val);
       +        free(f);
       +}
       +
       +static void
       +ipmuxtreefree(Ipmux *f)
       +{
       +        if(f == nil)
       +                return;
       +        if(f->no != nil)
       +                ipmuxfree(f->no);
       +        if(f->yes != nil)
       +                ipmuxfree(f->yes);
       +        ipmuxfree(f);
       +}
       +
       +/*
       + *  merge two trees
       + */
       +static Ipmux*
       +ipmuxmerge(Ipmux *a, Ipmux *b)
       +{
       +        int n;
       +        Ipmux *f;
       +
       +        if(a == nil)
       +                return b;
       +        if(b == nil)
       +                return a;
       +        n = ipmuxcmp(a, b);
       +        if(n < 0){
       +                f = ipmuxcopy(b);
       +                a->yes = ipmuxmerge(a->yes, b);
       +                a->no = ipmuxmerge(a->no, f);
       +                return a;
       +        }
       +        if(n > 0){
       +                f = ipmuxcopy(a);
       +                b->yes = ipmuxmerge(b->yes, a);
       +                b->no = ipmuxmerge(b->no, f);
       +                return b;
       +        }
       +        if(ipmuxvalcmp(a, b) == 0){
       +                a->yes = ipmuxmerge(a->yes, b->yes);
       +                a->no = ipmuxmerge(a->no, b->no);
       +                a->ref++;
       +                ipmuxfree(b);
       +                return a;
       +        }
       +        a->no = ipmuxmerge(a->no, b);
       +        return a;
       +}
       +
       +/*
       + *  remove a chain from a demux tree.  This is like merging accept that
       + *  we remove instead of insert.
       + */
       +static int
       +ipmuxremove(Ipmux **l, Ipmux *f)
       +{
       +        int n, rv;
       +        Ipmux *ft;
       +
       +        if(f == nil)
       +                return 0;                /* we've removed it all */
       +        if(*l == nil)
       +                return -1;
       +
       +        ft = *l;
       +        n = ipmuxcmp(ft, f);
       +        if(n < 0){
       +                /* *l is maching an earlier field, descend both paths */
       +                rv = ipmuxremove(&ft->yes, f);
       +                rv += ipmuxremove(&ft->no, f);
       +                return rv;
       +        }
       +        if(n > 0){
       +                /* f represents an earlier field than *l, this should be impossible */
       +                return -1;
       +        }
       +
       +        /* if we get here f and *l are comparing the same fields */
       +        if(ipmuxvalcmp(ft, f) != 0){
       +                /* different values mean mutually exclusive */
       +                return ipmuxremove(&ft->no, f);
       +        }
       +
       +        /* we found a match */
       +        if(--(ft->ref) == 0){
       +                /*
       +                 *  a dead node implies the whole yes side is also dead.
       +                 *  since our chain is constrained to be on that side,
       +                 *  we're done.
       +                 */
       +                ipmuxtreefree(ft->yes);
       +                *l = ft->no;
       +                ipmuxfree(ft);
       +                return 0;
       +        }
       +
       +        /*
       +         *  free the rest of the chain.  it is constrained to match the
       +         *  yes side.
       +         */
       +        return ipmuxremove(&ft->yes, f->yes);
       +}
       +
       +/*
       + *  connection request is a semi separated list of filters
       + *  e.g. proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
       + *
       + *  there's no protection against overlapping specs.
       + */
       +static char*
       +ipmuxconnect(Conv *c, char **argv, int argc)
       +{
       +        int i, n;
       +        char *field[10];
       +        Ipmux *mux, *chain;
       +        Ipmuxrock *r;
       +        Fs *f;
       +
       +        f = c->p->f;
       +
       +        if(argc != 2)
       +                return Ebadarg;
       +
       +        n = getfields(argv[1], field, nelem(field), 1, ";");
       +        if(n <= 0)
       +                return Ebadarg;
       +
       +        chain = nil;
       +        mux = nil;
       +        for(i = 0; i < n; i++){
       +                mux = parsemux(field[i]);
       +                if(mux == nil){
       +                        ipmuxtreefree(chain);
       +                        return Ebadarg;
       +                }
       +                ipmuxchain(&chain, mux);
       +        }
       +        if(chain == nil)
       +                return Ebadarg;
       +        mux->conv = c;
       +
       +        /* save a copy of the chain so we can later remove it */
       +        mux = ipmuxcopy(chain);
       +        r = (Ipmuxrock*)(c->ptcl);
       +        r->chain = chain;
       +
       +        /* add the chain to the protocol demultiplexor tree */
       +        WLOCK(f);
       +        f->ipmux->priv = ipmuxmerge(f->ipmux->priv, mux);
       +        WUNLOCK(f);
       +
       +        Fsconnected(c, nil);
       +        return nil;
       +}
       +
       +static int
       +ipmuxstate(Conv *c, char *state, int n)
       +{
       +        Ipmuxrock *r;
       +        
       +        r = (Ipmuxrock*)(c->ptcl);
       +        return ipmuxsprint(r->chain, 0, state, n);
       +}
       +
       +static void
       +ipmuxcreate(Conv *c)
       +{
       +        Ipmuxrock *r;
       +
       +        c->rq = qopen(64*1024, Qmsg, 0, c);
       +        c->wq = qopen(64*1024, Qkick, ipmuxkick, c);
       +        r = (Ipmuxrock*)(c->ptcl);
       +        r->chain = nil;
       +}
       +
       +static char*
       +ipmuxannounce(Conv* _, char** __, int ___)
       +{
       +        return "ipmux does not support announce";
       +}
       +
       +static void
       +ipmuxclose(Conv *c)
       +{
       +        Ipmux *i;
       +        Ipmuxrock *r;
       +        Fs *f = c->p->f;
       +
       +        r = (Ipmuxrock*)(c->ptcl);
       +
       +        qclose(c->rq);
       +        qclose(c->wq);
       +        qclose(c->eq);
       +        ipmove(c->laddr, IPnoaddr);
       +        ipmove(c->raddr, IPnoaddr);
       +        c->lport = 0;
       +        c->rport = 0;
       +
       +        WLOCK(f);
       +        i = (Ipmux *)c->p->priv;
       +        ipmuxremove(&i, r->chain);
       +        WUNLOCK(f);
       +        ipmuxtreefree(r->chain);
       +        r->chain = nil;
       +}
       +
       +/*
       + *  takes a fully formed ip packet and just passes it down
       + *  the stack
       + */
       +static void
       +ipmuxkick(void *x)
       +{
       +        Conv *c = x;
       +        Block *bp;
       +
       +        bp = qget(c->wq);
       +        if(bp != nil) {
       +                Myip4hdr *ih4 = (Myip4hdr*)(bp->rp);
       +
       +                if((ih4->vihl & 0xF0) != IP_VER6)
       +                        ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
       +                else
       +                        ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
       +        }
       +}
       +
       +static void
       +ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
       +{
       +        int len, hl;
       +        Fs *f = p->f;
       +        uchar *m, *h, *v, *e, *ve, *hp;
       +        Conv *c;
       +        Ipmux *mux;
       +        Myip4hdr *ip;
       +        Ip6hdr *ip6;
       +
       +        ip = (Myip4hdr*)bp->rp;
       +        hl = (ip->vihl&0x0F)<<2;
       +
       +        if(p->priv == nil)
       +                goto nomatch;
       +
       +        h = bp->rp;
       +        len = BLEN(bp);
       +
       +        /* run the v4 filter */
       +        RLOCK(f);
       +        c = nil;
       +        mux = f->ipmux->priv;
       +        while(mux != nil){
       +                if(mux->eoff > len){
       +                        mux = mux->no;
       +                        continue;
       +                }
       +                hp = h + mux->off + ((int)mux->skiphdr)*hl;
       +                switch(mux->ctype){
       +                case Cbyte:
       +                        if(*mux->val == *hp)
       +                                goto yes;
       +                        break;
       +                case Cmbyte:
       +                        if((*hp & *mux->mask) == *mux->val)
       +                                goto yes;
       +                        break;
       +                case Cshort:
       +                        if(*((ushort*)mux->val) == *(ushort*)hp)
       +                                goto yes;
       +                        break;
       +                case Cmshort:
       +                        if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
       +                                goto yes;
       +                        break;
       +                case Clong:
       +                        if(*((ulong*)mux->val) == *(ulong*)hp)
       +                                goto yes;
       +                        break;
       +                case Cmlong:
       +                        if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
       +                                goto yes;
       +                        break;
       +                case Cifc:
       +                        if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
       +                                goto yes;
       +                        break;
       +                case Cmifc:
       +                        if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
       +                                goto yes;
       +                        break;
       +                default:
       +                        v = mux->val;
       +                        for(e = mux->e; v < e; v = ve){
       +                                m = mux->mask;
       +                                hp = h + mux->off;
       +                                for(ve = v + mux->len; v < ve; v++){
       +                                        if((*hp++ & *m++) != *v)
       +                                                break;
       +                                }
       +                                if(v == ve)
       +                                        goto yes;
       +                        }
       +                }
       +                mux = mux->no;
       +                continue;
       +yes:
       +                if(mux->conv != nil)
       +                        c = mux->conv;
       +                mux = mux->yes;
       +        }
       +        RUNLOCK(f);
       +
       +        if(c != nil){
       +                /* tack on interface address */
       +                bp = padblock(bp, IPaddrlen);
       +                ipmove(bp->rp, ifc->lifc->local);
       +                bp = concatblock(bp);
       +                if(bp != nil)
       +                        if(qpass(c->rq, bp) < 0)
       +                                print("Q");
       +                return;
       +        }
       +
       +nomatch:
       +        /* doesn't match any filter, hand it to the specific protocol handler */
       +        ip = (Myip4hdr*)bp->rp;
       +        if((ip->vihl & 0xF0) == IP_VER4) {
       +                p = f->t2p[ip->proto];
       +        } else {
       +                ip6 = (Ip6hdr*)bp->rp;
       +                p = f->t2p[ip6->proto];
       +        }
       +        if(p && p->rcv)
       +                (*p->rcv)(p, ifc, bp);
       +        else
       +                freeblist(bp);
       +        return;
       +}
       +
       +static int
       +ipmuxsprint(Ipmux *mux, int level, char *buf, int len)
       +{
       +        int i, j, n;
       +        uchar *v;
       +
       +        n = 0;
       +        for(i = 0; i < level; i++)
       +                n += snprint(buf+n, len-n, " ");
       +        if(mux == nil){
       +                n += snprint(buf+n, len-n, "\n");
       +                return n;
       +        }
       +        n += snprint(buf+n, len-n, "h[%d:%d]&", 
       +               mux->off+((int)mux->skiphdr)*((int)ipoff->data), 
       +               mux->off+(((int)mux->skiphdr)*((int)ipoff->data))+mux->len-1);
       +        for(i = 0; i < mux->len; i++)
       +                n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
       +        n += snprint(buf+n, len-n, "=");
       +        v = mux->val;
       +        for(j = 0; j < mux->n; j++){
       +                for(i = 0; i < mux->len; i++)
       +                        n += snprint(buf+n, len - n, "%2.2ux", *v++);
       +                n += snprint(buf+n, len-n, "|");
       +        }
       +        n += snprint(buf+n, len-n, "\n");
       +        level++;
       +        n += ipmuxsprint(mux->no, level, buf+n, len-n);
       +        n += ipmuxsprint(mux->yes, level, buf+n, len-n);
       +        return n;
       +}
       +
       +static int
       +ipmuxstats(Proto *p, char *buf, int len)
       +{
       +        int n;
       +        Fs *f = p->f;
       +
       +        RLOCK(f);
       +        n = ipmuxsprint(p->priv, 0, buf, len);
       +        RUNLOCK(f);
       +
       +        return n;
       +}
       +
       +void
       +ipmuxinit(Fs *f)
       +{
       +        Proto *ipmux;
       +
       +        ipmux = smalloc(sizeof(Proto));
       +        ipmux->priv = nil;
       +        ipmux->name = "ipmux";
       +        ipmux->connect = ipmuxconnect;
       +        ipmux->announce = ipmuxannounce;
       +        ipmux->state = ipmuxstate;
       +        ipmux->create = ipmuxcreate;
       +        ipmux->close = ipmuxclose;
       +        ipmux->rcv = ipmuxiput;
       +        ipmux->ctl = nil;
       +        ipmux->advise = nil;
       +        ipmux->stats = ipmuxstats;
       +        ipmux->ipproto = -1;
       +        ipmux->nc = 64;
       +        ipmux->ptclsize = sizeof(Ipmuxrock);
       +
       +        f->ipmux = ipmux;                        /* hack for Fsrcvpcol */
       +
       +        Fsproto(f, ipmux);
       +}
 (DIR) diff --git a/src/9vx/a/ip/iproute.c b/src/9vx/a/ip/iproute.c
       @@ -0,0 +1,854 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +
       +#include        "ip.h"
       +
       +static void        walkadd(Fs*, Route**, Route*);
       +static void        addnode(Fs*, Route**, Route*);
       +static void        calcd(Route*);
       +
       +/* these are used for all instances of IP */
       +static Route*        v4freelist;
       +static Route*        v6freelist;
       +static RWlock        routelock;
       +static ulong        v4routegeneration, v6routegeneration;
       +
       +static void
       +freeroute(Route *r)
       +{
       +        Route **l;
       +
       +        r->left = nil;
       +        r->right = nil;
       +        if(r->type & Rv4)
       +                l = &v4freelist;
       +        else
       +                l = &v6freelist;
       +        r->mid = *l;
       +        *l = r;
       +}
       +
       +static Route*
       +allocroute(int type)
       +{
       +        Route *r;
       +        int n;
       +        Route **l;
       +
       +        if(type & Rv4){
       +                n = sizeof(RouteTree) + sizeof(V4route);
       +                l = &v4freelist;
       +        } else {
       +                n = sizeof(RouteTree) + sizeof(V6route);
       +                l = &v6freelist;
       +        }
       +
       +        r = *l;
       +        if(r != nil){
       +                *l = r->mid;
       +        } else {
       +                r = malloc(n);
       +                if(r == nil)
       +                        panic("out of routing nodes");
       +        }
       +        memset(r, 0, n);
       +        r->type = type;
       +        r->ifc = nil;
       +        r->ref = 1;
       +
       +        return r;
       +}
       +
       +static void
       +addqueue(Route **q, Route *r)
       +{
       +        Route *l;
       +
       +        if(r == nil)
       +                return;
       +
       +        l = allocroute(r->type);
       +        l->mid = *q;
       +        *q = l;
       +        l->left = r;
       +}
       +
       +/*
       + *   compare 2 v6 addresses
       + */
       +static int
       +lcmp(ulong *a, ulong *b)
       +{
       +        int i;
       +
       +        for(i = 0; i < IPllen; i++){
       +                if(a[i] > b[i])
       +                        return 1;
       +                if(a[i] < b[i])
       +                        return -1;
       +        }
       +        return 0;
       +}
       +
       +/*
       + *  compare 2 v4 or v6 ranges
       + */
       +enum
       +{
       +        Rpreceeds,
       +        Rfollows,
       +        Requals,
       +        Rcontains,
       +        Rcontained,
       +};
       +
       +static int
       +rangecompare(Route *a, Route *b)
       +{
       +        if(a->type & Rv4){
       +                if(a->v4.endaddress < b->v4.address)
       +                        return Rpreceeds;
       +
       +                if(a->v4.address > b->v4.endaddress)
       +                        return Rfollows;
       +
       +                if(a->v4.address <= b->v4.address
       +                && a->v4.endaddress >= b->v4.endaddress){
       +                        if(a->v4.address == b->v4.address
       +                        && a->v4.endaddress == b->v4.endaddress)
       +                                return Requals;
       +                        return Rcontains;
       +                }
       +                return Rcontained;
       +        }
       +
       +        if(lcmp(a->v6.endaddress, b->v6.address) < 0)
       +                return Rpreceeds;
       +
       +        if(lcmp(a->v6.address, b->v6.endaddress) > 0)
       +                return Rfollows;
       +
       +        if(lcmp(a->v6.address, b->v6.address) <= 0
       +        && lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
       +                if(lcmp(a->v6.address, b->v6.address) == 0
       +                && lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
       +                                return Requals;
       +                return Rcontains;
       +        }
       +
       +        return Rcontained;
       +}
       +
       +static void
       +copygate(Route *old, Route *new)
       +{
       +        if(new->type & Rv4)
       +                memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
       +        else
       +                memmove(old->v6.gate, new->v6.gate, IPaddrlen);
       +}
       +
       +/*
       + *  walk down a tree adding nodes back in
       + */
       +static void
       +walkadd(Fs *f, Route **root, Route *p)
       +{
       +        Route *l, *r;
       +
       +        l = p->left;
       +        r = p->right;
       +        p->left = 0;
       +        p->right = 0;
       +        addnode(f, root, p);
       +        if(l)
       +                walkadd(f, root, l);
       +        if(r)
       +                walkadd(f, root, r);
       +}
       +
       +/*
       + *  calculate depth
       + */
       +static void
       +calcd(Route *p)
       +{
       +        Route *q;
       +        int d;
       +
       +        if(p) {
       +                d = 0;
       +                q = p->left;
       +                if(q)
       +                        d = q->depth;
       +                q = p->right;
       +                if(q && q->depth > d)
       +                        d = q->depth;
       +                q = p->mid;
       +                if(q && q->depth > d)
       +                        d = q->depth;
       +                p->depth = d+1;
       +        }
       +}
       +
       +/*
       + *  balance the tree at the current node
       + */
       +static void
       +balancetree(Route **cur)
       +{
       +        Route *p, *l, *r;
       +        int dl, dr;
       +
       +        /*
       +         * if left and right are
       +         * too out of balance,
       +         * rotate tree node
       +         */
       +        p = *cur;
       +        dl = 0; if((l = p->left) != nil) dl = l->depth;
       +        dr = 0; if((r = p->right) != nil) dr = r->depth;
       +
       +        if(dl > dr+1) {
       +                p->left = l->right;
       +                l->right = p;
       +                *cur = l;
       +                calcd(p);
       +                calcd(l);
       +        } else
       +        if(dr > dl+1) {
       +                p->right = r->left;
       +                r->left = p;
       +                *cur = r;
       +                calcd(p);
       +                calcd(r);
       +        } else
       +                calcd(p);
       +}
       +
       +/*
       + *  add a new node to the tree
       + */
       +static void
       +addnode(Fs *f, Route **cur, Route *new)
       +{
       +        Route *p;
       +
       +        p = *cur;
       +        if(p == 0) {
       +                *cur = new;
       +                new->depth = 1;
       +                return;
       +        }
       +
       +        switch(rangecompare(new, p)){
       +        case Rpreceeds:
       +                addnode(f, &p->left, new);
       +                break;
       +        case Rfollows:
       +                addnode(f, &p->right, new);
       +                break;
       +        case Rcontains:
       +                /*
       +                 *  if new node is superset
       +                 *  of tree node,
       +                 *  replace tree node and
       +                 *  queue tree node to be
       +                 *  merged into root.
       +                 */
       +                *cur = new;
       +                new->depth = 1;
       +                addqueue(&f->queue, p);
       +                break;
       +        case Requals:
       +                /*
       +                 *  supercede the old entry if the old one isn't
       +                 *  a local interface.
       +                 */
       +                if((p->type & Rifc) == 0){
       +                        p->type = new->type;
       +                        p->ifcid = -1;
       +                        copygate(p, new);
       +                } else if(new->type & Rifc)
       +                        p->ref++;
       +                freeroute(new);
       +                break;
       +        case Rcontained:
       +                addnode(f, &p->mid, new);
       +                break;
       +        }
       +        
       +        balancetree(cur);
       +}
       +
       +#define        V4H(a)        ((a&0x07ffffff)>>(32-Lroot-5))
       +
       +void
       +v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
       +{
       +        Route *p;
       +        ulong sa;
       +        ulong m;
       +        ulong ea;
       +        int h, eh;
       +
       +        m = nhgetl(mask);
       +        sa = nhgetl(a) & m;
       +        ea = sa | ~m;
       +
       +        eh = V4H(ea);
       +        for(h=V4H(sa); h<=eh; h++) {
       +                p = allocroute(Rv4 | type);
       +                p->v4.address = sa;
       +                p->v4.endaddress = ea;
       +                memmove(p->v4.gate, gate, sizeof(p->v4.gate));
       +                memmove(p->tag, tag, sizeof(p->tag));
       +
       +                wlock(&routelock);
       +                addnode(f, &f->v4root[h], p);
       +                while((p = f->queue) != nil) {
       +                        f->queue = p->mid;
       +                        walkadd(f, &f->v4root[h], p->left);
       +                        freeroute(p);
       +                }
       +                wunlock(&routelock);
       +        }
       +        v4routegeneration++;
       +
       +        ipifcaddroute(f, Rv4, a, mask, gate, type);
       +}
       +
       +#define        V6H(a)        (((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
       +#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
       +
       +void
       +v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
       +{
       +        Route *p;
       +        ulong sa[IPllen], ea[IPllen];
       +        ulong x, y;
       +        int h, eh;
       +
       +        /*
       +        if(ISDFLT(a, mask, tag))
       +                f->v6p->cdrouter = -1;
       +        */
       +
       +
       +        for(h = 0; h < IPllen; h++){
       +                x = nhgetl(a+4*h);
       +                y = nhgetl(mask+4*h);
       +                sa[h] = x & y;
       +                ea[h] = x | ~y;
       +        }
       +
       +        eh = V6H(ea);
       +        for(h = V6H(sa); h <= eh; h++) {
       +                p = allocroute(type);
       +                memmove(p->v6.address, sa, IPaddrlen);
       +                memmove(p->v6.endaddress, ea, IPaddrlen);
       +                memmove(p->v6.gate, gate, IPaddrlen);
       +                memmove(p->tag, tag, sizeof(p->tag));
       +
       +                wlock(&routelock);
       +                addnode(f, &f->v6root[h], p);
       +                while((p = f->queue) != nil) {
       +                        f->queue = p->mid;
       +                        walkadd(f, &f->v6root[h], p->left);
       +                        freeroute(p);
       +                }
       +                wunlock(&routelock);
       +        }
       +        v6routegeneration++;
       +
       +        ipifcaddroute(f, 0, a, mask, gate, type);
       +}
       +
       +Route**
       +looknode(Route **cur, Route *r)
       +{
       +        Route *p;
       +
       +        for(;;){
       +                p = *cur;
       +                if(p == 0)
       +                        return 0;
       +        
       +                switch(rangecompare(r, p)){
       +                case Rcontains:
       +                        return 0;
       +                case Rpreceeds:
       +                        cur = &p->left;
       +                        break;
       +                case Rfollows:
       +                        cur = &p->right;
       +                        break;
       +                case Rcontained:
       +                        cur = &p->mid;
       +                        break;
       +                case Requals:
       +                        return cur;
       +                }
       +        }
       +}
       +
       +void
       +v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
       +{
       +        Route **r, *p;
       +        Route rt;
       +        int h, eh;
       +        ulong m;
       +
       +        m = nhgetl(mask);
       +        rt.v4.address = nhgetl(a) & m;
       +        rt.v4.endaddress = rt.v4.address | ~m;
       +        rt.type = Rv4;
       +
       +        eh = V4H(rt.v4.endaddress);
       +        for(h=V4H(rt.v4.address); h<=eh; h++) {
       +                if(dolock)
       +                        wlock(&routelock);
       +                r = looknode(&f->v4root[h], &rt);
       +                if(r) {
       +                        p = *r;
       +                        if(--(p->ref) == 0){
       +                                *r = 0;
       +                                addqueue(&f->queue, p->left);
       +                                addqueue(&f->queue, p->mid);
       +                                addqueue(&f->queue, p->right);
       +                                freeroute(p);
       +                                while((p = f->queue) != nil) {
       +                                        f->queue = p->mid;
       +                                        walkadd(f, &f->v4root[h], p->left);
       +                                        freeroute(p);
       +                                }
       +                        }
       +                }
       +                if(dolock)
       +                        wunlock(&routelock);
       +        }
       +        v4routegeneration++;
       +
       +        ipifcremroute(f, Rv4, a, mask);
       +}
       +
       +void
       +v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
       +{
       +        Route **r, *p;
       +        Route rt;
       +        int h, eh;
       +        ulong x, y;
       +
       +        for(h = 0; h < IPllen; h++){
       +                x = nhgetl(a+4*h);
       +                y = nhgetl(mask+4*h);
       +                rt.v6.address[h] = x & y;
       +                rt.v6.endaddress[h] = x | ~y;
       +        }
       +        rt.type = 0;
       +
       +        eh = V6H(rt.v6.endaddress);
       +        for(h=V6H(rt.v6.address); h<=eh; h++) {
       +                if(dolock)
       +                        wlock(&routelock);
       +                r = looknode(&f->v6root[h], &rt);
       +                if(r) {
       +                        p = *r;
       +                        if(--(p->ref) == 0){
       +                                *r = 0;
       +                                addqueue(&f->queue, p->left);
       +                                addqueue(&f->queue, p->mid);
       +                                addqueue(&f->queue, p->right);
       +                                freeroute(p);
       +                                while((p = f->queue) != nil) {
       +                                        f->queue = p->mid;
       +                                        walkadd(f, &f->v6root[h], p->left);
       +                                        freeroute(p);
       +                                }
       +                        }
       +                }
       +                if(dolock)
       +                        wunlock(&routelock);
       +        }
       +        v6routegeneration++;
       +
       +        ipifcremroute(f, 0, a, mask);
       +}
       +
       +Route*
       +v4lookup(Fs *f, uchar *a, Conv *c)
       +{
       +        Route *p, *q;
       +        ulong la;
       +        uchar gate[IPaddrlen];
       +        Ipifc *ifc;
       +
       +        if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
       +                return c->r;
       +
       +        la = nhgetl(a);
       +        q = nil;
       +        for(p=f->v4root[V4H(la)]; p;)
       +                if(la >= p->v4.address) {
       +                        if(la <= p->v4.endaddress) {
       +                                q = p;
       +                                p = p->mid;
       +                        } else
       +                                p = p->right;
       +                } else
       +                        p = p->left;
       +
       +        if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
       +                if(q->type & Rifc) {
       +                        hnputl(gate+IPv4off, q->v4.address);
       +                        memmove(gate, v4prefix, IPv4off);
       +                } else
       +                        v4tov6(gate, q->v4.gate);
       +                ifc = findipifc(f, gate, q->type);
       +                if(ifc == nil)
       +                        return nil;
       +                q->ifc = ifc;
       +                q->ifcid = ifc->ifcid;
       +        }
       +
       +        if(c != nil){
       +                c->r = q;
       +                c->rgen = v4routegeneration;
       +        }
       +
       +        return q;
       +}
       +
       +Route*
       +v6lookup(Fs *f, uchar *a, Conv *c)
       +{
       +        Route *p, *q;
       +        ulong la[IPllen];
       +        int h;
       +        ulong x, y;
       +        uchar gate[IPaddrlen];
       +        Ipifc *ifc;
       +
       +        if(memcmp(a, v4prefix, IPv4off) == 0){
       +                q = v4lookup(f, a+IPv4off, c);
       +                if(q != nil)
       +                        return q;
       +        }
       +
       +        if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
       +                return c->r;
       +
       +        for(h = 0; h < IPllen; h++)
       +                la[h] = nhgetl(a+4*h);
       +
       +        q = 0;
       +        for(p=f->v6root[V6H(la)]; p;){
       +                for(h = 0; h < IPllen; h++){
       +                        x = la[h];
       +                        y = p->v6.address[h];
       +                        if(x == y)
       +                                continue;
       +                        if(x < y){
       +                                p = p->left;
       +                                goto next;
       +                        }
       +                        break;
       +                }
       +                for(h = 0; h < IPllen; h++){
       +                        x = la[h];
       +                        y = p->v6.endaddress[h];
       +                        if(x == y)
       +                                continue;
       +                        if(x > y){
       +                                p = p->right;
       +                                goto next;
       +                        }
       +                        break;
       +                }
       +                q = p;
       +                p = p->mid;
       +next:                ;
       +        }
       +
       +        if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
       +                if(q->type & Rifc) {
       +                        for(h = 0; h < IPllen; h++)
       +                                hnputl(gate+4*h, q->v6.address[h]);
       +                        ifc = findipifc(f, gate, q->type);
       +                } else
       +                        ifc = findipifc(f, q->v6.gate, q->type);
       +                if(ifc == nil)
       +                        return nil;
       +                q->ifc = ifc;
       +                q->ifcid = ifc->ifcid;
       +        }
       +        if(c != nil){
       +                c->r = q;
       +                c->rgen = v6routegeneration;
       +        }
       +        
       +        return q;
       +}
       +
       +void
       +routetype(int type, char *p)
       +{
       +        memset(p, ' ', 4);
       +        p[4] = 0;
       +        if(type & Rv4)
       +                *p++ = '4';
       +        else
       +                *p++ = '6';
       +        if(type & Rifc)
       +                *p++ = 'i';
       +        if(type & Runi)
       +                *p++ = 'u';
       +        else if(type & Rbcast)
       +                *p++ = 'b';
       +        else if(type & Rmulti)
       +                *p++ = 'm';
       +        if(type & Rptpt)
       +                *p = 'p';
       +}
       +
       +static char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
       +
       +void
       +convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
       +{
       +        int i;
       +
       +        if(r->type & Rv4){
       +                memmove(addr, v4prefix, IPv4off);
       +                hnputl(addr+IPv4off, r->v4.address);
       +                memset(mask, 0xff, IPv4off);
       +                hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
       +                memmove(gate, v4prefix, IPv4off);
       +                memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
       +        } else {
       +                for(i = 0; i < IPllen; i++){
       +                        hnputl(addr + 4*i, r->v6.address[i]);
       +                        hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
       +                }
       +                memmove(gate, r->v6.gate, IPaddrlen);
       +        }
       +
       +        routetype(r->type, t);
       +
       +        if(r->ifc)
       +                *nifc = r->ifc->conv->x;
       +        else
       +                *nifc = -1;
       +}
       +
       +/*
       + *  this code is not in rr to reduce stack size
       + */
       +static void
       +sprintroute(Route *r, Routewalk *rw)
       +{
       +        int nifc, n;
       +        char t[5], *iname, ifbuf[5];
       +        uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
       +        char *p;
       +
       +        convroute(r, addr, mask, gate, t, &nifc);
       +        iname = "-";
       +        if(nifc != -1) {
       +                iname = ifbuf;
       +                snprint(ifbuf, sizeof ifbuf, "%d", nifc);
       +        }
       +        p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
       +        if(rw->o < 0){
       +                n = p - rw->p;
       +                if(n > -rw->o){
       +                        memmove(rw->p, rw->p-rw->o, n+rw->o);
       +                        rw->p = p + rw->o;
       +                }
       +                rw->o += n;
       +        } else
       +                rw->p = p;
       +}
       +
       +/*
       + *  recurse descending tree, applying the function in Routewalk
       + */
       +static int
       +rr(Route *r, Routewalk *rw)
       +{
       +        int h;
       +
       +        if(rw->e <= rw->p)
       +                return 0;
       +        if(r == nil)
       +                return 1;
       +
       +        if(rr(r->left, rw) == 0)
       +                return 0;
       +
       +        if(r->type & Rv4)
       +                h = V4H(r->v4.address);
       +        else
       +                h = V6H(r->v6.address);
       +
       +        if(h == rw->h)
       +                rw->walk(r, rw);
       +
       +        if(rr(r->mid, rw) == 0)
       +                return 0;
       +
       +        return rr(r->right, rw);
       +}
       +
       +void
       +ipwalkroutes(Fs *f, Routewalk *rw)
       +{
       +        rlock(&routelock);
       +        if(rw->e > rw->p) {
       +                for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
       +                        if(rr(f->v4root[rw->h], rw) == 0)
       +                                break;
       +        }
       +        if(rw->e > rw->p) {
       +                for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
       +                        if(rr(f->v6root[rw->h], rw) == 0)
       +                                break;
       +        }
       +        runlock(&routelock);
       +}
       +
       +long
       +routeread(Fs *f, char *p, ulong offset, int n)
       +{
       +        Routewalk rw;
       +
       +        rw.p = p;
       +        rw.e = p+n;
       +        rw.o = -offset;
       +        rw.walk = sprintroute;
       +
       +        ipwalkroutes(f, &rw);
       +
       +        return rw.p - p;
       +}
       +
       +/*
       + *  this code is not in routeflush to reduce stack size
       + */
       +void
       +delroute(Fs *f, Route *r, int dolock)
       +{
       +        uchar addr[IPaddrlen];
       +        uchar mask[IPaddrlen];
       +        uchar gate[IPaddrlen];
       +        char t[5];
       +        int nifc;
       +
       +        convroute(r, addr, mask, gate, t, &nifc);
       +        if(r->type & Rv4)
       +                v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
       +        else
       +                v6delroute(f, addr, mask, dolock);
       +}
       +
       +/*
       + *  recurse until one route is deleted
       + *    returns 0 if nothing is deleted, 1 otherwise
       + */
       +int
       +routeflush(Fs *f, Route *r, char *tag)
       +{
       +        if(r == nil)
       +                return 0;
       +        if(routeflush(f, r->mid, tag))
       +                return 1;
       +        if(routeflush(f, r->left, tag))
       +                return 1;
       +        if(routeflush(f, r->right, tag))
       +                return 1;
       +        if((r->type & Rifc) == 0){
       +                if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
       +                        delroute(f, r, 0);
       +                        return 1;
       +                }
       +        }
       +        return 0;
       +}
       +
       +long
       +routewrite(Fs *f, Chan *c, char *p, int n)
       +{
       +        int h, changed;
       +        char *tag;
       +        Cmdbuf *cb;
       +        uchar addr[IPaddrlen];
       +        uchar mask[IPaddrlen];
       +        uchar gate[IPaddrlen];
       +        IPaux *a, *na;
       +
       +        cb = parsecmd(p, n);
       +        if(waserror()){
       +                free(cb);
       +                nexterror();
       +        }
       +
       +        if(strcmp(cb->f[0], "flush") == 0){
       +                tag = cb->f[1];
       +                for(h = 0; h < nelem(f->v4root); h++)
       +                        for(changed = 1; changed;){
       +                                wlock(&routelock);
       +                                changed = routeflush(f, f->v4root[h], tag);
       +                                wunlock(&routelock);
       +                        }
       +                for(h = 0; h < nelem(f->v6root); h++)
       +                        for(changed = 1; changed;){
       +                                wlock(&routelock);
       +                                changed = routeflush(f, f->v6root[h], tag);
       +                                wunlock(&routelock);
       +                        }
       +        } else if(strcmp(cb->f[0], "remove") == 0){
       +                if(cb->nf < 3)
       +                        error(Ebadarg);
       +                if (parseip(addr, cb->f[1]) == -1)
       +                        error(Ebadip);
       +                parseipmask(mask, cb->f[2]);
       +                if(memcmp(addr, v4prefix, IPv4off) == 0)
       +                        v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
       +                else
       +                        v6delroute(f, addr, mask, 1);
       +        } else if(strcmp(cb->f[0], "add") == 0){
       +                if(cb->nf < 4)
       +                        error(Ebadarg);
       +                if(parseip(addr, cb->f[1]) == -1 ||
       +                    parseip(gate, cb->f[3]) == -1)
       +                        error(Ebadip);
       +                parseipmask(mask, cb->f[2]);
       +                tag = "none";
       +                if(c != nil){
       +                        a = c->aux;
       +                        tag = a->tag;
       +                }
       +                if(memcmp(addr, v4prefix, IPv4off) == 0)
       +                        v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
       +                else
       +                        v6addroute(f, tag, addr, mask, gate, 0);
       +        } else if(strcmp(cb->f[0], "tag") == 0) {
       +                if(cb->nf < 2)
       +                        error(Ebadarg);
       +
       +                a = c->aux;
       +                na = newipaux(a->owner, cb->f[1]);
       +                c->aux = na;
       +                free(a);
       +        }
       +
       +        poperror();
       +        free(cb);
       +        return n;
       +}
 (DIR) diff --git a/src/9vx/a/ip/ipv6.c b/src/9vx/a/ip/ipv6.c
       @@ -0,0 +1,718 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +
       +#include        "ip.h"
       +#include        "ipv6.h"
       +
       +enum
       +{
       +        IP6FHDR                = 8,                 /* sizeof(Fraghdr6) */
       +};
       +
       +#define IPV6CLASS(hdr)        (((hdr)->vcf[0]&0x0F)<<2 | ((hdr)->vcf[1]&0xF0)>>2)
       +#define BLKIPVER(xp)        (((Ip6hdr*)((xp)->rp))->vcf[0] & 0xF0)
       +/*
       + * This sleazy macro is stolen shamelessly from ip.c, see comment there.
       + */
       +#define BKFG(xp)        ((Ipfrag*)((xp)->base))
       +
       +typedef struct        Fragment4        Fragment4;
       +typedef struct        Fragment6        Fragment6;
       +typedef struct        Ipfrag        Ipfrag;
       +
       +Block*                ip6reassemble(IP*, int, Block*, Ip6hdr*);
       +Fragment6*        ipfragallo6(IP*);
       +void                ipfragfree6(IP*, Fragment6*);
       +Block*                procopts(Block *bp);
       +static Block*        procxtns(IP *ip, Block *bp, int doreasm);
       +int                unfraglen(Block *bp, uchar *nexthdr, int setfh);
       +
       +/* MIB II counters */
       +enum
       +{
       +        Forwarding,
       +        DefaultTTL,
       +        InReceives,
       +        InHdrErrors,
       +        InAddrErrors,
       +        ForwDatagrams,
       +        InUnknownProtos,
       +        InDiscards,
       +        InDelivers,
       +        OutRequests,
       +        OutDiscards,
       +        OutNoRoutes,
       +        ReasmTimeout,
       +        ReasmReqds,
       +        ReasmOKs,
       +        ReasmFails,
       +        FragOKs,
       +        FragFails,
       +        FragCreates,
       +
       +        Nstats,
       +};
       +
       +static char *statnames[] =
       +{
       +[Forwarding]        "Forwarding",
       +[DefaultTTL]        "DefaultTTL",
       +[InReceives]        "InReceives",
       +[InHdrErrors]        "InHdrErrors",
       +[InAddrErrors]        "InAddrErrors",
       +[ForwDatagrams]        "ForwDatagrams",
       +[InUnknownProtos]        "InUnknownProtos",
       +[InDiscards]        "InDiscards",
       +[InDelivers]        "InDelivers",
       +[OutRequests]        "OutRequests",
       +[OutDiscards]        "OutDiscards",
       +[OutNoRoutes]        "OutNoRoutes",
       +[ReasmTimeout]        "ReasmTimeout",
       +[ReasmReqds]        "ReasmReqds",
       +[ReasmOKs]        "ReasmOKs",
       +[ReasmFails]        "ReasmFails",
       +[FragOKs]        "FragOKs",
       +[FragFails]        "FragFails",
       +[FragCreates]        "FragCreates",
       +};
       +
       +struct Fragment4
       +{
       +        Block*        blist;
       +        Fragment4*        next;
       +        ulong         src;
       +        ulong         dst;
       +        ushort        id;
       +        ulong         age;
       +};
       +
       +struct Fragment6
       +{
       +        Block*        blist;
       +        Fragment6*        next;
       +        uchar         src[IPaddrlen];
       +        uchar         dst[IPaddrlen];
       +        uint        id;
       +        ulong         age;
       +};
       +
       +struct Ipfrag
       +{
       +        ushort        foff;
       +        ushort        flen;
       +};
       +
       +/* an instance of IP */
       +struct IP
       +{
       +        ulong                stats[Nstats];
       +
       +        QLock                fraglock4;
       +        Fragment4*        flisthead4;
       +        Fragment4*        fragfree4;
       +        Ref                id4;
       +
       +        QLock                fraglock6;
       +        Fragment6*        flisthead6;
       +        Fragment6*        fragfree6;
       +        Ref                id6;
       +
       +        int                iprouting;        /* true if we route like a gateway */
       +};
       +
       +int
       +ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
       +{
       +        int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
       +        int morefrags, blklen, rv = 0, tentative;
       +        uchar *gate, nexthdr;
       +        Block *xp, *nb;
       +        Fraghdr6 fraghdr;
       +        IP *ip;
       +        Ip6hdr *eh;
       +        Ipifc *ifc;
       +        Route *r, *sr;
       +
       +        ip = f->ip;
       +
       +        /* Fill out the ip header */
       +        eh = (Ip6hdr*)(bp->rp);
       +
       +        ip->stats[OutRequests]++;
       +
       +        /* Number of uchars in data and ip header to write */
       +        len = blocklen(bp);
       +
       +        tentative = iptentative(f, eh->src);
       +        if(tentative){
       +                netlog(f, Logip, "reject tx of packet with tentative src address %I\n",
       +                        eh->src);
       +                goto free;
       +        }
       +
       +        if(gating){
       +                chunk = nhgets(eh->ploadlen);
       +                if(chunk > len){
       +                        ip->stats[OutDiscards]++;
       +                        netlog(f, Logip, "short gated packet\n");
       +                        goto free;
       +                }
       +                if(chunk + IP6HDR < len)
       +                        len = chunk + IP6HDR;
       +        }
       +
       +        if(len >= IP_MAX){
       +                ip->stats[OutDiscards]++;
       +                netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
       +                goto free;
       +        }
       +
       +        r = v6lookup(f, eh->dst, c);
       +        if(r == nil){
       +//                print("no route for %I, src %I free\n", eh->dst, eh->src);
       +                ip->stats[OutNoRoutes]++;
       +                netlog(f, Logip, "no interface %I\n", eh->dst);
       +                rv = -1;
       +                goto free;
       +        }
       +
       +        ifc = r->ifc;
       +        if(r->type & (Rifc|Runi))
       +                gate = eh->dst;
       +        else if(r->type & (Rbcast|Rmulti)) {
       +                gate = eh->dst;
       +                sr = v6lookup(f, eh->src, nil);
       +                if(sr && (sr->type & Runi))
       +                        ifc = sr->ifc;
       +        }
       +        else
       +                gate = r->v6.gate;
       +
       +        if(!gating)
       +                eh->vcf[0] = IP_VER6;
       +        eh->ttl = ttl;
       +        if(!gating) {
       +                eh->vcf[0] |= tos >> 4;
       +                eh->vcf[1]  = tos << 4;
       +        }
       +
       +        if(!CANRLOCK(ifc))
       +                goto free;
       +
       +        if(waserror()){
       +                RUNLOCK(ifc);
       +                nexterror();
       +        }
       +
       +        if(ifc->m == nil)
       +                goto raise;
       +
       +        /* If we dont need to fragment just send it */
       +        medialen = ifc->maxtu - ifc->m->hsize;
       +        if(len <= medialen) {
       +                hnputs(eh->ploadlen, len - IP6HDR);
       +                ifc->m->bwrite(ifc, bp, V6, gate);
       +                RUNLOCK(ifc);
       +                poperror();
       +                return 0;
       +        }
       +
       +        if(gating && ifc->reassemble <= 0) {
       +                /*
       +                 * v6 intermediate nodes are not supposed to fragment pkts;
       +                 * we fragment if ifc->reassemble is turned on; an exception
       +                 * needed for nat.
       +                 */
       +                ip->stats[OutDiscards]++;
       +                icmppkttoobig6(f, ifc, bp);
       +                netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
       +                goto raise;
       +        }
       +
       +        /* start v6 fragmentation */
       +        uflen = unfraglen(bp, &nexthdr, 1);
       +        if(uflen > medialen) {
       +                ip->stats[FragFails]++;
       +                ip->stats[OutDiscards]++;
       +                netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
       +                goto raise;
       +        }
       +
       +        flen = len - uflen;
       +        seglen = (medialen - (uflen + IP6FHDR)) & ~7;
       +        if(seglen < 8) {
       +                ip->stats[FragFails]++;
       +                ip->stats[OutDiscards]++;
       +                netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
       +                goto raise;
       +        }
       +
       +        lid = incref(&ip->id6);
       +        fraghdr.nexthdr = nexthdr;
       +        fraghdr.res = 0;
       +        hnputl(fraghdr.id, lid);
       +
       +        xp = bp;
       +        offset = uflen;
       +        while (xp && offset && offset >= BLEN(xp)) {
       +                offset -= BLEN(xp);
       +                xp = xp->next;
       +        }
       +        xp->rp += offset;
       +
       +        fragoff = 0;
       +        morefrags = 1;
       +
       +        for(; fragoff < flen; fragoff += seglen) {
       +                nb = allocb(uflen + IP6FHDR + seglen);
       +
       +                if(fragoff + seglen >= flen) {
       +                        seglen = flen - fragoff;
       +                        morefrags = 0;
       +                }
       +
       +                hnputs(eh->ploadlen, seglen+IP6FHDR);
       +                memmove(nb->wp, eh, uflen);
       +                nb->wp += uflen;
       +
       +                hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
       +                fraghdr.offsetRM[1] |= morefrags;
       +                memmove(nb->wp, &fraghdr, IP6FHDR);
       +                nb->wp += IP6FHDR;
       +
       +                /* Copy data */
       +                chunk = seglen;
       +                while (chunk) {
       +                        if(!xp) {
       +                                ip->stats[OutDiscards]++;
       +                                ip->stats[FragFails]++;
       +                                freeblist(nb);
       +                                netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
       +                                goto raise;
       +                        }
       +                        blklen = chunk;
       +                        if(BLEN(xp) < chunk)
       +                                blklen = BLEN(xp);
       +                        memmove(nb->wp, xp->rp, blklen);
       +
       +                        nb->wp += blklen;
       +                        xp->rp += blklen;
       +                        chunk -= blklen;
       +                        if(xp->rp == xp->wp)
       +                                xp = xp->next;
       +                }
       +
       +                ifc->m->bwrite(ifc, nb, V6, gate);
       +                ip->stats[FragCreates]++;
       +        }
       +        ip->stats[FragOKs]++;
       +
       +raise:
       +        RUNLOCK(ifc);
       +        poperror();
       +free:
       +        freeblist(bp);
       +        return rv;
       +}
       +
       +void
       +ipiput6(Fs *f, Ipifc *ifc, Block *bp)
       +{
       +        int hl, hop, tos, notforme, tentative;
       +        uchar proto;
       +        uchar v6dst[IPaddrlen];
       +        IP *ip;
       +        Ip6hdr *h;
       +        Proto *p;
       +        Route *r, *sr;
       +
       +        ip = f->ip;
       +        ip->stats[InReceives]++;
       +
       +        /*
       +         *  Ensure we have all the header info in the first
       +         *  block.  Make life easier for other protocols by
       +         *  collecting up to the first 64 bytes in the first block.
       +         */
       +        if(BLEN(bp) < 64) {
       +                hl = blocklen(bp);
       +                if(hl < IP6HDR)
       +                        hl = IP6HDR;
       +                if(hl > 64)
       +                        hl = 64;
       +                bp = pullupblock(bp, hl);
       +                if(bp == nil)
       +                        return;
       +        }
       +
       +        h = (Ip6hdr *)bp->rp;
       +
       +        memmove(&v6dst[0], &h->dst[0], IPaddrlen);
       +        notforme = ipforme(f, v6dst) == 0;
       +        tentative = iptentative(f, v6dst);
       +
       +        if(tentative && h->proto != ICMPv6) {
       +                print("tentative addr, drop\n");
       +                freeblist(bp);
       +                return;
       +        }
       +
       +        /* Check header version */
       +        if(BLKIPVER(bp) != IP_VER6) {
       +                ip->stats[InHdrErrors]++;
       +                netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
       +                freeblist(bp);
       +                return;
       +        }
       +
       +        /* route */
       +        if(notforme) {
       +                if(!ip->iprouting){
       +                        freeb(bp);
       +                        return;
       +                }
       +
       +                /* don't forward to link-local destinations */
       +                if(islinklocal(h->dst) ||
       +                   (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
       +                        ip->stats[OutDiscards]++;
       +                        freeblist(bp);
       +                        return;
       +                }
       +                        
       +                /* don't forward to source's network */
       +                sr = v6lookup(f, h->src, nil);
       +                r  = v6lookup(f, h->dst, nil);
       +
       +                if(r == nil || sr == r){
       +                        ip->stats[OutDiscards]++;
       +                        freeblist(bp);
       +                        return;
       +                }
       +
       +                /* don't forward if packet has timed out */
       +                hop = h->ttl;
       +                if(hop < 1) {
       +                        ip->stats[InHdrErrors]++;
       +                        icmpttlexceeded6(f, ifc, bp);
       +                        freeblist(bp);
       +                        return;
       +                }
       +
       +                /* process headers & reassemble if the interface expects it */
       +                bp = procxtns(ip, bp, r->ifc->reassemble);
       +                if(bp == nil)
       +                        return;
       +
       +                ip->stats[ForwDatagrams]++;
       +                h = (Ip6hdr *)bp->rp;
       +                tos = IPV6CLASS(h);
       +                hop = h->ttl;
       +                ipoput6(f, bp, 1, hop-1, tos, nil);
       +                return;
       +        }
       +
       +        /* reassemble & process headers if needed */
       +        bp = procxtns(ip, bp, 1);
       +        if(bp == nil)
       +                return;
       +
       +        h = (Ip6hdr *) (bp->rp);
       +        proto = h->proto;
       +        p = Fsrcvpcol(f, proto);
       +        if(p && p->rcv) {
       +                ip->stats[InDelivers]++;
       +                (*p->rcv)(p, ifc, bp);
       +                return;
       +        }
       +
       +        ip->stats[InDiscards]++;
       +        ip->stats[InUnknownProtos]++;
       +        freeblist(bp);
       +}
       +
       +/*
       + * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
       + */
       +void
       +ipfragfree6(IP *ip, Fragment6 *frag)
       +{
       +        Fragment6 *fl, **l;
       +
       +        if(frag->blist)
       +                freeblist(frag->blist);
       +
       +        memset(frag->src, 0, IPaddrlen);
       +        frag->id = 0;
       +        frag->blist = nil;
       +
       +        l = &ip->flisthead6;
       +        for(fl = *l; fl; fl = fl->next) {
       +                if(fl == frag) {
       +                        *l = frag->next;
       +                        break;
       +                }
       +                l = &fl->next;
       +        }
       +
       +        frag->next = ip->fragfree6;
       +        ip->fragfree6 = frag;
       +}
       +
       +/*
       + * ipfragallo6 - copied from ipfragalloc4
       + */
       +Fragment6*
       +ipfragallo6(IP *ip)
       +{
       +        Fragment6 *f;
       +
       +        while(ip->fragfree6 == nil) {
       +                /* free last entry on fraglist */
       +                for(f = ip->flisthead6; f->next; f = f->next)
       +                        ;
       +                ipfragfree6(ip, f);
       +        }
       +        f = ip->fragfree6;
       +        ip->fragfree6 = f->next;
       +        f->next = ip->flisthead6;
       +        ip->flisthead6 = f;
       +        f->age = NOW + 30000;
       +
       +        return f;
       +}
       +
       +static Block*
       +procxtns(IP *ip, Block *bp, int doreasm)
       +{
       +        int offset;
       +        uchar proto;
       +        Ip6hdr *h;
       +
       +        h = (Ip6hdr *)bp->rp;
       +        offset = unfraglen(bp, &proto, 0);
       +
       +        if(proto == FH && doreasm != 0) {
       +                bp = ip6reassemble(ip, offset, bp, h);
       +                if(bp == nil)
       +                        return nil;
       +                offset = unfraglen(bp, &proto, 0);
       +        }
       +
       +        if(proto == DOH || offset > IP6HDR)
       +                bp = procopts(bp);
       +        return bp;
       +}
       +
       +/*
       + * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
       + * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
       + * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
       + * field of the last header in the "Unfragmentable part" is set to FH.
       + */
       +int
       +unfraglen(Block *bp, uchar *nexthdr, int setfh)
       +{
       +        uchar *p, *q;
       +        int ufl, hs;
       +
       +        p = bp->rp;
       +        q = p+6;   /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
       +        *nexthdr = *q;
       +        ufl = IP6HDR;
       +        p += ufl;
       +
       +        while (*nexthdr == HBH || *nexthdr == RH) {
       +                *nexthdr = *p;
       +                hs = ((int)*(p+1) + 1) * 8;
       +                ufl += hs;
       +                q = p;
       +                p += hs;
       +        }
       +
       +        if(*nexthdr == FH)
       +                *q = *p;
       +        if(setfh)
       +                *q = FH;
       +        return ufl;
       +}
       +
       +Block*
       +procopts(Block *bp)
       +{
       +        return bp;
       +}
       +
       +Block*
       +ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
       +{
       +        int fend, offset, ovlap, len, fragsize, pktposn;
       +        uint id;
       +        uchar src[IPaddrlen], dst[IPaddrlen];
       +        Block *bl, **l, *last, *prev;
       +        Fraghdr6 *fraghdr;
       +        Fragment6 *f, *fnext;
       +
       +        fraghdr = (Fraghdr6 *)(bp->rp + uflen);
       +        memmove(src, ih->src, IPaddrlen);
       +        memmove(dst, ih->dst, IPaddrlen);
       +        id = nhgetl(fraghdr->id);
       +        offset = nhgets(fraghdr->offsetRM) & ~7;
       +
       +        /*
       +         *  block lists are too hard, pullupblock into a single block
       +         */
       +        if(bp->next){
       +                bp = pullupblock(bp, blocklen(bp));
       +                ih = (Ip6hdr *)bp->rp;
       +        }
       +
       +        qlock(&ip->fraglock6);
       +
       +        /*
       +         *  find a reassembly queue for this fragment
       +         */
       +        for(f = ip->flisthead6; f; f = fnext){
       +                fnext = f->next;
       +                if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
       +                        break;
       +                if(f->age < NOW){
       +                        ip->stats[ReasmTimeout]++;
       +                        ipfragfree6(ip, f);
       +                }
       +        }
       +
       +        /*
       +         *  if this isn't a fragmented packet, accept it
       +         *  and get rid of any fragments that might go
       +         *  with it.
       +         */
       +        if(nhgets(fraghdr->offsetRM) == 0) {        /* 1st frag is also last */
       +                if(f) {
       +                        ipfragfree6(ip, f);
       +                        ip->stats[ReasmFails]++;
       +                }
       +                qunlock(&ip->fraglock6);
       +                return bp;
       +        }
       +
       +        if(bp->base+sizeof(Ipfrag) >= bp->rp){
       +                bp = padblock(bp, sizeof(Ipfrag));
       +                bp->rp += sizeof(Ipfrag);
       +        }
       +
       +        BKFG(bp)->foff = offset;
       +        BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
       +
       +        /* First fragment allocates a reassembly queue */
       +        if(f == nil) {
       +                f = ipfragallo6(ip);
       +                f->id = id;
       +                memmove(f->src, src, IPaddrlen);
       +                memmove(f->dst, dst, IPaddrlen);
       +
       +                f->blist = bp;
       +
       +                qunlock(&ip->fraglock6);
       +                ip->stats[ReasmReqds]++;
       +                return nil;
       +        }
       +
       +        /*
       +         *  find the new fragment's position in the queue
       +         */
       +        prev = nil;
       +        l = &f->blist;
       +        bl = f->blist;
       +        while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
       +                prev = bl;
       +                l = &bl->next;
       +                bl = bl->next;
       +        }
       +
       +        /* Check overlap of a previous fragment - trim away as necessary */
       +        if(prev) {
       +                ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
       +                if(ovlap > 0) {
       +                        if(ovlap >= BKFG(bp)->flen) {
       +                                freeblist(bp);
       +                                qunlock(&ip->fraglock6);
       +                                return nil;
       +                        }
       +                        BKFG(prev)->flen -= ovlap;
       +                }
       +        }
       +
       +        /* Link onto assembly queue */
       +        bp->next = *l;
       +        *l = bp;
       +
       +        /* Check to see if succeeding segments overlap */
       +        if(bp->next) {
       +                l = &bp->next;
       +                fend = BKFG(bp)->foff + BKFG(bp)->flen;
       +
       +                /* Take completely covered segments out */
       +                while(*l) {
       +                        ovlap = fend - BKFG(*l)->foff;
       +                        if(ovlap <= 0)
       +                                break;
       +                        if(ovlap < BKFG(*l)->flen) {
       +                                BKFG(*l)->flen -= ovlap;
       +                                BKFG(*l)->foff += ovlap;
       +                                /* move up ih hdrs */
       +                                memmove((*l)->rp + ovlap, (*l)->rp, uflen);
       +                                (*l)->rp += ovlap;
       +                                break;
       +                        }
       +                        last = (*l)->next;
       +                        (*l)->next = nil;
       +                        freeblist(*l);
       +                        *l = last;
       +                }
       +        }
       +
       +        /*
       +         *  look for a complete packet.  if we get to a fragment
       +         *  with the trailing bit of fraghdr->offsetRM[1] set, we're done.
       +         */
       +        pktposn = 0;
       +        for(bl = f->blist; bl && BKFG(bl)->foff == pktposn; bl = bl->next) {
       +                fraghdr = (Fraghdr6 *)(bl->rp + uflen);
       +                if((fraghdr->offsetRM[1] & 1) == 0) {
       +                        bl = f->blist;
       +
       +                        /* get rid of frag header in first fragment */
       +                        memmove(bl->rp + IP6FHDR, bl->rp, uflen);
       +                        bl->rp += IP6FHDR;
       +                        len = nhgets(((Ip6hdr*)bl->rp)->ploadlen) - IP6FHDR;
       +                        bl->wp = bl->rp + len + IP6HDR;
       +                        /*
       +                         * Pullup all the fragment headers and
       +                         * return a complete packet
       +                         */
       +                        for(bl = bl->next; bl; bl = bl->next) {
       +                                fragsize = BKFG(bl)->flen;
       +                                len += fragsize;
       +                                bl->rp += uflen + IP6FHDR;
       +                                bl->wp = bl->rp + fragsize;
       +                        }
       +
       +                        bl = f->blist;
       +                        f->blist = nil;
       +                        ipfragfree6(ip, f);
       +                        ih = (Ip6hdr*)bl->rp;
       +                        hnputs(ih->ploadlen, len);
       +                        qunlock(&ip->fraglock6);
       +                        ip->stats[ReasmOKs]++;
       +                        return bl;
       +                }
       +                pktposn += BKFG(bl)->flen;
       +        }
       +        qunlock(&ip->fraglock6);
       +        return nil;
       +}
 (DIR) diff --git a/src/9vx/a/ip/ipv6.h b/src/9vx/a/ip/ipv6.h
       @@ -0,0 +1,185 @@
       +/*
       + * Internet Protocol Version 6
       + *
       + * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
       + * rfc2462 address autoconfiguration.  rfc4443 defines ICMP; was rfc2463.
       + * rfc4291 defines the address architecture (including prefices), was rfc3513.
       + * rfc4007 defines the scoped address architecture.
       + *
       + * global unicast is anything but unspecified (::), loopback (::1),
       + * multicast (ff00::/8), and link-local unicast (fe80::/10).
       + *
       + * site-local (fec0::/10) is now deprecated, originally by rfc3879.
       + *
       + * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
       + * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
       + */
       +#define isv6mcast(addr)          ((addr)[0] == 0xff)
       +#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
       +
       +#define optexsts(np)        (nhgets((np)->ploadlen) > 24)
       +#define issmcast(addr)        (memcmp((addr), v6solicitednode, 13) == 0)
       +
       +#ifndef MIN
       +#define MIN(a, b) ((a) <= (b)? (a): (b))
       +#endif
       +
       +#undef ESP
       +
       +enum {                                /* Header Types */
       +        HBH                = 0,        /* hop-by-hop multicast routing protocol */
       +        ICMP                = 1,
       +        IGMP                = 2,
       +        GGP                = 3,
       +        IPINIP                = 4,
       +        ST                = 5,
       +        TCP                = 6,
       +        UDP                = 17,
       +        ISO_TP4                = 29,
       +        RH                = 43,
       +        FH                = 44,
       +        IDRP                = 45,
       +        RSVP                = 46,
       +        AH                = 51,
       +        ESP                = 52,
       +        ICMPv6                = 58,
       +        NNH                = 59,
       +        DOH                = 60,
       +        ISO_IP                = 80,
       +        IGRP                = 88,
       +        OSPF                = 89,
       +
       +        Maxhdrtype        = 256,
       +};
       +
       +enum {
       +        /* multicast flags and scopes */
       +
       +//        Well_known_flg        = 0,
       +//        Transient_flg        = 1,
       +
       +//        Interface_local_scop = 1,
       +        Link_local_scop        = 2,
       +//        Site_local_scop        = 5,
       +//        Org_local_scop        = 8,
       +        Global_scop        = 14,
       +
       +        /* various prefix lengths */
       +        SOLN_PREF_LEN        = 13,
       +
       +        /* icmpv6 unreachability codes */
       +        Icmp6_no_route                = 0,
       +        Icmp6_ad_prohib                = 1,
       +        Icmp6_out_src_scope        = 2,
       +        Icmp6_adr_unreach        = 3,
       +        Icmp6_port_unreach        = 4,
       +        Icmp6_gress_src_fail        = 5,
       +        Icmp6_rej_route                = 6,
       +        Icmp6_unknown                = 7,  /* our own invention for internal use */
       +
       +        /* various flags & constants */
       +        v6MINTU                = 1280,
       +        HOP_LIMIT        = 255,
       +        IP6HDR                = 20,                /* sizeof(Ip6hdr) */
       +
       +        /* option types */
       +
       +        /* neighbour discovery */
       +        SRC_LLADDR        = 1,
       +        TARGET_LLADDR        = 2,
       +        PREFIX_INFO        = 3,
       +        REDIR_HEADER        = 4,
       +        MTU_OPTION        = 5,
       +        /* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
       +        V6nd_home        = 8,
       +        V6nd_srcaddrs        = 9,                /* rfc3122 */
       +        V6nd_ip                = 17,
       +        /* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
       +        V6nd_rdns        = 25,
       +        /* plan 9 extensions */
       +        V6nd_9fs        = 250,
       +        V6nd_9auth        = 251,
       +
       +        SRC_UNSPEC        = 0,
       +        SRC_UNI                = 1,
       +        TARG_UNI        = 2,
       +        TARG_MULTI        = 3,
       +
       +        Tunitent        = 1,
       +        Tuniproxy        = 2,
       +        Tunirany        = 3,
       +
       +        /* Node constants */
       +        MAX_MULTICAST_SOLICIT        = 3,
       +        RETRANS_TIMER                = 1000,
       +};
       +
       +typedef struct Ip6hdr        Ip6hdr;
       +typedef struct Opthdr        Opthdr;
       +typedef struct Routinghdr Routinghdr;
       +typedef struct Fraghdr6        Fraghdr6;
       +
       +struct        Ip6hdr {
       +        uchar        vcf[4];                /* version:4, traffic class:8, flow label:20 */
       +        uchar        ploadlen[2];        /* payload length: packet length - 40 */
       +        uchar        proto;                /* next header type */
       +        uchar        ttl;                /* hop limit */
       +        uchar        src[IPaddrlen];
       +        uchar        dst[IPaddrlen];
       +};
       +
       +struct        Opthdr {
       +        uchar        nexthdr;
       +        uchar        len;
       +};
       +
       +/*
       + * Beware routing header type 0 (loose source routing); see
       + * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
       + * Type 1 is unused.  Type 2 is for MIPv6 (mobile IPv6) filtering
       + * against type 0 header.
       + */
       +struct        Routinghdr {
       +        uchar        nexthdr;
       +        uchar        len;
       +        uchar        rtetype;
       +        uchar        segrem;
       +};
       +
       +struct        Fraghdr6 {
       +        uchar        nexthdr;
       +        uchar        res;
       +        uchar        offsetRM[2];        /* Offset, Res, M flag */
       +        uchar        id[4];
       +};
       +
       +extern uchar v6allnodesN[IPaddrlen];
       +extern uchar v6allnodesL[IPaddrlen];
       +extern uchar v6allroutersN[IPaddrlen];
       +extern uchar v6allroutersL[IPaddrlen];
       +extern uchar v6allnodesNmask[IPaddrlen];
       +extern uchar v6allnodesLmask[IPaddrlen];
       +extern uchar v6solicitednode[IPaddrlen];
       +extern uchar v6solicitednodemask[IPaddrlen];
       +extern uchar v6Unspecified[IPaddrlen];
       +extern uchar v6loopback[IPaddrlen];
       +extern uchar v6loopbackmask[IPaddrlen];
       +extern uchar v6linklocal[IPaddrlen];
       +extern uchar v6linklocalmask[IPaddrlen];
       +extern uchar v6multicast[IPaddrlen];
       +extern uchar v6multicastmask[IPaddrlen];
       +
       +extern int v6llpreflen;
       +extern int v6mcpreflen;
       +extern int v6snpreflen;
       +extern int v6aNpreflen;
       +extern int v6aLpreflen;
       +
       +extern int ReTransTimer;
       +
       +void ipv62smcast(uchar *, uchar *);
       +void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
       +void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
       +void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
       +void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
       +void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
 (DIR) diff --git a/src/9vx/a/ip/loopbackmedium.c b/src/9vx/a/ip/loopbackmedium.c
       @@ -0,0 +1,120 @@
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "ip.h"
       +
       +enum
       +{
       +        Maxtu=        16*1024,
       +};
       +
       +typedef struct LB LB;
       +struct LB
       +{
       +        Proc        *readp;
       +        Queue        *q;
       +        Fs        *f;
       +};
       +
       +static void loopbackread(void *a);
       +
       +static void
       +loopbackbind(Ipifc *ifc, int _, char** __)
       +{
       +        LB *lb;
       +
       +        lb = smalloc(sizeof(*lb));
       +        lb->f = ifc->conv->p->f;
       +        lb->q = qopen(1024*1024, Qmsg, nil, nil);
       +        ifc->arg = lb;
       +        ifc->mbps = 1000;
       +
       +        kproc("loopbackread", loopbackread, ifc);
       +
       +}
       +
       +static void
       +loopbackunbind(Ipifc *ifc)
       +{
       +        LB *lb = ifc->arg;
       +
       +        if(lb->readp)
       +                postnote(lb->readp, 1, "unbind", 0);
       +
       +        /* wait for reader to die */
       +        while(lb->readp != 0)
       +                tsleep(&up->sleep, return0, 0, 300);
       +
       +        /* clean up */
       +        qfree(lb->q);
       +        free(lb);
       +}
       +
       +static void
       +loopbackbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
       +{
       +        LB *lb;
       +
       +        lb = ifc->arg;
       +        if(qpass(lb->q, bp) < 0)
       +                ifc->outerr++;
       +        ifc->out++;
       +}
       +
       +static void
       +loopbackread(void *a)
       +{
       +        Ipifc *ifc;
       +        Block *bp;
       +        LB *lb;
       +
       +        ifc = a;
       +        lb = ifc->arg;
       +        lb->readp = up;        /* hide identity under a rock for unbind */
       +        if(waserror()){
       +                lb->readp = 0;
       +                pexit("hangup", 1);
       +        }
       +        for(;;){
       +                bp = qbread(lb->q, Maxtu);
       +                if(bp == nil)
       +                        continue;
       +                ifc->in++;
       +                if(!CANRLOCK(ifc)){
       +                        freeb(bp);
       +                        continue;
       +                }
       +                if(waserror()){
       +                        RUNLOCK(ifc);
       +                        nexterror();
       +                }
       +                if(ifc->lifc == nil)
       +                        freeb(bp);
       +                else
       +                        ipiput4(lb->f, ifc, bp);
       +                RUNLOCK(ifc);
       +                poperror();
       +        }
       +}
       +
       +Medium loopbackmedium =
       +{
       +.hsize=                0,
       +.mintu=                0,
       +.maxtu=                Maxtu,
       +.maclen=        0,
       +.name=                "loopback",
       +.bind=                loopbackbind,
       +.unbind=        loopbackunbind,
       +.bwrite=        loopbackbwrite,
       +};
       +
       +void
       +loopbackmediumlink(void)
       +{
       +        addipmedium(&loopbackmedium);
       +}
 (DIR) diff --git a/src/9vx/a/ip/netdevmedium.c b/src/9vx/a/ip/netdevmedium.c
       @@ -0,0 +1,153 @@
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "ip.h"
       +
       +static void        netdevbind(Ipifc *ifc, int argc, char **argv);
       +static void        netdevunbind(Ipifc *ifc);
       +static void        netdevbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
       +static void        netdevread(void *a);
       +
       +typedef struct        Netdevrock Netdevrock;
       +struct Netdevrock
       +{
       +        Fs        *f;                /* file system we belong to */
       +        Proc        *readp;                /* reading process */
       +        Chan        *mchan;                /* Data channel */
       +};
       +
       +Medium netdevmedium =
       +{
       +.name=                "netdev",
       +.hsize=                0,
       +.mintu=        0,
       +.maxtu=        64000,
       +.maclen=        0,
       +.bind=                netdevbind,
       +.unbind=        netdevunbind,
       +.bwrite=        netdevbwrite,
       +.unbindonclose=        0,
       +};
       +
       +/*
       + *  called to bind an IP ifc to a generic network device
       + *  called with ifc qlock'd
       + */
       +static void
       +netdevbind(Ipifc *ifc, int argc, char **argv)
       +{
       +        Chan *mchan;
       +        Netdevrock *er;
       +
       +        if(argc < 2)
       +                error(Ebadarg);
       +
       +        mchan = namec(argv[2], Aopen, ORDWR, 0);
       +
       +        er = smalloc(sizeof(*er));
       +        er->mchan = mchan;
       +        er->f = ifc->conv->p->f;
       +
       +        ifc->arg = er;
       +
       +        kproc("netdevread", netdevread, ifc);
       +}
       +
       +/*
       + *  called with ifc wlock'd
       + */
       +static void
       +netdevunbind(Ipifc *ifc)
       +{
       +        Netdevrock *er = ifc->arg;
       +
       +        if(er->readp != nil)
       +                postnote(er->readp, 1, "unbind", 0);
       +
       +        /* wait for readers to die */
       +        while(er->readp != nil)
       +                tsleep(&up->sleep, return0, 0, 300);
       +
       +        if(er->mchan != nil)
       +                cclose(er->mchan);
       +
       +        free(er);
       +}
       +
       +/*
       + *  called by ipoput with a single block to write
       + */
       +static void
       +netdevbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
       +{
       +        Netdevrock *er = ifc->arg;
       +
       +        if(bp->next)
       +                bp = concatblock(bp);
       +        if(BLEN(bp) < ifc->mintu)
       +                bp = adjustblock(bp, ifc->mintu);
       +
       +        devtab[er->mchan->type]->bwrite(er->mchan, bp, 0);
       +        ifc->out++;
       +}
       +
       +/*
       + *  process to read from the device
       + */
       +static void
       +netdevread(void *a)
       +{
       +        Ipifc *ifc;
       +        Block *bp;
       +        Netdevrock *er;
       +        char *argv[1];
       +
       +        ifc = a;
       +        er = ifc->arg;
       +        er->readp = up;        /* hide identity under a rock for unbind */
       +        if(waserror()){
       +                er->readp = nil;
       +                pexit("hangup", 1);
       +        }
       +        for(;;){
       +                bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
       +                if(bp == nil){
       +                        /*
       +                         * get here if mchan is a pipe and other side hangs up
       +                         * clean up this interface & get out
       +ZZZ is this a good idea?
       +                         */
       +                        poperror();
       +                        er->readp = nil;
       +                        argv[0] = "unbind";
       +                        if(!waserror())
       +                                ifc->conv->p->ctl(ifc->conv, argv, 1);
       +                        pexit("hangup", 1);
       +                }
       +                if(!CANRLOCK(ifc)){
       +                        freeb(bp);
       +                        continue;
       +                }
       +                if(waserror()){
       +                        RUNLOCK(ifc);
       +                        nexterror();
       +                }
       +                ifc->in++;
       +                if(ifc->lifc == nil)
       +                        freeb(bp);
       +                else
       +                        ipiput4(er->f, ifc, bp);
       +                RUNLOCK(ifc);
       +                poperror();
       +        }
       +}
       +
       +void
       +netdevmediumlink(void)
       +{
       +        addipmedium(&netdevmedium);
       +}
 (DIR) diff --git a/src/9vx/a/ip/netlog.c b/src/9vx/a/ip/netlog.c
       @@ -0,0 +1,261 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +#include        "ip/ip.h"
       +
       +enum {
       +        Nlog                = 16*1024,
       +};
       +
       +/*
       + *  action log
       + */
       +struct Netlog {
       +        Lock        lk;
       +        int        opens;
       +        char*        buf;
       +        char        *end;
       +        char        *rptr;
       +        int        len;
       +
       +        int        logmask;                        /* mask of things to debug */
       +        uchar        iponly[IPaddrlen];                /* ip address to print debugging for */
       +        int        iponlyset;
       +
       +        QLock        qlock;
       +        Rendez        rendez;
       +};
       +
       +typedef struct Netlogflag {
       +        char*        name;
       +        int        mask;
       +} Netlogflag;
       +
       +static Netlogflag flags[] =
       +{
       +        { "ppp",        Logppp, },
       +        { "ip",                Logip, },
       +        { "fs",                Logfs, },
       +        { "tcp",        Logtcp, },
       +        { "icmp",        Logicmp, },
       +        { "udp",        Logudp, },
       +        { "compress",        Logcompress, },
       +        { "gre",        Loggre, },
       +        { "tcpwin",        Logtcp|Logtcpwin, },
       +        { "tcprxmt",        Logtcp|Logtcprxmt, },
       +        { "udpmsg",        Logudp|Logudpmsg, },
       +        { "ipmsg",        Logip|Logipmsg, },
       +        { "esp",        Logesp, },
       +        { nil,                0, },
       +};
       +
       +char Ebadnetctl[] = "too few arguments for netlog control message";
       +
       +enum
       +{
       +        CMset,
       +        CMclear,
       +        CMonly,
       +};
       +
       +static
       +Cmdtab routecmd[] = {
       +        CMset,                "set",                0,
       +        CMclear,        "clear",        0,
       +        CMonly,                "only",                0,
       +};
       +
       +void
       +netloginit(Fs *f)
       +{
       +        f->alog = smalloc(sizeof(Netlog));
       +}
       +
       +void
       +netlogopen(Fs *f)
       +{
       +        LOCK(f->alog);
       +        if(waserror()){
       +                UNLOCK(f->alog);
       +                nexterror();
       +        }
       +        if(f->alog->opens == 0){
       +                if(f->alog->buf == nil)
       +                        f->alog->buf = malloc(Nlog);
       +                f->alog->rptr = f->alog->buf;
       +                f->alog->end = f->alog->buf + Nlog;
       +        }
       +        f->alog->opens++;
       +        UNLOCK(f->alog);
       +        poperror();
       +}
       +
       +void
       +netlogclose(Fs *f)
       +{
       +        LOCK(f->alog);
       +        if(waserror()){
       +                UNLOCK(f->alog);
       +                nexterror();
       +        }
       +        f->alog->opens--;
       +        if(f->alog->opens == 0){
       +                free(f->alog->buf);
       +                f->alog->buf = nil;
       +        }
       +        UNLOCK(f->alog);
       +        poperror();
       +}
       +
       +static int
       +netlogready(void *a)
       +{
       +        Fs *f = a;
       +
       +        return f->alog->len;
       +}
       +
       +long
       +netlogread(Fs *f, void *a, ulong _, long n)
       +{
       +        int i, d;
       +        char *p, *rptr;
       +
       +        QLOCK(f->alog);
       +        if(waserror()){
       +                QUNLOCK(f->alog);
       +                nexterror();
       +        }
       +
       +        for(;;){
       +                LOCK(f->alog);
       +                if(f->alog->len){
       +                        if(n > f->alog->len)
       +                                n = f->alog->len;
       +                        d = 0;
       +                        rptr = f->alog->rptr;
       +                        f->alog->rptr += n;
       +                        if(f->alog->rptr >= f->alog->end){
       +                                d = f->alog->rptr - f->alog->end;
       +                                f->alog->rptr = f->alog->buf + d;
       +                        }
       +                        f->alog->len -= n;
       +                        UNLOCK(f->alog);
       +
       +                        i = n-d;
       +                        p = a;
       +                        memmove(p, rptr, i);
       +                        memmove(p+i, f->alog->buf, d);
       +                        break;
       +                }
       +                else
       +                        UNLOCK(f->alog);
       +
       +                sleep(&f->alog->rendez, netlogready, f);
       +        }
       +
       +        QUNLOCK(f->alog);
       +        poperror();
       +
       +        return n;
       +}
       +
       +void
       +netlogctl(Fs *f, char* s, int n)
       +{
       +        int i, set;
       +        Netlogflag *fp;
       +        Cmdbuf *cb;
       +        Cmdtab *ct;
       +
       +        cb = parsecmd(s, n);
       +        if(waserror()){
       +                free(cb);
       +                nexterror();
       +        }
       +
       +        if(cb->nf < 2)
       +                error(Ebadnetctl);
       +
       +        ct = lookupcmd(cb, routecmd, nelem(routecmd));
       +
       +        set = 1;
       +
       +        switch(ct->index){
       +        case CMset:
       +                set = 1;
       +                break;
       +
       +        case CMclear:
       +                set = 0;
       +                break;
       +
       +        case CMonly:
       +                parseip(f->alog->iponly, cb->f[1]);
       +                if(ipcmp(f->alog->iponly, IPnoaddr) == 0)
       +                        f->alog->iponlyset = 0;
       +                else
       +                        f->alog->iponlyset = 1;
       +                free(cb);
       +                return;
       +
       +        default:
       +                cmderror(cb, "unknown ip control message");
       +        }
       +
       +        for(i = 1; i < cb->nf; i++){
       +                for(fp = flags; fp->name; fp++)
       +                        if(strcmp(fp->name, cb->f[i]) == 0)
       +                                break;
       +                if(fp->name == nil)
       +                        continue;
       +                if(set)
       +                        f->alog->logmask |= fp->mask;
       +                else
       +                        f->alog->logmask &= ~fp->mask;
       +        }
       +
       +        free(cb);
       +        poperror();
       +}
       +
       +void
       +netlog(Fs *f, int mask, char *fmt, ...)
       +{
       +        char buf[128], *t, *fp;
       +        int i, n;
       +        va_list arg;
       +
       +        if(!(f->alog->logmask & mask))
       +                return;
       +
       +        if(f->alog->opens == 0)
       +                return;
       +
       +        va_start(arg, fmt);
       +        n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
       +        va_end(arg);
       +
       +        LOCK(f->alog);
       +        i = f->alog->len + n - Nlog;
       +        if(i > 0){
       +                f->alog->len -= i;
       +                f->alog->rptr += i;
       +                if(f->alog->rptr >= f->alog->end)
       +                        f->alog->rptr = f->alog->buf + (f->alog->rptr - f->alog->end);
       +        }
       +        t = f->alog->rptr + f->alog->len;
       +        fp = buf;
       +        f->alog->len += n;
       +        while(n-- > 0){
       +                if(t >= f->alog->end)
       +                        t = f->alog->buf + (t - f->alog->end);
       +                *t++ = *fp++;
       +        }
       +        UNLOCK(f->alog);
       +
       +        wakeup(&f->alog->rendez);
       +}
 (DIR) diff --git a/src/9vx/a/ip/nullmedium.c b/src/9vx/a/ip/nullmedium.c
       @@ -0,0 +1,39 @@
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "ip.h"
       +
       +static void
       +nullbind(Ipifc* _, int __, char** ___)
       +{
       +        error("cannot bind null device");
       +}
       +
       +static void
       +nullunbind(Ipifc* _)
       +{
       +}
       +
       +static void
       +nullbwrite(Ipifc* _, Block* __, int ___, uchar* ____)
       +{
       +        error("nullbwrite");
       +}
       +
       +Medium nullmedium =
       +{
       +.name=                "null",
       +.bind=                nullbind,
       +.unbind=        nullunbind,
       +.bwrite=        nullbwrite,
       +};
       +
       +void
       +nullmediumlink(void)
       +{
       +        addipmedium(&nullmedium);
       +}
 (DIR) diff --git a/src/9vx/a/ip/pktmedium.c b/src/9vx/a/ip/pktmedium.c
       @@ -0,0 +1,78 @@
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "ip.h"
       +
       +
       +static void        pktbind(Ipifc*, int, char**);
       +static void        pktunbind(Ipifc*);
       +static void        pktbwrite(Ipifc*, Block*, int, uchar*);
       +static void        pktin(Fs*, Ipifc*, Block*);
       +
       +Medium pktmedium =
       +{
       +.name=                "pkt",
       +.hsize=                14,
       +.mintu=                40,
       +.maxtu=                4*1024,
       +.maclen=        6,
       +.bind=                pktbind,
       +.unbind=        pktunbind,
       +.bwrite=        pktbwrite,
       +.pktin=                pktin,
       +};
       +
       +/*
       + *  called to bind an IP ifc to an ethernet device
       + *  called with ifc wlock'd
       + */
       +static void
       +pktbind(Ipifc* _, int argc, char **argv)
       +{
       +}
       +
       +/*
       + *  called with ifc wlock'd
       + */
       +static void
       +pktunbind(Ipifc* _)
       +{
       +}
       +
       +/*
       + *  called by ipoput with a single packet to write
       + */
       +static void
       +pktbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
       +{
       +        /* enqueue onto the conversation's rq */
       +        bp = concatblock(bp);
       +        if(ifc->conv->snoopers.ref > 0)
       +                qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
       +        qpass(ifc->conv->rq, bp);
       +}
       +
       +/*
       + *  called with ifc rlocked when someone write's to 'data'
       + */
       +static void
       +pktin(Fs *f, Ipifc *ifc, Block *bp)
       +{
       +        if(ifc->lifc == nil)
       +                freeb(bp);
       +        else {
       +                if(ifc->conv->snoopers.ref > 0)
       +                        qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
       +                ipiput4(f, ifc, bp);
       +        }
       +}
       +
       +void
       +pktmediumlink(void)
       +{
       +        addipmedium(&pktmedium);
       +}
 (DIR) diff --git a/src/9vx/a/ip/ptclbsum.c b/src/9vx/a/ip/ptclbsum.c
       @@ -0,0 +1,72 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +#include        "ip.h"
       +
       +static        short        endian        = 1;
       +static        uchar*        aendian        = (uchar*)&endian;
       +#define        LITTLE        *aendian
       +
       +ushort
       +ptclbsum(uchar *addr, int len)
       +{
       +        ulong losum, hisum, mdsum, x;
       +        ulong t1, t2;
       +
       +        losum = 0;
       +        hisum = 0;
       +        mdsum = 0;
       +
       +        x = 0;
       +        if((ulong)addr & 1) {
       +                if(len) {
       +                        hisum += addr[0];
       +                        len--;
       +                        addr++;
       +                }
       +                x = 1;
       +        }
       +        while(len >= 16) {
       +                t1 = *(ushort*)(addr+0);
       +                t2 = *(ushort*)(addr+2);        mdsum += t1;
       +                t1 = *(ushort*)(addr+4);        mdsum += t2;
       +                t2 = *(ushort*)(addr+6);        mdsum += t1;
       +                t1 = *(ushort*)(addr+8);        mdsum += t2;
       +                t2 = *(ushort*)(addr+10);        mdsum += t1;
       +                t1 = *(ushort*)(addr+12);        mdsum += t2;
       +                t2 = *(ushort*)(addr+14);        mdsum += t1;
       +                mdsum += t2;
       +                len -= 16;
       +                addr += 16;
       +        }
       +        while(len >= 2) {
       +                mdsum += *(ushort*)addr;
       +                len -= 2;
       +                addr += 2;
       +        }
       +        if(x) {
       +                if(len)
       +                        losum += addr[0];
       +                if(LITTLE)
       +                        losum += mdsum;
       +                else
       +                        hisum += mdsum;
       +        } else {
       +                if(len)
       +                        hisum += addr[0];
       +                if(LITTLE)
       +                        hisum += mdsum;
       +                else
       +                        losum += mdsum;
       +        }
       +
       +        losum += hisum >> 8;
       +        losum += (hisum & 0xff) << 8;
       +        while((hisum = losum>>16))
       +                losum = hisum + (losum & 0xffff);
       +
       +        return losum & 0xffff;
       +}
 (DIR) diff --git a/src/9vx/a/ip/rudp.c b/src/9vx/a/ip/rudp.c
       @@ -0,0 +1,1055 @@
       +/*
       + *  Reliable User Datagram Protocol, currently only for IPv4.
       + *  This protocol is compatible with UDP's packet format.
       + *  It could be done over UDP if need be.
       + */
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +
       +#include        "ip.h"
       +
       +#define DEBUG        0
       +#define DPRINT if(DEBUG)print
       +
       +#define SEQDIFF(a,b) ( (a)>=(b)?\
       +                        (a)-(b):\
       +                        0xffffffffUL-((b)-(a)) )
       +#define INSEQ(a,start,end) ( (start)<=(end)?\
       +                                ((a)>(start)&&(a)<=(end)):\
       +                                ((a)>(start)||(a)<=(end)) )
       +#define UNACKED(r) SEQDIFF(r->sndseq, r->ackrcvd)
       +#define NEXTSEQ(a) ( (a)+1 == 0 ? 1 : (a)+1 )
       +
       +enum
       +{
       +        UDP_PHDRSIZE        = 12,        /* pseudo header */
       +//        UDP_HDRSIZE        = 20,        /* pseudo header + udp header */
       +        UDP_RHDRSIZE        = 36,        /* pseudo header + udp header + rudp header */
       +        UDP_IPHDR        = 8,        /* ip header */
       +        IP_UDPPROTO        = 254,
       +        UDP_USEAD7        = 52,        /* size of new ipv6 headers struct */
       +
       +        Rudprxms        = 200,
       +        Rudptickms        = 50,
       +        Rudpmaxxmit        = 10,
       +        Maxunacked        = 100,
       +};
       +
       +#define Hangupgen        0xffffffff        /* used only in hangup messages */
       +
       +typedef struct Udphdr Udphdr;
       +struct Udphdr
       +{
       +        /* ip header */
       +        uchar        vihl;                /* Version and header length */
       +        uchar        tos;                /* Type of service */
       +        uchar        length[2];        /* packet length */
       +        uchar        id[2];                /* Identification */
       +        uchar        frag[2];        /* Fragment information */
       +
       +        /* pseudo header starts here */
       +        uchar        Unused;
       +        uchar        udpproto;        /* Protocol */
       +        uchar        udpplen[2];        /* Header plus data length */
       +        uchar        udpsrc[4];        /* Ip source */
       +        uchar        udpdst[4];        /* Ip destination */
       +
       +        /* udp header */
       +        uchar        udpsport[2];        /* Source port */
       +        uchar        udpdport[2];        /* Destination port */
       +        uchar        udplen[2];        /* data length */
       +        uchar        udpcksum[2];        /* Checksum */
       +};
       +
       +typedef struct Rudphdr Rudphdr;
       +struct Rudphdr
       +{
       +        /* ip header */
       +        uchar        vihl;                /* Version and header length */
       +        uchar        tos;                /* Type of service */
       +        uchar        length[2];        /* packet length */
       +        uchar        id[2];                /* Identification */
       +        uchar        frag[2];        /* Fragment information */
       +
       +        /* pseudo header starts here */
       +        uchar        Unused;
       +        uchar        udpproto;        /* Protocol */
       +        uchar        udpplen[2];        /* Header plus data length */
       +        uchar        udpsrc[4];        /* Ip source */
       +        uchar        udpdst[4];        /* Ip destination */
       +
       +        /* udp header */
       +        uchar        udpsport[2];        /* Source port */
       +        uchar        udpdport[2];        /* Destination port */
       +        uchar        udplen[2];        /* data length (includes rudp header) */
       +        uchar        udpcksum[2];        /* Checksum */
       +
       +        /* rudp header */
       +        uchar        relseq[4];        /* id of this packet (or 0) */
       +        uchar        relsgen[4];        /* generation/time stamp */
       +        uchar        relack[4];        /* packet being acked (or 0) */
       +        uchar        relagen[4];        /* generation/time stamp */
       +};
       +
       +
       +/*
       + *  one state structure per destination
       + */
       +typedef struct Reliable Reliable;
       +struct Reliable
       +{
       +        Ref;
       +
       +        Reliable *next;
       +
       +        uchar        addr[IPaddrlen];        /* always V6 when put here */
       +        ushort        port;
       +
       +        Block        *unacked;        /* unacked msg list */
       +        Block        *unackedtail;        /*  and its tail */
       +
       +        int        timeout;        /* time since first unacked msg sent */
       +        int        xmits;                /* number of times first unacked msg sent */
       +
       +        ulong        sndseq;                /* next packet to be sent */
       +        ulong        sndgen;                /*  and its generation */
       +
       +        ulong        rcvseq;                /* last packet received */
       +        ulong        rcvgen;                /*  and its generation */
       +
       +        ulong        acksent;        /* last ack sent */
       +        ulong        ackrcvd;        /* last msg for which ack was rcvd */
       +
       +        /* flow control */
       +        QLock        lock;
       +        Rendez        vous;
       +        int        blocked;
       +};
       +
       +
       +
       +/* MIB II counters */
       +typedef struct Rudpstats Rudpstats;
       +struct Rudpstats
       +{
       +        ulong        rudpInDatagrams;
       +        ulong        rudpNoPorts;
       +        ulong        rudpInErrors;
       +        ulong        rudpOutDatagrams;
       +};
       +
       +typedef struct Rudppriv Rudppriv;
       +struct Rudppriv
       +{
       +        Ipht        ht;
       +
       +        /* MIB counters */
       +        Rudpstats        ustats;
       +
       +        /* non-MIB stats */
       +        ulong        csumerr;                /* checksum errors */
       +        ulong        lenerr;                        /* short packet */
       +        ulong        rxmits;                        /* # of retransmissions */
       +        ulong        orders;                        /* # of out of order pkts */
       +
       +        /* keeping track of the ack kproc */
       +        int        ackprocstarted;
       +        QLock        apl;
       +};
       +
       +
       +static ulong generation = 0;
       +static Rendez rend;
       +
       +/*
       + *  protocol specific part of Conv
       + */
       +typedef struct Rudpcb Rudpcb;
       +struct Rudpcb
       +{
       +        QLock;
       +        uchar        headers;
       +        uchar        randdrop;
       +        Reliable *r;
       +};
       +
       +/*
       + * local functions 
       + */
       +void        relsendack(Conv*, Reliable*, int);
       +int        reliput(Conv*, Block*, uchar*, ushort);
       +Reliable *relstate(Rudpcb*, uchar*, ushort, char*);
       +void        relput(Reliable*);
       +void        relforget(Conv *, uchar*, int, int);
       +void        relackproc(void *);
       +void        relackq(Reliable *, Block*);
       +void        relhangup(Conv *, Reliable*);
       +void        relrexmit(Conv *, Reliable*);
       +void        relput(Reliable*);
       +void        rudpkick(void *x);
       +
       +static void
       +rudpstartackproc(Proto *rudp)
       +{
       +        Rudppriv *rpriv;
       +        char kpname[KNAMELEN];
       +
       +        rpriv = rudp->priv;
       +        if(rpriv->ackprocstarted == 0){
       +                qlock(&rpriv->apl);
       +                if(rpriv->ackprocstarted == 0){
       +                        sprint(kpname, "#I%drudpack", rudp->f->dev);
       +                        kproc(kpname, relackproc, rudp);
       +                        rpriv->ackprocstarted = 1;
       +                }
       +                qunlock(&rpriv->apl);
       +        }
       +}
       +
       +static char*
       +rudpconnect(Conv *c, char **argv, int argc)
       +{
       +        char *e;
       +        Rudppriv *upriv;
       +
       +        upriv = c->p->priv;
       +        rudpstartackproc(c->p);
       +        e = Fsstdconnect(c, argv, argc);
       +        Fsconnected(c, e);
       +        iphtadd(&upriv->ht, c);
       +
       +        return e;
       +}
       +
       +
       +static int
       +rudpstate(Conv *c, char *state, int n)
       +{
       +        Rudpcb *ucb;
       +        Reliable *r;
       +        int m;
       +
       +        m = snprint(state, n, "%s", c->inuse?"Open":"Closed");
       +        ucb = (Rudpcb*)c->ptcl;
       +        qlock(ucb);
       +        for(r = ucb->r; r; r = r->next)
       +                m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
       +        m += snprint(state+m, n-m, "\n");
       +        qunlock(ucb);
       +        return m;
       +}
       +
       +static char*
       +rudpannounce(Conv *c, char** argv, int argc)
       +{
       +        char *e;
       +        Rudppriv *upriv;
       +
       +        upriv = c->p->priv;
       +        rudpstartackproc(c->p);
       +        e = Fsstdannounce(c, argv, argc);
       +        if(e != nil)
       +                return e;
       +        Fsconnected(c, nil);
       +        iphtadd(&upriv->ht, c);
       +
       +        return nil;
       +}
       +
       +static void
       +rudpcreate(Conv *c)
       +{
       +        c->rq = qopen(64*1024, Qmsg, 0, 0);
       +        c->wq = qopen(64*1024, Qkick, rudpkick, c);
       +}
       +
       +static void
       +rudpclose(Conv *c)
       +{
       +        Rudpcb *ucb;
       +        Reliable *r, *nr;
       +        Rudppriv *upriv;
       +
       +        upriv = c->p->priv;
       +        iphtrem(&upriv->ht, c);
       +
       +        /* force out any delayed acks */
       +        ucb = (Rudpcb*)c->ptcl;
       +        qlock(ucb);
       +        for(r = ucb->r; r; r = r->next){
       +                if(r->acksent != r->rcvseq)
       +                        relsendack(c, r, 0);
       +        }
       +        qunlock(ucb);
       +
       +        qclose(c->rq);
       +        qclose(c->wq);
       +        qclose(c->eq);
       +        ipmove(c->laddr, IPnoaddr);
       +        ipmove(c->raddr, IPnoaddr);
       +        c->lport = 0;
       +        c->rport = 0;
       +
       +        ucb->headers = 0;
       +        ucb->randdrop = 0;
       +        qlock(ucb);
       +        for(r = ucb->r; r; r = nr){
       +                if(r->acksent != r->rcvseq)
       +                        relsendack(c, r, 0);
       +                nr = r->next;
       +                relhangup(c, r);
       +                relput(r);
       +        }
       +        ucb->r = 0;
       +
       +        qunlock(ucb);
       +}
       +
       +/*
       + *  randomly don't send packets
       + */
       +static void
       +doipoput(Conv *c, Fs *f, Block *bp, int x, int ttl, int tos)
       +{
       +        Rudpcb *ucb;
       +
       +        ucb = (Rudpcb*)c->ptcl;
       +        if(ucb->randdrop && nrand(100) < ucb->randdrop)
       +                freeblist(bp);
       +        else
       +                ipoput4(f, bp, x, ttl, tos, nil);
       +}
       +
       +int
       +flow(void *v)
       +{
       +        Reliable *r = v;
       +
       +        return UNACKED(r) <= Maxunacked;
       +}
       +
       +void
       +rudpkick(void *x)
       +{
       +        Conv *c = x;
       +        Udphdr *uh;
       +        ushort rport;
       +        uchar laddr[IPaddrlen], raddr[IPaddrlen];
       +        Block *bp;
       +        Rudpcb *ucb;
       +        Rudphdr *rh;
       +        Reliable *r;
       +        int dlen, ptcllen;
       +        Rudppriv *upriv;
       +        Fs *f;
       +
       +        upriv = c->p->priv;
       +        f = c->p->f;
       +
       +        netlog(c->p->f, Logrudp, "rudp: kick\n");
       +        bp = qget(c->wq);
       +        if(bp == nil)
       +                return;
       +
       +        ucb = (Rudpcb*)c->ptcl;
       +        switch(ucb->headers) {
       +        case 7:
       +                /* get user specified addresses */
       +                bp = pullupblock(bp, UDP_USEAD7);
       +                if(bp == nil)
       +                        return;
       +                ipmove(raddr, bp->rp);
       +                bp->rp += IPaddrlen;
       +                ipmove(laddr, bp->rp);
       +                bp->rp += IPaddrlen;
       +                /* pick interface closest to dest */
       +                if(ipforme(f, laddr) != Runi)
       +                        findlocalip(f, laddr, raddr);
       +                bp->rp += IPaddrlen;                /* Ignore ifc address */
       +                rport = nhgets(bp->rp);
       +                bp->rp += 2+2;                        /* Ignore local port */
       +                break;
       +        default:
       +                ipmove(raddr, c->raddr);
       +                ipmove(laddr, c->laddr);
       +                rport = c->rport;
       +                break;
       +        }
       +
       +        dlen = blocklen(bp);
       +
       +        /* Make space to fit rudp & ip header */
       +        bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
       +        if(bp == nil)
       +                return;
       +
       +        uh = (Udphdr *)(bp->rp);
       +        uh->vihl = IP_VER4;
       +
       +        rh = (Rudphdr*)uh;
       +
       +        ptcllen = dlen + (UDP_RHDRSIZE-UDP_PHDRSIZE);
       +        uh->Unused = 0;
       +        uh->udpproto = IP_UDPPROTO;
       +        uh->frag[0] = 0;
       +        uh->frag[1] = 0;
       +        hnputs(uh->udpplen, ptcllen);
       +        switch(ucb->headers){
       +        case 7:
       +                v6tov4(uh->udpdst, raddr);
       +                hnputs(uh->udpdport, rport);
       +                v6tov4(uh->udpsrc, laddr);
       +                break;
       +        default:
       +                v6tov4(uh->udpdst, c->raddr);
       +                hnputs(uh->udpdport, c->rport);
       +                if(ipcmp(c->laddr, IPnoaddr) == 0)
       +                        findlocalip(f, c->laddr, c->raddr);
       +                v6tov4(uh->udpsrc, c->laddr);
       +                break;
       +        }
       +        hnputs(uh->udpsport, c->lport);
       +        hnputs(uh->udplen, ptcllen);
       +        uh->udpcksum[0] = 0;
       +        uh->udpcksum[1] = 0;
       +
       +        qlock(ucb);
       +        r = relstate(ucb, raddr, rport, "kick");
       +        r->sndseq = NEXTSEQ(r->sndseq);
       +        hnputl(rh->relseq, r->sndseq);
       +        hnputl(rh->relsgen, r->sndgen);
       +
       +        hnputl(rh->relack, r->rcvseq);  /* ACK last rcvd packet */
       +        hnputl(rh->relagen, r->rcvgen);
       +
       +        if(r->rcvseq != r->acksent)
       +                r->acksent = r->rcvseq;
       +
       +        hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, dlen+UDP_RHDRSIZE));
       +
       +        relackq(r, bp);
       +        qunlock(ucb);
       +
       +        upriv->ustats.rudpOutDatagrams++;
       +
       +        DPRINT("sent: %lud/%lud, %lud/%lud\n", 
       +                r->sndseq, r->sndgen, r->rcvseq, r->rcvgen);
       +
       +        doipoput(c, f, bp, 0, c->ttl, c->tos);
       +
       +        if(waserror()) {
       +                relput(r);
       +                qunlock(&r->lock);
       +                nexterror();
       +        }
       +
       +        /* flow control of sorts */
       +        qlock(&r->lock);
       +        if(UNACKED(r) > Maxunacked){
       +                r->blocked = 1;
       +                sleep(&r->vous, flow, r);
       +                r->blocked = 0;
       +        }
       +
       +        qunlock(&r->lock);
       +        relput(r);
       +        poperror();
       +}
       +
       +void
       +rudpiput(Proto *rudp, Ipifc *ifc, Block *bp)
       +{
       +        int len, olen, ottl;
       +        Udphdr *uh;
       +        Conv *c;
       +        Rudpcb *ucb;
       +        uchar raddr[IPaddrlen], laddr[IPaddrlen];
       +        ushort rport, lport;
       +        Rudppriv *upriv;
       +        Fs *f;
       +        uchar *p;
       +
       +        upriv = rudp->priv;
       +        f = rudp->f;
       +
       +        upriv->ustats.rudpInDatagrams++;
       +
       +        uh = (Udphdr*)(bp->rp);
       +
       +        /* Put back pseudo header for checksum 
       +         * (remember old values for icmpnoconv()) 
       +         */
       +        ottl = uh->Unused;
       +        uh->Unused = 0;
       +        len = nhgets(uh->udplen);
       +        olen = nhgets(uh->udpplen);
       +        hnputs(uh->udpplen, len);
       +
       +        v4tov6(raddr, uh->udpsrc);
       +        v4tov6(laddr, uh->udpdst);
       +        lport = nhgets(uh->udpdport);
       +        rport = nhgets(uh->udpsport);
       +
       +        if(nhgets(uh->udpcksum)) {
       +                if(ptclcsum(bp, UDP_IPHDR, len+UDP_PHDRSIZE)) {
       +                        upriv->ustats.rudpInErrors++;
       +                        upriv->csumerr++;
       +                        netlog(f, Logrudp, "rudp: checksum error %I\n", raddr);
       +                        DPRINT("rudp: checksum error %I\n", raddr);
       +                        freeblist(bp);
       +                        return;
       +                }
       +        }
       +
       +        qlock(rudp);
       +
       +        c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
       +        if(c == nil){
       +                /* no conversation found */
       +                upriv->ustats.rudpNoPorts++;
       +                qunlock(rudp);
       +                netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
       +                        laddr, lport);
       +                uh->Unused = ottl;
       +                hnputs(uh->udpplen, olen);
       +                icmpnoconv(f, bp);
       +                freeblist(bp);
       +                return;
       +        }
       +        ucb = (Rudpcb*)c->ptcl;
       +        qlock(ucb);
       +        qunlock(rudp);
       +
       +        if(reliput(c, bp, raddr, rport) < 0){
       +                qunlock(ucb);
       +                freeb(bp);
       +                return;
       +        }
       +
       +        /*
       +         * Trim the packet down to data size
       +         */
       +
       +        len -= (UDP_RHDRSIZE-UDP_PHDRSIZE);
       +        bp = trimblock(bp, UDP_IPHDR+UDP_RHDRSIZE, len);
       +        if(bp == nil) {
       +                netlog(f, Logrudp, "rudp: len err %I.%d -> %I.%d\n", 
       +                        raddr, rport, laddr, lport);
       +                DPRINT("rudp: len err %I.%d -> %I.%d\n", 
       +                        raddr, rport, laddr, lport);
       +                upriv->lenerr++;
       +                return;
       +        }
       +
       +        netlog(f, Logrudpmsg, "rudp: %I.%d -> %I.%d l %d\n", 
       +                raddr, rport, laddr, lport, len);
       +
       +        switch(ucb->headers){
       +        case 7:
       +                /* pass the src address */
       +                bp = padblock(bp, UDP_USEAD7);
       +                p = bp->rp;
       +                ipmove(p, raddr); p += IPaddrlen;
       +                ipmove(p, laddr); p += IPaddrlen;
       +                ipmove(p, ifc->lifc->local); p += IPaddrlen;
       +                hnputs(p, rport); p += 2;
       +                hnputs(p, lport);
       +                break;
       +        default:
       +                /* connection oriented rudp */
       +                if(ipcmp(c->raddr, IPnoaddr) == 0){
       +                        /* save the src address in the conversation */
       +                         ipmove(c->raddr, raddr);
       +                        c->rport = rport;
       +
       +                        /* reply with the same ip address (if not broadcast) */
       +                        if(ipforme(f, laddr) == Runi)
       +                                ipmove(c->laddr, laddr);
       +                        else
       +                                v4tov6(c->laddr, ifc->lifc->local);
       +                }
       +                break;
       +        }
       +        if(bp->next)
       +                bp = concatblock(bp);
       +
       +        if(qfull(c->rq)) {
       +                netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
       +                        laddr, lport);
       +                freeblist(bp);
       +        }
       +        else
       +                qpass(c->rq, bp);
       +        
       +        qunlock(ucb);
       +}
       +
       +static char *rudpunknown = "unknown rudp ctl request";
       +
       +char*
       +rudpctl(Conv *c, char **f, int n)
       +{
       +        Rudpcb *ucb;
       +        uchar ip[IPaddrlen];
       +        int x;
       +
       +        ucb = (Rudpcb*)c->ptcl;
       +        if(n < 1)
       +                return rudpunknown;
       +
       +        if(strcmp(f[0], "headers") == 0){
       +                ucb->headers = 7;                /* new headers format */
       +                return nil;
       +        } else if(strcmp(f[0], "hangup") == 0){
       +                if(n < 3)
       +                        return "bad syntax";
       +                if (parseip(ip, f[1]) == -1)
       +                        return Ebadip;
       +                x = atoi(f[2]);
       +                qlock(ucb);
       +                relforget(c, ip, x, 1);
       +                qunlock(ucb);
       +                return nil;
       +        } else if(strcmp(f[0], "randdrop") == 0){
       +                x = 10;                        /* default is 10% */
       +                if(n > 1)
       +                        x = atoi(f[1]);
       +                if(x > 100 || x < 0)
       +                        return "illegal rudp drop rate";
       +                ucb->randdrop = x;
       +                return nil;
       +        }
       +        return rudpunknown;
       +}
       +
       +void
       +rudpadvise(Proto *rudp, Block *bp, char *msg)
       +{
       +        Udphdr *h;
       +        uchar source[IPaddrlen], dest[IPaddrlen];
       +        ushort psource, pdest;
       +        Conv *s, **p;
       +
       +        h = (Udphdr*)(bp->rp);
       +
       +        v4tov6(dest, h->udpdst);
       +        v4tov6(source, h->udpsrc);
       +        psource = nhgets(h->udpsport);
       +        pdest = nhgets(h->udpdport);
       +
       +        /* Look for a connection */
       +        for(p = rudp->conv; *p; p++) {
       +                s = *p;
       +                if(s->rport == pdest)
       +                if(s->lport == psource)
       +                if(ipcmp(s->raddr, dest) == 0)
       +                if(ipcmp(s->laddr, source) == 0){
       +                        qhangup(s->rq, msg);
       +                        qhangup(s->wq, msg);
       +                        break;
       +                }
       +        }
       +        freeblist(bp);
       +}
       +
       +int
       +rudpstats(Proto *rudp, char *buf, int len)
       +{
       +        Rudppriv *upriv;
       +
       +        upriv = rudp->priv;
       +        return snprint(buf, len, "%lud %lud %lud %lud %lud %lud\n",
       +                upriv->ustats.rudpInDatagrams,
       +                upriv->ustats.rudpNoPorts,
       +                upriv->ustats.rudpInErrors,
       +                upriv->ustats.rudpOutDatagrams,
       +                upriv->rxmits,
       +                upriv->orders);
       +}
       +
       +void
       +rudpinit(Fs *fs)
       +{
       +
       +        Proto *rudp;
       +
       +        rudp = smalloc(sizeof(Proto));
       +        rudp->priv = smalloc(sizeof(Rudppriv));
       +        rudp->name = "rudp";
       +        rudp->connect = rudpconnect;
       +        rudp->announce = rudpannounce;
       +        rudp->ctl = rudpctl;
       +        rudp->state = rudpstate;
       +        rudp->create = rudpcreate;
       +        rudp->close = rudpclose;
       +        rudp->rcv = rudpiput;
       +        rudp->advise = rudpadvise;
       +        rudp->stats = rudpstats;
       +        rudp->ipproto = IP_UDPPROTO;
       +        rudp->nc = 16;
       +        rudp->ptclsize = sizeof(Rudpcb);
       +
       +        Fsproto(fs, rudp);
       +}
       +
       +/*********************************************/
       +/* Here starts the reliable helper functions */
       +/*********************************************/
       +/*
       + *  Enqueue a copy of an unacked block for possible retransmissions
       + */
       +void
       +relackq(Reliable *r, Block *bp)
       +{
       +        Block *np;
       +
       +        np = copyblock(bp, blocklen(bp));
       +        if(r->unacked)
       +                r->unackedtail->list = np;
       +        else {
       +                /* restart timer */
       +                r->timeout = 0;
       +                r->xmits = 1;
       +                r->unacked = np;
       +        }
       +        r->unackedtail = np;
       +        np->list = nil;
       +}
       +
       +/*
       + *  retransmit unacked blocks
       + */
       +void
       +relackproc(void *a)
       +{
       +        Rudpcb *ucb;
       +        Proto *rudp;
       +        Reliable *r;
       +        Conv **s, *c;
       +
       +        rudp = (Proto *)a;
       +
       +loop:
       +        tsleep(&up->sleep, return0, 0, Rudptickms);
       +
       +        for(s = rudp->conv; *s; s++) {
       +                c = *s;
       +                ucb = (Rudpcb*)c->ptcl;
       +                qlock(ucb);
       +
       +                for(r = ucb->r; r; r = r->next) {
       +                        if(r->unacked != nil){
       +                                r->timeout += Rudptickms;
       +                                if(r->timeout > Rudprxms*r->xmits)
       +                                        relrexmit(c, r);
       +                        }
       +                        if(r->acksent != r->rcvseq)
       +                                relsendack(c, r, 0);
       +                }
       +                qunlock(ucb);
       +        }
       +        goto loop;
       +}
       +
       +/*
       + *  get the state record for a conversation
       + */
       +Reliable*
       +relstate(Rudpcb *ucb, uchar *addr, ushort port, char *from)
       +{
       +        Reliable *r, **l;
       +
       +        l = &ucb->r;
       +        for(r = *l; r; r = *l){
       +                if(memcmp(addr, r->addr, IPaddrlen) == 0 && 
       +                    port == r->port)
       +                        break;
       +                l = &r->next;
       +        }
       +
       +        /* no state for this addr/port, create some */
       +        if(r == nil){
       +                while(generation == 0)
       +                        generation = rand();
       +
       +                DPRINT("from %s new state %lud for %I!%ud\n", 
       +                        from, generation, addr, port);
       +
       +                r = smalloc(sizeof(Reliable));
       +                memmove(r->addr, addr, IPaddrlen);
       +                r->port = port;
       +                r->unacked = 0;
       +                if(generation == Hangupgen)
       +                        generation++;
       +                r->sndgen = generation++;
       +                r->sndseq = 0;
       +                r->ackrcvd = 0;
       +                r->rcvgen = 0;
       +                r->rcvseq = 0;
       +                r->acksent = 0;
       +                r->xmits = 0;
       +                r->timeout = 0;
       +                r->ref = 0;
       +                incref(r);        /* one reference for being in the list */
       +
       +                *l = r;
       +        }
       +
       +        incref(r);
       +        return r;
       +}
       +
       +void
       +relput(Reliable *r)
       +{
       +        if(decref(r) == 0)
       +                free(r);
       +}
       +
       +/*
       + *  forget a Reliable state
       + */
       +void
       +relforget(Conv *c, uchar *ip, int port, int originator)
       +{
       +        Rudpcb *ucb;
       +        Reliable *r, **l;
       +
       +        ucb = (Rudpcb*)c->ptcl;
       +
       +        l = &ucb->r;
       +        for(r = *l; r; r = *l){
       +                if(ipcmp(ip, r->addr) == 0 && port == r->port){
       +                        *l = r->next;
       +                        if(originator)
       +                                relsendack(c, r, 1);
       +                        relhangup(c, r);
       +                        relput(r);        /* remove from the list */
       +                        break;
       +                }
       +                l = &r->next;
       +        }
       +}
       +
       +/* 
       + *  process a rcvd reliable packet. return -1 if not to be passed to user process,
       + *  0 therwise.
       + *
       + *  called with ucb locked.
       + */
       +int
       +reliput(Conv *c, Block *bp, uchar *addr, ushort port)
       +{
       +        Block *nbp;
       +        Rudpcb *ucb;
       +        Rudppriv *upriv;
       +        Udphdr *uh;
       +        Reliable *r;
       +        Rudphdr *rh;
       +        ulong seq, ack, sgen, agen, ackreal;
       +        int rv = -1;
       +
       +        /* get fields */
       +        uh = (Udphdr*)(bp->rp);
       +        rh = (Rudphdr*)uh;
       +        seq = nhgetl(rh->relseq);
       +        sgen = nhgetl(rh->relsgen);
       +        ack = nhgetl(rh->relack);
       +        agen = nhgetl(rh->relagen);
       +
       +        upriv = c->p->priv;
       +        ucb = (Rudpcb*)c->ptcl;
       +        r = relstate(ucb, addr, port, "input");
       +
       +        DPRINT("rcvd %lud/%lud, %lud/%lud, r->sndgen = %lud\n", 
       +                seq, sgen, ack, agen, r->sndgen);
       +
       +        /* if acking an incorrect generation, ignore */
       +        if(ack && agen != r->sndgen)
       +                goto out;
       +
       +        /* Look for a hangup */
       +        if(sgen == Hangupgen) {
       +                if(agen == r->sndgen)
       +                        relforget(c, addr, port, 0);
       +                goto out;
       +        }
       +
       +        /* make sure we're not talking to a new remote side */
       +        if(r->rcvgen != sgen){
       +                if(seq != 0 && seq != 1)
       +                        goto out;
       +
       +                /* new connection */
       +                if(r->rcvgen != 0){
       +                        DPRINT("new con r->rcvgen = %lud, sgen = %lud\n", r->rcvgen, sgen);
       +                        relhangup(c, r);
       +                }
       +                r->rcvgen = sgen;
       +        }
       +
       +        /* dequeue acked packets */
       +        if(ack && agen == r->sndgen){
       +                ackreal = 0;
       +                while(r->unacked != nil && INSEQ(ack, r->ackrcvd, r->sndseq)){
       +                        nbp = r->unacked;
       +                        r->unacked = nbp->list;
       +                        DPRINT("%lud/%lud acked, r->sndgen = %lud\n", 
       +                               ack, agen, r->sndgen);
       +                        freeb(nbp);
       +                        r->ackrcvd = NEXTSEQ(r->ackrcvd);
       +                        ackreal = 1;
       +                }
       +
       +                /* flow control */
       +                if(UNACKED(r) < Maxunacked/8 && r->blocked)
       +                        wakeup(&r->vous);
       +
       +                /*
       +                 *  retransmit next packet if the acked packet
       +                 *  was transmitted more than once
       +                 */
       +                if(ackreal && r->unacked != nil){
       +                        r->timeout = 0;
       +                        if(r->xmits > 1){
       +                                r->xmits = 1;
       +                                relrexmit(c, r);
       +                        }
       +                }
       +                
       +        }
       +
       +        /* no message or input queue full */
       +        if(seq == 0 || qfull(c->rq))
       +                goto out;
       +
       +        /* refuse out of order delivery */
       +        if(seq != NEXTSEQ(r->rcvseq)){
       +                relsendack(c, r, 0);        /* tell him we got it already */
       +                upriv->orders++;
       +                DPRINT("out of sequence %lud not %lud\n", seq, NEXTSEQ(r->rcvseq));
       +                goto out;
       +        }
       +        r->rcvseq = seq;
       +
       +        rv = 0;
       +out:
       +        relput(r);
       +        return rv;
       +}
       +
       +void
       +relsendack(Conv *c, Reliable *r, int hangup)
       +{
       +        Udphdr *uh;
       +        Block *bp;
       +        Rudphdr *rh;
       +        int ptcllen;
       +        Fs *f;
       +
       +        bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
       +        if(bp == nil)
       +                return;
       +        bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
       +        f = c->p->f;
       +        uh = (Udphdr *)(bp->rp);
       +        uh->vihl = IP_VER4;
       +        rh = (Rudphdr*)uh;
       +
       +        ptcllen = (UDP_RHDRSIZE-UDP_PHDRSIZE);
       +        uh->Unused = 0;
       +        uh->udpproto = IP_UDPPROTO;
       +        uh->frag[0] = 0;
       +        uh->frag[1] = 0;
       +        hnputs(uh->udpplen, ptcllen);
       +
       +        v6tov4(uh->udpdst, r->addr);
       +        hnputs(uh->udpdport, r->port);
       +        hnputs(uh->udpsport, c->lport);
       +        if(ipcmp(c->laddr, IPnoaddr) == 0)
       +                findlocalip(f, c->laddr, c->raddr);
       +        v6tov4(uh->udpsrc, c->laddr);
       +        hnputs(uh->udplen, ptcllen);
       +
       +        if(hangup)
       +                hnputl(rh->relsgen, Hangupgen);
       +        else
       +                hnputl(rh->relsgen, r->sndgen);
       +        hnputl(rh->relseq, 0);
       +        hnputl(rh->relagen, r->rcvgen);
       +        hnputl(rh->relack, r->rcvseq);
       +
       +        if(r->acksent < r->rcvseq)
       +                r->acksent = r->rcvseq;
       +
       +        uh->udpcksum[0] = 0;
       +        uh->udpcksum[1] = 0;
       +        hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, UDP_RHDRSIZE));
       +
       +        DPRINT("sendack: %lud/%lud, %lud/%lud\n", 0L, r->sndgen, r->rcvseq, r->rcvgen);
       +        doipoput(c, f, bp, 0, c->ttl, c->tos);
       +}
       +
       +
       +/*
       + *  called with ucb locked (and c locked if user initiated close)
       + */
       +void
       +relhangup(Conv *c, Reliable *r)
       +{
       +        int n;
       +        Block *bp;
       +        char hup[ERRMAX];
       +
       +        n = snprint(hup, sizeof(hup), "hangup %I!%d", r->addr, r->port);
       +        qproduce(c->eq, hup, n);
       +
       +        /*
       +         *  dump any unacked outgoing messages
       +         */
       +        for(bp = r->unacked; bp != nil; bp = r->unacked){
       +                r->unacked = bp->list;
       +                bp->list = nil;
       +                freeb(bp);
       +        }
       +
       +        r->rcvgen = 0;
       +        r->rcvseq = 0;
       +        r->acksent = 0;
       +        if(generation == Hangupgen)
       +                generation++;
       +        r->sndgen = generation++;
       +        r->sndseq = 0;
       +        r->ackrcvd = 0;
       +        r->xmits = 0;
       +        r->timeout = 0;
       +        wakeup(&r->vous);
       +}
       +
       +/*
       + *  called with ucb locked
       + */
       +void
       +relrexmit(Conv *c, Reliable *r)
       +{
       +        Rudppriv *upriv;
       +        Block *np;
       +        Fs *f;
       +
       +        upriv = c->p->priv;
       +        f = c->p->f;
       +        r->timeout = 0;
       +        if(r->xmits++ > Rudpmaxxmit){
       +                relhangup(c, r);
       +                return;
       +        }
       +
       +        upriv->rxmits++;
       +        np = copyblock(r->unacked, blocklen(r->unacked));
       +        DPRINT("rxmit r->ackrvcd+1 = %lud\n", r->ackrcvd+1);
       +        doipoput(c, f, np, 0, c->ttl, c->tos);
       +}
 (DIR) diff --git a/src/9vx/a/ip/tcp.c b/src/9vx/a/ip/tcp.c
       @@ -0,0 +1,3209 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +
       +#include        "ip.h"
       +
       +enum
       +{
       +        QMAX                = 64*1024-1,
       +        IP_TCPPROTO        = 6,
       +
       +        TCP4_IPLEN        = 8,
       +        TCP4_PHDRSIZE        = 12,
       +        TCP4_HDRSIZE        = 20,
       +        TCP4_TCBPHDRSZ        = 40,
       +        TCP4_PKT        = TCP4_IPLEN+TCP4_PHDRSIZE,
       +
       +        TCP6_IPLEN        = 0,
       +        TCP6_PHDRSIZE        = 40,
       +        TCP6_HDRSIZE        = 20,
       +        TCP6_TCBPHDRSZ        = 60,
       +        TCP6_PKT        = TCP6_IPLEN+TCP6_PHDRSIZE,
       +
       +        TcptimerOFF        = 0,
       +        TcptimerON        = 1,
       +        TcptimerDONE        = 2,
       +        MAX_TIME         = (1<<20),        /* Forever */
       +        TCP_ACK                = 50,                /* Timed ack sequence in ms */
       +        MAXBACKMS        = 9*60*1000,        /* longest backoff time (ms) before hangup */
       +
       +        URG                = 0x20,                /* Data marked urgent */
       +        ACK                = 0x10,                /* Acknowledge is valid */
       +        PSH                = 0x08,                /* Whole data pipe is pushed */
       +        RST                = 0x04,                /* Reset connection */
       +        SYN                = 0x02,                /* Pkt. is synchronise */
       +        FIN                = 0x01,                /* Start close down */
       +
       +        EOLOPT                = 0,
       +        NOOPOPT                = 1,
       +        MSSOPT                = 2,
       +        MSS_LENGTH        = 4,                /* Mean segment size */
       +        WSOPT                = 3,
       +        WS_LENGTH        = 3,                /* Bits to scale window size by */
       +        MSL2                = 10,
       +        MSPTICK                = 50,                /* Milliseconds per timer tick */
       +        DEF_MSS                = 1460,                /* Default mean segment */
       +        DEF_MSS6        = 1280,                /* Default mean segment (min) for v6 */
       +        DEF_RTT                = 500,                /* Default round trip */
       +        DEF_KAT                = 120000,        /* Default time (ms) between keep alives */
       +        TCP_LISTEN        = 0,                /* Listen connection */
       +        TCP_CONNECT        = 1,                /* Outgoing connection */
       +        SYNACK_RXTIMER        = 250,                /* ms between SYNACK retransmits */
       +
       +        TCPREXMTTHRESH        = 3,                /* dupack threshhold for rxt */
       +
       +        FORCE                = 1,
       +        CLONE                = 2,
       +        RETRAN                = 4,
       +        ACTIVE                = 8,
       +        SYNACK                = 16,
       +
       +        LOGAGAIN        = 3,
       +        LOGDGAIN        = 2,
       +
       +        Closed                = 0,                /* Connection states */
       +        Listen,
       +        Syn_sent,
       +        Syn_received,
       +        Established,
       +        Finwait1,
       +        Finwait2,
       +        Close_wait,
       +        Closing,
       +        Last_ack,
       +        Time_wait,
       +
       +        Maxlimbo        = 1000,                /* maximum procs waiting for response to SYN ACK */
       +        NLHT                = 256,                /* hash table size, must be a power of 2 */
       +        LHTMASK                = NLHT-1,
       +
       +        HaveWS                = 1<<8,
       +};
       +
       +/* Must correspond to the enumeration above */
       +char *tcpstates[] =
       +{
       +        "Closed",         "Listen",         "Syn_sent", "Syn_received",
       +        "Established",         "Finwait1",        "Finwait2", "Close_wait",
       +        "Closing",         "Last_ack",         "Time_wait"
       +};
       +
       +typedef struct Tcptimer Tcptimer;
       +struct Tcptimer
       +{
       +        Tcptimer        *next;
       +        Tcptimer        *prev;
       +        Tcptimer        *readynext;
       +        int        state;
       +        int        start;
       +        int        count;
       +        void        (*func)(void*);
       +        void        *arg;
       +};
       +
       +/*
       + *  v4 and v6 pseudo headers used for
       + *  checksuming tcp
       + */
       +typedef struct Tcp4hdr Tcp4hdr;
       +struct Tcp4hdr
       +{
       +        uchar        vihl;                /* Version and header length */
       +        uchar        tos;                /* Type of service */
       +        uchar        length[2];        /* packet length */
       +        uchar        id[2];                /* Identification */
       +        uchar        frag[2];        /* Fragment information */
       +        uchar        Unused;
       +        uchar        proto;
       +        uchar        tcplen[2];
       +        uchar        tcpsrc[4];
       +        uchar        tcpdst[4];
       +        uchar        tcpsport[2];
       +        uchar        tcpdport[2];
       +        uchar        tcpseq[4];
       +        uchar        tcpack[4];
       +        uchar        tcpflag[2];
       +        uchar        tcpwin[2];
       +        uchar        tcpcksum[2];
       +        uchar        tcpurg[2];
       +        /* Options segment */
       +        uchar        tcpopt[1];
       +};
       +
       +typedef struct Tcp6hdr Tcp6hdr;
       +struct Tcp6hdr
       +{
       +        uchar        vcf[4];
       +        uchar        ploadlen[2];
       +        uchar        proto;
       +        uchar        ttl;
       +        uchar        tcpsrc[IPaddrlen];
       +        uchar        tcpdst[IPaddrlen];
       +        uchar        tcpsport[2];
       +        uchar        tcpdport[2];
       +        uchar        tcpseq[4];
       +        uchar        tcpack[4];
       +        uchar        tcpflag[2];
       +        uchar        tcpwin[2];
       +        uchar        tcpcksum[2];
       +        uchar        tcpurg[2];
       +        /* Options segment */
       +        uchar        tcpopt[1];
       +};
       +
       +/*
       + *  this represents the control info
       + *  for a single packet.  It is derived from
       + *  a packet in ntohtcp{4,6}() and stuck into
       + *  a packet in htontcp{4,6}().
       + */
       +typedef struct Tcp Tcp;
       +struct        Tcp
       +{
       +        ushort        source;
       +        ushort        dest;
       +        ulong        seq;
       +        ulong        ack;
       +        uchar        flags;
       +        ushort        ws;        /* window scale option (if not zero) */
       +        ulong        wnd;
       +        ushort        urg;
       +        ushort        mss;        /* max segment size option (if not zero) */
       +        ushort        len;        /* size of data */
       +};
       +
       +/*
       + *  this header is malloc'd to thread together fragments
       + *  waiting to be coalesced
       + */
       +typedef struct Reseq Reseq;
       +struct Reseq
       +{
       +        Reseq        *next;
       +        Tcp        seg;
       +        Block        *bp;
       +        ushort        length;
       +};
       +
       +/*
       + *  the QLOCK in the Conv locks this structure
       + */
       +typedef struct Tcpctl Tcpctl;
       +struct Tcpctl
       +{
       +        uchar        state;                        /* Connection state */
       +        uchar        type;                        /* Listening or active connection */
       +        uchar        code;                        /* Icmp code */
       +        struct {
       +                ulong        una;                /* Unacked data pointer */
       +                ulong        nxt;                /* Next sequence expected */
       +                ulong        ptr;                /* Data pointer */
       +                ulong        wnd;                /* Tcp send window */
       +                ulong        urg;                /* Urgent data pointer */
       +                ulong        wl2;
       +                int        scale;                /* how much to right shift window in xmitted packets */
       +                /* to implement tahoe and reno TCP */
       +                ulong        dupacks;        /* number of duplicate acks rcvd */
       +                int        recovery;        /* loss recovery flag */
       +                ulong        rxt;                /* right window marker for recovery */
       +        } snd;
       +        struct {
       +                ulong        nxt;                /* Receive pointer to next uchar slot */
       +                ulong        wnd;                /* Receive window incoming */
       +                ulong        urg;                /* Urgent pointer */
       +                int        blocked;
       +                int        una;                /* unacked data segs */
       +                int        scale;                /* how much to left shift window in rcved packets */
       +        } rcv;
       +        ulong        iss;                        /* Initial sequence number */
       +        int        sawwsopt;                /* true if we saw a wsopt on the incoming SYN */
       +        ulong        cwind;                        /* Congestion window */
       +        int        scale;                        /* desired snd.scale */
       +        ushort        ssthresh;                /* Slow start threshold */
       +        int        resent;                        /* Bytes just resent */
       +        int        irs;                        /* Initial received squence */
       +        ushort        mss;                        /* Mean segment size */
       +        int        rerecv;                        /* Overlap of data rerecevived */
       +        ulong        window;                        /* Recevive window */
       +        uchar        backoff;                /* Exponential backoff counter */
       +        int        backedoff;                /* ms we've backed off for rexmits */
       +        uchar        flags;                        /* State flags */
       +        Reseq        *reseq;                        /* Resequencing queue */
       +        Tcptimer        timer;                        /* Activity timer */
       +        Tcptimer        acktimer;                /* Acknowledge timer */
       +        Tcptimer        rtt_timer;                /* Round trip timer */
       +        Tcptimer        katimer;                /* keep alive timer */
       +        ulong        rttseq;                        /* Round trip sequence */
       +        int        srtt;                        /* Shortened round trip */
       +        int        mdev;                        /* Mean deviation of round trip */
       +        int        kacounter;                /* count down for keep alive */
       +        uint        sndsyntime;                /* time syn sent */
       +        ulong        time;                        /* time Finwait2 or Syn_received was sent */
       +        int        nochecksum;                /* non-zero means don't send checksums */
       +        int        flgcnt;                        /* number of flags in the sequence (FIN,SEQ) */
       +
       +        union {
       +                Tcp4hdr        tcp4hdr;
       +                Tcp6hdr        tcp6hdr;
       +        } protohdr;                /* prototype header */
       +};
       +
       +/*
       + *  New calls are put in limbo rather than having a conversation structure
       + *  allocated.  Thus, a SYN attack results in lots of limbo'd calls but not
       + *  any real Conv structures mucking things up.  Calls in limbo rexmit their
       + *  SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second.
       + *
       + *  In particular they aren't on a listener's queue so that they don't figure
       + *  in the input queue limit.
       + *
       + *  If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue
       + *  of 70000 limbo'd calls.  Not great for a linear list but doable.  Therefore
       + *  there is no hashing of this list.
       + */
       +typedef struct Limbo Limbo;
       +struct Limbo
       +{
       +        Limbo        *next;
       +
       +        uchar        laddr[IPaddrlen];
       +        uchar        raddr[IPaddrlen];
       +        ushort        lport;
       +        ushort        rport;
       +        ulong        irs;                /* initial received sequence */
       +        ulong        iss;                /* initial sent sequence */
       +        ushort        mss;                /* mss from the other end */
       +        ushort        rcvscale;        /* how much to scale rcvd windows */
       +        ushort        sndscale;        /* how much to scale sent windows */
       +        ulong        lastsend;        /* last time we sent a synack */
       +        uchar        version;        /* v4 or v6 */
       +        uchar        rexmits;        /* number of retransmissions */
       +};
       +
       +int        tcp_irtt = DEF_RTT;        /* Initial guess at round trip time */
       +ushort        tcp_mss = DEF_MSS;        /* Maximum segment size to be sent */
       +
       +enum {
       +        /* MIB stats */
       +        MaxConn,
       +        ActiveOpens,
       +        PassiveOpens,
       +        EstabResets,
       +        CurrEstab,
       +        InSegs,
       +        OutSegs,
       +        RetransSegs,
       +        RetransTimeouts,
       +        InErrs,
       +        OutRsts,
       +
       +        /* non-MIB stats */
       +        CsumErrs,
       +        HlenErrs,
       +        LenErrs,
       +        OutOfOrder,
       +
       +        Nstats
       +};
       +
       +static char *statnames[] =
       +{
       +[MaxConn]        "MaxConn",
       +[ActiveOpens]        "ActiveOpens",
       +[PassiveOpens]        "PassiveOpens",
       +[EstabResets]        "EstabResets",
       +[CurrEstab]        "CurrEstab",
       +[InSegs]        "InSegs",
       +[OutSegs]        "OutSegs",
       +[RetransSegs]        "RetransSegs",
       +[RetransTimeouts]        "RetransTimeouts",
       +[InErrs]        "InErrs",
       +[OutRsts]        "OutRsts",
       +[CsumErrs]        "CsumErrs",
       +[HlenErrs]        "HlenErrs",
       +[LenErrs]        "LenErrs",
       +[OutOfOrder]        "OutOfOrder",
       +};
       +
       +typedef struct Tcppriv Tcppriv;
       +struct Tcppriv
       +{
       +        /* List of active timers */
       +        QLock         tl;
       +        Tcptimer *timers;
       +
       +        /* hash table for matching conversations */
       +        Ipht        ht;
       +
       +        /* calls in limbo waiting for an ACK to our SYN ACK */
       +        int        nlimbo;
       +        Limbo        *lht[NLHT];
       +
       +        /* for keeping track of tcpackproc */
       +        QLock        apl;
       +        int        ackprocstarted;
       +
       +        ulong        stats[Nstats];
       +};
       +
       +/*
       + *  Setting tcpporthogdefense to non-zero enables Dong Lin's
       + *  solution to hijacked systems staking out port's as a form
       + *  of DoS attack.
       + *
       + *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
       + *  that number gets acked by the other end, we shut down the connection.
       + *  Look for tcpporthogdefense in the code.
       + */
       +int tcpporthogdefense = 0;
       +
       +int        addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
       +void        getreseq(Tcpctl*, Tcp*, Block**, ushort*);
       +void        localclose(Conv*, char*);
       +void        procsyn(Conv*, Tcp*);
       +void        tcpiput(Proto*, Ipifc*, Block*);
       +void        tcpoutput(Conv*);
       +int        tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
       +void        tcpstart(Conv*, int);
       +void        tcptimeout(void*);
       +void        tcpsndsyn(Conv*, Tcpctl*);
       +void        tcprcvwin(Conv*);
       +void        tcpacktimer(void*);
       +void        tcpkeepalive(void*);
       +void        tcpsetkacounter(Tcpctl*);
       +void        tcprxmit(Conv*);
       +void        tcpsettimer(Tcpctl*);
       +void        tcpsynackrtt(Conv*);
       +void        tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
       +
       +static void limborexmit(Proto*);
       +static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
       +
       +void
       +tcpsetstate(Conv *s, uchar newstate)
       +{
       +        Tcpctl *tcb;
       +        uchar oldstate;
       +        Tcppriv *tpriv;
       +
       +        tpriv = s->p->priv;
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +
       +        oldstate = tcb->state;
       +        if(oldstate == newstate)
       +                return;
       +
       +        if(oldstate == Established)
       +                tpriv->stats[CurrEstab]--;
       +        if(newstate == Established)
       +                tpriv->stats[CurrEstab]++;
       +
       +        /**
       +        print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
       +                tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
       +        **/
       +
       +        switch(newstate) {
       +        case Closed:
       +                qclose(s->rq);
       +                qclose(s->wq);
       +                qclose(s->eq);
       +                break;
       +
       +        case Close_wait:                /* Remote closes */
       +                qhangup(s->rq, nil);
       +                break;
       +        }
       +
       +        tcb->state = newstate;
       +
       +        if(oldstate == Syn_sent && newstate != Closed)
       +                Fsconnected(s, nil);
       +}
       +
       +static char*
       +tcpconnect(Conv *c, char **argv, int argc)
       +{
       +        char *e;
       +        Tcpctl *tcb;
       +
       +        tcb = (Tcpctl*)(c->ptcl);
       +        if(tcb->state != Closed)
       +                return Econinuse;
       +
       +        e = Fsstdconnect(c, argv, argc);
       +        if(e != nil)
       +                return e;
       +        tcpstart(c, TCP_CONNECT);
       +
       +        return nil;
       +}
       +
       +static int
       +tcpstate(Conv *c, char *state, int n)
       +{
       +        Tcpctl *s;
       +
       +        s = (Tcpctl*)(c->ptcl);
       +
       +        return snprint(state, n,
       +                "%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
       +                tcpstates[s->state],
       +                c->rq ? qlen(c->rq) : 0,
       +                c->wq ? qlen(c->wq) : 0,
       +                s->srtt, s->mdev,
       +                s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
       +                s->timer.start, s->timer.count, s->rerecv,
       +                s->katimer.start, s->katimer.count);
       +}
       +
       +static int
       +tcpinuse(Conv *c)
       +{
       +        Tcpctl *s;
       +
       +        s = (Tcpctl*)(c->ptcl);
       +        return s->state != Closed;
       +}
       +
       +static char*
       +tcpannounce(Conv *c, char **argv, int argc)
       +{
       +        char *e;
       +        Tcpctl *tcb;
       +
       +        tcb = (Tcpctl*)(c->ptcl);
       +        if(tcb->state != Closed)
       +                return Econinuse;
       +
       +        e = Fsstdannounce(c, argv, argc);
       +        if(e != nil)
       +                return e;
       +        tcpstart(c, TCP_LISTEN);
       +        Fsconnected(c, nil);
       +
       +        return nil;
       +}
       +
       +/*
       + *  tcpclose is always called with the q locked
       + */
       +static void
       +tcpclose(Conv *c)
       +{
       +        Tcpctl *tcb;
       +
       +        tcb = (Tcpctl*)c->ptcl;
       +
       +        qhangup(c->rq, nil);
       +        qhangup(c->wq, nil);
       +        qhangup(c->eq, nil);
       +        qflush(c->rq);
       +
       +        switch(tcb->state) {
       +        case Listen:
       +                /*
       +                 *  reset any incoming calls to this listener
       +                 */
       +                Fsconnected(c, "Hangup");
       +
       +                localclose(c, nil);
       +                break;
       +        case Closed:
       +        case Syn_sent:
       +                localclose(c, nil);
       +                break;
       +        case Syn_received:
       +        case Established:
       +                tcb->flgcnt++;
       +                tcb->snd.nxt++;
       +                tcpsetstate(c, Finwait1);
       +                tcpoutput(c);
       +                break;
       +        case Close_wait:
       +                tcb->flgcnt++;
       +                tcb->snd.nxt++;
       +                tcpsetstate(c, Last_ack);
       +                tcpoutput(c);
       +                break;
       +        }
       +}
       +
       +void
       +tcpkick(void *x)
       +{
       +        Conv *s = x;
       +        Tcpctl *tcb;
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +
       +        if(waserror()){
       +                QUNLOCK(s);
       +                nexterror();
       +        }
       +        QLOCK(s);
       +
       +        switch(tcb->state) {
       +        case Syn_sent:
       +        case Syn_received:
       +        case Established:
       +        case Close_wait:
       +                /*
       +                 * Push data
       +                 */
       +                tcprcvwin(s);
       +                tcpoutput(s);
       +                break;
       +        default:
       +                localclose(s, "Hangup");
       +                break;
       +        }
       +
       +        QUNLOCK(s);
       +        poperror();
       +}
       +
       +void
       +tcprcvwin(Conv *s)                                /* Call with tcb locked */
       +{
       +        int w;
       +        Tcpctl *tcb;
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +        w = tcb->window - qlen(s->rq);
       +        if(w < 0)
       +                w = 0;
       +        tcb->rcv.wnd = w;
       +        if(w == 0)
       +                tcb->rcv.blocked = 1;
       +}
       +
       +void
       +tcpacktimer(void *v)
       +{
       +        Tcpctl *tcb;
       +        Conv *s;
       +
       +        s = v;
       +        tcb = (Tcpctl*)s->ptcl;
       +
       +        if(waserror()){
       +                QUNLOCK(s);
       +                nexterror();
       +        }
       +        QLOCK(s);
       +        if(tcb->state != Closed){
       +                tcb->flags |= FORCE;
       +                tcprcvwin(s);
       +                tcpoutput(s);
       +        }
       +        QUNLOCK(s);
       +        poperror();
       +}
       +
       +static void
       +tcpcreate(Conv *c)
       +{
       +        c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
       +        c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
       +}
       +
       +static void
       +timerstate(Tcppriv *priv, Tcptimer *t, int newstate)
       +{
       +        if(newstate != TcptimerON){
       +                if(t->state == TcptimerON){
       +                        /* unchain */
       +                        if(priv->timers == t){
       +                                priv->timers = t->next;
       +                                if(t->prev != nil)
       +                                        panic("timerstate1");
       +                        }
       +                        if(t->next)
       +                                t->next->prev = t->prev;
       +                        if(t->prev)
       +                                t->prev->next = t->next;
       +                        t->next = t->prev = nil;
       +                }
       +        } else {
       +                if(t->state != TcptimerON){
       +                        /* chain */
       +                        if(t->prev != nil || t->next != nil)
       +                                panic("timerstate2");
       +                        t->prev = nil;
       +                        t->next = priv->timers;
       +                        if(t->next)
       +                                t->next->prev = t;
       +                        priv->timers = t;
       +                }
       +        }
       +        t->state = newstate;
       +}
       +
       +void
       +tcpackproc(void *a)
       +{
       +        Tcptimer *t, *tp, *timeo;
       +        Proto *tcp;
       +        Tcppriv *priv;
       +        int loop;
       +
       +        tcp = a;
       +        priv = tcp->priv;
       +
       +        for(;;) {
       +                tsleep(&up->sleep, return0, 0, MSPTICK);
       +
       +                qlock(&priv->tl);
       +                timeo = nil;
       +                loop = 0;
       +                for(t = priv->timers; t != nil; t = tp) {
       +                        if(loop++ > 10000)
       +                                panic("tcpackproc1");
       +                        tp = t->next;
       +                         if(t->state == TcptimerON) {
       +                                t->count--;
       +                                if(t->count == 0) {
       +                                        timerstate(priv, t, TcptimerDONE);
       +                                        t->readynext = timeo;
       +                                        timeo = t;
       +                                }
       +                        }
       +                }
       +                qunlock(&priv->tl);
       +
       +                loop = 0;
       +                for(t = timeo; t != nil; t = t->readynext) {
       +                        if(loop++ > 10000)
       +                                panic("tcpackproc2");
       +                        if(t->state == TcptimerDONE && t->func != nil && !waserror()){
       +                                (*t->func)(t->arg);
       +                                poperror();
       +                        }
       +                }
       +
       +                limborexmit(tcp);
       +        }
       +}
       +
       +void
       +tcpgo(Tcppriv *priv, Tcptimer *t)
       +{
       +        if(t == nil || t->start == 0)
       +                return;
       +
       +        qlock(&priv->tl);
       +        t->count = t->start;
       +        timerstate(priv, t, TcptimerON);
       +        qunlock(&priv->tl);
       +}
       +
       +void
       +tcphalt(Tcppriv *priv, Tcptimer *t)
       +{
       +        if(t == nil)
       +                return;
       +
       +        qlock(&priv->tl);
       +        timerstate(priv, t, TcptimerOFF);
       +        qunlock(&priv->tl);
       +}
       +
       +int
       +backoff(int n)
       +{
       +        return 1 << n;
       +}
       +
       +void
       +localclose(Conv *s, char *reason)        /* called with tcb locked */
       +{
       +        Tcpctl *tcb;
       +        Reseq *rp,*rp1;
       +        Tcppriv *tpriv;
       +
       +        tpriv = s->p->priv;
       +        tcb = (Tcpctl*)s->ptcl;
       +
       +        iphtrem(&tpriv->ht, s);
       +
       +        tcphalt(tpriv, &tcb->timer);
       +        tcphalt(tpriv, &tcb->rtt_timer);
       +        tcphalt(tpriv, &tcb->acktimer);
       +        tcphalt(tpriv, &tcb->katimer);
       +
       +        /* Flush reassembly queue; nothing more can arrive */
       +        for(rp = tcb->reseq; rp != nil; rp = rp1) {
       +                rp1 = rp->next;
       +                freeblist(rp->bp);
       +                free(rp);
       +        }
       +        tcb->reseq = nil;
       +
       +        if(tcb->state == Syn_sent)
       +                Fsconnected(s, reason);
       +        if(s->state == Announced)
       +                wakeup(&s->listenr);
       +
       +        qhangup(s->rq, reason);
       +        qhangup(s->wq, reason);
       +
       +        tcpsetstate(s, Closed);
       +}
       +
       +/* mtu (- TCP + IP hdr len) of 1st hop */
       +int
       +tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
       +{
       +        Ipifc *ifc;
       +        int mtu;
       +
       +        ifc = findipifc(tcp->f, addr, 0);
       +        switch(version){
       +        default:
       +        case V4:
       +                mtu = DEF_MSS;
       +                if(ifc != nil)
       +                        mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
       +                break;
       +        case V6:
       +                mtu = DEF_MSS6;
       +                if(ifc != nil)
       +                        mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
       +                break;
       +        }
       +        if(ifc != nil){
       +                if(ifc->mbps > 1000)
       +                        *scale = HaveWS | 4;
       +                else if(ifc->mbps > 100)
       +                        *scale = HaveWS | 3;
       +                else if(ifc->mbps > 10)
       +                        *scale = HaveWS | 1;
       +                else
       +                        *scale = HaveWS | 0;
       +        } else
       +                *scale = HaveWS | 0;
       +
       +        return mtu;
       +}
       +
       +void
       +inittcpctl(Conv *s, int mode)
       +{
       +        Tcpctl *tcb;
       +        Tcp4hdr* h4;
       +        Tcp6hdr* h6;
       +        int mss;
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +
       +        memset(tcb, 0, sizeof(Tcpctl));
       +
       +        tcb->ssthresh = 65535;
       +        tcb->srtt = tcp_irtt<<LOGAGAIN;
       +        tcb->mdev = 0;
       +
       +        /* setup timers */
       +        tcb->timer.start = tcp_irtt / MSPTICK;
       +        tcb->timer.func = tcptimeout;
       +        tcb->timer.arg = s;
       +        tcb->rtt_timer.start = MAX_TIME;
       +        tcb->acktimer.start = TCP_ACK / MSPTICK;
       +        tcb->acktimer.func = tcpacktimer;
       +        tcb->acktimer.arg = s;
       +        tcb->katimer.start = DEF_KAT / MSPTICK;
       +        tcb->katimer.func = tcpkeepalive;
       +        tcb->katimer.arg = s;
       +
       +        mss = DEF_MSS;
       +
       +        /* create a prototype(pseudo) header */
       +        if(mode != TCP_LISTEN){
       +                if(ipcmp(s->laddr, IPnoaddr) == 0)
       +                        findlocalip(s->p->f, s->laddr, s->raddr);
       +
       +                switch(s->ipversion){
       +                case V4:
       +                        h4 = &tcb->protohdr.tcp4hdr;
       +                        memset(h4, 0, sizeof(*h4));
       +                        h4->proto = IP_TCPPROTO;
       +                        hnputs(h4->tcpsport, s->lport);
       +                        hnputs(h4->tcpdport, s->rport);
       +                        v6tov4(h4->tcpsrc, s->laddr);
       +                        v6tov4(h4->tcpdst, s->raddr);
       +                        break;
       +                case V6:
       +                        h6 = &tcb->protohdr.tcp6hdr;
       +                        memset(h6, 0, sizeof(*h6));
       +                        h6->proto = IP_TCPPROTO;
       +                        hnputs(h6->tcpsport, s->lport);
       +                        hnputs(h6->tcpdport, s->rport);
       +                        ipmove(h6->tcpsrc, s->laddr);
       +                        ipmove(h6->tcpdst, s->raddr);
       +                        mss = DEF_MSS6;
       +                        break;
       +                default:
       +                        panic("inittcpctl: version %d", s->ipversion);
       +                }
       +        }
       +
       +        tcb->mss = tcb->cwind = mss;
       +
       +        /* default is no window scaling */
       +        tcb->window = QMAX;
       +        tcb->rcv.wnd = QMAX;
       +        tcb->rcv.scale = 0;
       +        tcb->snd.scale = 0;
       +        qsetlimit(s->rq, QMAX);
       +}
       +
       +/*
       + *  called with s QLOCKed
       + */
       +void
       +tcpstart(Conv *s, int mode)
       +{
       +        Tcpctl *tcb;
       +        Tcppriv *tpriv;
       +        char kpname[KNAMELEN];
       +
       +        tpriv = s->p->priv;
       +
       +        if(tpriv->ackprocstarted == 0){
       +                qlock(&tpriv->apl);
       +                if(tpriv->ackprocstarted == 0){
       +                        sprint(kpname, "#I%dtcpack", s->p->f->dev);
       +                        kproc(kpname, tcpackproc, s->p);
       +                        tpriv->ackprocstarted = 1;
       +                }
       +                qunlock(&tpriv->apl);
       +        }
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +
       +        inittcpctl(s, mode);
       +
       +        iphtadd(&tpriv->ht, s);
       +        switch(mode) {
       +        case TCP_LISTEN:
       +                tpriv->stats[PassiveOpens]++;
       +                tcb->flags |= CLONE;
       +                tcpsetstate(s, Listen);
       +                break;
       +
       +        case TCP_CONNECT:
       +                tpriv->stats[ActiveOpens]++;
       +                tcb->flags |= ACTIVE;
       +                tcpsndsyn(s, tcb);
       +                tcpsetstate(s, Syn_sent);
       +                tcpoutput(s);
       +                break;
       +        }
       +}
       +
       +static char*
       +tcpflag(ushort flag)
       +{
       +        static char buf[128];
       +
       +        sprint(buf, "%d", flag>>10);        /* Head len */
       +        if(flag & URG)
       +                strcat(buf, " URG");
       +        if(flag & ACK)
       +                strcat(buf, " ACK");
       +        if(flag & PSH)
       +                strcat(buf, " PSH");
       +        if(flag & RST)
       +                strcat(buf, " RST");
       +        if(flag & SYN)
       +                strcat(buf, " SYN");
       +        if(flag & FIN)
       +                strcat(buf, " FIN");
       +
       +        return buf;
       +}
       +
       +Block *
       +htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
       +{
       +        int dlen;
       +        Tcp6hdr *h;
       +        ushort csum;
       +        ushort hdrlen, optpad = 0;
       +        uchar *opt;
       +
       +        hdrlen = TCP6_HDRSIZE;
       +        if(tcph->flags & SYN){
       +                if(tcph->mss)
       +                        hdrlen += MSS_LENGTH;
       +                if(tcph->ws)
       +                        hdrlen += WS_LENGTH;
       +                optpad = hdrlen & 3;
       +                if(optpad)
       +                        optpad = 4 - optpad;
       +                hdrlen += optpad;
       +        }
       +
       +        if(data) {
       +                dlen = blocklen(data);
       +                data = padblock(data, hdrlen + TCP6_PKT);
       +                if(data == nil)
       +                        return nil;
       +        }
       +        else {
       +                dlen = 0;
       +                data = allocb(hdrlen + TCP6_PKT + 64);        /* the 64 pad is to meet mintu's */
       +                if(data == nil)
       +                        return nil;
       +                data->wp += hdrlen + TCP6_PKT;
       +        }
       +
       +        /* copy in pseudo ip header plus port numbers */
       +        h = (Tcp6hdr *)(data->rp);
       +        memmove(h, ph, TCP6_TCBPHDRSZ);
       +
       +        /* compose pseudo tcp header, do cksum calculation */
       +        hnputl(h->vcf, hdrlen + dlen);
       +        h->ploadlen[0] = h->ploadlen[1] = h->proto = 0;
       +        h->ttl = ph->proto;
       +
       +        /* copy in variable bits */
       +        hnputl(h->tcpseq, tcph->seq);
       +        hnputl(h->tcpack, tcph->ack);
       +        hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
       +        hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
       +        hnputs(h->tcpurg, tcph->urg);
       +
       +        if(tcph->flags & SYN){
       +                opt = h->tcpopt;
       +                if(tcph->mss != 0){
       +                        *opt++ = MSSOPT;
       +                        *opt++ = MSS_LENGTH;
       +                        hnputs(opt, tcph->mss);
       +                        opt += 2;
       +                }
       +                if(tcph->ws != 0){
       +                        *opt++ = WSOPT;
       +                        *opt++ = WS_LENGTH;
       +                        *opt++ = tcph->ws;
       +                }
       +                while(optpad-- > 0)
       +                        *opt++ = NOOPOPT;
       +        }
       +
       +        if(tcb != nil && tcb->nochecksum){
       +                h->tcpcksum[0] = h->tcpcksum[1] = 0;
       +        } else {
       +                csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE);
       +                hnputs(h->tcpcksum, csum);
       +        }
       +
       +        /* move from pseudo header back to normal ip header */
       +        memset(h->vcf, 0, 4);
       +        h->vcf[0] = IP_VER6;
       +        hnputs(h->ploadlen, hdrlen+dlen);
       +        h->proto = ph->proto;
       +
       +        return data;
       +}
       +
       +Block *
       +htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
       +{
       +        int dlen;
       +        Tcp4hdr *h;
       +        ushort csum;
       +        ushort hdrlen, optpad = 0;
       +        uchar *opt;
       +
       +        hdrlen = TCP4_HDRSIZE;
       +        if(tcph->flags & SYN){
       +                if(tcph->mss)
       +                        hdrlen += MSS_LENGTH;
       +                if(tcph->ws)
       +                        hdrlen += WS_LENGTH;
       +                optpad = hdrlen & 3;
       +                if(optpad)
       +                        optpad = 4 - optpad;
       +                hdrlen += optpad;
       +        }
       +
       +        if(data) {
       +                dlen = blocklen(data);
       +                data = padblock(data, hdrlen + TCP4_PKT);
       +                if(data == nil)
       +                        return nil;
       +        }
       +        else {
       +                dlen = 0;
       +                data = allocb(hdrlen + TCP4_PKT + 64);        /* the 64 pad is to meet mintu's */
       +                if(data == nil)
       +                        return nil;
       +                data->wp += hdrlen + TCP4_PKT;
       +        }
       +
       +        /* copy in pseudo ip header plus port numbers */
       +        h = (Tcp4hdr *)(data->rp);
       +        memmove(h, ph, TCP4_TCBPHDRSZ);
       +
       +        /* copy in variable bits */
       +        hnputs(h->tcplen, hdrlen + dlen);
       +        hnputl(h->tcpseq, tcph->seq);
       +        hnputl(h->tcpack, tcph->ack);
       +        hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
       +        hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
       +        hnputs(h->tcpurg, tcph->urg);
       +
       +        if(tcph->flags & SYN){
       +                opt = h->tcpopt;
       +                if(tcph->mss != 0){
       +                        *opt++ = MSSOPT;
       +                        *opt++ = MSS_LENGTH;
       +                        hnputs(opt, tcph->mss);
       +                        opt += 2;
       +                }
       +                if(tcph->ws != 0){
       +                        *opt++ = WSOPT;
       +                        *opt++ = WS_LENGTH;
       +                        *opt++ = tcph->ws;
       +                }
       +                while(optpad-- > 0)
       +                        *opt++ = NOOPOPT;
       +        }
       +
       +        if(tcb != nil && tcb->nochecksum){
       +                h->tcpcksum[0] = h->tcpcksum[1] = 0;
       +        } else {
       +                csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE);
       +                hnputs(h->tcpcksum, csum);
       +        }
       +
       +        return data;
       +}
       +
       +int
       +ntohtcp6(Tcp *tcph, Block **bpp)
       +{
       +        Tcp6hdr *h;
       +        uchar *optr;
       +        ushort hdrlen;
       +        ushort optlen;
       +        int n;
       +
       +        *bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE);
       +        if(*bpp == nil)
       +                return -1;
       +
       +        h = (Tcp6hdr *)((*bpp)->rp);
       +        tcph->source = nhgets(h->tcpsport);
       +        tcph->dest = nhgets(h->tcpdport);
       +        tcph->seq = nhgetl(h->tcpseq);
       +        tcph->ack = nhgetl(h->tcpack);
       +        hdrlen = (h->tcpflag[0]>>2) & ~3;
       +        if(hdrlen < TCP6_HDRSIZE) {
       +                freeblist(*bpp);
       +                return -1;
       +        }
       +
       +        tcph->flags = h->tcpflag[1];
       +        tcph->wnd = nhgets(h->tcpwin);
       +        tcph->urg = nhgets(h->tcpurg);
       +        tcph->mss = 0;
       +        tcph->ws = 0;
       +        tcph->len = nhgets(h->ploadlen) - hdrlen;
       +
       +        *bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
       +        if(*bpp == nil)
       +                return -1;
       +
       +        optr = h->tcpopt;
       +        n = hdrlen - TCP6_HDRSIZE;
       +        while(n > 0 && *optr != EOLOPT) {
       +                if(*optr == NOOPOPT) {
       +                        n--;
       +                        optr++;
       +                        continue;
       +                }
       +                optlen = optr[1];
       +                if(optlen < 2 || optlen > n)
       +                        break;
       +                switch(*optr) {
       +                case MSSOPT:
       +                        if(optlen == MSS_LENGTH)
       +                                tcph->mss = nhgets(optr+2);
       +                        break;
       +                case WSOPT:
       +                        if(optlen == WS_LENGTH && *(optr+2) <= 14)
       +                                tcph->ws = HaveWS | *(optr+2);
       +                        break;
       +                }
       +                n -= optlen;
       +                optr += optlen;
       +        }
       +        return hdrlen;
       +}
       +
       +int
       +ntohtcp4(Tcp *tcph, Block **bpp)
       +{
       +        Tcp4hdr *h;
       +        uchar *optr;
       +        ushort hdrlen;
       +        ushort optlen;
       +        int n;
       +
       +        *bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE);
       +        if(*bpp == nil)
       +                return -1;
       +
       +        h = (Tcp4hdr *)((*bpp)->rp);
       +        tcph->source = nhgets(h->tcpsport);
       +        tcph->dest = nhgets(h->tcpdport);
       +        tcph->seq = nhgetl(h->tcpseq);
       +        tcph->ack = nhgetl(h->tcpack);
       +
       +        hdrlen = (h->tcpflag[0]>>2) & ~3;
       +        if(hdrlen < TCP4_HDRSIZE) {
       +                freeblist(*bpp);
       +                return -1;
       +        }
       +
       +        tcph->flags = h->tcpflag[1];
       +        tcph->wnd = nhgets(h->tcpwin);
       +        tcph->urg = nhgets(h->tcpurg);
       +        tcph->mss = 0;
       +        tcph->ws = 0;
       +        tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
       +
       +        *bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
       +        if(*bpp == nil)
       +                return -1;
       +
       +        optr = h->tcpopt;
       +        n = hdrlen - TCP4_HDRSIZE;
       +        while(n > 0 && *optr != EOLOPT) {
       +                if(*optr == NOOPOPT) {
       +                        n--;
       +                        optr++;
       +                        continue;
       +                }
       +                optlen = optr[1];
       +                if(optlen < 2 || optlen > n)
       +                        break;
       +                switch(*optr) {
       +                case MSSOPT:
       +                        if(optlen == MSS_LENGTH)
       +                                tcph->mss = nhgets(optr+2);
       +                        break;
       +                case WSOPT:
       +                        if(optlen == WS_LENGTH && *(optr+2) <= 14)
       +                                tcph->ws = HaveWS | *(optr+2);
       +                        break;
       +                }
       +                n -= optlen;
       +                optr += optlen;
       +        }
       +        return hdrlen;
       +}
       +
       +/*
       + *  For outgiing calls, generate an initial sequence
       + *  number and put a SYN on the send queue
       + */
       +void
       +tcpsndsyn(Conv *s, Tcpctl *tcb)
       +{
       +        tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
       +        tcb->rttseq = tcb->iss;
       +        tcb->snd.wl2 = tcb->iss;
       +        tcb->snd.una = tcb->iss;
       +        tcb->snd.ptr = tcb->rttseq;
       +        tcb->snd.nxt = tcb->rttseq;
       +        tcb->flgcnt++;
       +        tcb->flags |= FORCE;
       +        tcb->sndsyntime = NOW;
       +
       +        /* set desired mss and scale */
       +        tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
       +}
       +
       +void
       +sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason)
       +{
       +        Block *hbp;
       +        uchar rflags;
       +        Tcppriv *tpriv;
       +        Tcp4hdr ph4;
       +        Tcp6hdr ph6;
       +
       +        netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
       +
       +        tpriv = tcp->priv;
       +
       +        if(seg->flags & RST)
       +                return;
       +
       +        /* make pseudo header */
       +        switch(version) {
       +        case V4:
       +                memset(&ph4, 0, sizeof(ph4));
       +                ph4.vihl = IP_VER4;
       +                v6tov4(ph4.tcpsrc, dest);
       +                v6tov4(ph4.tcpdst, source);
       +                ph4.proto = IP_TCPPROTO;
       +                hnputs(ph4.tcplen, TCP4_HDRSIZE);
       +                hnputs(ph4.tcpsport, seg->dest);
       +                hnputs(ph4.tcpdport, seg->source);
       +                break;
       +        case V6:
       +                memset(&ph6, 0, sizeof(ph6));
       +                ph6.vcf[0] = IP_VER6;
       +                ipmove(ph6.tcpsrc, dest);
       +                ipmove(ph6.tcpdst, source);
       +                ph6.proto = IP_TCPPROTO;
       +                hnputs(ph6.ploadlen, TCP6_HDRSIZE);
       +                hnputs(ph6.tcpsport, seg->dest);
       +                hnputs(ph6.tcpdport, seg->source);
       +                break;
       +        default:
       +                panic("sndrst: version %d", version);
       +        }
       +
       +        tpriv->stats[OutRsts]++;
       +        rflags = RST;
       +
       +        /* convince the other end that this reset is in band */
       +        if(seg->flags & ACK) {
       +                seg->seq = seg->ack;
       +                seg->ack = 0;
       +        }
       +        else {
       +                rflags |= ACK;
       +                seg->ack = seg->seq;
       +                seg->seq = 0;
       +                if(seg->flags & SYN)
       +                        seg->ack++;
       +                seg->ack += length;
       +                if(seg->flags & FIN)
       +                        seg->ack++;
       +        }
       +        seg->flags = rflags;
       +        seg->wnd = 0;
       +        seg->urg = 0;
       +        seg->mss = 0;
       +        seg->ws = 0;
       +        switch(version) {
       +        case V4:
       +                hbp = htontcp4(seg, nil, &ph4, nil);
       +                if(hbp == nil)
       +                        return;
       +                ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
       +                break;
       +        case V6:
       +                hbp = htontcp6(seg, nil, &ph6, nil);
       +                if(hbp == nil)
       +                        return;
       +                ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
       +                break;
       +        default:
       +                panic("sndrst2: version %d", version);
       +        }
       +}
       +
       +/*
       + *  send a reset to the remote side and close the conversation
       + *  called with s QLOCKed
       + */
       +char*
       +tcphangup(Conv *s)
       +{
       +        Tcp seg;
       +        Tcpctl *tcb;
       +        Block *hbp;
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +        if(waserror())
       +                return commonerror();
       +        if(ipcmp(s->raddr, IPnoaddr) != 0) {
       +                if(!waserror()){
       +                        seg.flags = RST | ACK;
       +                        seg.ack = tcb->rcv.nxt;
       +                        tcb->rcv.una = 0;
       +                        seg.seq = tcb->snd.ptr;
       +                        seg.wnd = 0;
       +                        seg.urg = 0;
       +                        seg.mss = 0;
       +                        seg.ws = 0;
       +                        switch(s->ipversion) {
       +                        case V4:
       +                                tcb->protohdr.tcp4hdr.vihl = IP_VER4;
       +                                hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb);
       +                                ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
       +                                break;
       +                        case V6:
       +                                tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
       +                                hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb);
       +                                ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
       +                                break;
       +                        default:
       +                                panic("tcphangup: version %d", s->ipversion);
       +                        }
       +                        poperror();
       +                }
       +        }
       +        localclose(s, nil);
       +        poperror();
       +        return nil;
       +}
       +
       +/*
       + *  (re)send a SYN ACK
       + */
       +int
       +sndsynack(Proto *tcp, Limbo *lp)
       +{
       +        Block *hbp;
       +        Tcp4hdr ph4;
       +        Tcp6hdr ph6;
       +        Tcp seg;
       +        int scale;
       +
       +        /* make pseudo header */
       +        switch(lp->version) {
       +        case V4:
       +                memset(&ph4, 0, sizeof(ph4));
       +                ph4.vihl = IP_VER4;
       +                v6tov4(ph4.tcpsrc, lp->laddr);
       +                v6tov4(ph4.tcpdst, lp->raddr);
       +                ph4.proto = IP_TCPPROTO;
       +                hnputs(ph4.tcplen, TCP4_HDRSIZE);
       +                hnputs(ph4.tcpsport, lp->lport);
       +                hnputs(ph4.tcpdport, lp->rport);
       +                break;
       +        case V6:
       +                memset(&ph6, 0, sizeof(ph6));
       +                ph6.vcf[0] = IP_VER6;
       +                ipmove(ph6.tcpsrc, lp->laddr);
       +                ipmove(ph6.tcpdst, lp->raddr);
       +                ph6.proto = IP_TCPPROTO;
       +                hnputs(ph6.ploadlen, TCP6_HDRSIZE);
       +                hnputs(ph6.tcpsport, lp->lport);
       +                hnputs(ph6.tcpdport, lp->rport);
       +                break;
       +        default:
       +                panic("sndrst: version %d", lp->version);
       +        }
       +
       +        seg.seq = lp->iss;
       +        seg.ack = lp->irs+1;
       +        seg.flags = SYN|ACK;
       +        seg.urg = 0;
       +        seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
       +        seg.wnd = QMAX;
       +
       +        /* if the other side set scale, we should too */
       +        if(lp->rcvscale){
       +                seg.ws = scale;
       +                lp->sndscale = scale;
       +        } else {
       +                seg.ws = 0;
       +                lp->sndscale = 0;
       +        }
       +
       +        switch(lp->version) {
       +        case V4:
       +                hbp = htontcp4(&seg, nil, &ph4, nil);
       +                if(hbp == nil)
       +                        return -1;
       +                ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
       +                break;
       +        case V6:
       +                hbp = htontcp6(&seg, nil, &ph6, nil);
       +                if(hbp == nil)
       +                        return -1;
       +                ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
       +                break;
       +        default:
       +                panic("sndsnack: version %d", lp->version);
       +        }
       +        lp->lastsend = NOW;
       +        return 0;
       +}
       +
       +#define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK )
       +
       +/*
       + *  put a call into limbo and respond with a SYN ACK
       + *
       + *  called with proto locked
       + */
       +static void
       +limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version)
       +{
       +        Limbo *lp, **l;
       +        Tcppriv *tpriv;
       +        int h;
       +
       +        tpriv = s->p->priv;
       +        h = hashipa(source, seg->source);
       +
       +        for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
       +                lp = *l;
       +                if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version)
       +                        continue;
       +                if(ipcmp(lp->raddr, source) != 0)
       +                        continue;
       +                if(ipcmp(lp->laddr, dest) != 0)
       +                        continue;
       +
       +                /* each new SYN restarts the retransmits */
       +                lp->irs = seg->seq;
       +                break;
       +        }
       +        lp = *l;
       +        if(lp == nil){
       +                if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){
       +                        lp = tpriv->lht[h];
       +                        tpriv->lht[h] = lp->next;
       +                        lp->next = nil;
       +                } else {
       +                        lp = malloc(sizeof(*lp));
       +                        if(lp == nil)
       +                                return;
       +                        tpriv->nlimbo++;
       +                }
       +                *l = lp;
       +                lp->version = version;
       +                ipmove(lp->laddr, dest);
       +                ipmove(lp->raddr, source);
       +                lp->lport = seg->dest;
       +                lp->rport = seg->source;
       +                lp->mss = seg->mss;
       +                lp->rcvscale = seg->ws;
       +                lp->irs = seg->seq;
       +                lp->iss = (nrand(1<<16)<<16)|nrand(1<<16);
       +        }
       +
       +        if(sndsynack(s->p, lp) < 0){
       +                *l = lp->next;
       +                tpriv->nlimbo--;
       +                free(lp);
       +        }
       +}
       +
       +/*
       + *  resend SYN ACK's once every SYNACK_RXTIMER ms.
       + */
       +static void
       +limborexmit(Proto *tcp)
       +{
       +        Tcppriv *tpriv;
       +        Limbo **l, *lp;
       +        int h;
       +        int seen;
       +        ulong now;
       +
       +        tpriv = tcp->priv;
       +
       +        if(!CANQLOCK(tcp))
       +                return;
       +        seen = 0;
       +        now = NOW;
       +        for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){
       +                for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){
       +                        lp = *l;
       +                        seen++;
       +                        if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER)
       +                                continue;
       +
       +                        /* time it out after 1 second */
       +                        if(++(lp->rexmits) > 5){
       +                                tpriv->nlimbo--;
       +                                *l = lp->next;
       +                                free(lp);
       +                                continue;
       +                        }
       +
       +                        /* if we're being attacked, don't bother resending SYN ACK's */
       +                        if(tpriv->nlimbo > 100)
       +                                continue;
       +
       +                        if(sndsynack(tcp, lp) < 0){
       +                                tpriv->nlimbo--;
       +                                *l = lp->next;
       +                                free(lp);
       +                                continue;
       +                        }
       +
       +                        l = &lp->next;
       +                }
       +        }
       +        QUNLOCK(tcp);
       +}
       +
       +/*
       + *  lookup call in limbo.  if found, throw it out.
       + *
       + *  called with proto locked
       + */
       +static void
       +limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
       +{
       +        Limbo *lp, **l;
       +        int h;
       +        Tcppriv *tpriv;
       +
       +        tpriv = s->p->priv;
       +
       +        /* find a call in limbo */
       +        h = hashipa(src, segp->source);
       +        for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
       +                lp = *l;
       +                if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
       +                        continue;
       +                if(ipcmp(lp->laddr, dst) != 0)
       +                        continue;
       +                if(ipcmp(lp->raddr, src) != 0)
       +                        continue;
       +
       +                /* RST can only follow the SYN */
       +                if(segp->seq == lp->irs+1){
       +                        tpriv->nlimbo--;
       +                        *l = lp->next;
       +                        free(lp);
       +                }
       +                break;
       +        }
       +}
       +
       +/*
       + *  come here when we finally get an ACK to our SYN-ACK.
       + *  lookup call in limbo.  if found, create a new conversation
       + *
       + *  called with proto locked
       + */
       +static Conv*
       +tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
       +{
       +        Conv *new;
       +        Tcpctl *tcb;
       +        Tcppriv *tpriv;
       +        Tcp4hdr *h4;
       +        Tcp6hdr *h6;
       +        Limbo *lp, **l;
       +        int h;
       +
       +        /* unless it's just an ack, it can't be someone coming out of limbo */
       +        if((segp->flags & SYN) || (segp->flags & ACK) == 0)
       +                return nil;
       +
       +        tpriv = s->p->priv;
       +
       +        /* find a call in limbo */
       +        h = hashipa(src, segp->source);
       +        for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
       +                netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d\n",
       +                        src, segp->source, lp->raddr, lp->rport,
       +                        dst, segp->dest, lp->laddr, lp->lport,
       +                        version, lp->version
       +                 );
       +
       +                if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
       +                        continue;
       +                if(ipcmp(lp->laddr, dst) != 0)
       +                        continue;
       +                if(ipcmp(lp->raddr, src) != 0)
       +                        continue;
       +
       +                /* we're assuming no data with the initial SYN */
       +                if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
       +                        netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
       +                                segp->seq, lp->irs+1, segp->ack, lp->iss+1);
       +                        lp = nil;
       +                } else {
       +                        tpriv->nlimbo--;
       +                        *l = lp->next;
       +                }
       +                break;
       +        }
       +        if(lp == nil)
       +                return nil;
       +
       +        new = Fsnewcall(s, src, segp->source, dst, segp->dest, version);
       +        if(new == nil)
       +                return nil;
       +
       +        memmove(new->ptcl, s->ptcl, sizeof(Tcpctl));
       +        tcb = (Tcpctl*)new->ptcl;
       +        tcb->flags &= ~CLONE;
       +        tcb->timer.arg = new;
       +        tcb->timer.state = TcptimerOFF;
       +        tcb->acktimer.arg = new;
       +        tcb->acktimer.state = TcptimerOFF;
       +        tcb->katimer.arg = new;
       +        tcb->katimer.state = TcptimerOFF;
       +        tcb->rtt_timer.arg = new;
       +        tcb->rtt_timer.state = TcptimerOFF;
       +
       +        tcb->irs = lp->irs;
       +        tcb->rcv.nxt = tcb->irs+1;
       +        tcb->rcv.urg = tcb->rcv.nxt;
       +
       +        tcb->iss = lp->iss;
       +        tcb->rttseq = tcb->iss;
       +        tcb->snd.wl2 = tcb->iss;
       +        tcb->snd.una = tcb->iss+1;
       +        tcb->snd.ptr = tcb->iss+1;
       +        tcb->snd.nxt = tcb->iss+1;
       +        tcb->flgcnt = 0;
       +        tcb->flags |= SYNACK;
       +
       +        /* our sending max segment size cannot be bigger than what he asked for */
       +        if(lp->mss != 0 && lp->mss < tcb->mss)
       +                tcb->mss = lp->mss;
       +
       +        /* window scaling */
       +        tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
       +
       +        /* the congestion window always starts out as a single segment */
       +        tcb->snd.wnd = segp->wnd;
       +        tcb->cwind = tcb->mss;
       +
       +        /* set initial round trip time */
       +        tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
       +        tcpsynackrtt(new);
       +
       +        free(lp);
       +
       +        /* set up proto header */
       +        switch(version){
       +        case V4:
       +                h4 = &tcb->protohdr.tcp4hdr;
       +                memset(h4, 0, sizeof(*h4));
       +                h4->proto = IP_TCPPROTO;
       +                hnputs(h4->tcpsport, new->lport);
       +                hnputs(h4->tcpdport, new->rport);
       +                v6tov4(h4->tcpsrc, dst);
       +                v6tov4(h4->tcpdst, src);
       +                break;
       +        case V6:
       +                h6 = &tcb->protohdr.tcp6hdr;
       +                memset(h6, 0, sizeof(*h6));
       +                h6->proto = IP_TCPPROTO;
       +                hnputs(h6->tcpsport, new->lport);
       +                hnputs(h6->tcpdport, new->rport);
       +                ipmove(h6->tcpsrc, dst);
       +                ipmove(h6->tcpdst, src);
       +                break;
       +        default:
       +                panic("tcpincoming: version %d", new->ipversion);
       +        }
       +
       +        tcpsetstate(new, Established);
       +
       +        iphtadd(&tpriv->ht, new);
       +
       +        return new;
       +}
       +
       +int
       +seq_within(ulong x, ulong low, ulong high)
       +{
       +        if(low <= high){
       +                if(low <= x && x <= high)
       +                        return 1;
       +        }
       +        else {
       +                if(x >= low || x <= high)
       +                        return 1;
       +        }
       +        return 0;
       +}
       +
       +int
       +seq_lt(ulong x, ulong y)
       +{
       +        return (int)(x-y) < 0;
       +}
       +
       +int
       +seq_le(ulong x, ulong y)
       +{
       +        return (int)(x-y) <= 0;
       +}
       +
       +int
       +seq_gt(ulong x, ulong y)
       +{
       +        return (int)(x-y) > 0;
       +}
       +
       +int
       +seq_ge(ulong x, ulong y)
       +{
       +        return (int)(x-y) >= 0;
       +}
       +
       +/*
       + *  use the time between the first SYN and it's ack as the
       + *  initial round trip time
       + */
       +void
       +tcpsynackrtt(Conv *s)
       +{
       +        Tcpctl *tcb;
       +        int delta;
       +        Tcppriv *tpriv;
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +        tpriv = s->p->priv;
       +
       +        delta = NOW - tcb->sndsyntime;
       +        tcb->srtt = delta<<LOGAGAIN;
       +        tcb->mdev = delta<<LOGDGAIN;
       +
       +        /* halt round trip timer */
       +        tcphalt(tpriv, &tcb->rtt_timer);
       +}
       +
       +void
       +update(Conv *s, Tcp *seg)
       +{
       +        int rtt, delta;
       +        Tcpctl *tcb;
       +        ulong acked;
       +        ulong expand;
       +        Tcppriv *tpriv;
       +
       +        tpriv = s->p->priv;
       +        tcb = (Tcpctl*)s->ptcl;
       +
       +        /* if everything has been acked, force output(?) */
       +        if(seq_gt(seg->ack, tcb->snd.nxt)) {
       +                tcb->flags |= FORCE;
       +                return;
       +        }
       +
       +        /* added by Dong Lin for fast retransmission */
       +        if(seg->ack == tcb->snd.una
       +        && tcb->snd.una != tcb->snd.nxt
       +        && seg->len == 0
       +        && seg->wnd == tcb->snd.wnd) {
       +
       +                /* this is a pure ack w/o window update */
       +                netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
       +                        tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
       +
       +                if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
       +                        /*
       +                         *  tahoe tcp rxt the packet, half sshthresh,
       +                          *  and set cwnd to one packet
       +                         */
       +                        tcb->snd.recovery = 1;
       +                        tcb->snd.rxt = tcb->snd.nxt;
       +                        netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
       +                        tcprxmit(s);
       +                } else {
       +                        /* do reno tcp here. */
       +                }
       +        }
       +
       +        /*
       +         *  update window
       +         */
       +        if(seq_gt(seg->ack, tcb->snd.wl2)
       +        || (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
       +                tcb->snd.wnd = seg->wnd;
       +                tcb->snd.wl2 = seg->ack;
       +        }
       +
       +        if(!seq_gt(seg->ack, tcb->snd.una)){
       +                /*
       +                 *  don't let us hangup if sending into a closed window and
       +                 *  we're still getting acks
       +                 */
       +                if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
       +                        tcb->backedoff = MAXBACKMS/4;
       +                }
       +                return;
       +        }
       +
       +        /*
       +         *  any positive ack turns off fast rxt,
       +         *  (should we do new-reno on partial acks?)
       +         */
       +        if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
       +                tcb->snd.dupacks = 0;
       +                tcb->snd.recovery = 0;
       +        } else
       +                netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
       +
       +        /* Compute the new send window size */
       +        acked = seg->ack - tcb->snd.una;
       +
       +        /* avoid slow start and timers for SYN acks */
       +        if((tcb->flags & SYNACK) == 0) {
       +                tcb->flags |= SYNACK;
       +                acked--;
       +                tcb->flgcnt--;
       +                goto done;
       +        }
       +
       +        /* slow start as long as we're not recovering from lost packets */
       +        if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
       +                if(tcb->cwind < tcb->ssthresh) {
       +                        expand = tcb->mss;
       +                        if(acked < expand)
       +                                expand = acked;
       +                }
       +                else
       +                        expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
       +
       +                if(tcb->cwind + expand < tcb->cwind)
       +                        expand = tcb->snd.wnd - tcb->cwind;
       +                if(tcb->cwind + expand > tcb->snd.wnd)
       +                        expand = tcb->snd.wnd - tcb->cwind;
       +                tcb->cwind += expand;
       +        }
       +
       +        /* Adjust the timers according to the round trip time */
       +        if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
       +                tcphalt(tpriv, &tcb->rtt_timer);
       +                if((tcb->flags&RETRAN) == 0) {
       +                        tcb->backoff = 0;
       +                        tcb->backedoff = 0;
       +                        rtt = tcb->rtt_timer.start - tcb->rtt_timer.count;
       +                        if(rtt == 0)
       +                                rtt = 1;        /* otherwise all close systems will rexmit in 0 time */
       +                        rtt *= MSPTICK;
       +                        if(tcb->srtt == 0) {
       +                                tcb->srtt = rtt << LOGAGAIN;
       +                                tcb->mdev = rtt << LOGDGAIN;
       +                        } else {
       +                                delta = rtt - (tcb->srtt>>LOGAGAIN);
       +                                tcb->srtt += delta;
       +                                if(tcb->srtt <= 0)
       +                                        tcb->srtt = 1;
       +
       +                                delta = abs(delta) - (tcb->mdev>>LOGDGAIN);
       +                                tcb->mdev += delta;
       +                                if(tcb->mdev <= 0)
       +                                        tcb->mdev = 1;
       +                        }
       +                        tcpsettimer(tcb);
       +                }
       +        }
       +
       +done:
       +        if(qdiscard(s->wq, acked) < acked)
       +                tcb->flgcnt--;
       +
       +        tcb->snd.una = seg->ack;
       +        if(seq_gt(seg->ack, tcb->snd.urg))
       +                tcb->snd.urg = seg->ack;
       +
       +        if(tcb->snd.una != tcb->snd.nxt)
       +                tcpgo(tpriv, &tcb->timer);
       +        else
       +                tcphalt(tpriv, &tcb->timer);
       +
       +        if(seq_lt(tcb->snd.ptr, tcb->snd.una))
       +                tcb->snd.ptr = tcb->snd.una;
       +
       +        tcb->flags &= ~RETRAN;
       +        tcb->backoff = 0;
       +        tcb->backedoff = 0;
       +}
       +
       +void
       +tcpiput(Proto *tcp, Ipifc* _, Block *bp)
       +{
       +        Tcp seg;
       +        Tcp4hdr *h4;
       +        Tcp6hdr *h6;
       +        int hdrlen;
       +        Tcpctl *tcb;
       +        ushort length, csum;
       +        uchar source[IPaddrlen], dest[IPaddrlen];
       +        Conv *s;
       +        Fs *f;
       +        Tcppriv *tpriv;
       +        uchar version;
       +
       +        f = tcp->f;
       +        tpriv = tcp->priv;
       +
       +        tpriv->stats[InSegs]++;
       +
       +        h4 = (Tcp4hdr*)(bp->rp);
       +        h6 = (Tcp6hdr*)(bp->rp);
       +
       +        if((h4->vihl&0xF0)==IP_VER4) {
       +                version = V4;
       +                length = nhgets(h4->length);
       +                v4tov6(dest, h4->tcpdst);
       +                v4tov6(source, h4->tcpsrc);
       +
       +                h4->Unused = 0;
       +                hnputs(h4->tcplen, length-TCP4_PKT);
       +                if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) &&
       +                        ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) {
       +                        tpriv->stats[CsumErrs]++;
       +                        tpriv->stats[InErrs]++;
       +                        netlog(f, Logtcp, "bad tcp proto cksum\n");
       +                        freeblist(bp);
       +                        return;
       +                }
       +
       +                hdrlen = ntohtcp4(&seg, &bp);
       +                if(hdrlen < 0){
       +                        tpriv->stats[HlenErrs]++;
       +                        tpriv->stats[InErrs]++;
       +                        netlog(f, Logtcp, "bad tcp hdr len\n");
       +                        return;
       +                }
       +
       +                /* trim the packet to the size claimed by the datagram */
       +                length -= hdrlen+TCP4_PKT;
       +                bp = trimblock(bp, hdrlen+TCP4_PKT, length);
       +                if(bp == nil){
       +                        tpriv->stats[LenErrs]++;
       +                        tpriv->stats[InErrs]++;
       +                        netlog(f, Logtcp, "tcp len < 0 after trim\n");
       +                        return;
       +                }
       +        }
       +        else {
       +                int ttl = h6->ttl;
       +                int proto = h6->proto;
       +
       +                version = V6;
       +                length = nhgets(h6->ploadlen);
       +                ipmove(dest, h6->tcpdst);
       +                ipmove(source, h6->tcpsrc);
       +
       +                h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0;
       +                h6->ttl = proto;
       +                hnputl(h6->vcf, length);
       +                if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
       +                    (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
       +                        tpriv->stats[CsumErrs]++;
       +                        tpriv->stats[InErrs]++;
       +                        netlog(f, Logtcp,
       +                            "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
       +                                h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
       +                        freeblist(bp);
       +                        return;
       +                }
       +                h6->ttl = ttl;
       +                h6->proto = proto;
       +                hnputs(h6->ploadlen, length);
       +
       +                hdrlen = ntohtcp6(&seg, &bp);
       +                if(hdrlen < 0){
       +                        tpriv->stats[HlenErrs]++;
       +                        tpriv->stats[InErrs]++;
       +                        netlog(f, Logtcp, "bad tcpv6 hdr len\n");
       +                        return;
       +                }
       +
       +                /* trim the packet to the size claimed by the datagram */
       +                length -= hdrlen;
       +                bp = trimblock(bp, hdrlen+TCP6_PKT, length);
       +                if(bp == nil){
       +                        tpriv->stats[LenErrs]++;
       +                        tpriv->stats[InErrs]++;
       +                        netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
       +                        return;
       +                }
       +        }
       +
       +        /* lock protocol while searching for a conversation */
       +        QLOCK(tcp);
       +
       +        /* Look for a matching conversation */
       +        s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
       +        if(s == nil){
       +                netlog(f, Logtcp, "iphtlook failed\n");
       +reset:
       +                QUNLOCK(tcp);
       +                sndrst(tcp, source, dest, length, &seg, version, "no conversation");
       +                freeblist(bp);
       +                return;
       +        }
       +
       +        /* if it's a listener, look for the right flags and get a new conv */
       +        tcb = (Tcpctl*)s->ptcl;
       +        if(tcb->state == Listen){
       +                if(seg.flags & RST){
       +                        limborst(s, &seg, source, dest, version);
       +                        QUNLOCK(tcp);
       +                        freeblist(bp);
       +                        return;
       +                }
       +
       +                /* if this is a new SYN, put the call into limbo */
       +                if((seg.flags & SYN) && (seg.flags & ACK) == 0){
       +                        limbo(s, source, dest, &seg, version);
       +                        QUNLOCK(tcp);
       +                        freeblist(bp);
       +                        return;
       +                }
       +
       +                /*
       +                 *  if there's a matching call in limbo, tcpincoming will
       +                 *  return it in state Syn_received
       +                 */
       +                s = tcpincoming(s, &seg, source, dest, version);
       +                if(s == nil)
       +                        goto reset;
       +        }
       +
       +        /* The rest of the input state machine is run with the control block
       +         * locked and implements the state machine directly out of the RFC.
       +         * Out-of-band data is ignored - it was always a bad idea.
       +         */
       +        tcb = (Tcpctl*)s->ptcl;
       +        if(waserror()){
       +                QUNLOCK(s);
       +                nexterror();
       +        }
       +        QLOCK(s);
       +        QUNLOCK(tcp);
       +
       +        /* fix up window */
       +        seg.wnd <<= tcb->rcv.scale;
       +
       +        /* every input packet in puts off the keep alive time out */
       +        tcpsetkacounter(tcb);
       +
       +        switch(tcb->state) {
       +        case Closed:
       +                sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
       +                goto raise;
       +        case Syn_sent:
       +                if(seg.flags & ACK) {
       +                        if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) {
       +                                sndrst(tcp, source, dest, length, &seg, version,
       +                                         "bad seq in Syn_sent");
       +                                goto raise;
       +                        }
       +                }
       +                if(seg.flags & RST) {
       +                        if(seg.flags & ACK)
       +                                localclose(s, Econrefused);
       +                        goto raise;
       +                }
       +
       +                if(seg.flags & SYN) {
       +                        procsyn(s, &seg);
       +                        if(seg.flags & ACK){
       +                                update(s, &seg);
       +                                tcpsynackrtt(s);
       +                                tcpsetstate(s, Established);
       +                                tcpsetscale(s, tcb, seg.ws, tcb->scale);
       +                        }
       +                        else {
       +                                tcb->time = NOW;
       +                                tcpsetstate(s, Syn_received);        /* DLP - shouldn't this be a reset? */
       +                        }
       +
       +                        if(length != 0 || (seg.flags & FIN))
       +                                break;
       +
       +                        freeblist(bp);
       +                        goto output;
       +                }
       +                else
       +                        freeblist(bp);
       +
       +                QUNLOCK(s);
       +                poperror();
       +                return;
       +        case Syn_received:
       +                /* doesn't matter if it's the correct ack, we're just trying to set timing */
       +                if(seg.flags & ACK)
       +                        tcpsynackrtt(s);
       +                break;
       +        }
       +
       +        /*
       +         *  One DOS attack is to open connections to us and then forget about them,
       +         *  thereby tying up a conv at no long term cost to the attacker.
       +         *  This is an attempt to defeat these stateless DOS attacks.  See
       +         *  corresponding code in tcpsendka().
       +         */
       +        if(tcb->state != Syn_received && (seg.flags & RST) == 0){
       +                if(tcpporthogdefense
       +                && seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){
       +                        print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n",
       +                                source, seg.source, dest, seg.dest, seg.flags,
       +                                tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29));
       +                        localclose(s, "stateless hog");
       +                }
       +        }
       +
       +        /* Cut the data to fit the receive window */
       +        if(tcptrim(tcb, &seg, &bp, &length) == -1) {
       +                netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
       +                update(s, &seg);
       +                if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
       +                        tcphalt(tpriv, &tcb->rtt_timer);
       +                        tcphalt(tpriv, &tcb->acktimer);
       +                        tcphalt(tpriv, &tcb->katimer);
       +                        tcpsetstate(s, Time_wait);
       +                        tcb->timer.start = MSL2*(1000 / MSPTICK);
       +                        tcpgo(tpriv, &tcb->timer);
       +                }
       +                if(!(seg.flags & RST)) {
       +                        tcb->flags |= FORCE;
       +                        goto output;
       +                }
       +                QUNLOCK(s);
       +                poperror();
       +                return;
       +        }
       +
       +        /* Cannot accept so answer with a rst */
       +        if(length && tcb->state == Closed) {
       +                sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
       +                goto raise;
       +        }
       +
       +        /* The segment is beyond the current receive pointer so
       +         * queue the data in the resequence queue
       +         */
       +        if(seg.seq != tcb->rcv.nxt)
       +        if(length != 0 || (seg.flags & (SYN|FIN))) {
       +                update(s, &seg);
       +                if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
       +                        print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
       +                tcb->flags |= FORCE;
       +                goto output;
       +        }
       +
       +        /*
       +         *  keep looping till we've processed this packet plus any
       +         *  adjacent packets in the resequence queue
       +         */
       +        for(;;) {
       +                if(seg.flags & RST) {
       +                        if(tcb->state == Established) {
       +                                tpriv->stats[EstabResets]++;
       +                                if(tcb->rcv.nxt != seg.seq)
       +                                        print("out of order RST rcvd: %I.%d -> %I.%d, rcv.nxt %lux seq %lux\n", s->raddr, s->rport, s->laddr, s->lport, tcb->rcv.nxt, seg.seq);
       +                        }
       +                        localclose(s, Econrefused);
       +                        goto raise;
       +                }
       +
       +                if((seg.flags&ACK) == 0)
       +                        goto raise;
       +
       +                switch(tcb->state) {
       +                case Syn_received:
       +                        if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){
       +                                sndrst(tcp, source, dest, length, &seg, version,
       +                                        "bad seq in Syn_received");
       +                                goto raise;
       +                        }
       +                        update(s, &seg);
       +                        tcpsetstate(s, Established);
       +                case Established:
       +                case Close_wait:
       +                        update(s, &seg);
       +                        break;
       +                case Finwait1:
       +                        update(s, &seg);
       +                        if(qlen(s->wq)+tcb->flgcnt == 0){
       +                                tcphalt(tpriv, &tcb->rtt_timer);
       +                                tcphalt(tpriv, &tcb->acktimer);
       +                                tcpsetkacounter(tcb);
       +                                tcb->time = NOW;
       +                                tcpsetstate(s, Finwait2);
       +                                tcb->katimer.start = MSL2 * (1000 / MSPTICK);
       +                                tcpgo(tpriv, &tcb->katimer);
       +                        }
       +                        break;
       +                case Finwait2:
       +                        update(s, &seg);
       +                        break;
       +                case Closing:
       +                        update(s, &seg);
       +                        if(qlen(s->wq)+tcb->flgcnt == 0) {
       +                                tcphalt(tpriv, &tcb->rtt_timer);
       +                                tcphalt(tpriv, &tcb->acktimer);
       +                                tcphalt(tpriv, &tcb->katimer);
       +                                tcpsetstate(s, Time_wait);
       +                                tcb->timer.start = MSL2*(1000 / MSPTICK);
       +                                tcpgo(tpriv, &tcb->timer);
       +                        }
       +                        break;
       +                case Last_ack:
       +                        update(s, &seg);
       +                        if(qlen(s->wq)+tcb->flgcnt == 0) {
       +                                localclose(s, nil);
       +                                goto raise;
       +                        }
       +                case Time_wait:
       +                        tcb->flags |= FORCE;
       +                        if(tcb->timer.state != TcptimerON)
       +                                tcpgo(tpriv, &tcb->timer);
       +                }
       +
       +                if((seg.flags&URG) && seg.urg) {
       +                        if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) {
       +                                tcb->rcv.urg = seg.urg + seg.seq;
       +                                pullblock(&bp, seg.urg);
       +                        }
       +                }
       +                else
       +                if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg))
       +                        tcb->rcv.urg = tcb->rcv.nxt;
       +
       +                if(length == 0) {
       +                        if(bp != nil)
       +                                freeblist(bp);
       +                }
       +                else {
       +                        switch(tcb->state){
       +                        default:
       +                                /* Ignore segment text */
       +                                if(bp != nil)
       +                                        freeblist(bp);
       +                                break;
       +
       +                        case Syn_received:
       +                        case Established:
       +                        case Finwait1:
       +                                /* If we still have some data place on
       +                                 * receive queue
       +                                 */
       +                                if(bp) {
       +                                        bp = packblock(bp);
       +                                        if(bp == nil)
       +                                                panic("tcp packblock");
       +                                        qpassnolim(s->rq, bp);
       +                                        bp = nil;
       +
       +                                        /*
       +                                         *  Force an ack every 2 data messages.  This is
       +                                         *  a hack for rob to make his home system run
       +                                         *  faster.
       +                                         *
       +                                         *  this also keeps the standard TCP congestion
       +                                         *  control working since it needs an ack every
       +                                         *  2 max segs worth.  This is not quite that,
       +                                         *  but under a real stream is equivalent since
       +                                         *  every packet has a max seg in it.
       +                                         */
       +                                        if(++(tcb->rcv.una) >= 2)
       +                                                tcb->flags |= FORCE;
       +                                }
       +                                tcb->rcv.nxt += length;
       +
       +                                /*
       +                                 *  update our rcv window
       +                                 */
       +                                tcprcvwin(s);
       +
       +                                /*
       +                                 *  turn on the acktimer if there's something
       +                                 *  to ack
       +                                 */
       +                                if(tcb->acktimer.state != TcptimerON)
       +                                        tcpgo(tpriv, &tcb->acktimer);
       +
       +                                break;
       +                        case Finwait2:
       +                                /* no process to read the data, send a reset */
       +                                if(bp != nil)
       +                                        freeblist(bp);
       +                                sndrst(tcp, source, dest, length, &seg, version,
       +                                        "send to Finwait2");
       +                                QUNLOCK(s);
       +                                poperror();
       +                                return;
       +                        }
       +                }
       +
       +                if(seg.flags & FIN) {
       +                        tcb->flags |= FORCE;
       +
       +                        switch(tcb->state) {
       +                        case Syn_received:
       +                        case Established:
       +                                tcb->rcv.nxt++;
       +                                tcpsetstate(s, Close_wait);
       +                                break;
       +                        case Finwait1:
       +                                tcb->rcv.nxt++;
       +                                if(qlen(s->wq)+tcb->flgcnt == 0) {
       +                                        tcphalt(tpriv, &tcb->rtt_timer);
       +                                        tcphalt(tpriv, &tcb->acktimer);
       +                                        tcphalt(tpriv, &tcb->katimer);
       +                                        tcpsetstate(s, Time_wait);
       +                                        tcb->timer.start = MSL2*(1000/MSPTICK);
       +                                        tcpgo(tpriv, &tcb->timer);
       +                                }
       +                                else
       +                                        tcpsetstate(s, Closing);
       +                                break;
       +                        case Finwait2:
       +                                tcb->rcv.nxt++;
       +                                tcphalt(tpriv, &tcb->rtt_timer);
       +                                tcphalt(tpriv, &tcb->acktimer);
       +                                tcphalt(tpriv, &tcb->katimer);
       +                                tcpsetstate(s, Time_wait);
       +                                tcb->timer.start = MSL2 * (1000/MSPTICK);
       +                                tcpgo(tpriv, &tcb->timer);
       +                                break;
       +                        case Close_wait:
       +                        case Closing:
       +                        case Last_ack:
       +                                break;
       +                        case Time_wait:
       +                                tcpgo(tpriv, &tcb->timer);
       +                                break;
       +                        }
       +                }
       +
       +                /*
       +                 *  get next adjacent segment from the resequence queue.
       +                 *  dump/trim any overlapping segments
       +                 */
       +                for(;;) {
       +                        if(tcb->reseq == nil)
       +                                goto output;
       +
       +                        if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0)
       +                                goto output;
       +
       +                        getreseq(tcb, &seg, &bp, &length);
       +
       +                        if(tcptrim(tcb, &seg, &bp, &length) == 0)
       +                                break;
       +                }
       +        }
       +output:
       +        tcpoutput(s);
       +        QUNLOCK(s);
       +        poperror();
       +        return;
       +raise:
       +        QUNLOCK(s);
       +        poperror();
       +        freeblist(bp);
       +        tcpkick(s);
       +}
       +
       +/*
       + *  always enters and exits with the s locked.  We drop
       + *  the lock to ipoput the packet so some care has to be
       + *  taken by callers.
       + */
       +void
       +tcpoutput(Conv *s)
       +{
       +        Tcp seg;
       +        int msgs;
       +        Tcpctl *tcb;
       +        Block *hbp, *bp;
       +        int sndcnt, n;
       +        ulong ssize, dsize, usable, sent;
       +        Fs *f;
       +        Tcppriv *tpriv;
       +        uchar version;
       +
       +        f = s->p->f;
       +        tpriv = s->p->priv;
       +        version = s->ipversion;
       +
       +        for(msgs = 0; msgs < 100; msgs++) {
       +                tcb = (Tcpctl*)s->ptcl;
       +
       +                switch(tcb->state) {
       +                case Listen:
       +                case Closed:
       +                case Finwait2:
       +                        return;
       +                }
       +
       +                /* force an ack when a window has opened up */
       +                if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
       +                        tcb->rcv.blocked = 0;
       +                        tcb->flags |= FORCE;
       +                }
       +
       +                sndcnt = qlen(s->wq)+tcb->flgcnt;
       +                sent = tcb->snd.ptr - tcb->snd.una;
       +
       +                /* Don't send anything else until our SYN has been acked */
       +                if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
       +                        break;
       +
       +                /* Compute usable segment based on offered window and limit
       +                 * window probes to one
       +                 */
       +                if(tcb->snd.wnd == 0){
       +                        if(sent != 0) {
       +                                if((tcb->flags&FORCE) == 0)
       +                                        break;
       +//                                tcb->snd.ptr = tcb->snd.una;
       +                        }
       +                        usable = 1;
       +                }
       +                else {
       +                        usable = tcb->cwind;
       +                        if(tcb->snd.wnd < usable)
       +                                usable = tcb->snd.wnd;
       +                        usable -= sent;
       +                }
       +                ssize = sndcnt-sent;
       +                if(ssize && usable < 2)
       +                        netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
       +                                tcb->snd.wnd, tcb->cwind);
       +                if(usable < ssize)
       +                        ssize = usable;
       +                if(tcb->mss < ssize)
       +                        ssize = tcb->mss;
       +                dsize = ssize;
       +                seg.urg = 0;
       +
       +                if(ssize == 0)
       +                if((tcb->flags&FORCE) == 0)
       +                        break;
       +
       +                tcb->flags &= ~FORCE;
       +                tcprcvwin(s);
       +
       +                /* By default we will generate an ack */
       +                tcphalt(tpriv, &tcb->acktimer);
       +                tcb->rcv.una = 0;
       +                seg.source = s->lport;
       +                seg.dest = s->rport;
       +                seg.flags = ACK;
       +                seg.mss = 0;
       +                seg.ws = 0;
       +                switch(tcb->state){
       +                case Syn_sent:
       +                        seg.flags = 0;
       +                        if(tcb->snd.ptr == tcb->iss){
       +                                seg.flags |= SYN;
       +                                dsize--;
       +                                seg.mss = tcb->mss;
       +                                seg.ws = tcb->scale;
       +                        }
       +                        break;
       +                case Syn_received:
       +                        /*
       +                         *  don't send any data with a SYN/ACK packet
       +                         *  because Linux rejects the packet in its
       +                         *  attempt to solve the SYN attack problem
       +                         */
       +                        if(tcb->snd.ptr == tcb->iss){
       +                                seg.flags |= SYN;
       +                                dsize = 0;
       +                                ssize = 1;
       +                                seg.mss = tcb->mss;
       +                                seg.ws = tcb->scale;
       +                        }
       +                        break;
       +                }
       +                seg.seq = tcb->snd.ptr;
       +                seg.ack = tcb->rcv.nxt;
       +                seg.wnd = tcb->rcv.wnd;
       +
       +                /* Pull out data to send */
       +                bp = nil;
       +                if(dsize != 0) {
       +                        bp = qcopy(s->wq, dsize, sent);
       +                        if(BLEN(bp) != dsize) {
       +                                seg.flags |= FIN;
       +                                dsize--;
       +                        }
       +                }
       +
       +                if(sent+dsize == sndcnt)
       +                        seg.flags |= PSH;
       +
       +                /* keep track of balance of resent data */
       +                if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
       +                        n = tcb->snd.nxt - tcb->snd.ptr;
       +                        if(ssize < n)
       +                                n = ssize;
       +                        tcb->resent += n;
       +                        netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
       +                                s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
       +                        tpriv->stats[RetransSegs]++;
       +                }
       +
       +                tcb->snd.ptr += ssize;
       +
       +                /* Pull up the send pointer so we can accept acks
       +                 * for this window
       +                 */
       +                if(seq_gt(tcb->snd.ptr,tcb->snd.nxt))
       +                        tcb->snd.nxt = tcb->snd.ptr;
       +
       +                /* Build header, link data and compute cksum */
       +                switch(version){
       +                case V4:
       +                        tcb->protohdr.tcp4hdr.vihl = IP_VER4;
       +                        hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb);
       +                        if(hbp == nil) {
       +                                freeblist(bp);
       +                                return;
       +                        }
       +                        break;
       +                case V6:
       +                        tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
       +                        hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb);
       +                        if(hbp == nil) {
       +                                freeblist(bp);
       +                                return;
       +                        }
       +                        break;
       +                default:
       +                        hbp = nil;        /* to suppress a warning */
       +                        panic("tcpoutput: version %d", version);
       +                }
       +
       +                /* Start the transmission timers if there is new data and we
       +                 * expect acknowledges
       +                 */
       +                if(ssize != 0){
       +                        if(tcb->timer.state != TcptimerON)
       +                                tcpgo(tpriv, &tcb->timer);
       +
       +                        /*  If round trip timer isn't running, start it.
       +                         *  measure the longest packet only in case the
       +                         *  transmission time dominates RTT
       +                         */
       +                        if(tcb->rtt_timer.state != TcptimerON)
       +                        if(ssize == tcb->mss) {
       +                                tcpgo(tpriv, &tcb->rtt_timer);
       +                                tcb->rttseq = tcb->snd.ptr;
       +                        }
       +                }
       +
       +                tpriv->stats[OutSegs]++;
       +
       +                /* put off the next keep alive */
       +                tcpgo(tpriv, &tcb->katimer);
       +
       +                switch(version){
       +                case V4:
       +                        if(ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0){
       +                                /* a negative return means no route */
       +                                localclose(s, "no route");
       +                        }
       +                        break;
       +                case V6:
       +                        if(ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0){
       +                                /* a negative return means no route */
       +                                localclose(s, "no route");
       +                        }
       +                        break;
       +                default:
       +                        panic("tcpoutput2: version %d", version);
       +                }
       +                if((uint)(msgs%4) == 1){
       +                        QUNLOCK(s);
       +                        sched();
       +                        QLOCK(s);
       +                }
       +        }
       +}
       +
       +/*
       + *  the BSD convention (hack?) for keep alives.  resend last uchar acked.
       + */
       +void
       +tcpsendka(Conv *s)
       +{
       +        Tcp seg;
       +        Tcpctl *tcb;
       +        Block *hbp,*dbp;
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +
       +        dbp = nil;
       +        seg.urg = 0;
       +        seg.source = s->lport;
       +        seg.dest = s->rport;
       +        seg.flags = ACK|PSH;
       +        seg.mss = 0;
       +        seg.ws = 0;
       +        if(tcpporthogdefense)
       +                seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20);
       +        else
       +                seg.seq = tcb->snd.una-1;
       +        seg.ack = tcb->rcv.nxt;
       +        tcb->rcv.una = 0;
       +        seg.wnd = tcb->rcv.wnd;
       +        if(tcb->state == Finwait2){
       +                seg.flags |= FIN;
       +        } else {
       +                dbp = allocb(1);
       +                dbp->wp++;
       +        }
       +
       +        if(isv4(s->raddr)) {
       +                /* Build header, link data and compute cksum */
       +                tcb->protohdr.tcp4hdr.vihl = IP_VER4;
       +                hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb);
       +                if(hbp == nil) {
       +                        freeblist(dbp);
       +                        return;
       +                }
       +                ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
       +        }
       +        else {
       +                /* Build header, link data and compute cksum */
       +                tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
       +                hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb);
       +                if(hbp == nil) {
       +                        freeblist(dbp);
       +                        return;
       +                }
       +                ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
       +        }
       +}
       +
       +/*
       + *  set connection to time out after 12 minutes
       + */
       +void
       +tcpsetkacounter(Tcpctl *tcb)
       +{
       +        tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
       +        if(tcb->kacounter < 3)
       +                tcb->kacounter = 3;
       +}
       +
       +/*
       + *  if we've timed out, close the connection
       + *  otherwise, send a keepalive and restart the timer
       + */
       +void
       +tcpkeepalive(void *v)
       +{
       +        Tcpctl *tcb;
       +        Conv *s;
       +
       +        s = v;
       +        tcb = (Tcpctl*)s->ptcl;
       +        if(waserror()){
       +                QUNLOCK(s);
       +                nexterror();
       +        }
       +        QLOCK(s);
       +        if(tcb->state != Closed){
       +                if(--(tcb->kacounter) <= 0) {
       +                        localclose(s, Etimedout);
       +                } else {
       +                        tcpsendka(s);
       +                        tcpgo(s->p->priv, &tcb->katimer);
       +                }
       +        }
       +        QUNLOCK(s);
       +        poperror();
       +}
       +
       +/*
       + *  start keepalive timer
       + */
       +char*
       +tcpstartka(Conv *s, char **f, int n)
       +{
       +        Tcpctl *tcb;
       +        int x;
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +        if(tcb->state != Established)
       +                return "connection must be in Establised state";
       +        if(n > 1){
       +                x = atoi(f[1]);
       +                if(x >= MSPTICK)
       +                        tcb->katimer.start = x/MSPTICK;
       +        }
       +        tcpsetkacounter(tcb);
       +        tcpgo(s->p->priv, &tcb->katimer);
       +
       +        return nil;
       +}
       +
       +/*
       + *  turn checksums on/off
       + */
       +char*
       +tcpsetchecksum(Conv *s, char **f, int _)
       +{
       +        Tcpctl *tcb;
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +        tcb->nochecksum = !atoi(f[1]);
       +
       +        return nil;
       +}
       +
       +void
       +tcprxmit(Conv *s)
       +{
       +        Tcpctl *tcb;
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +
       +        tcb->flags |= RETRAN|FORCE;
       +        tcb->snd.ptr = tcb->snd.una;
       +
       +        /*
       +         *  We should be halving the slow start threshhold (down to one
       +         *  mss) but leaving it at mss seems to work well enough
       +         */
       +         tcb->ssthresh = tcb->mss;
       +
       +        /*
       +         *  pull window down to a single packet
       +         */
       +        tcb->cwind = tcb->mss;
       +        tcpoutput(s);
       +}
       +
       +void
       +tcptimeout(void *arg)
       +{
       +        Conv *s;
       +        Tcpctl *tcb;
       +        int maxback;
       +        Tcppriv *tpriv;
       +
       +        s = (Conv*)arg;
       +        tpriv = s->p->priv;
       +        tcb = (Tcpctl*)s->ptcl;
       +
       +        if(waserror()){
       +                QUNLOCK(s);
       +                nexterror();
       +        }
       +        QLOCK(s);
       +        switch(tcb->state){
       +        default:
       +                tcb->backoff++;
       +                if(tcb->state == Syn_sent)
       +                        maxback = MAXBACKMS/2;
       +                else
       +                        maxback = MAXBACKMS;
       +                tcb->backedoff += tcb->timer.start * MSPTICK;
       +                if(tcb->backedoff >= maxback) {
       +                        localclose(s, Etimedout);
       +                        break;
       +                }
       +                netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
       +                tcpsettimer(tcb);
       +                tcprxmit(s);
       +                tpriv->stats[RetransTimeouts]++;
       +                tcb->snd.dupacks = 0;
       +                break;
       +        case Time_wait:
       +                localclose(s, nil);
       +                break;
       +        case Closed:
       +                break;
       +        }
       +        QUNLOCK(s);
       +        poperror();
       +}
       +
       +int
       +inwindow(Tcpctl *tcb, int seq)
       +{
       +        return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
       +}
       +
       +/*
       + *  set up state for a received SYN (or SYN ACK) packet
       + */
       +void
       +procsyn(Conv *s, Tcp *seg)
       +{
       +        Tcpctl *tcb;
       +
       +        tcb = (Tcpctl*)s->ptcl;
       +        tcb->flags |= FORCE;
       +
       +        tcb->rcv.nxt = seg->seq + 1;
       +        tcb->rcv.urg = tcb->rcv.nxt;
       +        tcb->irs = seg->seq;
       +
       +        /* our sending max segment size cannot be bigger than what he asked for */
       +        if(seg->mss != 0 && seg->mss < tcb->mss)
       +                tcb->mss = seg->mss;
       +
       +        /* the congestion window always starts out as a single segment */
       +        tcb->snd.wnd = seg->wnd;
       +        tcb->cwind = tcb->mss;
       +}
       +
       +int
       +addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
       +{
       +        Reseq *rp, *rp1;
       +        int i, rqlen, qmax;
       +
       +        rp = malloc(sizeof(Reseq));
       +        if(rp == nil){
       +                freeblist(bp);        /* bp always consumed by add_reseq */
       +                return 0;
       +        }
       +
       +        rp->seg = *seg;
       +        rp->bp = bp;
       +        rp->length = length;
       +
       +        /* Place on reassembly list sorting by starting seq number */
       +        rp1 = tcb->reseq;
       +        if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
       +                rp->next = rp1;
       +                tcb->reseq = rp;
       +                if(rp->next != nil)
       +                        tpriv->stats[OutOfOrder]++;
       +                return 0;
       +        }
       +
       +        rqlen = 0;
       +        for(i = 0;; i++) {
       +                rqlen += rp1->length;
       +                if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
       +                        rp->next = rp1->next;
       +                        rp1->next = rp;
       +                        if(rp->next != nil)
       +                                tpriv->stats[OutOfOrder]++;
       +                        break;
       +                }
       +                rp1 = rp1->next;
       +        }
       +        qmax = QMAX<<tcb->rcv.scale;
       +        if(rqlen > qmax){
       +                print("resequence queue > window: %d > %d\n", rqlen, qmax);
       +                i = 0;
       +                  for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
       +                          print("%#lux %#lux %#ux\n", rp1->seg.seq,
       +                                  rp1->seg.ack, rp1->seg.flags);
       +                        if(i++ > 10){
       +                                print("...\n");
       +                                break;
       +                        }
       +                }
       +
       +                /*
       +                 * delete entire reassembly queue; wait for retransmit.
       +                 * - should we be smarter and only delete the tail?
       +                 */
       +                for(rp = tcb->reseq; rp != nil; rp = rp1){
       +                        rp1 = rp->next;
       +                        freeblist(rp->bp);
       +                        free(rp);
       +                }
       +                tcb->reseq = nil;
       +
       +                  return -1;
       +        }
       +        return 0;
       +}
       +
       +void
       +getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
       +{
       +        Reseq *rp;
       +
       +        rp = tcb->reseq;
       +        if(rp == nil)
       +                return;
       +
       +        tcb->reseq = rp->next;
       +
       +        *seg = rp->seg;
       +        *bp = rp->bp;
       +        *length = rp->length;
       +
       +        free(rp);
       +}
       +
       +int
       +tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
       +{
       +        ushort len;
       +        uchar accept;
       +        int dupcnt, excess;
       +
       +        accept = 0;
       +        len = *length;
       +        if(seg->flags & SYN)
       +                len++;
       +        if(seg->flags & FIN)
       +                len++;
       +
       +        if(tcb->rcv.wnd == 0) {
       +                if(len == 0 && seg->seq == tcb->rcv.nxt)
       +                        return 0;
       +        }
       +        else {
       +                /* Some part of the segment should be in the window */
       +                if(inwindow(tcb,seg->seq))
       +                        accept++;
       +                else
       +                if(len != 0) {
       +                        if(inwindow(tcb, seg->seq+len-1) ||
       +                        seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1))
       +                                accept++;
       +                }
       +        }
       +        if(!accept) {
       +                freeblist(*bp);
       +                return -1;
       +        }
       +        dupcnt = tcb->rcv.nxt - seg->seq;
       +        if(dupcnt > 0){
       +                tcb->rerecv += dupcnt;
       +                if(seg->flags & SYN){
       +                        seg->flags &= ~SYN;
       +                        seg->seq++;
       +
       +                        if(seg->urg > 1)
       +                                seg->urg--;
       +                        else
       +                                seg->flags &= ~URG;
       +                        dupcnt--;
       +                }
       +                if(dupcnt > 0){
       +                        pullblock(bp, (ushort)dupcnt);
       +                        seg->seq += dupcnt;
       +                        *length -= dupcnt;
       +
       +                        if(seg->urg > dupcnt)
       +                                seg->urg -= dupcnt;
       +                        else {
       +                                seg->flags &= ~URG;
       +                                seg->urg = 0;
       +                        }
       +                }
       +        }
       +        excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd);
       +        if(excess > 0) {
       +                tcb->rerecv += excess;
       +                *length -= excess;
       +                *bp = trimblock(*bp, 0, *length);
       +                if(*bp == nil)
       +                        panic("presotto is a boofhead");
       +                seg->flags &= ~FIN;
       +        }
       +        return 0;
       +}
       +
       +void
       +tcpadvise(Proto *tcp, Block *bp, char *msg)
       +{
       +        Tcp4hdr *h4;
       +        Tcp6hdr *h6;
       +        Tcpctl *tcb;
       +        uchar source[IPaddrlen];
       +        uchar dest[IPaddrlen];
       +        ushort psource, pdest;
       +        Conv *s, **p;
       +
       +        h4 = (Tcp4hdr*)(bp->rp);
       +        h6 = (Tcp6hdr*)(bp->rp);
       +
       +        if((h4->vihl&0xF0)==IP_VER4) {
       +                v4tov6(dest, h4->tcpdst);
       +                v4tov6(source, h4->tcpsrc);
       +                psource = nhgets(h4->tcpsport);
       +                pdest = nhgets(h4->tcpdport);
       +        }
       +        else {
       +                ipmove(dest, h6->tcpdst);
       +                ipmove(source, h6->tcpsrc);
       +                psource = nhgets(h6->tcpsport);
       +                pdest = nhgets(h6->tcpdport);
       +        }
       +
       +        /* Look for a connection */
       +        QLOCK(tcp);
       +        for(p = tcp->conv; *p; p++) {
       +                s = *p;
       +                tcb = (Tcpctl*)s->ptcl;
       +                if(s->rport == pdest)
       +                if(s->lport == psource)
       +                if(tcb->state != Closed)
       +                if(ipcmp(s->raddr, dest) == 0)
       +                if(ipcmp(s->laddr, source) == 0){
       +                        QLOCK(s);
       +                        QUNLOCK(tcp);
       +                        switch(tcb->state){
       +                        case Syn_sent:
       +                                localclose(s, msg);
       +                                break;
       +                        }
       +                        QUNLOCK(s);
       +                        freeblist(bp);
       +                        return;
       +                }
       +        }
       +        QUNLOCK(tcp);
       +        freeblist(bp);
       +}
       +
       +static char*
       +tcpporthogdefensectl(char *val)
       +{
       +        if(strcmp(val, "on") == 0)
       +                tcpporthogdefense = 1;
       +        else if(strcmp(val, "off") == 0)
       +                tcpporthogdefense = 0;
       +        else
       +                return "unknown value for tcpporthogdefense";
       +        return nil;
       +}
       +
       +/* called with c QLOCKed */
       +char*
       +tcpctl(Conv* c, char** f, int n)
       +{
       +        if(n == 1 && strcmp(f[0], "hangup") == 0)
       +                return tcphangup(c);
       +        if(n >= 1 && strcmp(f[0], "keepalive") == 0)
       +                return tcpstartka(c, f, n);
       +        if(n >= 1 && strcmp(f[0], "checksum") == 0)
       +                return tcpsetchecksum(c, f, n);
       +        if(n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0)
       +                return tcpporthogdefensectl(f[1]);
       +        return "unknown control request";
       +}
       +
       +int
       +tcpstats(Proto *tcp, char *buf, int len)
       +{
       +        Tcppriv *priv;
       +        char *p, *e;
       +        int i;
       +
       +        priv = tcp->priv;
       +        p = buf;
       +        e = p+len;
       +        for(i = 0; i < Nstats; i++)
       +                p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
       +        return p - buf;
       +}
       +
       +/*
       + *  garbage collect any stale conversations:
       + *        - SYN received but no SYN-ACK after 5 seconds (could be the SYN attack)
       + *        - Finwait2 after 5 minutes
       + *
       + *  this is called whenever we run out of channels.  Both checks are
       + *  of questionable validity so we try to use them only when we're
       + *  up against the wall.
       + */
       +int
       +tcpgc(Proto *tcp)
       +{
       +        Conv *c, **pp, **ep;
       +        int n;
       +        Tcpctl *tcb;
       +
       +
       +        n = 0;
       +        ep = &tcp->conv[tcp->nc];
       +        for(pp = tcp->conv; pp < ep; pp++) {
       +                c = *pp;
       +                if(c == nil)
       +                        break;
       +                if(!CANQLOCK(c))
       +                        continue;
       +                tcb = (Tcpctl*)c->ptcl;
       +                switch(tcb->state){
       +                case Syn_received:
       +                        if(NOW - tcb->time > 5000){
       +                                localclose(c, "timed out");
       +                                n++;
       +                        }
       +                        break;
       +                case Finwait2:
       +                        if(NOW - tcb->time > 5*60*1000){
       +                                localclose(c, "timed out");
       +                                n++;
       +                        }
       +                        break;
       +                }
       +                QUNLOCK(c);
       +        }
       +        return n;
       +}
       +
       +void
       +tcpsettimer(Tcpctl *tcb)
       +{
       +        int x;
       +
       +        /* round trip dependency */
       +        x = backoff(tcb->backoff) *
       +                (tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
       +
       +        /* bounded twixt 1/2 and 64 seconds */
       +        if(x < 500/MSPTICK)
       +                x = 500/MSPTICK;
       +        else if(x > (64000/MSPTICK))
       +                x = 64000/MSPTICK;
       +        tcb->timer.start = x;
       +}
       +
       +void
       +tcpinit(Fs *fs)
       +{
       +        Proto *tcp;
       +        Tcppriv *tpriv;
       +
       +        tcp = smalloc(sizeof(Proto));
       +        tpriv = tcp->priv = smalloc(sizeof(Tcppriv));
       +        tcp->name = "tcp";
       +        tcp->connect = tcpconnect;
       +        tcp->announce = tcpannounce;
       +        tcp->ctl = tcpctl;
       +        tcp->state = tcpstate;
       +        tcp->create = tcpcreate;
       +        tcp->close = tcpclose;
       +        tcp->rcv = tcpiput;
       +        tcp->advise = tcpadvise;
       +        tcp->stats = tcpstats;
       +        tcp->inuse = tcpinuse;
       +        tcp->gc = tcpgc;
       +        tcp->ipproto = IP_TCPPROTO;
       +        tcp->nc = scalednconv();
       +        tcp->ptclsize = sizeof(Tcpctl);
       +        tpriv->stats[MaxConn] = tcp->nc;
       +
       +        Fsproto(fs, tcp);
       +}
       +
       +void
       +tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
       +{
       +        if(rcvscale){
       +                tcb->rcv.scale = rcvscale & 0xff;
       +                tcb->snd.scale = sndscale & 0xff;
       +                tcb->window = QMAX<<tcb->snd.scale;
       +                qsetlimit(s->rq, tcb->window);
       +        } else {
       +                tcb->rcv.scale = 0;
       +                tcb->snd.scale = 0;
       +                tcb->window = QMAX;
       +                qsetlimit(s->rq, tcb->window);
       +        }
       +}
 (DIR) diff --git a/src/9vx/a/ip/tripmedium.c b/src/9vx/a/ip/tripmedium.c
       @@ -0,0 +1,398 @@
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +
       +#include "ip.h"
       +#include "trip.h"
       +
       +static void        tripread(void *a);
       +static void        tripbind(Ipifc *ifc, int argc, char **argv);
       +static void        tripunbind(Ipifc *ifc);
       +static void        tripbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
       +static void        tripaddmulti(Ipifc *ifc, uchar*, uchar*);
       +static void        tripremmulti(Ipifc *ifc, uchar*, uchar*);
       +static void        tripaddroute(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
       +static void        tripremroute(Ipifc *ifc, int, uchar*, uchar*);
       +static void        tripares(Fs*, int, uchar*, uchar*, int, int);
       +
       +Medium tripmedium =
       +{
       +.name=                "trip",
       +.mintu=        20,
       +.maxtu=        64*1024,
       +.maclen=        LCIMACSIZE,
       +.bind=                tripbind,
       +.unbind=        tripunbind,
       +.bwrite=        tripbwrite,
       +.addmulti=        tripaddmulti,
       +.remmulti=        tripremmulti,
       +.addroute=        tripaddroute,
       +.remroute=        tripremroute,
       +.ares=                tripares,
       +};
       +
       +typedef struct        Tripinfo Tripinfo;
       +struct Tripinfo
       +{
       +        Fs*        fs;                /* my instance of the IP stack */
       +        Ipifc*        ifc;                /* IP interface */
       +        Card*        dev;
       +        Proc*        readp;                /* reading process */
       +        Chan*        mchan;                /* Data channel */
       +};
       +
       +/*
       + *  called to bind an IP ifc to an ethernet device
       + *  called with ifc qlock'd
       + */
       +static void
       +tripbind(Ipifc *ifc, int argc, char **argv)
       +{
       +        int fd;
       +        Chan *mchan;
       +        Tripinfo *er;
       +
       +        if(argc < 2)
       +                error(Ebadarg);
       +
       +        fd = kopen(argv[2], ORDWR);
       +        if(fd < 0)
       +                error("trip open failed");
       +
       +        mchan = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1);
       +        kclose(fd);
       +
       +        if(devtab[mchan->type]->dc != 'T') {
       +                cclose(mchan);
       +                error(Enoport);
       +        }
       +
       +        er = smalloc(sizeof(*er));
       +        er->mchan = mchan;
       +        er->ifc = ifc;
       +        er->dev = tripsetifc(mchan, ifc);
       +        er->fs = ifc->conv->p->f;
       +
       +        ifc->arg = er;
       +
       +        kproc("tripread", tripread, ifc);
       +}
       +
       +/*
       + *  called with ifc qlock'd
       + */
       +static void
       +tripunbind(Ipifc *ifc)
       +{
       +        Tripinfo *er = ifc->arg;
       +/*
       +        if(er->readp)
       +                postnote(er->readp, 1, "unbind", 0);
       +*/
       +        tsleep(&up->sleep, return0, 0, 300);
       +
       +        if(er->mchan != nil)
       +                cclose(er->mchan);
       +
       +        free(er);
       +}
       +
       +/*
       + *  called by ipoput with a single block to write
       + */
       +static void
       +tripbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip)
       +{
       +        Tripinfo *er = ifc->arg;
       +
       +        /*
       +         * Packet is rerouted at linecard
       +         * so the gateway is ignored
       +         */
       +        USED(ip);
       +        USED(version);
       +
       +        if(waserror()) {
       +                print("tripwrite failed\n");
       +                return;
       +        }
       +
       +        devtab[er->mchan->type]->bwrite(er->mchan, bp, 0);
       +        poperror();
       +        ifc->out++;
       +}
       +
       +/*
       + *  process to read from the trip interface
       + */
       +static void
       +tripread(void *a)
       +{
       +        Ipifc *ifc;
       +        Block *bp;
       +        Tripinfo *er;
       +
       +        ifc = a;
       +        er = ifc->arg;
       +        er->readp = up;        /* hide identity under a rock for unbind */
       +
       +        for(;;) {
       +                bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
       +                ifc->in++;
       +                ipiput4(er->fs, ifc, bp);
       +        }
       +
       +        pexit("hangup", 1);
       +}
       +
       +static void
       +tripaddroute(Ipifc *ifc, int v, uchar *addr, uchar *mask, uchar *gate, int t)
       +{
       +        int alen;
       +        MTroute mtr;
       +        Tripinfo *tinfo;
       +
       +        tinfo = ifc->arg;
       +        if(!tinfo->dev->routing)
       +                return;
       +
       +        /*
       +         * Multicast addresses are handled on the linecard by
       +         * the multicast port driver, so the route load is dumped.
       +         *        loaded by addmulti/remmulti for SBC routes
       +         *                  joinmulti/leavemulti for inter LC
       +         */
       +        if(ipismulticast(addr))
       +                return;
       +
       +        mtr.type = T_ROUTEADMIN;
       +        if(v & Rv4) {
       +                mtr.op = RTADD4;
       +                alen = IPv4addrlen;
       +        }
       +        else {
       +                mtr.op = RTADD6;
       +                alen = IPaddrlen;
       +        }
       +        mtr.rtype = t;
       +        memmove(mtr.addr, addr, alen);
       +        memmove(mtr.mask, mask, alen);
       +        memmove(mtr.gate, gate, alen);
       +
       +        i2osend(tinfo->dev, &mtr, sizeof(mtr));
       +}
       +
       +static void
       +tripremroute(Ipifc *ifc, int v, uchar *addr, uchar *mask)
       +{
       +        int alen;
       +        MTroute mtr;
       +        Tripinfo *tinfo;
       +
       +        tinfo = ifc->arg;
       +        if(!tinfo->dev->routing)
       +                return;
       +
       +        if(ipismulticast(addr))
       +                return;
       +
       +        mtr.type = T_ROUTEADMIN;
       +        if(v & Rv4) {
       +                mtr.op = RTDEL4;
       +                alen = IPv4addrlen;
       +        }
       +        else {
       +                mtr.op = RTDEL6;
       +                alen = IPaddrlen;
       +        }
       +        memmove(mtr.addr, addr, alen);
       +        memmove(mtr.mask, mask, alen);
       +
       +        i2osend(tinfo->dev, &mtr, sizeof(mtr));
       +}
       +
       +static void
       +tripxmitroute(Route *r, Routewalk *rw)
       +{
       +        int nifc;
       +        char t[5];
       +        uchar a[IPaddrlen], m[IPaddrlen], g[IPaddrlen];
       +
       +        convroute(r, a, m, g, t, &nifc);
       +        if(!(r->type & Rv4)) {
       +                tripaddroute(rw->state, 0, a, m, g, r->type);
       +                return;
       +        }
       +
       +        tripaddroute(rw->state, Rv4, a+IPv4off, m+IPv4off, g+IPv4off, r->type);
       +}
       +
       +static void
       +sendifcinfo(Ipifc *dest)
       +{
       +        Conv **cp, **e;
       +        Iplifc *l;
       +        Ipifc *ifc;
       +        MTifctl mtc;
       +        Tripinfo *tinfo, *oinfo;
       +        Proto *p;
       +
       +        tinfo = dest->arg;
       +
       +        /* Install interfaces */
       +        p = tinfo->fs->ipifc;
       +        e = &p->conv[p->nc];
       +        for(cp = p->conv; cp < e; cp++) {
       +
       +                if(*cp == nil)
       +                        continue;
       +
       +                ifc = (Ipifc*)(*cp)->ptcl;
       +                if(dest == ifc)
       +                        continue;
       +
       +                mtc.type = T_CTLIFADMIN;
       +                mtc.maxtu = ifc->maxtu;
       +                mtc.mintu = ifc->mintu;
       +
       +                mtc.port = 0;
       +                if(ifc->m == &tripmedium) {
       +                        oinfo = ifc->arg;
       +                        mtc.port = oinfo->dev->bar[0].bar;
       +                }
       +
       +                for(l = ifc->lifc; l != nil; l = l->next) {
       +                        if(isv4(l->local)) {
       +                                mtc.op = IFADD4;
       +                                memmove(mtc.addr, l->local+IPv4off, IPv4addrlen);
       +                                memmove(mtc.mask, l->mask+IPv4off, IPv4addrlen);
       +                        }
       +                        else {
       +                                mtc.op = IFADD6;
       +                                memmove(mtc.addr, l->local, sizeof(mtc.addr));
       +                                memmove(mtc.mask, l->mask, sizeof(mtc.mask));
       +                        }
       +
       +                        i2osend(tinfo->dev, &mtc, sizeof(mtc));
       +                }
       +        }
       +}
       +
       +void
       +tripsync(Ipifc *ifc)
       +{
       +        Routewalk rw;
       +
       +        if(ifc == nil) {
       +                print("tripsync: interface not bound\n");
       +                return;
       +        }
       +
       +        /* Mirror the route table into the lincard */
       +        rw.o = 0;
       +        rw.n = (1<<22);
       +        rw.state = ifc;
       +        rw.walk = tripxmitroute;
       +
       +        ipwalkroutes(ifc->conv->p->f, &rw);
       +
       +        /*
       +         * Tell the linecard about interfaces that already
       +         * exist elsewhere
       +         */
       +        sendifcinfo(ifc);
       +}
       +
       +/* Tell a line card the SBC is interested in listening
       + * to a multicast address
       + */
       +static void
       +tripaddmulti(Ipifc *ifc, uchar *addr, uchar *ifca)
       +{
       +        MTmultiears mt;
       +        Tripinfo *tinfo;
       +
       +        /* print("tripaddmulti %I %I\n", addr, ifca); /**/
       +
       +        tinfo = ifc->arg;
       +        if(!tinfo->dev->routing)
       +                return;
       +
       +        mt.type = T_MULTIEAR;
       +        mt.op = ADDMULTI;
       +        memmove(mt.addr, addr, sizeof(mt.addr));
       +        memmove(mt.ifca, ifca, sizeof(mt.ifca));
       +
       +        i2osend(tinfo->dev, &mt, sizeof(mt));
       +}
       +
       +/* Tell a line card the SBC is no longer interested in listening
       + * to a multicast address
       + */
       +static void
       +tripremmulti(Ipifc *ifc, uchar *addr, uchar *ifca)
       +{
       +        MTmultiears mt;
       +        Tripinfo *tinfo;
       +
       +        tinfo = ifc->arg;
       +        if(!tinfo->dev->routing)
       +                return;
       +
       +        mt.type = T_MULTIEAR;
       +        mt.op = REMMULTI;
       +        memmove(mt.addr, addr, sizeof(mt.addr));
       +        memmove(mt.ifca, ifca, sizeof(mt.ifca));
       +
       +        i2osend(tinfo->dev, &mt, sizeof(mt));
       +}
       +
       +static void
       +tripares(Fs *fs, int vers, uchar *ip, uchar *mac, int l, int)
       +{
       +        Route *r;
       +        Ipifc *ifc;
       +        MTaresenter ta;
       +        Tripinfo *tinfo;
       +        uchar v6ip[IPaddrlen];
       +
       +        if(vers == V4) {
       +                r = v4lookup(fs, ip);
       +                v4tov6(v6ip, ip);
       +                ip = v6ip;
       +        }
       +        else
       +                r = v6lookup(fs, ip);
       +
       +        if(r == nil) {
       +                print("tripares: no route for entry\n");
       +                return;
       +        }
       +
       +        ifc = r->ifc;
       +
       +        tinfo = ifc->arg;
       +        if(!tinfo->dev->routing)
       +                return;
       +
       +        if(vers == V4) {
       +                v4tov6(v6ip, ip);
       +                ip = v6ip;
       +        }
       +
       +        ta.type = T_ARESENTER;
       +        ta.maclen = l;
       +        memmove(ta.addr, ip, IPaddrlen);
       +        memmove(ta.amac, mac, l);
       +
       +        i2osend(tinfo->dev, &ta, sizeof(ta));
       +}
       +
       +void
       +tripmediumlink(void)
       +{
       +        addipmedium(&tripmedium);
       +}
 (DIR) diff --git a/src/9vx/a/ip/udp.c b/src/9vx/a/ip/udp.c
       @@ -0,0 +1,619 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +
       +#include        "ip.h"
       +#include        "ipv6.h"
       +
       +
       +#define DPRINT if(0)print
       +
       +enum
       +{
       +        UDP_UDPHDR_SZ        = 8,
       +
       +        UDP4_PHDR_OFF = 8,
       +        UDP4_PHDR_SZ = 12,
       +        UDP4_IPHDR_SZ = 20,
       +        UDP6_IPHDR_SZ = 40,
       +        UDP6_PHDR_SZ = 40,
       +        UDP6_PHDR_OFF = 0,
       +
       +        IP_UDPPROTO        = 17,
       +        UDP_USEAD7        = 52,
       +
       +        Udprxms                = 200,
       +        Udptickms        = 100,
       +        Udpmaxxmit        = 10,
       +};
       +
       +typedef struct Udp4hdr Udp4hdr;
       +struct Udp4hdr
       +{
       +        /* ip header */
       +        uchar        vihl;                /* Version and header length */
       +        uchar        tos;                /* Type of service */
       +        uchar        length[2];        /* packet length */
       +        uchar        id[2];                /* Identification */
       +        uchar        frag[2];        /* Fragment information */
       +        uchar        Unused;
       +        uchar        udpproto;        /* Protocol */
       +        uchar        udpplen[2];        /* Header plus data length */
       +        uchar        udpsrc[IPv4addrlen];        /* Ip source */
       +        uchar        udpdst[IPv4addrlen];        /* Ip destination */
       +
       +        /* udp header */
       +        uchar        udpsport[2];        /* Source port */
       +        uchar        udpdport[2];        /* Destination port */
       +        uchar        udplen[2];        /* data length */
       +        uchar        udpcksum[2];        /* Checksum */
       +};
       +
       +typedef struct Udp6hdr Udp6hdr;
       +struct Udp6hdr {
       +        uchar viclfl[4];
       +        uchar len[2];
       +        uchar nextheader;
       +        uchar hoplimit;
       +        uchar udpsrc[IPaddrlen];
       +        uchar udpdst[IPaddrlen];
       +
       +        /* udp header */
       +        uchar        udpsport[2];        /* Source port */
       +        uchar        udpdport[2];        /* Destination port */
       +        uchar        udplen[2];        /* data length */
       +        uchar        udpcksum[2];        /* Checksum */
       +};
       +
       +/* MIB II counters */
       +typedef struct Udpstats Udpstats;
       +struct Udpstats
       +{
       +        ulong        udpInDatagrams;
       +        ulong        udpNoPorts;
       +        ulong        udpInErrors;
       +        ulong        udpOutDatagrams;
       +};
       +
       +typedef struct Udppriv Udppriv;
       +struct Udppriv
       +{
       +        Ipht                ht;
       +
       +        /* MIB counters */
       +        Udpstats        ustats;
       +
       +        /* non-MIB stats */
       +        ulong                csumerr;                /* checksum errors */
       +        ulong                lenerr;                        /* short packet */
       +};
       +
       +void (*etherprofiler)(char *name, int qlen);
       +void udpkick(void *x, Block *bp);
       +
       +/*
       + *  protocol specific part of Conv
       + */
       +typedef struct Udpcb Udpcb;
       +struct Udpcb
       +{
       +        QLock        qlock;
       +        uchar        headers;
       +};
       +
       +static char*
       +udpconnect(Conv *c, char **argv, int argc)
       +{
       +        char *e;
       +        Udppriv *upriv;
       +
       +        upriv = c->p->priv;
       +        e = Fsstdconnect(c, argv, argc);
       +        Fsconnected(c, e);
       +        if(e != nil)
       +                return e;
       +
       +        iphtadd(&upriv->ht, c);
       +        return nil;
       +}
       +
       +
       +static int
       +udpstate(Conv *c, char *state, int n)
       +{
       +        return snprint(state, n, "%s qin %d qout %d\n",
       +                c->inuse ? "Open" : "Closed",
       +                c->rq ? qlen(c->rq) : 0,
       +                c->wq ? qlen(c->wq) : 0
       +        );
       +}
       +
       +static char*
       +udpannounce(Conv *c, char** argv, int argc)
       +{
       +        char *e;
       +        Udppriv *upriv;
       +
       +        upriv = c->p->priv;
       +        e = Fsstdannounce(c, argv, argc);
       +        if(e != nil)
       +                return e;
       +        Fsconnected(c, nil);
       +        iphtadd(&upriv->ht, c);
       +
       +        return nil;
       +}
       +
       +static void
       +udpcreate(Conv *c)
       +{
       +        c->rq = qopen(128*1024, Qmsg, 0, 0);
       +        c->wq = qbypass(udpkick, c);
       +}
       +
       +static void
       +udpclose(Conv *c)
       +{
       +        Udpcb *ucb;
       +        Udppriv *upriv;
       +
       +        upriv = c->p->priv;
       +        iphtrem(&upriv->ht, c);
       +
       +        c->state = 0;
       +        qclose(c->rq);
       +        qclose(c->wq);
       +        qclose(c->eq);
       +        ipmove(c->laddr, IPnoaddr);
       +        ipmove(c->raddr, IPnoaddr);
       +        c->lport = 0;
       +        c->rport = 0;
       +
       +        ucb = (Udpcb*)c->ptcl;
       +        ucb->headers = 0;
       +}
       +
       +void
       +udpkick(void *x, Block *bp)
       +{
       +        Conv *c = x;
       +        Udp4hdr *uh4;
       +        Udp6hdr *uh6;
       +        ushort rport;
       +        uchar laddr[IPaddrlen], raddr[IPaddrlen];
       +        Udpcb *ucb;
       +        int dlen, ptcllen;
       +        Udppriv *upriv;
       +        Fs *f;
       +        int version;
       +        Conv *rc;
       +
       +        upriv = c->p->priv;
       +        f = c->p->f;
       +
       +        netlog(c->p->f, Logudp, "udp: kick\n");
       +        if(bp == nil)
       +                return;
       +
       +        ucb = (Udpcb*)c->ptcl;
       +        switch(ucb->headers) {
       +        case 7:
       +                /* get user specified addresses */
       +                bp = pullupblock(bp, UDP_USEAD7);
       +                if(bp == nil)
       +                        return;
       +                ipmove(raddr, bp->rp);
       +                bp->rp += IPaddrlen;
       +                ipmove(laddr, bp->rp);
       +                bp->rp += IPaddrlen;
       +                /* pick interface closest to dest */
       +                if(ipforme(f, laddr) != Runi)
       +                        findlocalip(f, laddr, raddr);
       +                bp->rp += IPaddrlen;                /* Ignore ifc address */
       +                rport = nhgets(bp->rp);
       +                bp->rp += 2+2;                        /* Ignore local port */
       +                break;
       +        default:
       +                rport = 0;
       +                break;
       +        }
       +
       +        if(ucb->headers) {
       +                if(memcmp(laddr, v4prefix, IPv4off) == 0
       +                || ipcmp(laddr, IPnoaddr) == 0)
       +                        version = 4;
       +                else
       +                        version = 6;
       +        } else {
       +                if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
       +                        memcmp(c->laddr, v4prefix, IPv4off) == 0)
       +                        || ipcmp(c->raddr, IPnoaddr) == 0)
       +                        version = 4;
       +                else
       +                        version = 6;
       +        }
       +
       +        dlen = blocklen(bp);
       +
       +        /* fill in pseudo header and compute checksum */
       +        switch(version){
       +        case V4:
       +                bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
       +                if(bp == nil)
       +                        return;
       +
       +                uh4 = (Udp4hdr *)(bp->rp);
       +                ptcllen = dlen + UDP_UDPHDR_SZ;
       +                uh4->Unused = 0;
       +                uh4->udpproto = IP_UDPPROTO;
       +                uh4->frag[0] = 0;
       +                uh4->frag[1] = 0;
       +                hnputs(uh4->udpplen, ptcllen);
       +                if(ucb->headers) {
       +                        v6tov4(uh4->udpdst, raddr);
       +                        hnputs(uh4->udpdport, rport);
       +                        v6tov4(uh4->udpsrc, laddr);
       +                        rc = nil;
       +                } else {
       +                        v6tov4(uh4->udpdst, c->raddr);
       +                        hnputs(uh4->udpdport, c->rport);
       +                        if(ipcmp(c->laddr, IPnoaddr) == 0)
       +                                findlocalip(f, c->laddr, c->raddr);
       +                        v6tov4(uh4->udpsrc, c->laddr);
       +                        rc = c;
       +                }
       +                hnputs(uh4->udpsport, c->lport);
       +                hnputs(uh4->udplen, ptcllen);
       +                uh4->udpcksum[0] = 0;
       +                uh4->udpcksum[1] = 0;
       +                hnputs(uh4->udpcksum,
       +                       ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
       +                uh4->vihl = IP_VER4;
       +                ipoput4(f, bp, 0, c->ttl, c->tos, rc);
       +                break;
       +
       +        case V6:
       +                bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
       +                if(bp == nil)
       +                        return;
       +
       +                /*
       +                 * using the v6 ip header to create pseudo header
       +                 * first then reset it to the normal ip header
       +                 */
       +                uh6 = (Udp6hdr *)(bp->rp);
       +                memset(uh6, 0, 8);
       +                ptcllen = dlen + UDP_UDPHDR_SZ;
       +                hnputl(uh6->viclfl, ptcllen);
       +                uh6->hoplimit = IP_UDPPROTO;
       +                if(ucb->headers) {
       +                        ipmove(uh6->udpdst, raddr);
       +                        hnputs(uh6->udpdport, rport);
       +                        ipmove(uh6->udpsrc, laddr);
       +                        rc = nil;
       +                } else {
       +                        ipmove(uh6->udpdst, c->raddr);
       +                        hnputs(uh6->udpdport, c->rport);
       +                        if(ipcmp(c->laddr, IPnoaddr) == 0)
       +                                findlocalip(f, c->laddr, c->raddr);
       +                        ipmove(uh6->udpsrc, c->laddr);
       +                        rc = c;
       +                }
       +                hnputs(uh6->udpsport, c->lport);
       +                hnputs(uh6->udplen, ptcllen);
       +                uh6->udpcksum[0] = 0;
       +                uh6->udpcksum[1] = 0;
       +                hnputs(uh6->udpcksum,
       +                       ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
       +                memset(uh6, 0, 8);
       +                uh6->viclfl[0] = IP_VER6;
       +                hnputs(uh6->len, ptcllen);
       +                uh6->nextheader = IP_UDPPROTO;
       +                ipoput6(f, bp, 0, c->ttl, c->tos, rc);
       +                break;
       +
       +        default:
       +                panic("udpkick: version %d", version);
       +        }
       +        upriv->ustats.udpOutDatagrams++;
       +}
       +
       +void
       +udpiput(Proto *udp, Ipifc *ifc, Block *bp)
       +{
       +        int len;
       +        Udp4hdr *uh4;
       +        Udp6hdr *uh6;
       +        Conv *c;
       +        Udpcb *ucb;
       +        uchar raddr[IPaddrlen], laddr[IPaddrlen];
       +        ushort rport, lport;
       +        Udppriv *upriv;
       +        Fs *f;
       +        int version;
       +        int ottl, oviclfl, olen;
       +        uchar *p;
       +
       +        upriv = udp->priv;
       +        f = udp->f;
       +        upriv->ustats.udpInDatagrams++;
       +
       +        uh4 = (Udp4hdr*)(bp->rp);
       +        version = ((uh4->vihl&0xF0)==IP_VER6) ? 6 : 4;
       +
       +        /* Put back pseudo header for checksum
       +         * (remember old values for icmpnoconv()) */
       +        switch(version) {
       +        case V4:
       +                ottl = uh4->Unused;
       +                uh4->Unused = 0;
       +                len = nhgets(uh4->udplen);
       +                olen = nhgets(uh4->udpplen);
       +                hnputs(uh4->udpplen, len);
       +
       +                v4tov6(raddr, uh4->udpsrc);
       +                v4tov6(laddr, uh4->udpdst);
       +                lport = nhgets(uh4->udpdport);
       +                rport = nhgets(uh4->udpsport);
       +
       +                if(nhgets(uh4->udpcksum)) {
       +                        if(ptclcsum(bp, UDP4_PHDR_OFF, len+UDP4_PHDR_SZ)) {
       +                                upriv->ustats.udpInErrors++;
       +                                netlog(f, Logudp, "udp: checksum error %I\n", raddr);
       +                                DPRINT("udp: checksum error %I\n", raddr);
       +                                freeblist(bp);
       +                                return;
       +                        }
       +                }
       +                uh4->Unused = ottl;
       +                hnputs(uh4->udpplen, olen);
       +                break;
       +        case V6:
       +                uh6 = (Udp6hdr*)(bp->rp);
       +                len = nhgets(uh6->udplen);
       +                oviclfl = nhgetl(uh6->viclfl);
       +                olen = nhgets(uh6->len);
       +                ottl = uh6->hoplimit;
       +                ipmove(raddr, uh6->udpsrc);
       +                ipmove(laddr, uh6->udpdst);
       +                lport = nhgets(uh6->udpdport);
       +                rport = nhgets(uh6->udpsport);
       +                memset(uh6, 0, 8);
       +                hnputl(uh6->viclfl, len);
       +                uh6->hoplimit = IP_UDPPROTO;
       +                if(ptclcsum(bp, UDP6_PHDR_OFF, len+UDP6_PHDR_SZ)) {
       +                        upriv->ustats.udpInErrors++;
       +                        netlog(f, Logudp, "udp: checksum error %I\n", raddr);
       +                        DPRINT("udp: checksum error %I\n", raddr);
       +                        freeblist(bp);
       +                        return;
       +                }
       +                hnputl(uh6->viclfl, oviclfl);
       +                hnputs(uh6->len, olen);
       +                uh6->nextheader = IP_UDPPROTO;
       +                uh6->hoplimit = ottl;
       +                break;
       +        default:
       +                panic("udpiput: version %d", version);
       +                return;        /* to avoid a warning */
       +        }
       +
       +        QLOCK(udp);
       +
       +        c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
       +        if(c == nil){
       +                /* no conversation found */
       +                upriv->ustats.udpNoPorts++;
       +                QUNLOCK(udp);
       +                netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
       +                       laddr, lport);
       +
       +                switch(version){
       +                case V4:
       +                        icmpnoconv(f, bp);
       +                        break;
       +                case V6:
       +                        icmphostunr(f, ifc, bp, Icmp6_port_unreach, 0);
       +                        break;
       +                default:
       +                        panic("udpiput2: version %d", version);
       +                }
       +
       +                freeblist(bp);
       +                return;
       +        }
       +        ucb = (Udpcb*)c->ptcl;
       +
       +        if(c->state == Announced){
       +                if(ucb->headers == 0){
       +                        /* create a new conversation */
       +                        if(ipforme(f, laddr) != Runi) {
       +                                switch(version){
       +                                case V4:
       +                                        v4tov6(laddr, ifc->lifc->local);
       +                                        break;
       +                                case V6:
       +                                        ipmove(laddr, ifc->lifc->local);
       +                                        break;
       +                                default:
       +                                        panic("udpiput3: version %d", version);
       +                                }
       +                        }
       +                        c = Fsnewcall(c, raddr, rport, laddr, lport, version);
       +                        if(c == nil){
       +                                QUNLOCK(udp);
       +                                freeblist(bp);
       +                                return;
       +                        }
       +                        iphtadd(&upriv->ht, c);
       +                        ucb = (Udpcb*)c->ptcl;
       +                }
       +        }
       +
       +        QLOCK(c);
       +        QUNLOCK(udp);
       +
       +        /*
       +         * Trim the packet down to data size
       +         */
       +        len -= UDP_UDPHDR_SZ;
       +        switch(version){
       +        case V4:
       +                bp = trimblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ, len);
       +                break;
       +        case V6:
       +                bp = trimblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ, len);
       +                break;
       +        default:
       +                bp = nil;
       +                panic("udpiput4: version %d", version);
       +        }
       +        if(bp == nil){
       +                QUNLOCK(c);
       +                netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport,
       +                       laddr, lport);
       +                upriv->lenerr++;
       +                return;
       +        }
       +
       +        netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport,
       +               laddr, lport, len);
       +
       +        switch(ucb->headers){
       +        case 7:
       +                /* pass the src address */
       +                bp = padblock(bp, UDP_USEAD7);
       +                p = bp->rp;
       +                ipmove(p, raddr); p += IPaddrlen;
       +                ipmove(p, laddr); p += IPaddrlen;
       +                ipmove(p, ifc->lifc->local); p += IPaddrlen;
       +                hnputs(p, rport); p += 2;
       +                hnputs(p, lport);
       +                break;
       +        }
       +
       +        if(bp->next)
       +                bp = concatblock(bp);
       +
       +        if(qfull(c->rq)){
       +                QUNLOCK(c);
       +                netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
       +                       laddr, lport);
       +                freeblist(bp);
       +                return;
       +        }
       +
       +        qpass(c->rq, bp);
       +        QUNLOCK(c);
       +
       +}
       +
       +char*
       +udpctl(Conv *c, char **f, int n)
       +{
       +        Udpcb *ucb;
       +
       +        ucb = (Udpcb*)c->ptcl;
       +        if(n == 1){
       +                if(strcmp(f[0], "headers") == 0){
       +                        ucb->headers = 7;        /* new headers format */
       +                        return nil;
       +                }
       +        }
       +        return "unknown control request";
       +}
       +
       +void
       +udpadvise(Proto *udp, Block *bp, char *msg)
       +{
       +        Udp4hdr *h4;
       +        Udp6hdr *h6;
       +        uchar source[IPaddrlen], dest[IPaddrlen];
       +        ushort psource, pdest;
       +        Conv *s, **p;
       +        int version;
       +
       +        h4 = (Udp4hdr*)(bp->rp);
       +        version = ((h4->vihl&0xF0)==IP_VER6) ? 6 : 4;
       +
       +        switch(version) {
       +        case V4:
       +                v4tov6(dest, h4->udpdst);
       +                v4tov6(source, h4->udpsrc);
       +                psource = nhgets(h4->udpsport);
       +                pdest = nhgets(h4->udpdport);
       +                break;
       +        case V6:
       +                h6 = (Udp6hdr*)(bp->rp);
       +                ipmove(dest, h6->udpdst);
       +                ipmove(source, h6->udpsrc);
       +                psource = nhgets(h6->udpsport);
       +                pdest = nhgets(h6->udpdport);
       +                break;
       +        default:
       +                panic("udpadvise: version %d", version);
       +                return;  /* to avoid a warning */
       +        }
       +
       +        /* Look for a connection */
       +        QLOCK(udp);
       +        for(p = udp->conv; *p; p++) {
       +                s = *p;
       +                if(s->rport == pdest)
       +                if(s->lport == psource)
       +                if(ipcmp(s->raddr, dest) == 0)
       +                if(ipcmp(s->laddr, source) == 0){
       +                        if(s->ignoreadvice)
       +                                break;
       +                        QLOCK(s);
       +                        QUNLOCK(udp);
       +                        qhangup(s->rq, msg);
       +                        qhangup(s->wq, msg);
       +                        QUNLOCK(s);
       +                        freeblist(bp);
       +                        return;
       +                }
       +        }
       +        QUNLOCK(udp);
       +        freeblist(bp);
       +}
       +
       +int
       +udpstats(Proto *udp, char *buf, int len)
       +{
       +        Udppriv *upriv;
       +
       +        upriv = udp->priv;
       +        return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
       +                upriv->ustats.udpInDatagrams,
       +                upriv->ustats.udpNoPorts,
       +                upriv->ustats.udpInErrors,
       +                upriv->ustats.udpOutDatagrams);
       +}
       +
       +void
       +udpinit(Fs *fs)
       +{
       +        Proto *udp;
       +
       +        udp = smalloc(sizeof(Proto));
       +        udp->priv = smalloc(sizeof(Udppriv));
       +        udp->name = "udp";
       +        udp->connect = udpconnect;
       +        udp->announce = udpannounce;
       +        udp->ctl = udpctl;
       +        udp->state = udpstate;
       +        udp->create = udpcreate;
       +        udp->close = udpclose;
       +        udp->rcv = udpiput;
       +        udp->advise = udpadvise;
       +        udp->stats = udpstats;
       +        udp->ipproto = IP_UDPPROTO;
       +        udp->nc = Nchans;
       +        udp->ptclsize = sizeof(Udpcb);
       +
       +        Fsproto(fs, udp);
       +}
 (DIR) diff --git a/src/9vx/a/kfs.h b/src/9vx/a/kfs.h
       @@ -0,0 +1,57 @@
       +typedef struct Qid9p1 Qid9p1;
       +typedef struct Dentry Dentry;
       +typedef struct Kfsfile Kfsfile;
       +typedef struct Kfs Kfs;
       +
       +/* DONT TOUCH, this is the disk structure */
       +struct        Qid9p1
       +{
       +        long        path;
       +        long        version;
       +};
       +
       +#define        NAMELEN                28                /* size of names */
       +#define        NDBLOCK                6                /* number of direct blocks in Dentry */
       +
       +/* DONT TOUCH, this is the disk structure */
       +struct        Dentry
       +{
       +        char        name[NAMELEN];
       +        short        uid;
       +        short        gid;
       +        ushort        mode;
       +/*
       +                #define        DALLOC        0x8000
       +                #define        DDIR        0x4000
       +                #define        DAPND        0x2000
       +                #define        DLOCK        0x1000
       +                #define        DREAD        0x4
       +                #define        DWRITE        0x2
       +                #define        DEXEC        0x1
       +*/
       +        Qid9p1        qid;
       +        long        size;
       +        long        dblock[NDBLOCK];
       +        long        iblock;
       +        long        diblock;
       +        long        atime;
       +        long        mtime;
       +};
       +
       +struct Kfsfile
       +{
       +        Dentry _;
       +        long off;
       +};
       +
       +struct Kfs
       +{
       +        int        RBUFSIZE;
       +        int        BUFSIZE;
       +        int        DIRPERBUF;
       +        int        INDPERBUF;
       +        int        INDPERBUF2;
       +};
       +
       +extern int kfsinit(Fs*);
       +
 (DIR) diff --git a/src/9vx/a/netif.c b/src/9vx/a/netif.c
       @@ -0,0 +1,761 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +#include        "error.h"
       +#include        "netif.h"
       +
       +static int netown(Netfile*, char*, int);
       +static int openfile(Netif*, int);
       +static char* matchtoken(char*, char*);
       +static char* netmulti(Netif*, Netfile*, uchar*, int);
       +static int parseaddr(uchar*, char*, int);
       +
       +int        netifdebug;
       +#define        dprint(...)        if(netifdebug)print(__VA_ARGS__); else USED(netifdebug)
       +
       +/*
       + *  set up a new network interface
       + */
       +void
       +netifinit(Netif *nif, char *name, int nfile, ulong limit)
       +{
       +        strncpy(nif->name, name, KNAMELEN-1);
       +        nif->name[KNAMELEN-1] = 0;
       +        nif->nfile = nfile;
       +        nif->f = xalloc(nfile*sizeof(Netfile*));
       +        if (nif->f == nil)
       +                panic("netifinit: no memory");
       +        nif->limit = limit;
       +}
       +
       +#define DD(c,q,nam,n,owner,perm,dp) dprint("%lux.%llux %s\n", q.type, q.path, nam); devdir(c,q,nam,n,owner,perm,dp)
       +
       +/*
       + *  generate a 3 level directory
       + */
       +static int
       +netifgen(Chan *c, char *dummy, Dirtab *vp, int dummy1, int i, Dir *dp)
       +{
       +        Qid q;
       +        Netif *nif = (Netif*)vp;
       +        Netfile *f;
       +        int t, perm;
       +        char *o;
       +
       +        memset(&q, 0, sizeof q);
       +        q.type = QTFILE;
       +        q.vers = 0;
       +
       +        dprint("gen %d %llud %.2d        ", c->dri, c->qid.path, i);
       +        /* top level directory contains the name of the network */
       +        if(c->qid.path == 0){
       +                switch(i){
       +                case DEVDOTDOT:
       +                        q.path = 0;
       +                        q.type = QTDIR;
       +                        DD(c, q, ".", 0, eve, 0555, dp);
       +                        break;
       +                case 0:
       +                        q.path = N2ndqid;
       +                        q.type = QTDIR;
       +                        strcpy(up->genbuf, nif->name);
       +                        DD(c, q, up->genbuf, 0, eve, 0555, dp);
       +                        break;
       +                default:
       +                        dprint("-> -1 (top)\n");
       +                        return -1;
       +                }
       +                return 1;
       +        }
       +
       +        /* second level contains clone plus all the conversations */
       +        t = NETTYPE(c->qid.path);
       +        if(t == N2ndqid || t == Ncloneqid || t == Naddrqid || t == Nstatqid || t == Nifstatqid){
       +                switch(i){
       +                case DEVDOTDOT:
       +                        q.type = QTDIR;
       +                        q.path = 0;
       +                        DD(c, q, ".", 0, eve, DMDIR|0555, dp);
       +                        break;
       +                case 0:
       +                        q.path = Ncloneqid;
       +                        DD(c, q, "clone", 0, eve, 0666, dp);
       +                        break;
       +                case 1:
       +                        q.path = Naddrqid;
       +                        DD(c, q, "addr", 0, eve, 0666, dp);
       +                        break;
       +                case 2:
       +                        q.path = Nstatqid;
       +                        DD(c, q, "stats", 0, eve, 0444, dp);
       +                        break;
       +                case 3:
       +                        q.path = Nifstatqid;
       +                        DD(c, q, "ifstats", 0, eve, 0444, dp);
       +                        break;
       +                default:
       +                        i -= 4;
       +                        if(i >= nif->nfile){
       +                                dprint("-> -1 (2d): %d %d\n", i, nif->nfile);
       +                                return -1;
       +                        }
       +                        if(nif->f[i] == 0){
       +                                dprint("nif->f[%d] -> 0\n", i);
       +                                return 0;
       +                        }
       +                        q.type = QTDIR;
       +                        q.path = NETQID(i, N3rdqid);
       +                        sprint(up->genbuf, "%d", i);
       +                        DD(c, q, up->genbuf, 0, eve, DMDIR|0555, dp);
       +                        break;
       +                }
       +                return 1;
       +        }
       +
       +        /* third level */
       +        f = nif->f[NETID(c->qid.path)];
       +        if(f == 0){
       +                dprint("->f 0\n");
       +                return -1;
       +        }
       +        if(*f->owner){
       +                o = f->owner;
       +                perm = f->mode;
       +        } else {
       +                o = eve;
       +                perm = 0666;
       +        }
       +        switch(i){
       +        case DEVDOTDOT:
       +                q.type = QTDIR;
       +                q.path = N2ndqid;
       +                strcpy(up->genbuf, nif->name);
       +                DD(c, q, up->genbuf, 0, eve, DMDIR|0555, dp);
       +                break;
       +        case 0:
       +                q.path = NETQID(NETID(c->qid.path), Ndataqid);
       +                DD(c, q, "data", 0, o, perm, dp);
       +                break;
       +        case 1:
       +                q.path = NETQID(NETID(c->qid.path), Nctlqid);
       +                DD(c, q, "ctl", 0, o, perm, dp);
       +                break;
       +        case 2:
       +                q.path = NETQID(NETID(c->qid.path), Nstatqid);
       +                DD(c, q, "stats", 0, eve, 0444, dp);
       +                break;
       +        case 3:
       +                q.path = NETQID(NETID(c->qid.path), Ntypeqid);
       +                DD(c, q, "type", 0, eve, 0444, dp);
       +                break;
       +        case 4:
       +                q.path = NETQID(NETID(c->qid.path), Nifstatqid);
       +                DD(c, q, "ifstats", 0, eve, 0444, dp);
       +                break;
       +        default:
       +                dprint("-> -1 (third)\n");
       +                return -1;
       +        }
       +        return 1;
       +}
       +
       +static void
       +prwalk(Netif *nif, Chan *c, Chan *nc, char **name, int nname)
       +{
       +        char buf[512], *e, *p;
       +
       +        if(netifdebug == 0)
       +                return;
       +        p = buf;
       +        e = p + sizeof buf;
       +        for(int i = 0; i < nname; i++)
       +                p = seprint(p, e, "%s ", name[i]);
       +        if(p > buf)
       +                p--;
       +        *p = 0;
       +        print("netifwalk %lld [%s]\n", c->qid.path, buf);
       +}
       +
       +Walkqid*
       +netifwalk(Netif *nif, Chan *c, Chan *nc, char **name, int nname)
       +{
       +        prwalk(nif, c, nc, name, nname);
       +        return devwalk(c, nc, name, nname, (Dirtab *)nif, 0, netifgen);
       +}
       +
       +Chan*
       +netifopen(Netif *nif, Chan *c, int omode)
       +{
       +        int id;
       +        Netfile *f;
       +
       +        dprint("netifopen %p %d\n", nif, c? c->qid.path: -1);
       +        id = 0;
       +        if(c->qid.type & QTDIR){
       +                if(omode != OREAD)
       +                        error(Eperm);
       +        } else {
       +                switch(NETTYPE(c->qid.path)){
       +                case Ndataqid:
       +                case Nctlqid:
       +                        id = NETID(c->qid.path);
       +                        openfile(nif, id);
       +                        break;
       +                case Ncloneqid:
       +                        id = openfile(nif, -1);
       +                        c->qid.path = NETQID(id, Nctlqid);
       +                        break;
       +                default:
       +                        if(omode != OREAD)
       +                                error(Ebadarg);
       +                }
       +                switch(NETTYPE(c->qid.path)){
       +                case Ndataqid:
       +                case Nctlqid:
       +                        f = nif->f[id];
       +                        if(netown(f, up->user, omode&7) < 0)
       +                                error(Eperm);
       +                        break;
       +                }
       +        }
       +        c->mode = openmode(omode);
       +        c->flag |= COPEN;
       +        c->offset = 0;
       +        c->iounit = qiomaxatomic;
       +        return c;
       +}
       +
       +long
       +netifread(Netif *nif, Chan *c, void *a, long n, ulong offset)
       +{
       +        int i, j;
       +        Netfile *f;
       +        char *p;
       +
       +        dprint("netifread %lud %lud\n", c->qid.path, NETTYPE(c->qid.path));
       +        if(c->qid.type&QTDIR)
       +                return devdirread(c, a, n, (Dirtab*)nif, 0, netifgen);
       +
       +        switch(NETTYPE(c->qid.path)){
       +        case Ndataqid:
       +                f = nif->f[NETID(c->qid.path)];
       +                return qread(f->in, a, n);
       +        case Nctlqid:
       +                return readnum(offset, a, n, NETID(c->qid.path), NUMSIZE);
       +        case Nstatqid:
       +                dprint("netstatqid\n");
       +                p = smalloc(READSTR);
       +                j = snprint(p, READSTR, "in: %llud\n", nif->inpackets);
       +                j += snprint(p+j, READSTR-j, "link: %d\n", nif->link);
       +                j += snprint(p+j, READSTR-j, "out: %llud\n", nif->outpackets);
       +                j += snprint(p+j, READSTR-j, "crc errs: %d\n", nif->crcs);
       +                j += snprint(p+j, READSTR-j, "overflows: %d\n", nif->overflows);
       +                j += snprint(p+j, READSTR-j, "soft overflows: %d\n", nif->soverflows);
       +                j += snprint(p+j, READSTR-j, "framing errs: %d\n", nif->frames);
       +                j += snprint(p+j, READSTR-j, "buffer errs: %d\n", nif->buffs);
       +                j += snprint(p+j, READSTR-j, "output errs: %d\n", nif->oerrs);
       +                j += snprint(p+j, READSTR-j, "prom: %d\n", nif->prom);
       +                j += snprint(p+j, READSTR-j, "mbps: %d\n", nif->mbps);
       +                j += snprint(p+j, READSTR-j, "addr: ");
       +                for(i = 0; i < nif->alen; i++)
       +                        j += snprint(p+j, READSTR-j, "%2.2ux", nif->addr[i]);
       +                snprint(p+j, READSTR-j, "\n");
       +                n = readstr(offset, a, n, p);
       +                free(p);
       +                return n;
       +        case Naddrqid:
       +                p = malloc(READSTR);
       +                j = 0;
       +                for(i = 0; i < nif->alen; i++)
       +                        j += snprint(p+j, READSTR-j, "%2.2ux", nif->addr[i]);
       +                n = readstr(offset, a, n, p);
       +                free(p);
       +                return n;
       +        case Ntypeqid:
       +                f = nif->f[NETID(c->qid.path)];
       +                return readnum(offset, a, n, f->type, NUMSIZE);
       +        case Nifstatqid:
       +                return 0;
       +        }
       +        error(Ebadarg);
       +        return -1;        /* not reached */
       +}
       +
       +Block*
       +netifbread(Netif *nif, Chan *c, long n, ulong offset)
       +{
       +        if((c->qid.type & QTDIR) || NETTYPE(c->qid.path) != Ndataqid)
       +                return devbread(c, n, offset);
       +
       +        return qbread(nif->f[NETID(c->qid.path)]->in, n);
       +}
       +
       +/*
       + *  make sure this type isn't already in use on this device
       + */
       +static int
       +typeinuse(Netif *nif, int type)
       +{
       +        Netfile *f, **fp, **efp;
       +
       +        if(type <= 0)
       +                return 0;
       +
       +        efp = &nif->f[nif->nfile];
       +        for(fp = nif->f; fp < efp; fp++){
       +                f = *fp;
       +                if(f == 0)
       +                        continue;
       +                if(f->type == type)
       +                        return 1;
       +        }
       +        return 0;
       +}
       +
       +/*
       + *  the devxxx.c that calls us handles writing data, it knows best
       + */
       +long
       +netifwrite(Netif *nif, Chan *c, void *a, long n)
       +{
       +        Netfile *f;
       +        int type;
       +        char *p, buf[64];
       +        uchar binaddr[Nmaxaddr];
       +
       +        if(NETTYPE(c->qid.path) != Nctlqid)
       +                error(Eperm);
       +
       +        if(n >= sizeof(buf))
       +                n = sizeof(buf)-1;
       +        memmove(buf, a, n);
       +        buf[n] = 0;
       +
       +        if(waserror()){
       +                QUNLOCK(nif);
       +                nexterror();
       +        }
       +
       +        QLOCK(nif);
       +        f = nif->f[NETID(c->qid.path)];
       +        if((p = matchtoken(buf, "connect")) != 0){
       +                type = atoi(p);
       +                if(typeinuse(nif, type))
       +                        error(Einuse);
       +                f->type = type;
       +                if(f->type < 0)
       +                        nif->all++;
       +        } else if(matchtoken(buf, "promiscuous")){
       +                if(f->prom == 0){
       +                        if(nif->prom == 0 && nif->promiscuous != nil)
       +                                nif->promiscuous(nif->arg, 1);
       +                        f->prom = 1;
       +                        nif->prom++;
       +                }
       +        } else if((p = matchtoken(buf, "scanbs")) != 0){
       +                /* scan for base stations */
       +                if(f->scan == 0){
       +                        type = atoi(p);
       +                        if(type < 5)
       +                                type = 5;
       +                        if(nif->scanbs != nil)
       +                                nif->scanbs(nif->arg, type);
       +                        f->scan = type;
       +                        nif->scan++;
       +                }
       +        } else if(matchtoken(buf, "bridge")){
       +                f->bridge = 1;
       +        } else if(matchtoken(buf, "headersonly")){
       +                f->headersonly = 1;
       +        } else if((p = matchtoken(buf, "addmulti")) != 0){
       +                if(parseaddr(binaddr, p, nif->alen) < 0)
       +                        error("bad address");
       +                p = netmulti(nif, f, binaddr, 1);
       +                if(p)
       +                        error(p);
       +        } else if((p = matchtoken(buf, "remmulti")) != 0){
       +                if(parseaddr(binaddr, p, nif->alen) < 0)
       +                        error("bad address");
       +                p = netmulti(nif, f, binaddr, 0);
       +                if(p)
       +                        error(p);
       +        } else
       +                n = -1;
       +        QUNLOCK(nif);
       +        poperror();
       +        return n;
       +}
       +
       +int
       +netifwstat(Netif *nif, Chan *c, uchar *db, int n)
       +{
       +        Dir *dir;
       +        Netfile *f;
       +        int m;
       +
       +        f = nif->f[NETID(c->qid.path)];
       +        if(f == 0)
       +                error(Enonexist);
       +
       +        if(netown(f, up->user, OWRITE) < 0)
       +                error(Eperm);
       +
       +        dir = smalloc(sizeof(Dir)+n);
       +        m = convM2D(db, n, &dir[0], (char*)&dir[1]);
       +        if(m == 0){
       +                free(dir);
       +                error(Eshortstat);
       +        }
       +        if(!emptystr(dir[0].uid))
       +                strncpy(f->owner, dir[0].uid, KNAMELEN);
       +        if(dir[0].mode != ~0UL)
       +                f->mode = dir[0].mode;
       +        free(dir);
       +        return m;
       +}
       +
       +int
       +netifstat(Netif *nif, Chan *c, uchar *db, int n)
       +{
       +        dprint("netifstat %s nfile %d %lld type=%d\n", nif->name, nif->nfile, c->qid.path, c->type);
       +        return devstat(c, db, n, (Dirtab *)nif, 0, netifgen);
       +}
       +
       +void
       +netifclose(Netif *nif, Chan *c)
       +{
       +        Netfile *f;
       +        int t;
       +        Netaddr *ap;
       +
       +        if((c->flag & COPEN) == 0)
       +                return;
       +
       +        t = NETTYPE(c->qid.path);
       +        if(t != Ndataqid && t != Nctlqid)
       +                return;
       +
       +        f = nif->f[NETID(c->qid.path)];
       +        QLOCK(f);
       +        if(--(f->inuse) == 0){
       +                if(f->prom){
       +                        QLOCK(nif);
       +                        if(--(nif->prom) == 0 && nif->promiscuous != nil)
       +                                nif->promiscuous(nif->arg, 0);
       +                        QUNLOCK(nif);
       +                        f->prom = 0;
       +                }
       +                if(f->scan){
       +                        QLOCK(nif);
       +                        if(--(nif->scan) == 0 && nif->scanbs != nil)
       +                                nif->scanbs(nif->arg, 0);
       +                        QUNLOCK(nif);
       +                        f->prom = 0;
       +                        f->scan = 0;
       +                }
       +                if(f->nmaddr){
       +                        QLOCK(nif);
       +                        t = 0;
       +                        for(ap = nif->maddr; ap; ap = ap->next){
       +                                if(f->maddr[t/8] & (1<<(t%8)))
       +                                        netmulti(nif, f, ap->addr, 0);
       +                        }
       +                        QUNLOCK(nif);
       +                        f->nmaddr = 0;
       +                }
       +                if(f->type < 0){
       +                        QLOCK(nif);
       +                        --(nif->all);
       +                        QUNLOCK(nif);
       +                }
       +                f->owner[0] = 0;
       +print("drop type %.4ux\n", f->type);
       +                f->type = 0;
       +                f->bridge = 0;
       +                f->headersonly = 0;
       +                qclose(f->in);
       +        }
       +        QUNLOCK(f);
       +}
       +
       +Lock netlock;
       +
       +static int
       +netown(Netfile *p, char *o, int omode)
       +{
       +        static int access[] = { 0400, 0200, 0600, 0100 };
       +        int mode;
       +        int t;
       +
       +        lock(&netlock);
       +        if(*p->owner){
       +                if(strncmp(o, p->owner, KNAMELEN) == 0)        /* User */
       +                        mode = p->mode;
       +                else if(strncmp(o, eve, KNAMELEN) == 0)        /* Bootes is group */
       +                        mode = p->mode<<3;
       +                else
       +                        mode = p->mode<<6;                /* Other */
       +
       +                t = access[omode&3];
       +                if((t & mode) == t){
       +                        unlock(&netlock);
       +                        return 0;
       +                } else {
       +                        unlock(&netlock);
       +                        return -1;
       +                }
       +        }
       +        strncpy(p->owner, o, KNAMELEN);
       +        p->mode = 0660;
       +        unlock(&netlock);
       +        return 0;
       +}
       +
       +/*
       + *  Increment the reference count of a network device.
       + *  If id < 0, return an unused ether device.
       + */
       +static int
       +openfile(Netif *nif, int id)
       +{
       +        Netfile *f, **fp, **efp;
       +
       +        if(id >= 0){
       +                f = nif->f[id];
       +                if(f == 0)
       +                        error(Enodev);
       +                QLOCK(f);
       +                qreopen(f->in);
       +                f->inuse++;
       +                QUNLOCK(f);
       +                return id;
       +        }
       +
       +        QLOCK(nif);
       +        if(waserror()){
       +                QUNLOCK(nif);
       +                nexterror();
       +        }
       +        efp = &nif->f[nif->nfile];
       +        for(fp = nif->f; fp < efp; fp++){
       +                f = *fp;
       +                if(f == 0){
       +                        f = malloc(sizeof(Netfile));
       +                        if(f == 0)
       +                                exhausted("memory");
       +                        f->in = qopen(nif->limit, Qmsg, 0, 0);
       +                        if(f->in == nil){
       +                                free(f);
       +                                exhausted("memory");
       +                        }
       +                        *fp = f;
       +                        QLOCK(f);
       +                } else {
       +                        QLOCK(f);
       +                        if(f->inuse){
       +                                QUNLOCK(f);
       +                                continue;
       +                        }
       +                }
       +                f->inuse = 1;
       +                qreopen(f->in);
       +                netown(f, up->user, 0);
       +                QUNLOCK(f);
       +                QUNLOCK(nif);
       +                poperror();
       +                return fp - nif->f;
       +        }
       +        error(Enodev);
       +        return -1;        /* not reached */
       +}
       +
       +/*
       + *  look for a token starting a string,
       + *  return a pointer to first non-space char after it
       + */
       +static char*
       +matchtoken(char *p, char *token)
       +{
       +        int n;
       +
       +        n = strlen(token);
       +        if(strncmp(p, token, n))
       +                return 0;
       +        p += n;
       +        if(*p == 0)
       +                return p;
       +        if(*p != ' ' && *p != '\t' && *p != '\n')
       +                return 0;
       +        while(*p == ' ' || *p == '\t' || *p == '\n')
       +                p++;
       +        return p;
       +}
       +
       +void
       +hnputv(void *p, uvlong v)
       +{
       +        uchar *a;
       +
       +        a = p;
       +        hnputl(a, v>>32);
       +        hnputl(a+4, v);
       +}
       +
       +void
       +hnputl(void *p, uint v)
       +{
       +        uchar *a;
       +
       +        a = p;
       +        a[0] = v>>24;
       +        a[1] = v>>16;
       +        a[2] = v>>8;
       +        a[3] = v;
       +}
       +
       +void
       +hnputs(void *p, ushort v)
       +{
       +        uchar *a;
       +
       +        a = p;
       +        a[0] = v>>8;
       +        a[1] = v;
       +}
       +
       +uvlong
       +nhgetv(void *p)
       +{
       +        uchar *a;
       +
       +        a = p;
       +        return ((vlong)nhgetl(a) << 32) | nhgetl(a+4);
       +}
       +
       +uint
       +nhgetl(void *p)
       +{
       +        uchar *a;
       +
       +        a = p;
       +        return (a[0]<<24)|(a[1]<<16)|(a[2]<<8)|(a[3]<<0);
       +}
       +
       +ushort
       +nhgets(void *p)
       +{
       +        uchar *a;
       +
       +        a = p;
       +        return (a[0]<<8)|(a[1]<<0);
       +}
       +
       +static ulong
       +hash(uchar *a, int len)
       +{
       +        ulong sum = 0;
       +
       +        while(len-- > 0)
       +                sum = (sum << 1) + *a++;
       +        return sum%Nmhash;
       +}
       +
       +int
       +activemulti(Netif *nif, uchar *addr, int alen)
       +{
       +        Netaddr *hp;
       +
       +        for(hp = nif->mhash[hash(addr, alen)]; hp; hp = hp->hnext)
       +                if(memcmp(addr, hp->addr, alen) == 0){
       +                        if(hp->ref)
       +                                return 1;
       +                        else
       +                                break;
       +                }
       +        return 0;
       +}
       +
       +static int
       +parseaddr(uchar *to, char *from, int alen)
       +{
       +        char nip[4];
       +        char *p;
       +        int i;
       +
       +        p = from;
       +        for(i = 0; i < alen; i++){
       +                if(*p == 0)
       +                        return -1;
       +                nip[0] = *p++;
       +                if(*p == 0)
       +                        return -1;
       +                nip[1] = *p++;
       +                nip[2] = 0;
       +                to[i] = strtoul(nip, 0, 16);
       +                if(*p == ':')
       +                        p++;
       +        }
       +        return 0;
       +}
       +
       +/*
       + *  keep track of multicast addresses
       + */
       +static char*
       +netmulti(Netif *nif, Netfile *f, uchar *addr, int add)
       +{
       +        Netaddr **l, *ap;
       +        int i;
       +        ulong h;
       +
       +        if(nif->multicast == nil)
       +                return "interface does not support multicast";
       +
       +        l = &nif->maddr;
       +        i = 0;
       +        for(ap = *l; ap; ap = *l){
       +                if(memcmp(addr, ap->addr, nif->alen) == 0)
       +                        break;
       +                i++;
       +                l = &ap->next;
       +        }
       +
       +        if(add){
       +                if(ap == 0){
       +                        *l = ap = smalloc(sizeof(*ap));
       +                        memmove(ap->addr, addr, nif->alen);
       +                        ap->next = 0;
       +                        ap->ref = 1;
       +                        h = hash(addr, nif->alen);
       +                        ap->hnext = nif->mhash[h];
       +                        nif->mhash[h] = ap;
       +                } else {
       +                        ap->ref++;
       +                }
       +                if(ap->ref == 1){
       +                        nif->nmaddr++;
       +                        nif->multicast(nif->arg, addr, 1);
       +                }
       +                if(i < 8*sizeof(f->maddr)){
       +                        if((f->maddr[i/8] & (1<<(i%8))) == 0)
       +                                f->nmaddr++;
       +                        f->maddr[i/8] |= 1<<(i%8);
       +                }
       +        } else {
       +                if(ap == 0 || ap->ref == 0)
       +                        return 0;
       +                ap->ref--;
       +                if(ap->ref == 0){
       +                        nif->nmaddr--;
       +                        nif->multicast(nif->arg, addr, 0);
       +                }
       +                if(i < 8*sizeof(f->maddr)){
       +                        if((f->maddr[i/8] & (1<<(i%8))) != 0)
       +                                f->nmaddr--;
       +                        f->maddr[i/8] &= ~(1<<(i%8));
       +                }
       +        }
       +        return 0;
       +}
 (DIR) diff --git a/src/9vx/a/netif.h b/src/9vx/a/netif.h
       @@ -31,7 +31,7 @@ enum
         */
        struct Netfile
        {
       -        QLock lk;
       +        QLock        qlock; 
        
                int        inuse;
                ulong        mode;
       @@ -64,7 +64,7 @@ struct Netaddr
         */
        struct Netif
        {
       -        QLock lk;
       +        QLock        qlock;
        
                /* multiplexing */
                char        name[KNAMELEN];                /* for top level directory */
       @@ -87,8 +87,8 @@ struct Netif
        
                /* statistics */
                int        misses;
       -        int        inpackets;
       -        int        outpackets;
       +        uvlong        inpackets;
       +        uvlong        outpackets;
                int        crcs;                /* input crc errors */
                int        oerrs;                /* output errors */
                int        frames;                /* framing errors */
 (DIR) diff --git a/src/9vx/a/part.c b/src/9vx/a/part.c
       @@ -0,0 +1,341 @@
       +#include        "u.h"
       +#include        "lib.h"
       +#include        "mem.h"
       +#include        "dat.h"
       +#include        "fns.h"
       +
       +#include        "sd.h"
       +#include        "fs.h"
       +
       +enum {
       +        Npart = 32
       +};
       +
       +uchar *mbrbuf, *partbuf;
       +int nbuf;
       +#define trace 0
       +
       +int
       +tsdbio(SDunit *unit, SDpart *part, void *a, vlong off, int mbr)
       +{
       +        uchar *b;
       +
       +        if(unit->dev->ifc->bio(unit, 0, 0, a, 1, (off/unit->secsize) + part->start) != unit->secsize){
       +                if(trace)
       +                        print("%s: read %lud at %lld failed\n", unit->dev->name,
       +                                unit->secsize, (vlong)part->start*unit->secsize+off);
       +                return -1;
       +        }
       +        b = a;
       +        if(mbr && (b[0x1FE] != 0x55 || b[0x1FF] != 0xAA)){
       +                if(trace)
       +                        print("%s: bad magic %.2ux %.2ux at %lld\n",
       +                                unit->dev->name, b[0x1FE], b[0x1FF],
       +                                (vlong)part->start*unit->secsize+off);
       +                return -1;
       +        }
       +        return 0;
       +}
       +
       +/*
       + *  read partition table.  The partition table is just ascii strings.
       + */
       +#define MAGIC "plan9 partitions"
       +static void
       +oldp9part(SDunit *unit)
       +{
       +        SDpart *pp;
       +        char *field[3], *line[Npart+1];
       +        ulong n, start, end;
       +        int i;
       +
       +        /*
       +         *  We have some partitions already.
       +         */
       +        pp = &unit->part[unit->npart];
       +
       +        /*
       +         * We prefer partition tables on the second to last sector,
       +         * but some old disks use the last sector instead.
       +         */
       +        pp->start = unit->sectors - 2;
       +        pp->end = unit->sectors - 1;
       +
       +        if(tsdbio(unit, pp, partbuf, 0, 0) < 0)
       +                return;
       +
       +        if(strncmp((char*)partbuf, MAGIC, sizeof(MAGIC)-1) != 0) {
       +                /* not found on 2nd last sector; look on last sector */
       +                pp->start++;
       +                pp->end++;
       +                if(tsdbio(unit, pp, partbuf, 0, 0) < 0)
       +                        return;
       +                if(strncmp((char*)partbuf, MAGIC, sizeof(MAGIC)-1) != 0)
       +                        return;
       +                print("%s: using old plan9 partition table on last sector\n", unit->dev->name);
       +        }else
       +                print("%s: using old plan9 partition table on 2nd-to-last sector\n", unit->dev->name);
       +
       +        /* we found a partition table, so add a partition partition */
       +        unit->npart++;
       +        partbuf[unit->secsize-1] = '\0';
       +
       +        /*
       +         * parse partition table
       +         */
       +        n = getfields((char*)partbuf, line, Npart+1, 0, "\n");
       +        if(n && strncmp(line[0], MAGIC, sizeof(MAGIC)-1) == 0){
       +                for(i = 1; i < n && unit->npart < SDnpart; i++){
       +                        if(getfields(line[i], field, 3, 0, " ") != 3)
       +                                break;
       +                        start = strtoull(field[1], 0, 0);
       +                        end = strtoull(field[2], 0, 0);
       +                        if(start >= end || end > unit->sectors)
       +                                break;
       +                        sdaddpart(unit, field[0], start, end);
       +                }
       +        }        
       +}
       +
       +static void
       +p9part(SDunit *unit, char *name)
       +{
       +        SDpart *p;
       +        char *field[4], *line[Npart+1];
       +        uvlong start, end;
       +        int i, n;
       +        
       +        p = sdfindpart(unit, name);
       +        if(p == nil)
       +                return;
       +
       +        if(tsdbio(unit, p, partbuf, unit->secsize, 0) < 0)
       +                return;
       +        partbuf[unit->secsize-1] = '\0';
       +
       +        if(strncmp((char*)partbuf, "part ", 5) != 0)
       +                return;
       +
       +        n = getfields((char*)partbuf, line, Npart+1, 0, "\n");
       +        if(n == 0)
       +                return;
       +        for(i = 0; i < n /* && unit->npart < SDnpart */; i++){
       +                if(strncmp(line[i], "part ", 5) != 0)
       +                        break;
       +                if(getfields(line[i], field, 4, 0, " ") != 4)
       +                        break;
       +                start = strtoull(field[2], 0, 0);
       +                end = strtoull(field[3], 0, 0);
       +                if(start >= end || end > unit->sectors)
       +                        break;
       +                sdaddpart(unit, field[1], p->start+start, p->start+end);
       +        }
       +}
       +
       +int
       +isdos(int t)
       +{
       +        return t==FAT12 || t==FAT16 || t==FATHUGE || t==FAT32 || t==FAT32X;
       +}
       +
       +int
       +isextend(int t)
       +{
       +        return t==EXTEND || t==EXTHUGE || t==LEXTEND;
       +}
       +
       +/* 
       + * Fetch the first dos and all plan9 partitions out of the MBR partition table.
       + * We return -1 if we did not find a plan9 partition.
       + */
       +static int
       +mbrpart(SDunit *unit)
       +{
       +        Dospart *dp;
       +        ulong taboffset, start, end;
       +        ulong firstxpart, nxtxpart;
       +        int havedos, i, nplan9;
       +        char name[10];
       +
       +        taboffset = 0;
       +        dp = (Dospart*)&mbrbuf[0x1BE];
       +        if(1) {
       +                /* get the MBR (allowing for DMDDO) */
       +                if(tsdbio(unit, &unit->part[0], mbrbuf, (vlong)taboffset*unit->secsize, 1) < 0)
       +                        return -1;
       +                for(i=0; i<4; i++)
       +                        if(dp[i].type == DMDDO) {
       +                                if(trace)
       +                                        print("DMDDO partition found\n");
       +                                taboffset = 63;
       +                                if(tsdbio(unit, &unit->part[0], mbrbuf, (vlong)taboffset*unit->secsize, 1) < 0)
       +                                        return -1;
       +                                i = -1;        /* start over */
       +                        }
       +        }
       +
       +        /*
       +         * Read the partitions, first from the MBR and then
       +         * from successive extended partition tables.
       +         */
       +        nplan9 = 0;
       +        havedos = 0;
       +        firstxpart = 0;
       +        for(;;) {
       +                if(tsdbio(unit, &unit->part[0], mbrbuf, (vlong)taboffset*unit->secsize, 1) < 0)
       +                        return -1;
       +                if(trace) {
       +                        if(firstxpart)
       +                                print("%s ext %lud ", unit->dev->name, taboffset);
       +                        else
       +                                print("%s mbr ", unit->dev->name);
       +                }
       +                nxtxpart = 0;
       +                for(i=0; i<4; i++) {
       +                        if(trace)
       +                                print("dp %d...", dp[i].type);
       +                        start = taboffset+GLONG(dp[i].start);
       +                        end = start+GLONG(dp[i].len);
       +
       +                        if(dp[i].type == PLAN9) {
       +                                if(nplan9 == 0)
       +                                        strcpy(name, "plan9");
       +                                else
       +                                        sprint(name, "plan9.%d", nplan9);
       +                                sdaddpart(unit, name, start, end);
       +                                p9part(unit, name);
       +                                nplan9++;
       +                        }
       +
       +                        /*
       +                         * We used to take the active partition (and then the first
       +                         * when none are active).  We have to take the first here,
       +                         * so that the partition we call ``dos'' agrees with the
       +                         * partition disk/fdisk calls ``dos''. 
       +                         */
       +                        if(havedos==0 && isdos(dp[i].type)){
       +                                havedos = 1;
       +                                sdaddpart(unit, "dos", start, end);
       +                        }
       +
       +                        /* nxtxpart is relative to firstxpart (or 0), not taboffset */
       +                        if(isextend(dp[i].type)){
       +                                nxtxpart = start-taboffset+firstxpart;
       +                                if(trace)
       +                                        print("link %lud...", nxtxpart);
       +                        }
       +                }
       +                if(trace)
       +                        print("\n");
       +
       +                if(!nxtxpart)
       +                        break;
       +                if(!firstxpart)
       +                        firstxpart = nxtxpart;
       +                taboffset = nxtxpart;
       +        }        
       +        return nplan9 ? 0 : -1;
       +}
       +
       +/*
       + * To facilitate booting from CDs, we create a partition for
       + * the boot floppy image embedded in a bootable CD.
       + */
       +static int
       +part9660(SDunit *unit)
       +{
       +        uchar buf[2048];
       +        ulong a, n;
       +        uchar *p;
       +
       +        if(unit->secsize != 2048)
       +                return -1;
       +
       +        if(unit->dev->ifc->bio(unit, 0, 0, buf, 2048/unit->secsize, (17*2048)/unit->secsize) < 0)
       +                return -1;
       +
       +        if(buf[0] || strcmp((char*)buf+1, "CD001\x01EL TORITO SPECIFICATION") != 0)
       +                return -1;
       +
       +        
       +        p = buf+0x47;
       +        a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
       +
       +        if(unit->dev->ifc->bio(unit, 0, 0, buf, 2048/unit->secsize, (a*2048)/unit->secsize) < 0)
       +                return -1;
       +
       +        if(memcmp(buf, "\x01\x00\x00\x00", 4) != 0
       +        || memcmp(buf+30, "\x55\xAA", 2) != 0
       +        || buf[0x20] != 0x88)
       +                return -1;
       +
       +        p = buf+0x28;
       +        a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
       +
       +        switch(buf[0x21]){
       +        case 0x01:
       +                n = 1200*1024;
       +                break;
       +        case 0x02:
       +                n = 1440*1024;
       +                break;
       +        case 0x03:
       +                n = 2880*1024;
       +                break;
       +        default:
       +                return -1;
       +        }
       +        n /= 2048;
       +
       +        print("found partition %s!cdboot; %lud+%lud\n", unit->dev->name, a, n);
       +        sdaddpart(unit, "cdboot", a, a+n);
       +        return 0;
       +}
       +
       +enum {
       +        NEW = 1<<0,
       +        OLD = 1<<1
       +};
       +
       +void
       +partition(SDunit *unit)
       +{
       +        int type;
       +        char *p;
       +
       +        if(unit->part == 0)
       +                return;
       +
       +        if(part9660(unit) == 0)
       +                return;
       +
       +        p = "new";
       +
       +        if(p != nil && strncmp(p, "new", 3) == 0)
       +                type = NEW;
       +        else if(p != nil && strncmp(p, "old", 3) == 0)
       +                type = OLD;
       +        else
       +                type = NEW|OLD;
       +
       +        if(nbuf < unit->secsize) {
       +                free(mbrbuf);
       +                free(partbuf);
       +                mbrbuf = malloc(unit->secsize);
       +                partbuf = malloc(unit->secsize);
       +                if(mbrbuf==nil || partbuf==nil) {
       +                        free(mbrbuf);
       +                        free(partbuf);
       +                        partbuf = mbrbuf = nil;
       +                        nbuf = 0;
       +                        return;
       +                }
       +                nbuf = unit->secsize;
       +        }
       +
       +        if((type & NEW) && mbrpart(unit) >= 0){
       +                /* nothing to do */;
       +        }
       +        else if(type & OLD)
       +                oldp9part(unit);
       +}
 (DIR) diff --git a/src/9vx/a/pgrp.c b/src/9vx/a/pgrp.c
       @@ -180,7 +180,7 @@ dupfgrp(Fgrp *f)
                lock(&f->ref.lk);
                /* Make new fd list shorter if possible, preserving quantization */
                new->nfd = f->maxfd+1;
       -        i = new->nfd%DELTAFD;
       +        i = (uint)new->nfd%DELTAFD;
                if(i != 0)
                        new->nfd += DELTAFD - i;
                new->fd = malloc(new->nfd*sizeof(Chan*));
 (DIR) diff --git a/src/9vx/a/portfns.h b/src/9vx/a/portfns.h
       @@ -32,8 +32,8 @@ void                callwithureg(void(*)(Ureg*));
        char*                chanpath(Chan*);
        int                canlock(Lock*);
        int                canpage(Proc*);
       -int                canqlock(QLock*);
       -int                canrlock(RWlock*);
       +int                __canqlock(QLock*);
       +int                __canrlock(RWlock*);
        void                chandevinit(void);
        void                chandevreset(void);
        void                chandevshutdown(void);
       @@ -166,7 +166,7 @@ void                ksetenv(char*, char*, int);
        void                kstrcpy(char*, char*, int);
        void                kstrdup(char**, char*);
        long                latin1(Rune*, int);
       -int                lock(Lock*);
       +int                __lock(Lock*);
        void                logopen(Log*);
        void                logclose(Log*);
        char*                logctl(Log*, int, char**, Logflag*);
       @@ -277,7 +277,7 @@ void                qhangup(Queue*, char*);
        int                qisclosed(Queue*);
        int                qiwrite(Queue*, void*, int);
        int                qlen(Queue*);
       -void                qlock(QLock*);
       +void                __qlock(QLock*);
        Queue*                qopen(int, int, void (*)(void*), void*);
        int                qpass(Queue*, Block*);
        int                qpassnolim(Queue*, Block*);
       @@ -287,7 +287,7 @@ long                qread(Queue*, void*, int);
        Block*                qremove(Queue*);
        void                qreopen(Queue*);
        void                qsetlimit(Queue*, int);
       -void                qunlock(QLock*);
       +void                __qunlock(QLock*);
        int                qwindow(Queue*);
        int                qwrite(Queue*, void*, int);
        void                qnoblock(Queue*, int);
       @@ -305,9 +305,9 @@ void                renameuser(char*, char*);
        void                resched(char*);
        void                resrcwait(char*);
        int                return0(void*);
       -void                rlock(RWlock*);
       +void                __rlock(RWlock*);
        long                rtctime(void);
       -void                runlock(RWlock*);
       +void                __runlock(RWlock*);
        Proc*                runproc(void);
        void                savefpregs(FPsave*);
        void                sched(void);
       @@ -361,7 +361,7 @@ int                uartstageoutput(Uart*);
        void                unbreak(Proc*);
        void                uncachepage(Page*);
        long                unionread(Chan*, void*, long);
       -void                unlock(Lock*);
       +void                __unlock(Lock*);
        uvlong                us2fastticks(uvlong);
        void                userinit(void);
        ulong                userpc(void);
       @@ -372,8 +372,8 @@ void                validstat(uchar*, int);
        void*                vmemchr(void*, int, int);
        Proc*                wakeup(Rendez*);
        int                walk(Chan**, char**, int, int, int*);
       -void                wlock(RWlock*);
       -void                wunlock(RWlock*);
       +void                __wlock(RWlock*);
       +void                __wunlock(RWlock*);
        void*                xalloc(ulong);
        void*                xallocz(ulong, int);
        void                xfree(void*);
 (DIR) diff --git a/src/9vx/a/qlock.c b/src/9vx/a/qlock.c
       @@ -5,6 +5,8 @@
        #include "dat.h"
        #include "fns.h"
        
       +int tracelock = 0;
       +
        struct {
                ulong rlock;
                ulong rlockq;
       @@ -15,7 +17,7 @@ struct {
        } rwstats;
        
        void
       -qlock(QLock *q)
       +__qlock(QLock *q)
        {
                Proc *p;
        
       @@ -50,7 +52,7 @@ qlock(QLock *q)
        }
        
        int
       -canqlock(QLock *q)
       +__canqlock(QLock *q)
        {
                if(!canlock(&q->use))
                        return 0;
       @@ -64,7 +66,7 @@ canqlock(QLock *q)
        }
        
        void
       -qunlock(QLock *q)
       +__qunlock(QLock *q)
        {
                Proc *p;
        
       @@ -86,7 +88,7 @@ qunlock(QLock *q)
        }
        
        void
       -rlock(RWlock *q)
       +__rlock(RWlock *q)
        {
                Proc *p;
        
       @@ -115,7 +117,7 @@ rlock(RWlock *q)
        }
        
        void
       -runlock(RWlock *q)
       +__runlock(RWlock *q)
        {
                Proc *p;
        
       @@ -138,7 +140,7 @@ runlock(RWlock *q)
        }
        
        void
       -wlock(RWlock *q)
       +__wlock(RWlock *q)
        {
                Proc *p;
        
       @@ -170,7 +172,7 @@ wlock(RWlock *q)
        }
        
        void
       -wunlock(RWlock *q)
       +__wunlock(RWlock *q)
        {
                Proc *p;
        
       @@ -209,7 +211,7 @@ wunlock(RWlock *q)
        
        /* same as rlock but punts if there are any writers waiting */
        int
       -canrlock(RWlock *q)
       +__canrlock(RWlock *q)
        {
                lock(&q->use);
                rwstats.rlock++;
 (DIR) diff --git a/src/9vx/a/sd.h b/src/9vx/a/sd.h
       @@ -129,9 +129,14 @@ extern void sdadddevs(SDev*);
        extern int sdsetsense(SDreq*, int, int, int, int);
        extern int sdmodesense(SDreq*, uchar*, void*, int);
        extern int sdfakescsi(SDreq*, void*, int);
       +extern void sdaddpart(SDunit*, char*, uvlong, uvlong);
       +extern SDpart* sdfindpart(SDunit*, char*);
        
        /* sdscsi.c */
        extern int scsiverify(SDunit*);
        extern int scsionline(SDunit*);
        extern long scsibio(SDunit*, int, int, void*, long, uvlong);
        extern SDev* scsiid(SDev*, SDifc*);
       +
       +/* part.c */
       +extern void partition(SDunit*);
 (DIR) diff --git a/src/9vx/a/sdaoe.c b/src/9vx/a/sdaoe.c
       @@ -0,0 +1,652 @@
       +/*
       + * aoe sd driver, copyright © 2007 coraid
       + */
       +
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "io.h"
       +#include "error.h"
       +#include "sd.h"
       +#include "netif.h"
       +#include "aoe.h"
       +
       +extern        char        Echange[];
       +extern        char        Enotup[];
       +
       +#define uprint(...)        snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
       +
       +enum {
       +        Nctlr        = 32,
       +        Maxpath        = 128,
       +};
       +
       +enum {
       +        /* sync with ahci.h */
       +        Dllba         = 1<<0,
       +        Dsmart        = 1<<1,
       +        Dpower        = 1<<2,
       +        Dnop        = 1<<3,
       +        Datapi        = 1<<4,
       +        Datapi16= 1<<5,
       +};
       +
       +static char *flagname[] = {
       +        "llba",
       +        "smart",
       +        "power",
       +        "nop",
       +        "atapi",
       +        "atapi16",
       +};
       +
       +typedef struct Ctlr Ctlr;
       +struct Ctlr{
       +        QLock        qlock;
       +
       +        Ctlr        *next;
       +        SDunit        *unit;
       +
       +        char        path[Maxpath];
       +        Chan        *c;
       +
       +        ulong        vers;
       +        uchar        mediachange;
       +        uchar        flag;
       +        uchar        smart;
       +        uchar        smartrs;
       +        uchar        feat;
       +
       +        uvlong        sectors;
       +        char        serial[20+1];
       +        char        firmware[8+1];
       +        char        model[40+1];
       +        char        ident[0x100];
       +};
       +
       +static        Lock        ctlrlock;
       +static        Ctlr        *head;
       +static        Ctlr        *tail;
       +
       +SDifc sdaoeifc;
       +
       +static void
       +idmove(char *p, ushort *a, int n)
       +{
       +        int i;
       +        char *op, *e;
       +
       +        op = p;
       +        for(i = 0; i < n/2; i++){
       +                *p++ = a[i] >> 8;
       +                *p++ = a[i];
       +        }
       +        *p = 0;
       +        while(p > op && *--p == ' ')
       +                *p = 0;
       +        e = p;
       +        p = op;
       +        while(*p == ' ')
       +                p++;
       +        memmove(op, p, n - (e - p));
       +}
       +
       +static ushort
       +gbit16(void *a)
       +{
       +        uchar *i;
       +
       +        i = a;
       +        return i[1] << 8 | i[0];
       +}
       +
       +static ulong
       +gbit32(void *a)
       +{
       +        ulong j;
       +        uchar *i;
       +
       +        i = a;
       +        j  = i[3] << 24;
       +        j |= i[2] << 16;
       +        j |= i[1] << 8;
       +        j |= i[0];
       +        return j;
       +}
       +
       +static uvlong
       +gbit64(void *a)
       +{
       +        uchar *i;
       +
       +        i = a;
       +        return (uvlong)gbit32(i+4)<<32 | gbit32(i);
       +}
       +
       +static int
       +identify(Ctlr *c, ushort *id)
       +{
       +        int i;
       +        uchar oserial[21];
       +        uvlong osectors, s;
       +
       +        osectors = c->sectors;
       +        memmove(oserial, c->serial, sizeof c->serial);
       +
       +        c->feat &= ~(Dllba|Dpower|Dsmart|Dnop);
       +        i = gbit16(id+83) | gbit16(id+86);
       +        if(i & (1<<10)){
       +                c->feat |= Dllba;
       +                s = gbit64(id+100);
       +        }else
       +                s = gbit32(id+60);
       +
       +        i = gbit16(id+83);
       +        if((i>>14) == 1) {
       +                if(i & (1<<3))
       +                        c->feat |= Dpower;
       +                i = gbit16(id+82);
       +                if(i & 1)
       +                        c->feat |= Dsmart;
       +                if(i & (1<<14))
       +                        c->feat |= Dnop;
       +        }
       +
       +        idmove(c->serial, id+10, 20);
       +        idmove(c->firmware, id+23, 8);
       +        idmove(c->model, id+27, 40);
       +
       +        if((osectors == 0 || osectors != s) &&
       +            memcmp(oserial, c->serial, sizeof oserial) != 0){
       +                c->sectors = s;
       +                c->mediachange = 1;
       +                c->vers++;
       +        }
       +        return 0;
       +}
       +
       +/* must call with d qlocked */
       +static int
       +aoeidentify(Ctlr *d, SDunit *u)
       +{
       +        Chan *c;
       +
       +        c = nil;
       +        if(waserror()){
       +                if(c)
       +                        cclose(c);
       +                iprint("aoeidentify: %s\n", up->errstr);
       +                nexterror();
       +        }
       +
       +        uprint("%s/ident", d->path);
       +        c = namec(up->genbuf, Aopen, OREAD, 0);
       +        devtab[c->type]->read(c, d->ident, sizeof d->ident, 0);
       +
       +        poperror();
       +        cclose(c);
       +
       +        d->feat = 0;
       +        d->smart = 0;
       +        identify(d, (ushort*)d->ident);
       +
       +        memset(u->inquiry, 0, sizeof u->inquiry);
       +        u->inquiry[2] = 2;
       +        u->inquiry[3] = 2;
       +        u->inquiry[4] = sizeof u->inquiry - 4;
       +        memmove(u->inquiry+8, d->model, 40);
       +
       +        return 0;
       +}
       +
       +static Ctlr*
       +ctlrlookup(char *path)
       +{
       +        Ctlr *c;
       +
       +        lock(&ctlrlock);
       +        for(c = head; c; c = c->next)
       +                if(strcmp(c->path, path) == 0)
       +                        break;
       +        unlock(&ctlrlock);
       +        return c;
       +}
       +
       +static Ctlr*
       +newctlr(char *path)
       +{
       +        Ctlr *c;
       +
       +        /* race? */
       +        if(ctlrlookup(path))
       +                error(Eexist);
       +
       +        if((c = malloc(sizeof *c)) == nil)
       +                return 0;
       +        kstrcpy(c->path, path, sizeof c->path);
       +        lock(&ctlrlock);
       +        if(head != nil)
       +                tail->next = c;
       +        else
       +                head = c;
       +        tail = c;
       +        unlock(&ctlrlock);
       +        return c;
       +}
       +
       +static void
       +delctlr(Ctlr *c)
       +{
       +        Ctlr *x, *prev;
       +
       +        lock(&ctlrlock);
       +
       +        for(prev = 0, x = head; x; prev = x, x = c->next)
       +                if(strcmp(c->path, x->path) == 0)
       +                        break;
       +        if(x == 0){
       +                unlock(&ctlrlock);
       +                error(Enonexist);
       +        }
       +
       +        if(prev)
       +                prev->next = x->next;
       +        else
       +                head = x->next;
       +        if(x->next == nil)
       +                tail = prev;
       +        unlock(&ctlrlock);
       +
       +        if(x->c)
       +                cclose(x->c);
       +        free(x);
       +}
       +
       +static SDev*
       +aoeprobe(char *path, SDev *s)
       +{
       +        int n, i;
       +        char *p;
       +        Chan *c;
       +        Ctlr *ctlr;
       +
       +        if((p = strrchr(path, '/')) == 0)
       +                error(Ebadarg);
       +        *p = 0;
       +        uprint("%s/ctl", path);
       +        *p = '/';
       +
       +        c = namec(up->genbuf, Aopen, OWRITE, 0);
       +        if(waserror()) {
       +                cclose(c);
       +                nexterror();
       +        }
       +        n = uprint("discover %s", p+1);
       +        devtab[c->type]->write(c, up->genbuf, n, 0);
       +        poperror();
       +        cclose(c);
       +
       +        for(i = 0;; i += 200){
       +                if(i > 8000 || waserror())
       +                        error(Etimedout);
       +                tsleep(&up->sleep, return0, 0, 200);
       +                poperror();
       +
       +                uprint("%s/ident", path);
       +                if(waserror())
       +                        continue;
       +                c = namec(up->genbuf, Aopen, OREAD, 0);
       +                poperror();
       +                cclose(c);
       +
       +                ctlr = newctlr(path);
       +                break;
       +        }
       +
       +        if(s == nil && (s = malloc(sizeof *s)) == nil)
       +                return nil;
       +        s->ctlr = ctlr;
       +        s->ifc = &sdaoeifc;
       +        s->nunit = 1;
       +        return s;
       +}
       +
       +static char         *probef[32];
       +static int         nprobe;
       +
       +static int
       +pnpprobeid(char *s)
       +{
       +        int id;
       +
       +        if(strlen(s) < 2)
       +                return 0;
       +        id = 'e';
       +        if(s[1] == '!')
       +                id = s[0];
       +        return id;
       +}
       +
       +static SDev*
       +aoepnp(void)
       +{
       +        int i, id;
       +        char *p;
       +        SDev *h, *t, *s;
       +
       +//        if((p = getconf("aoedev")) == 0)
       +        if(1)
       +                return 0;
       +        nprobe = tokenize(p, probef, nelem(probef));
       +        h = t = 0;
       +        for(i = 0; i < nprobe; i++){
       +                id = pnpprobeid(probef[i]);
       +                if(id == 0)
       +                        continue;
       +                s = malloc(sizeof *s);
       +                if(s == nil)
       +                        break;
       +                s->ctlr = 0;
       +                s->idno = id;
       +                s->ifc = &sdaoeifc;
       +                s->nunit = 1;
       +
       +                if(h)
       +                        t->next = s;
       +                else
       +                        h = s;
       +                t = s;
       +        }
       +        return h;
       +}
       +
       +static Ctlr*
       +pnpprobe(SDev *sd)
       +{
       +        int j;
       +        char *p;
       +        static int i;
       +
       +        if(i > nprobe)
       +                return 0;
       +        p = probef[i++];
       +        if(strlen(p) < 2)
       +                return 0;
       +        if(p[1] == '!')
       +                p += 2;
       +
       +        for(j = 0;; j += 200){
       +                if(j > 8000){
       +                        print("#æ: pnpprobe: %s: %s\n", probef[i-1], up->errstr);
       +                        return 0;
       +                }
       +                if(waserror()){
       +                        tsleep(&up->sleep, return0, 0, 200);
       +                        continue;
       +                }
       +                sd = aoeprobe(p, sd);
       +                poperror();
       +                break;
       +        }
       +        print("#æ: pnpprobe establishes %sin %dms\n", probef[i-1], j);
       +        return sd->ctlr;
       +}
       +
       +
       +static int
       +aoeverify(SDunit *u)
       +{
       +        SDev *s;
       +        Ctlr *c;
       +
       +        s = u->dev;
       +        c = s->ctlr;
       +        if(c == nil && (s->ctlr = c = pnpprobe(s)) == nil)
       +                return 0;
       +        c->mediachange = 1;
       +        return 1;
       +}
       +
       +static int
       +aoeconnect(SDunit *u, Ctlr *c)
       +{
       +        QLOCK(c);
       +        if(waserror()){
       +                QUNLOCK(c);
       +                return -1;
       +        }
       +
       +        aoeidentify(u->dev->ctlr, u);
       +        if(c->c)
       +                cclose(c->c);
       +        c->c = 0;
       +        uprint("%s/data", c->path);
       +        c->c = namec(up->genbuf, Aopen, ORDWR, 0);
       +        QUNLOCK(c);
       +        poperror();
       +
       +        return 0;
       +}
       +
       +static int
       +aoeonline(SDunit *u)
       +{
       +        Ctlr *c;
       +        int r;
       +
       +        c = u->dev->ctlr;
       +        r = 0;
       +
       +        if((c->feat&Datapi) && c->mediachange){
       +                if(aoeconnect(u, c) == 0 && (r = scsionline(u)) > 0)
       +                        c->mediachange = 0;
       +                return r;
       +        }
       +
       +        if(c->mediachange){
       +                if(aoeconnect(u, c) == -1)
       +                        return 0;
       +                r = 2;
       +                c->mediachange = 0;
       +                u->sectors = c->sectors;
       +                u->secsize = Aoesectsz;
       +        } else
       +                r = 1;
       +
       +        return r;
       +}
       +
       +static int
       +aoerio(SDreq *r)
       +{
       +        int i, count;
       +        uvlong lba;
       +        char *name;
       +        uchar *cmd;
       +        long (*rio)(Chan*, void*, long, vlong);
       +        Ctlr *c;
       +        SDunit *unit;
       +
       +        unit = r->unit;
       +        c = unit->dev->ctlr;
       +//        if(c->feat & Datapi)
       +//                return aoeriopkt(r, d);
       +
       +        cmd = r->cmd;
       +        name = unit->perm.name;
       +
       +        if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){
       +//                QLOCK(c);
       +//                i = flushcache();
       +//                QUNLOCK(c);
       +//                if(i == 0)
       +//                        return sdsetsense(r, SDok, 0, 0, 0);
       +                return sdsetsense(r, SDcheck, 3, 0xc, 2);
       +        }
       +
       +        if((i = sdfakescsi(r, c->ident, sizeof c->ident)) != SDnostatus){
       +                r->status = i;
       +                return i;
       +        }
       +
       +        switch(*cmd){
       +        case 0x88:
       +        case 0x28:
       +                rio = devtab[c->c->type]->read;
       +                break;
       +        case 0x8a:
       +        case 0x2a:
       +                rio = devtab[c->c->type]->write;
       +                break;
       +        default:
       +                print("%s: bad cmd %#.2ux\n", name, cmd[0]);
       +                r->status = SDcheck;
       +                return SDcheck;
       +        }
       +
       +        if(r->data == nil)
       +                return SDok;
       +
       +        if(r->clen == 16){
       +                if(cmd[2] || cmd[3])
       +                        return sdsetsense(r, SDcheck, 3, 0xc, 2);
       +                lba = (uvlong)cmd[4]<<40 | (uvlong)cmd[5]<<32;
       +                lba |=   cmd[6]<<24 |  cmd[7]<<16 |  cmd[8]<<8 | cmd[9];
       +                count = cmd[10]<<24 | cmd[11]<<16 | cmd[12]<<8 | cmd[13];
       +        }else{
       +                lba  = cmd[2]<<24 | cmd[3]<<16 | cmd[4]<<8 | cmd[5];
       +                count = cmd[7]<<8 | cmd[8];
       +        }
       +
       +        count *= Aoesectsz;
       +
       +        if(r->dlen < count)
       +                count = r->dlen & ~0x1ff;
       +
       +        if(waserror()){
       +                if(strcmp(up->errstr, Echange) == 0 ||
       +                    strcmp(up->errstr, Enotup) == 0)
       +                        unit->sectors = 0;
       +                nexterror();
       +        }
       +        r->rlen = rio(c->c, r->data, count, Aoesectsz * lba);
       +        poperror();
       +        r->status = SDok;
       +        return SDok;
       +}
       +
       +static char *smarttab[] = {
       +        "unset",
       +        "error",
       +        "threshold exceeded",
       +        "normal"
       +};
       +
       +static char *
       +pflag(char *s, char *e, uchar f)
       +{
       +        uchar i, m;
       +
       +        for(i = 0; i < 8; i++){
       +                m = 1 << i;
       +                if(f & m)
       +                        s = seprint(s, e, "%s ", flagname[i]);
       +        }
       +        return seprint(s, e, "\n");
       +}
       +
       +static int
       +aoerctl(SDunit *u, char *p, int l)
       +{
       +        Ctlr *c;
       +        char *e, *op;
       +
       +        if((c = u->dev->ctlr) == nil)
       +                return 0;
       +        e = p+l;
       +        op = p;
       +
       +        p = seprint(p, e, "model\t%s\n", c->model);
       +        p = seprint(p, e, "serial\t%s\n", c->serial);
       +        p = seprint(p, e, "firm        %s\n", c->firmware);
       +        if(c->smartrs == 0xff)
       +                p = seprint(p, e, "smart\tenable error\n");
       +        else if(c->smartrs == 0)
       +                p = seprint(p, e, "smart\tdisabled\n");
       +        else
       +                p = seprint(p, e, "smart\t%s\n", smarttab[c->smart]);
       +        p = seprint(p, e, "flag        ");
       +        p = pflag(p, e, c->feat);
       +        p = seprint(p, e, "geometry %llud %d\n", c->sectors, Aoesectsz);
       +        return p-op;
       +}
       +
       +static int
       +aoewctl(SDunit *d1, Cmdbuf *cmd)
       +{
       +        cmderror(cmd, Ebadarg);
       +        return 0;
       +}
       +
       +static SDev*
       +aoeprobew(DevConf *c)
       +{
       +        char *p;
       +
       +        p = strchr(c->type, '/');
       +        if(p == nil || strlen(p) > Maxpath - 11)
       +                error(Ebadarg);
       +        if(p[1] == '#')
       +                p++;                        /* hack */
       +        if(ctlrlookup(p))
       +                error(Einuse);
       +        return aoeprobe(p, 0);
       +}
       +
       +static void
       +aoeclear(SDev *s)
       +{
       +        delctlr((Ctlr *)s->ctlr);
       +}
       +
       +static char*
       +aoertopctl(SDev *s, char *p, char *e)
       +{
       +        Ctlr *c;
       +
       +        c = s->ctlr;
       +        return seprint(p, e, "%s aoe %s\n", s->name, c->path);
       +}
       +
       +static int
       +aoewtopctl(SDev *d1, Cmdbuf *cmd)
       +{
       +        switch(cmd->nf){
       +        default:
       +                cmderror(cmd, Ebadarg);
       +        }
       +        return 0;
       +}
       +
       +SDifc sdaoeifc = {
       +        "aoe",
       +
       +        aoepnp,
       +        nil,                /* legacy */
       +        nil,                /* enable */
       +        nil,                /* disable */
       +
       +        aoeverify,
       +        aoeonline,
       +        aoerio,
       +        aoerctl,
       +        aoewctl,
       +
       +        scsibio,
       +        aoeprobew,        /* probe */
       +        aoeclear,        /* clear */
       +        aoertopctl,
       +        aoewtopctl,
       +};
 (DIR) diff --git a/src/9vx/bootcode.9 b/src/9vx/bootcode.9
       Binary files differ.
 (DIR) diff --git a/src/9vx/devip.c b/src/9vx/devip.c
       @@ -883,7 +883,7 @@ cswrite(Chan *c, void *a, long n, vlong offset)
                return n;
        }
        
       -Dev ipdevtab = 
       +Dev pipdevtab = 
        {
                'I',
                "ip",
 (DIR) diff --git a/src/9vx/devtab.c b/src/9vx/devtab.c
       @@ -5,6 +5,7 @@
        #include "fns.h"
        #include "error.h"
        
       +extern Dev aoedevtab;
        extern Dev consdevtab;
        extern Dev rootdevtab;
        extern Dev pipedevtab;
       @@ -24,14 +25,18 @@ extern Dev mntloopdevtab;
        extern Dev dupdevtab;
        extern Dev sddevtab;
        extern Dev capdevtab;
       +extern Dev etherdevtab;
        
        Dev *devtab[] = {
                &rootdevtab,        /* must be first */
       +        &aoedevtab,
                &audiodevtab,
       +        &capdevtab,
                &consdevtab,
                &drawdevtab,
                &dupdevtab,
                &envdevtab,
       +        &etherdevtab,
                &fsdevtab,
                &ipdevtab,
                &mntdevtab,
       @@ -40,11 +45,9 @@ Dev *devtab[] = {
                &pipedevtab,
                &procdevtab,
                &ramdevtab,
       +        &sddevtab,
                &srvdevtab,
                &ssldevtab,
                &tlsdevtab,
       -        &sddevtab,
       -        &capdevtab,
                0
        };
       -
 (DIR) diff --git a/src/9vx/etherpcap.c b/src/9vx/etherpcap.c
       @@ -0,0 +1,189 @@
       +/*
       + * etherpcap - portable Virtual Ethernet driver for 9vx.
       + * 
       + * Copyright (c) 2008 Devon H. O'Dell
       + * copyright © 2008 erik quanstrom
       + * copyright © 2010 Jesus Galan Lopez
       + *
       + * Released under 2-clause BSD license.
       + */
       +
       +#include "u.h"
       +
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "io.h"
       +#include "error.h"
       +#include "netif.h"
       +#include "etherif.h"
       +#include "vether.h"
       +
       +#include <pcap.h>
       +
       +static        uvlong        txerrs;
       +
       +extern        int        eafrom(char *ma, uchar ea[6]);
       +
       +typedef struct Ctlr Ctlr;
       +struct Ctlr {
       +        pcap_t        *pd;
       +};
       +
       +static void *
       +veerror(char* err)
       +{
       +        iprint("ve: %s\n", err);
       +        return nil;
       +}
       +
       +static pcap_t *
       +setup(char *dev, uchar *ea)
       +{
       +        char        filter[30];
       +        char        errbuf[PCAP_ERRBUF_SIZE];
       +        pcap_t        *pd;
       +        struct bpf_program prog;
       +        bpf_u_int32 net;
       +        bpf_u_int32 mask;
       +
       +        if(sprint(filter, "ether dst %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux",
       +        ea[0], ea[1], ea[2],ea[3], ea[4], ea[5]) == -1)
       +                return veerror("cannot create pcap filter");
       +
       +        if (!dev && (dev = pcap_lookupdev(errbuf)) == nil)
       +                return veerror("cannot find network device");
       +
       +//        if ((pd = pcap_open_live(netdev, 1514, 1, 1, errbuf)) == nil)
       +        if ((pd = pcap_open_live(dev, 65000, 1, 1, errbuf)) == nil)
       +                return nil;
       +
       +        pcap_lookupnet(dev, &net, &mask, errbuf);
       +        pcap_compile(pd, &prog, filter, 0, net);
       +
       +        if (pcap_setfilter(pd, &prog) == -1)
       +                return nil;
       +
       +        pcap_freecode(&prog);
       +
       +        return pd;
       +}
       +
       +static Block *
       +pcappkt(Ctlr *c)
       +{
       +        struct pcap_pkthdr hdr;
       +        uchar *p;
       +        Block *b;
       +
       +        while ((p = pcap_next(c->pd, &hdr)) == nil);
       +
       +        b = allocb(hdr.caplen);
       +        memcpy(b->rp, p, hdr.caplen);
       +        b->wp += hdr.caplen;
       +        b->flag |= Btcpck|Budpck|Bpktck;
       +
       +/*
       +        iprint("+++++++++++ packet %d (len %d):\n", ++fn, hdr.caplen);
       +        int i=0; uchar* u;
       +        static int fn=0;
       +
       +        for(u=b->rp; u<b->wp; u++){
       +                if (i%16 == 0) iprint("%.4ux", i);
       +                if (i%8 == 0) iprint("   ");
       +                iprint("%2.2ux ", *u);
       +                if (++i%16 == 0) iprint("\n");
       +        }
       +        iprint("\n-------------\n");
       +*/
       +
       +        return b;
       +
       +}
       +
       +static void
       +pcaprecvkproc(void *v)
       +{
       +        Ether *e;
       +        Block *b;
       +
       +        e = v;
       +        while ((b = pcappkt(e->ctlr))) 
       +                if (b != nil)
       +                        etheriq(e, b, 1);
       +}
       +
       +static void
       +pcaptransmit(Ether* e)
       +{
       +        const u_char *u;
       +        Block *b;
       +        Ctlr *c;
       +
       +        c = e->ctlr;
       +        while ((b = qget(e->oq)) != nil) {
       +                int wlen;
       +
       +                u = (const u_char*)b->rp;
       +
       +                wlen = pcap_inject(c->pd, u, BLEN(b));
       +                // iprint("injected packet len %d\n", wlen);
       +                if (wlen == -1)
       +                        txerrs++;
       +
       +                freeb(b);
       +        }
       +}
       +
       +static long
       +pcapifstat(Ether *e, void *a, long n, ulong offset)
       +{
       +        char buf[128];
       +
       +        snprint(buf, sizeof buf, "txerrors: %lud\n", txerrs);
       +        return readstr(offset, a, n, buf);
       +}
       +
       +static void
       +pcapattach(Ether* e)
       +{
       +        kproc("pcaprecv", pcaprecvkproc, e);
       +}
       +
       +static int
       +pcappnp(Ether* e)
       +{
       +        Ctlr c;
       +        static int cve = 0;
       +
       +        while(cve < nve && ve[cve].tap == 1)
       +                cve++;
       +        if(cve >= nve)
       +                return -1;
       +
       +        memset(&c, 0, sizeof(c));
       +        c.pd = setup(ve[cve].dev, ve[cve].ea);
       +        if (c.pd == nil) {
       +                iprint("ve: pcap failed to initialize\n");
       +                cve++;
       +                return -1;
       +        }
       +        e->ctlr = malloc(sizeof(c));
       +        memcpy(e->ctlr, &c, sizeof(c));
       +        e->tbdf = BUSUNKNOWN;
       +        memcpy(e->ea, ve[cve].ea, Eaddrlen);
       +        e->attach = pcapattach;
       +        e->transmit = pcaptransmit;
       +        e->ifstat = pcapifstat;
       +        e->ni.arg = e;
       +        e->ni.link = 1;
       +        cve++;
       +        return 0;
       +}
       +
       +void
       +etherpcaplink(void)
       +{
       +        addethercard("pcap", pcappnp);
       +}
 (DIR) diff --git a/src/9vx/ethertap.c b/src/9vx/ethertap.c
       @@ -0,0 +1,185 @@
       +/*
       + * ethertap: tap device ethernet driver
       + * copyright © 2008 erik quanstrom
       + * copyright © 2010 Tully Gray
       + * copyright © 2010 Jesus Galan Lopez
       + */
       +
       +#include "u.h"
       +#include "lib.h"
       +#include "mem.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "io.h"
       +#include "error.h"
       +#include "netif.h"
       +#include "etherif.h"
       +#include "vether.h"
       +
       +#include <net/if.h>
       +#include <sys/ioctl.h>
       +
       +#ifdef linux
       +#include <netpacket/packet.h>
       +#include <linux/if_tun.h>
       +#elif defined(__FreeBSD__)
       +#include <net/if_tun.h>
       +#endif
       +
       +typedef struct Ctlr Ctlr;
       +struct Ctlr {
       +        int        fd;
       +        int        txerrs;
       +        uchar        ea[Eaddrlen];
       +};
       +
       +static        uchar        anyea[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff,};
       +
       +#ifdef linux
       +static int
       +opentap(char *dev)
       +{
       +        int fd;
       +        char *tap0 = "tap0";
       +        struct ifreq ifr;
       +
       +        if(dev == nil)
       +                dev = tap0;
       +        if((fd = open("/dev/net/tun", O_RDWR)) < 0)
       +                return -1;
       +        memset(&ifr, 0, sizeof ifr);
       +        strncpy(ifr.ifr_name, dev, sizeof ifr.ifr_name);
       +        ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
       +        if(ioctl(fd, TUNSETIFF, &ifr) < 0){
       +                close(fd);
       +                return -1;
       +        }
       +        return fd;
       +}
       +#elif defined(__FreeBSD__)
       +static int
       +opentap(char *dev)
       +{
       +        int fd;
       +        struct stat s;
       +
       +        if((fd = open("/dev/tap", O_RDWR)) < 0)
       +                return -1;
       +        return fd;
       +}
       +#endif
       +
       +static int
       +setup(char *dev)
       +{
       +        return opentap(dev);
       +}
       +
       +Block*
       +tappkt(Ctlr *c)
       +{
       +        int n;
       +        Block *b;
       +
       +        b = allocb(1514);
       +        for(;;){
       +                n = read(c->fd, b->rp, BALLOC(b));
       +                if(n <= 0)
       +                        panic("fd %d read %d", c->fd, n);
       +                if(memcmp(b->rp + 0, anyea, 6) == 0
       +                || memcmp(b->rp + 0, c->ea, 6) == 0)
       +                        break;
       +        }
       +        b->wp += n;
       +        b->flag |= Btcpck|Budpck|Bpktck;
       +        return b;
       +}
       +
       +static void
       +taprecvkproc(void *v)
       +{
       +        Block *b;
       +        Ether *e;
       +
       +        e = v;
       +        while((b = tappkt(e->ctlr)))
       +                etheriq(e, b, 1);
       +        pexit("read fail", 1);
       +}
       +
       +static void
       +taptransmit(Ether* e)
       +{
       +        Block *b, *h;
       +        Ctlr *c;
       +
       +        c = e->ctlr;
       +        while ((b = qget(e->oq)) != nil) {
       +                if(memcmp(b->rp + 6, anyea, 6) == 0 ||
       +                memcmp(b->rp + 0, c->ea, 6) == 0){
       +                        h = allocb(BLEN(b));
       +                        memcpy(h->rp, b->wp, BLEN(b));
       +                        h->wp += BLEN(b);
       +                        h->flag |= Btcpck|Budpck|Bpktck;
       +                        etheriq(e, h, 1);
       +                }
       +                if(write(c->fd, b->rp, BLEN(b)) == -1)
       +                        c->txerrs++;
       +                freeb(b);
       +        }
       +}
       +
       +static long
       +tapifstat(Ether *e, void *a, long n, ulong offset)
       +{
       +        char buf[128];
       +        Ctlr *c;
       +
       +        c = a;
       +        snprint(buf, sizeof buf, "txerrors: %lud\n", c->txerrs);
       +        return readstr(offset, a, n, buf);
       +}
       +
       +static void
       +tapattach(Ether* e)
       +{
       +        kproc("taprecv", taprecvkproc, e);
       +}
       +
       +static int
       +tappnp(Ether* e)
       +{
       +        Ctlr c;
       +        static int cve = 0;
       +
       +        while(cve < nve && ve[cve].tap == 0)
       +                cve++;
       +        if(cve == nve)
       +                return -1;
       +
       +        memset(&c, 0, sizeof c);
       +        c.fd = setup(ve[cve].dev);
       +        memcpy(c.ea, ve[cve].ea, Eaddrlen);
       +        if(c.fd== -1){
       +                iprint("ve: tap failed to initialize\n");
       +                cve++;
       +                return -1;
       +        }
       +        e->ctlr = malloc(sizeof c);
       +        memcpy(e->ctlr, &c, sizeof c);
       +        e->tbdf = BUSUNKNOWN;
       +        memcpy(e->ea, ve[cve].ea, Eaddrlen);
       +        e->attach = tapattach;
       +        e->transmit = taptransmit;
       +        e->ifstat = tapifstat;
       +        e->ni.arg = e;
       +        e->ni.link = 1;
       +        cve++;
       +        return 0;
       +}
       +
       +void
       +ethertaplink(void)
       +{
       +        addethercard("tap", tappnp);
       +}
 (DIR) diff --git a/src/9vx/fossil.9 b/src/9vx/fossil.9
       Binary files differ.
 (DIR) diff --git a/src/9vx/main.c b/src/9vx/main.c
       @@ -25,13 +25,24 @@
        #include        "arg.h"
        #include        "tos.h"
        
       +#include "fs.h"
       +
       +#include "netif.h"
       +#include "etherif.h"
       +#include "vether.h"
       +
        #define Image IMAGE
        #include        "draw.h"
        #include        "memdraw.h"
        #include        "cursor.h"
        #include        "screen.h"
        
       +#define        BOOTLINELEN        64
       +#define        BOOTARGSLEN        (3584-0x200-BOOTLINELEN)
       +#define        MAXCONF                100
       +
        extern Dev ipdevtab;
       +extern Dev pipdevtab;
        extern Dev drawdevtab;
        extern Dev fsdevtab;
        extern Dev audiodevtab;
       @@ -42,8 +53,14 @@ char*        argv0;
        char*        conffile = "9vx";
        Conf        conf;
        
       +static char*        inifile;
       +static char        inibuf[BOOTARGSLEN];
       +static char        *iniline[MAXCONF];
        static int        bootboot;        /* run /boot/boot instead of bootscript */
       +static int        nofork;        /* do not fork at init */
        static int        initrc;        /* run rc instead of init */
       +static int        nogui;        /* do not start the gui */
       +static int        usetty;        /* use tty for input/output */
        static char*        username;
        static Mach mach0;
        
       @@ -56,13 +73,19 @@ static int singlethread;
        static void        bootinit(void);
        static void        siginit(void);
        
       +static int        readini(char *fn);
       +static void        inifields(void (*fp)(char*, char*));
       +static void        iniopt(char *name, char *value);
       +static void        inienv(char *name, char *value);
       +
        static char*        getuser(void);
        static char*        findroot(void);
        
        void
        usage(void)
        {
       -        fprint(2, "usage: 9vx [-gt] [-r root] [-u user]\n");
       +        // TODO(yy): add debug and other options by ron
       +        fprint(2, "usage: 9vx [-p file.ini] [-bfgit] [-n [tap] [netdev]] [-m macaddr] [-r root] [-u user]\n");
                exit(1);
        }
        
       @@ -74,9 +97,8 @@ nop(void)
        int
        main(int argc, char **argv)
        {
       -        int usetty;
       -        int nogui;
       -        int nofork;
       +        int vetap;
       +        char *vedev;
                char buf[1024];
                
                /* Minimal set up to make print work. */
       @@ -87,6 +109,7 @@ main(int argc, char **argv)
                nogui = 0;
                nofork = 0;
                usetty = 0;
       +        nve = 0;
                localroot = nil;
                ARGBEGIN{
                /* debugging options */
       @@ -102,9 +125,6 @@ main(int argc, char **argv)
                case 'K':
                        tracekdev++;
                        break;
       -        case 'F':
       -                nofork = 1;
       -                break;
                case 'M':
                        tracemmu++;
                        break;
       @@ -125,6 +145,9 @@ main(int argc, char **argv)
                case 'b':
                        bootboot = 1;
                        break;
       +        case 'f':
       +                nofork = 1;
       +                break;
                case 'g':
                        nogui = 1;
                        usetty = 1;
       @@ -132,6 +155,26 @@ main(int argc, char **argv)
                case 'i':
                        initrc = 1;
                        break;
       +        case 'p':
       +                inifile = EARGF(usage());
       +                break;
       +        case 'm':
       +                setmac(EARGF(usage()));
       +                break;
       +        case 'n':
       +                vetap = 0;
       +                vedev = ARGF();
       +                if(vedev != nil && strcmp(vedev, "tap") == 0){
       +                        vetap = 1;
       +                        vedev = ARGF();
       +                }
       +                if(vedev != nil && vedev[0] == '-'){
       +                        vedev = nil;
       +                        argc++;
       +                        argv--;
       +                }
       +                addve(vedev, vetap);
       +                break;
                case 'r':
                        localroot = EARGF(usage());
                        break;
       @@ -148,6 +191,13 @@ main(int argc, char **argv)
                if(argc != 0)
                        usage();
                
       +        if(inifile){
       +                if(readini(inifile) != 0)
       +                        panic("error reading config file %s", inifile);
       +                conffile=inifile;
       +                inifields(&iniopt);
       +        }
       +
                if(!bootboot){
                        if(localroot == nil && (localroot = findroot()) == nil)
                                panic("cannot find plan 9 root; use -r");
       @@ -188,14 +238,34 @@ main(int argc, char **argv)
                /*
                 * Debugging: tell user what options we guessed.
                 */
       -        print("9vx %s-r %s -u %s\n", usetty ? "-t " : "", localroot, username);
       +        print("9vx ");
       +        if(inifile)
       +                print("-p %s ", inifile);
       +        if(bootboot | nofork | nogui | initrc | usetty)
       +                print("-%s%s%s%s%s ", bootboot ? "b" : "", nofork ? "f " : "",
       +                        nogui ? "g" : "", initrc ? "i " : "", usetty ? "t " : "");
       +        for(int i=0; i<nve; i++){
       +                print("-n %s", ve[i].tap ? "tap ": "");
       +                if(ve[i].dev != nil)
       +                        print("%s ", ve[i].dev);
       +                if(ve[i].mac != nil)
       +                        print("-m %s ", ve[i].mac);
       +        }
       +        print("-r %s -u %s\n", localroot, username);
       +
       +        if(nve == 0)
       +                ipdevtab = pipdevtab;
        
                printinit();
                procinit0();
                initseg();
       +        if(nve > 0)
       +                links();
       +
                chandevreset();
                if(!singlethread){
       -                makekprocdev(&ipdevtab);
       +                if(nve == 0)
       +                        makekprocdev(&ipdevtab);
                        makekprocdev(&fsdevtab);
                        makekprocdev(&drawdevtab);
                        makekprocdev(&audiodevtab);
       @@ -218,6 +288,144 @@ main(int argc, char **argv)
        }
        
        /*
       + *  read configuration file
       + */
       +int
       +readini(char *fn)
       +{
       +        int blankline, incomment, inspace, n, fd;
       +        char *cp, *p, *q;
       +
       +        if(strcmp(fn, "-") == 0)
       +                fd = stdin;
       +        else if((fd = open(fn, OREAD)) < 0)
       +                return -1;
       +
       +        cp = inibuf;
       +        *cp = 0;
       +        n = read(fd, cp, BOOTARGSLEN-1);
       +        close(fd);
       +        if(n <= 0)
       +                return -1;
       +
       +        cp[n] = 0;
       +
       +        /*
       +         * Strip out '\r', change '\t' -> ' '.
       +         * Change runs of spaces into single spaces.
       +         * Strip out trailing spaces, blank lines.
       +         *
       +         * We do this before we make the copy so that if we 
       +         * need to change the copy, it is already fairly clean.
       +         * The main need is in the case when plan9.ini has been
       +         * padded with lots of trailing spaces, as is the case 
       +         * for those created during a distribution install.
       +         */
       +        p = cp;
       +        blankline = 1;
       +        incomment = inspace = 0;
       +        for(q = cp; *q; q++){
       +                if(*q == '\r')
       +                        continue;
       +                if(*q == '\t')
       +                        *q = ' ';
       +                if(*q == ' '){
       +                        inspace = 1;
       +                        continue;
       +                }
       +                if(*q == '\n'){
       +                        if(!blankline){
       +                                if(!incomment)
       +                                        *p++ = '\n';
       +                                blankline = 1;
       +                        }
       +                        incomment = inspace = 0;
       +                        continue;
       +                }
       +                if(inspace){
       +                        if(!blankline && !incomment)
       +                                *p++ = ' ';
       +                        inspace = 0;
       +                }
       +                if(blankline && *q == '#')
       +                        incomment = 1;
       +                blankline = 0;
       +                if(!incomment)
       +                        *p++ = *q;        
       +        }
       +        if(p > cp && p[-1] != '\n')
       +                *p++ = '\n';
       +        *p++ = 0;
       +
       +        getfields(cp, iniline, MAXCONF, 0, "\n");
       +
       +        return 0;
       +}
       +
       +void
       +inifields(void (*fp)(char*, char*))
       +{
       +        int i;
       +        char *cp;
       +
       +        for(i = 0; i < MAXCONF; i++){
       +                if(!iniline[i])
       +                        break;
       +                cp = strchr(iniline[i], '=');
       +                if(cp == 0)
       +                        continue;
       +                *cp++ = 0;
       +                if(cp - iniline[i] >= NAMELEN+1)
       +                        *(iniline[i]+NAMELEN-1) = 0;
       +                (fp)(iniline[i], cp);
       +                *(cp-1) = '=';
       +        }
       +}
       +
       +void
       +iniopt(char *name, char *value)
       +{
       +        char *vedev;
       +        int vetap;
       +
       +        if(*name == '*')
       +                name++;
       +        if(strcmp(name, "bootboot") == 0)
       +                bootboot = 1;
       +        else if(strcmp(name, "initrc") == 0)
       +                initrc = 1;
       +        else if(strcmp(name, "nofork") == 0)
       +                nofork = 1;
       +        else if(strcmp(name, "localroot") == 0 && !localroot)
       +                localroot = value;
       +        else if(strcmp(name, "user") == 0 && !username)
       +                username = value;
       +        else if(strcmp(name, "usetty") == 0)
       +                usetty = 1;
       +        else if(strcmp(name, "macaddr") == 0)
       +                setmac(value);
       +        else if(strcmp(name, "netdev") == 0){
       +                if(strncmp(value, "tap", 3) == 0) {
       +                        vetap = 1;
       +                        value += 4;
       +                }
       +                vedev = value;
       +                addve(vedev, vetap);
       +        }
       +        else if(strcmp(name, "nogui") == 0){
       +                nogui = 1;
       +                usetty = 1;
       +        }
       +}
       +
       +void
       +inienv(char *name, char *value)
       +{
       +        if(*name != '*')
       +                ksetenv(name, value, 0);
       +}
       +
       +/*
         * Search for Plan 9 /386/bin/rc to find root.
         */
        static char*
       @@ -228,8 +436,7 @@ findroot(void)
                char buf[1024];
                char *dir[] = {
                        cwd,
       -                "/Users/rsc/9vx",
       -                "/home/rsc/plan9/4e"
       +                "/usr/local/9vx"
                };
                
                if(getcwd(cwd, sizeof cwd) == nil){
       @@ -304,6 +511,10 @@ bootinit(void)
                 */
                extern uchar factotumcode[];
                extern long factotumlen;
       +        extern uchar fossilcode[];
       +        extern long fossillen;
       +        extern uchar venticode[];
       +        extern long ventilen;
        
                if(bootboot){
                        extern uchar bootcode[];
       @@ -314,6 +525,8 @@ bootinit(void)
                else
                        addbootfile("boot", (uchar*)bootscript, strlen(bootscript));
                addbootfile("factotum", factotumcode, factotumlen);
       +        addbootfile("fossil", fossilcode, fossillen);
       +        addbootfile("venti", venticode, ventilen);
        }
        
        static uchar *sp;        /* user stack of init proc */
       @@ -484,7 +697,8 @@ init0(void)
                        ksetenv("service", "terminal", 0);
                ksetenv("user", username, 0);
                ksetenv("sysname", "vx32", 0);
       -        
       +        inifields(&inienv);
       +
                /* if we're not running /boot/boot, mount / and create /srv/boot */
                if(!bootboot){
                        kbind("#Zplan9/", "/", MAFTER);
       @@ -556,8 +770,13 @@ sigsegv(int signo, siginfo_t *info, void *v)
        #elif defined(__FreeBSD__)
                mcontext_t *mc;
                mc = &uc->uc_mcontext;
       +#ifdef __i386__
                eip = mc->mc_eip;
                esp = mc->mc_esp;
       +#elif defined(__amd64__)
       +        eip = mc->mc_rip;
       +        esp = mc->mc_rsp;
       +#endif
                addr = (ulong)info->si_addr;
                if(__FreeBSD__ < 7){
                        /*
 (DIR) diff --git a/src/9vx/mmu.c b/src/9vx/mmu.c
       @@ -26,7 +26,7 @@ int tracemmu;
         * Plan 9 assumes this, and while it's not a ton of work to break that
         * assumption, it was easier not to.
         */
       -#define MEMSIZE (256<<20)
       +#define MEMSIZE (256<<20)        // same as ../a/devether.c:13 (TODO: var)
        
        static int pagefile;
        static char* pagebase;
       @@ -35,6 +35,19 @@ static Uspace uspace[16];
        static Uspace *ulist[nelem(uspace)];
        int nuspace = 1;
        
       +#ifdef __i386__
       +#define BIT32 0
       +#define HINT nil
       +#elif defined(__amd64__)
       +#ifdef linux
       +#define BIT32 MAP_32BIT
       +#define HINT nil
       +#elif defined(__FreeBSD__)
       +#define BIT32 MAP_FIXED
       +#define HINT (caddr_t)0x40000000
       +#endif
       +#endif
       +
        int
        isuaddr(void *v)
        {
       @@ -56,15 +69,14 @@ mapzero(void)
        {
                int fd, bit32;
                void *v;
       +        void *hint;
                
       -#ifdef i386
       -        bit32 = 0;
       -#else
       -        bit32 = MAP_32BIT;
       -#endif
       +        bit32 = BIT32;
       +        hint = HINT;
       +
                /* First try mmaping /dev/zero.  Some OS'es don't allow this. */
                if((fd = open("/dev/zero", O_RDONLY)) >= 0){
       -                v = mmap(nil, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE, fd, 0);
       +                v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE, fd, 0);
                        if(v != MAP_FAILED) {
                                if((uint32_t)(uintptr)v != (uintptr)v) {
                                        iprint("mmap returned 64-bit pointer %p\n", v);
       @@ -75,7 +87,7 @@ mapzero(void)
                }
                
                /* Next try an anonymous map. */
       -        v = mmap(nil, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
       +        v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
                if(v != MAP_FAILED) {
                        if((uint32_t)(uintptr)v != (uintptr)v) {
                                iprint("mmap returned 64-bit pointer %p\n", v);
 (DIR) diff --git a/src/9vx/sched.c b/src/9vx/sched.c
       @@ -174,7 +174,7 @@ struct Pwaiter
        };
        
        void
       -plock(Psleep *p)
       +__plock(Psleep *p)
        {
                int r;
        
       @@ -193,7 +193,7 @@ plock(Psleep *p)
        }
        
        void
       -punlock(Psleep *p)
       +__punlock(Psleep *p)
        {
                int r;
        
       @@ -202,7 +202,7 @@ punlock(Psleep *p)
        }
        
        void
       -psleep(Psleep *p)
       +__psleep(Psleep *p)
        {
                int r;
                Pwaiter w;
       @@ -218,7 +218,7 @@ psleep(Psleep *p)
        }
        
        void
       -pwakeup(Psleep *p)
       +__pwakeup(Psleep *p)
        {
                int r;
                Pwaiter *w;
 (DIR) diff --git a/src/9vx/sdloop.c b/src/9vx/sdloop.c
       @@ -22,6 +22,7 @@ struct Ctlr{
                Chan        *c;
                int                mode;
                uvlong        qidpath;
       +        char                fn[20];
        };
        
        static        Lock        ctlrlock;
       @@ -30,9 +31,47 @@ static        Ctlr        *ctlrtail;
        
        SDifc sdloopifc;
        
       +static void
       +loopopen(Ctlr *c)
       +{
       +        if(c->c == nil)
       +                c->c = namec(c->fn, Aopen, c->mode, 0);
       +}
       +
        static SDev*
        looppnp(void)
        {
       +        struct stat sbuf;
       +        char c, c2;
       +        char fn[20];
       +
       +        for(c = 'a'; c <= 'j'; ++c){
       +                sprint(fn, "#Z/dev/sd%c", c);
       +                if(stat(fn+2, &sbuf) == 0)
       +                        loopdev(fn, ORDWR);
       +        }
       +        for(c = '0'; c <= '9'; ++c){
       +                sprintf(fn, "#Z/dev/sd%c",c);
       +                if(stat(fn+2, &sbuf) == 0)
       +                        loopdev(fn, ORDWR);
       +        }
       +        for(c = 'a'; c <= 'j'; ++c){
       +                sprint(fn, "#Z/dev/hd%c", c);
       +                if(stat(fn+2, &sbuf) == 0)
       +                        loopdev(fn, ORDWR);
       +        }
       +        for(c = '0'; c <= '9'; ++c){
       +                sprint(fn, "#Z/dev/wd%c", c);
       +                if(stat(fn+2, &sbuf) == 0)
       +                        loopdev(fn, ORDWR);
       +        }
       +        for(c = '0'; c <= '8'; ++c){
       +                for(c2 = '0'; c2 <= '8'; ++c2){
       +                        sprint(fn, "#Z/dev/cciss/c%cd%c", c, c2);
       +                        if(stat(fn+2, &sbuf) == 0)
       +                                loopdev(fn, ORDWR);
       +                }
       +        }
                return nil;
        }
        
       @@ -69,6 +108,7 @@ looponline(SDunit *unit)
        
                sdev = unit->dev;
                ctlr = sdev->ctlr;
       +        loopopen(ctlr);
                c = ctlr->c;
                n = devtab[c->type]->stat(c, buf, sizeof buf);
                if(convM2D(buf, n, &dir, nil) == 0)
       @@ -99,6 +139,7 @@ looprio(SDreq *r)
                unit = r->unit;
                sdev = unit->dev;
                ctlr = sdev->ctlr;
       +        loopopen(ctlr);
                cmd = r->cmd;
        
                if((status = sdfakescsi(r, nil, 0)) != SDnostatus){
       @@ -141,6 +182,7 @@ looprctl(SDunit *unit, char *p, int l)
                char *e, *op;
                
                ctlr = unit->dev->ctlr;
       +        loopopen(ctlr);
                e = p+l;
                op = p;
                
       @@ -170,7 +212,8 @@ loopclear1(Ctlr *ctlr)
                        ctlrtail = ctlr->prev;
                unlock(&ctlrlock);
                
       -        cclose(ctlr->c);
       +        if(ctlr->c)
       +                cclose(ctlr->c);
                free(ctlr);
        }
        
       @@ -187,6 +230,7 @@ looprtopctl(SDev *s, char *p, char *e)
                char *r;
        
                c = s->ctlr;
       +        loopopen(c);
                r = "ro";
                if(c->mode == ORDWR)
                        r = "rw";
       @@ -219,9 +263,9 @@ loopdev(char *name, int mode)
                Ctlr *volatile ctlr;
                SDev *volatile sdev;
        
       -        c = namec(name, Aopen, mode, 0);
                ctlr = nil;
                sdev = nil;
       +/*
                if(waserror()){
                        cclose(c);
                        if(ctlr)
       @@ -230,6 +274,7 @@ loopdev(char *name, int mode)
                                free(sdev);
                        nexterror();
                }
       +*/
        
                ctlr = smalloc(sizeof *ctlr);
                sdev = smalloc(sizeof *sdev);
       @@ -238,9 +283,11 @@ loopdev(char *name, int mode)
                sdev->nunit = 1;
                sdev->idno = '0';
                ctlr->sdev = sdev;
       -        ctlr->c = c;
       +        strcpy(ctlr->fn, name);
                ctlr->mode = mode;
       +/*
                poperror();
       +*/
        
                lock(&ctlrlock);
                ctlr->next = nil;
       @@ -277,11 +324,5 @@ SDifc sdloopifc = {
                loopwtopctl,
        };
        
       -SDifc *sdifc[] = 
       -{
       -        &sdloopifc,
       -        nil
       -};
       -
        
        
 (DIR) diff --git a/src/9vx/u.h b/src/9vx/u.h
       @@ -17,3 +17,4 @@ typedef int socklen_t;
        #define nil ((void*)0)
        #define sleep _ksleep
        #define syscall _ksyscall
       +#define atoi(x) strtol(x, 0, 0)
 (DIR) diff --git a/src/9vx/venti.9 b/src/9vx/venti.9
       Binary files differ.
 (DIR) diff --git a/src/9vx/vether.c b/src/9vx/vether.c
       @@ -0,0 +1,122 @@
       +#include "u.h"
       +#include "mem.h"
       +#include "lib.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "error.h"
       +#include "ip/ip.h"
       +#include "netif.h"
       +#include "etherif.h"
       +#include "vether.h"
       +#include "sd.h"
       +
       +extern int nettap;
       +extern void ethertaplink(void);
       +extern void etherpcaplink(void);
       +extern void ethermediumlink(void);
       +extern void loopbackmediumlink(void);
       +extern void netdevmediumlink(void);
       +
       +extern void ilinit(Fs*);
       +extern void tcpinit(Fs*);
       +extern void udpinit(Fs*);
       +extern void ipifcinit(Fs*);
       +extern void icmpinit(Fs*);
       +extern void icmp6init(Fs*);
       +extern void greinit(Fs*);
       +extern void ipmuxinit(Fs*);
       +extern void espinit(Fs*);
       +
       +extern SDifc sdloopifc;
       +extern SDifc sdaoeifc;
       +
       +void
       +setmac(char *macaddr)
       +{
       +        int i;
       +        char **nc = &macaddr;
       +
       +        if(nve == 0)
       +                return;
       +        ve[nve-1].mac = macaddr;
       +        for(i = 0; i < Eaddrlen; i++){
       +                ve[nve-1].ea[i] = (uchar)strtoul(macaddr, nc, 16);
       +                macaddr = *nc+1;
       +        }
       +}
       +
       +static int
       +eainuse(int n, uchar ea[Eaddrlen])
       +{
       +        int i;
       +
       +        for(i = 0; i < nve; i++)
       +                if((i<n || ve[i].mac != nil) && memcmp(ea, ve[i].ea, Eaddrlen) == 0)
       +                        return -1;
       +        return 0;
       +}
       +
       +void
       +addve(char *dev, int tap)
       +{
       +        if(nve == MaxEther)
       +                panic("too many virtual ether cards");
       +        ve[nve].tap = tap;
       +        ve[nve].dev = dev;
       +        ve[nve].mac = nil;
       +        nve++;
       +}
       +
       +void links(void) {
       +        static uchar ea[Eaddrlen] = {0x00, 0x00, 0x09, 0x00, 0x00, 0x00};
       +
       +        ethermediumlink();
       +        loopbackmediumlink();
       +        netdevmediumlink();
       +        for(int i=0; i<nve; i++){
       +                if(ve[i].mac == nil){
       +                        while(eainuse(i, ea))
       +                                ea[5]++;
       +                        memcpy(ve[i].ea, ea, Eaddrlen);
       +                }
       +                if(ve[i].tap == 1)
       +                        ethertaplink();
       +                else
       +                        etherpcaplink();
       +        }
       +}
       +
       +void (*ipprotoinit[])(Fs*) = {
       +        ilinit,
       +        tcpinit,
       +        udpinit,
       +        ipifcinit,
       +        icmpinit,
       +        icmp6init,
       +        greinit,
       +        ipmuxinit,
       +        espinit,
       +        nil,
       +};
       +
       +int
       +eafrom(char *ma, uchar ea[6])
       +{
       +        int i;
       +        char **nc = &ma;
       +
       +        for(i = 0; i < 6; i++){
       +                if(!ma)
       +                        return -1;
       +                ea[i] = (uchar)strtoul(ma, nc, 16);
       +                ma = *nc+1;
       +        }
       +        return 0;
       +}
       +
       +SDifc *sdifc[] =
       +{
       +        &sdloopifc,
       +        &sdaoeifc,
       +        0,
       +};
 (DIR) diff --git a/src/9vx/vether.h b/src/9vx/vether.h
       @@ -0,0 +1,15 @@
       +typedef struct Vether Vether;
       +struct Vether
       +{
       +        int        tap;
       +        char        *dev;
       +        char        *mac;
       +        uchar ea[Eaddrlen];
       +};
       +
       +Vether ve[MaxEther+1];
       +int nve;
       +
       +void        setmac(char*);
       +void        addve(char*, int);
       +void        links();
 (DIR) diff --git a/src/libvx32/Makefrag b/src/libvx32/Makefrag
       @@ -1,8 +1,12 @@
        ifeq ($(ARCH),x86_64)
        VX32_RUN = run64.o
        else
       +ifeq ($(ARCH),amd64)
       +VX32_RUN = run64.o
       +else
        VX32_RUN = run32.o 
        endif
       +endif
        
        ifeq ($(OS),darwin)
        VX32_RUN := $(VX32_RUN) darwin-asm.o
 (DIR) diff --git a/src/libvx32/freebsd.c b/src/libvx32/freebsd.c
       @@ -20,18 +20,34 @@
        #warning "libvx32 and FreeBSD 5 and 6's libpthread are not compatible."
        #endif
        
       +#ifdef __i386__
        static void setbase(struct segment_descriptor *desc, unsigned long base)
       +#elif defined __amd64__
       +static void setbase(struct user_segment_descriptor *desc, unsigned long base)
       +#endif
        {
                desc->sd_lobase = base & 0xffffff;
                desc->sd_hibase = base >> 24;
        }
        
       +#ifdef __i386__
        static void setlimit(struct segment_descriptor *desc, unsigned long limit)
       +#elif defined __amd64__
       +static void setlimit(struct user_segment_descriptor *desc, unsigned long limit)
       +#endif
        {
                desc->sd_lolimit = limit & 0xffff;
                desc->sd_hilimit = limit >> 16;
        }
        
       +/*
       +#ifdef __amd64__
       +union descriptor {
       +        struct user_segment_descriptor sd;
       +        struct gate_descriptor gd;
       +};
       +#endif
       +*/
        
        int vxemu_map(vxemu *emu, vxmmap *mm)
        {
       @@ -52,27 +68,44 @@ int vxemu_map(vxemu *emu, vxmmap *mm)
                        desc.sd.sd_def32 = 1;
                        desc.sd.sd_gran = 1;
                        if(emu->datasel == 0){
       +#ifdef __i386__
                                if ((s = i386_set_ldt(LDT_AUTO_ALLOC, &desc, 1)) < 0)
       +#elif defined __amd64__
       +                        if ((s = sysarch(I386_SET_GSBASE, &desc)) < 0)
       +#endif
                                        return -1;
                                emu->datasel = (s<<3) + 4 + 3;        // 4=LDT, 3=RPL
       -                }else if(i386_set_ldt(emu->datasel >> 3, &desc, 1) < 0)
       +#ifdef __i386__
       +                }else if (i386_set_ldt(emu->datasel >> 3, &desc, 1) < 0)
       +#elif defined __amd64__
       +                }else if (sysarch(I386_SET_GSBASE, &desc) < 0)
       +#endif
                                return -1;
        
                        // Set up the process's vxemu segment selector (for FS).
                        setbase(&desc.sd, (unsigned long)emu);
                        setlimit(&desc.sd, (VXCODEBUFSIZE - 1) >> VXPAGESHIFT);
                        if(emu->emusel == 0){
       +#ifdef __i386__
                                if ((s = i386_set_ldt(LDT_AUTO_ALLOC, &desc, 1)) < 0)
       +#elif defined __amd64__
       +                        if ((s = sysarch(I386_SET_GSBASE, &desc)) < 0)
       +#endif
                                        return -1;
                                emu->emusel = (s<<3) + 4 + 3;        // 4=LDT, 3=RPL
       -                }else if(i386_set_ldt(emu->emusel >> 3, &desc, 1) < 0)
       +#ifdef __i386__
       +                }else if (i386_set_ldt(emu->emusel >> 3, &desc, 1) < 0)
       +#elif defined __amd64__
       +                }else if (sysarch(I386_SET_GSBASE, &desc) < 0)
       +#endif
                                return -1;
        
                        emu->ldt_base = (uintptr_t)mm->base;
                        emu->ldt_size = mm->size;
                }
        
       -#ifdef __x86_64
       +#ifdef __amd64__
       +/*
                // Set up 32-bit mode code and data segments (not vxproc-specific),
                // giving access to the full low 32-bit of linear address space.
                // The code segment is necessary to get into 32-bit compatibility mode;
       @@ -80,11 +113,9 @@ int vxemu_map(vxemu *emu, vxmmap *mm)
                // doesn't give 64-bit processes a "real" data segment by default
                // but instead just loads zero into the data segment selectors!
                emu->runptr.sel = FLATCODE;
       -        desc.entry_number = emu->runptr.sel / 8;
       -        desc.base_addr = 0;
       -        desc.limit = 0xfffff;
       -        desc.contents = MODIFY_LDT_CONTENTS_CODE;
       -        if (modify_ldt(1, &desc, sizeof(desc)) < 0)
       +        setbase(&desc.sd, 0);
       +        setlimit(&desc.sd, 0xfffff);
       +        if ((s = sysarch(I386_SET_GSBASE, &desc)) < 0)
                        return -1;
        
                desc.entry_number = FLATDATA / 8;
       @@ -97,6 +128,7 @@ int vxemu_map(vxemu *emu, vxmmap *mm)
                extern void vxrun_return();
                asm volatile("movw %%cs,%0" : "=r" (emu->retptr.sel));
                emu->retptr.ofs = (uint32_t)(intptr_t)vxrun_return;
       +*/
        #endif
        
                return 0;
       @@ -122,28 +154,35 @@ static void dumpmcontext(mcontext_t *ctx, uint32_t cr2)
                        "r12 %016lx  r13 %016lx\nr14 %016lx  r15 %016lx\n"
                        "rip %016lx  efl %016lx  cs %04x  ss %04x\n"
                        "err %016lx  trapno %016lx  cr2 %016lx\n",
       -                ctx->rax, ctx->rbx, ctx->rcx, ctx->rdx,
       -                ctx->rsi, ctx->rdi, ctx->rbp, ctx->rsp,
       -                ctx->r8, ctx->r9, ctx->r10, ctx->r11,
       -                ctx->r12, ctx->r13, ctx->r14, ctx->r15,
       -                ctx->rip, ctx->eflags, ctx->cs, ctx->__pad0,
       -                ctx->err, ctx->trapno, ctx->cr2);
       +                ctx->mc_rax, ctx->mc_rbx, ctx->mc_rcx, ctx->mc_rdx,
       +                ctx->mc_rsi, ctx->mc_rdi, ctx->mc_rbp, ctx->mc_rsp,
       +                ctx->mc_r8, ctx->mc_r9, ctx->mc_r10, ctx->mc_r11,
       +                ctx->mc_r12, ctx->mc_r13, ctx->mc_r14, ctx->mc_r15,
       +                ctx->mc_rip, ctx->mc_rflags, ctx->mc_cs, ctx->mc_ss,
       +                ctx->mc_err, ctx->mc_trapno, cr2);
        #endif
        }
        
        static void
        fprestore(int *state, int fmt)
        {
       +#ifdef __i386__
                if(fmt == _MC_FPFMT_387)
                        asm volatile("frstor 0(%%eax); fwait\n" : : "a" (state) : "memory");
       -        else if(fmt == _MC_FPFMT_XMM){
       +        else
       +#endif
       +        if(fmt == _MC_FPFMT_XMM){
                        /* Have to 16-align the 512-byte state */
                        char buf[512+16], *p;
                        p = buf;
                        if((long)p&15)
                                p += 16 - (long)p&15;
                        memmove(p, state, 512);
       +#ifdef __i386__
                        asm volatile("fxrstor 0(%%eax); fwait\n" : : "a" (p) : "memory");
       +#elif defined(__amd64__)
       +                asm volatile("fxrstor 0(%%rax); fwait\n" : : "a" (p) : "memory");
       +#endif
                }else
                        abort();
        }
       @@ -167,12 +206,22 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
        
                // First sanity check vxproc segment number.
                // FreeBSD reset the register before entering the handler!
       +#ifdef __i386__
                asm("movw %"VSEGSTR",%0"
                        : "=r" (oldvs));
                vs = mc->mc_vs & 0xFFFF;        /* mc_vs #defined in os.h */
       +#elif defined(__amd64__)
       +        if (sysarch(I386_GET_GSBASE, &vs) < 0)
       +                return 0;
       +#endif
        
       +#ifdef __i386__
                if(0) vxprint("vx32_sighandler signo=%d eip=%#x esp=%#x vs=%#x currentvs=%#x\n",
                        signo, mc->mc_eip, mc->mc_esp, vs, oldvs);
       +#elif defined(__amd64__)
       +        if(0) vxprint("vx32_sighandler signo=%d rip=%#x rsp=%#x vs=%#x currentvs=%#x\n",
       +                signo, mc->mc_rip, mc->mc_rsp, vs, oldvs);
       +#endif
        
                if ((vs & 7) != 7)        // LDT, RPL=3
                        return 0;
       @@ -192,12 +241,21 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
                // Okay, we're convinced.
        
                // Find current vxproc and vxemu.
       +#ifdef __i386__
                asm("movw %"VSEGSTR",%1\n"
                        "movw %2,%"VSEGSTR"\n"
                        "movl %"VSEGSTR":%3,%0\n"
                        "movw %1,%"VSEGSTR"\n"
                        : "=r" (vxp), "=r" (oldvs)
                        : "r" (vs), "m" (((vxemu*)0)->proc));
       +#elif defined(__amd64__)
       +        asm("movw %"VSEGSTR",%1\n"
       +                "movw %2,%"VSEGSTR"\n"
       +                "movw %"VSEGSTR":%3,%0\n"
       +                "movw %1,%"VSEGSTR"\n"
       +                : "=r" (vxp), "=r" (oldvs)
       +                : "r" (vs), "m" (((vxemu*)0)->proc));
       +#endif
                emu = vxp->emu;
        
                // Get back our regular host segment register state,
       @@ -212,7 +270,11 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
                switch(signo){
                case SIGSEGV:
                        newtrap = VXTRAP_PAGEFAULT;
       +#ifdef __i386__
                        addr = (uint32_t)si->si_addr;
       +#elif defined(__amd64__)
       +                addr = (uint64_t)si->si_addr;
       +#endif
                        break;
                case SIGBUS:
                        /*
       @@ -242,7 +304,11 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
                        // before entering the signal handler.
                        addr = 0;
                        newtrap = VXTRAP_SINGLESTEP;
       +#ifdef __i386__
                        mc->mc_eflags &= ~EFLAGS_TF;        // Just in case.
       +#elif defined(__amd64__)
       +                mc->mc_rflags &= ~EFLAGS_TF;        // Just in case.
       +#endif
                        break;
        
                default:
       @@ -264,51 +330,111 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
                }
                emu->cpu_trap = newtrap;
        
       +#ifdef __i386__
                r = vxemu_sighandler(emu, mc->mc_eip);
       +#elif defined(__amd64__)
       +        r = vxemu_sighandler(emu, mc->mc_rip);
       +#endif
        
                if (r == VXSIG_SINGLESTEP){
                        // Vxemu_sighandler wants us to single step.
                        // Execution state is in intermediate state - don't touch.
       +#ifdef __i386__
                        mc->mc_eflags |= EFLAGS_TF;                // x86 TF (single-step) bit
       +#elif defined(__amd64__)
       +                mc->mc_rflags |= EFLAGS_TF;
       +#endif
                        vxrun_setup(emu);
                        return 1;
                }
        
                // Copy execution state into emu.
                if ((r & VXSIG_SAVE_ALL) == VXSIG_SAVE_ALL) {
       +#ifdef __i386__
                        emu->cpu.reg[EAX] = mc->mc_eax;
                        emu->cpu.reg[EBX] = mc->mc_ebx;
                        emu->cpu.reg[ECX] = mc->mc_ecx;
                        emu->cpu.reg[EDX] = mc->mc_edx;
       -                emu->cpu.reg[ESI] =  mc->mc_esi;
       +                emu->cpu.reg[ESI] = mc->mc_esi;
                        emu->cpu.reg[EDI] = mc->mc_edi;
                        emu->cpu.reg[ESP] = mc->mc_esp;        // or esp_at_signal ???
                        emu->cpu.reg[EBP] = mc->mc_ebp;
                        emu->cpu.eflags = mc->mc_eflags;
       +#elif defined(__amd64__)
       +                emu->cpu.reg[EAX] = mc->mc_rax;
       +                emu->cpu.reg[EBX] = mc->mc_rbx;
       +                emu->cpu.reg[ECX] = mc->mc_rcx;
       +                emu->cpu.reg[EDX] = mc->mc_rdx;
       +                emu->cpu.reg[ESI] = mc->mc_rsi;
       +                emu->cpu.reg[EDI] = mc->mc_rdi;
       +                emu->cpu.reg[ESP] = mc->mc_rsp;        // or esp_at_signal ???
       +                emu->cpu.reg[EBP] = mc->mc_rbp;
       +                emu->cpu.eflags = mc->mc_rflags;
       +#endif
                } else if (r & VXSIG_SAVE_ALL) {
                        if (r & VXSIG_SAVE_EAX)
       +#ifdef __i386__
                                emu->cpu.reg[EAX] = mc->mc_eax;
       +#elif defined(__amd64__)
       +                        emu->cpu.reg[EAX] = mc->mc_rax;
       +#endif
                        if (r & VXSIG_SAVE_EBX)
       +#ifdef __i386__
                                emu->cpu.reg[EBX] = mc->mc_ebx;
       +#elif defined(__amd64__)
       +                        emu->cpu.reg[EBX] = mc->mc_rbx;
       +#endif
                        if (r & VXSIG_SAVE_ECX)
       +#ifdef __i386__
                                emu->cpu.reg[ECX] = mc->mc_ecx;
       +#elif defined(__amd64__)
       +                        emu->cpu.reg[ECX] = mc->mc_rcx;
       +#endif
                        if (r & VXSIG_SAVE_EDX)
       +#ifdef __i386__
                                emu->cpu.reg[EDX] = mc->mc_edx;
       +#elif defined(__amd64__)
       +                        emu->cpu.reg[EDX] = mc->mc_rdx;
       +#endif
                        if (r & VXSIG_SAVE_ESI)
       +#ifdef __i386__
                                emu->cpu.reg[ESI] =  mc->mc_esi;
       +#elif defined(__amd64__)
       +                        emu->cpu.reg[ESI] =  mc->mc_rsi;
       +#endif
                        if (r & VXSIG_SAVE_EDI)
       +#ifdef __i386__
                                emu->cpu.reg[EDI] = mc->mc_edi;
       +#elif defined(__amd64__)
       +                        emu->cpu.reg[EDI] = mc->mc_rdi;
       +#endif
                        if (r & VXSIG_SAVE_ESP)
       +#ifdef __i386__
                                emu->cpu.reg[ESP] = mc->mc_esp;        // or esp_at_signal ???
       +#elif defined(__amd64__)
       +                        emu->cpu.reg[ESP] = mc->mc_rsp;        // or esp_at_signal ???
       +#endif
                        if (r & VXSIG_SAVE_EBP)
       +#ifdef __i386__
                                emu->cpu.reg[EBP] = mc->mc_ebp;
       +#elif defined(__amd64__)
       +                        emu->cpu.reg[EBP] = mc->mc_rbp;
       +#endif
                        if (r & VXSIG_SAVE_EFLAGS)
       +#ifdef __i386__
                                emu->cpu.eflags = mc->mc_eflags;
       +#elif defined(__amd64__)
       +                        emu->cpu.eflags = mc->mc_rflags;
       +#endif
                }
                r &= ~VXSIG_SAVE_ALL;
        
                if (r & VXSIG_SAVE_EBX_AS_EIP)
       +#ifdef __i386__
                        emu->cpu.eip = mc->mc_ebx;
       +#elif defined(__amd64__)
       +                emu->cpu.eip = mc->mc_rbx;
       +#endif
                r &= ~VXSIG_SAVE_EBX_AS_EIP;
        
                if (r & VXSIG_ADD_COUNT_TO_ESP) {
       @@ -327,7 +453,11 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
                                return 0;
                        emu->cpu.traperr = mc->mc_err;
                        emu->cpu.trapva = addr;
       +#ifdef __i386__
                        memmove(&mc->mc_gs, &emu->trapenv->mc_gs, 19*4);
       +#elif defined(__amd64__)
       +                memmove(&mc->mc_onstack, &emu->trapenv->mc_onstack, sizeof(mcontext_t));
       +#endif
                        return 1;
                }
        
 (DIR) diff --git a/src/libvx32/run64.S b/src/libvx32/run64.S
       @@ -79,7 +79,11 @@ vxrun:
                movl        VXEMU_EDI(%r8),%edi
        
                // Run translated code
       +#ifndef __FreeBSD__
                ljmpl        *VXEMU_RUNPTR(%r8)        // 'ljmpq' doesn't work - gas bug??
       +#else
       +        ljmpq        *VXEMU_RUNPTR(%r8)
       +#endif
        
        
        // Return from running translated code to the normal host environment.