From cd83bbd2aaa80ac5f185b8f8f6af7f3bcd218548 Mon Sep 17 00:00:00 2001 From: Bruce M Simpson Date: Sun, 4 Feb 2007 16:32:46 +0000 Subject: [PATCH] Implement ifnet cloning for tun(4)/tap(4). Make devfs cloning a sysctl/tunable which defaults to on. If devfs cloning is enabled, only the super-user may create tun(4)/tap(4)/vmnet(4) instances. Devfs cloning is still enabled by default; it may be disabled from the loader or via sysctl with "net.link.tap.devfs_cloning" and "net.link.tun.devfs_cloning". Disabling its use affects potentially all tun(4)/tap(4) consumers including OpenSSH, OpenVPN and VMware. PR: 105228 (potentially also 90413, 105570) Submitted by: Landon Fuller Tested by: Andrej Tobola Approved by: core (rwatson) MFC after: 4 weeks --- share/man/man4/tap.4 | 39 +++++++++-- share/man/man4/tun.4 | 39 +++++++++-- sys/net/if_tap.c | 158 ++++++++++++++++++++++++++++++++++--------- sys/net/if_tun.c | 84 +++++++++++++++++++++-- 4 files changed, 270 insertions(+), 50 deletions(-) diff --git a/share/man/man4/tap.4 b/share/man/man4/tap.4 index c82220b2ebe8..98861d85b66d 100644 --- a/share/man/man4/tap.4 +++ b/share/man/man4/tap.4 @@ -1,7 +1,7 @@ .\" $FreeBSD$ .\" Based on PR#2411 .\" -.Dd July 9, 2000 +.Dd February 4, 2007 .Os .Dt TAP 4 .Sh NAME @@ -41,11 +41,26 @@ The network interfaces are named etc., one for each control device that has been opened. These Ethernet network interfaces persist until .Pa if_tap.ko -module is unloaded (if -.Nm -is built into your kernel, the network interfaces cannot be removed). +module is unloaded, or until removed with "ifconfig destroy" (see below). .Pp -The +.Nm +devices are created using interface cloning. +This is done using the +.Dq ifconfig tap Ns Sy N No create +command. +This is the preferred method of creating +.Nm +devices. +The same method allows removal of interfaces. +For this, use the +.Dq ifconfig tap Ns Sy N No destroy +command. +.Pp +If the +.Xr sysctl 8 +variable +.Va net.link.tap.devfs_cloning +is non-zero, the .Nm interface permits opens on the special control device @@ -57,9 +72,21 @@ will return a handle for the lowest unused device (use .Xr devname 3 to determine which). +.Pp +.Bf Em +Disabling the legacy devfs cloning functionality may break existing +applications which use +.Nm , +such as +.Tn VMware +and +.Xr ssh 1 . +It therefore defaults to being enabled until further notice. +.Ef +.Pp Control devices (once successfully opened) persist until .Pa if_tap.ko -is unloaded in the same way that network interfaces persist (see above). +is unloaded or the interface is destroyed. .Pp Each interface supports the usual Ethernet network interface .Xr ioctl 2 Ns s , diff --git a/share/man/man4/tun.4 b/share/man/man4/tun.4 index d6dd86213f14..e162ef431bb8 100644 --- a/share/man/man4/tun.4 +++ b/share/man/man4/tun.4 @@ -2,7 +2,7 @@ .\" $FreeBSD$ .\" Based on PR#2411 .\" -.Dd October 9, 2006 +.Dd February 4, 2007 .Dt TUN 4 .Os .Sh NAME @@ -42,11 +42,28 @@ The network interfaces are named etc., one for each control device that has been opened. These network interfaces persist until the .Pa if_tun.ko -module is unloaded (if -.Nm -is built into your kernel, the network interfaces cannot be removed). +module is unloaded, or until removed with the +.Xr ifconfig 8 +command. .Pp -The +.Nm +devices are created using interface cloning. +This is done using the +.Dq ifconfig tap Ns Sy N No create +command. +This is the preferred method of creating +.Nm +devices. +The same method allows removal of interfaces. +For this, use the +.Dq ifconfig tap Ns Sy N No destroy +command. +.Pp +If the +.Xr sysctl 8 +variable +.Va net.link.tun.devfs_cloning +is non-zero, the .Nm interface permits opens on the special control device @@ -58,6 +75,18 @@ will return a handle for the lowest unused device (use .Xr devname 3 to determine which). +.Pp +.Bf Em +Disabling the legacy devfs cloning functionality may break existing +applications which use +.Nm , +such as +.Xr ppp 8 +and +.Xr ssh 1 . +It therefore defaults to being enabled until further notice. +.Ef +.Pp Control devices (once successfully opened) persist until .Pa if_tun.ko is unloaded in the same way that network interfaces persist (see above). diff --git a/sys/net/if_tap.c b/sys/net/if_tap.c index 653d618bfec7..9fa10b5cb6c7 100644 --- a/sys/net/if_tap.c +++ b/sys/net/if_tap.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -93,6 +94,14 @@ static void tapifstart(struct ifnet *); static int tapifioctl(struct ifnet *, u_long, caddr_t); static void tapifinit(void *); +static int tap_clone_create(struct if_clone *, int, caddr_t); +static void tap_clone_destroy(struct ifnet *); +static int vmnet_clone_create(struct if_clone *, int, caddr_t); +static void vmnet_clone_destroy(struct ifnet *); + +IFC_SIMPLE_DECLARE(tap, 0); +IFC_SIMPLE_DECLARE(vmnet, 0); + /* character device */ static d_open_t tapopen; static d_close_t tapclose; @@ -142,6 +151,7 @@ static struct cdevsw tap_cdevsw = { static struct mtx tapmtx; static int tapdebug = 0; /* debug flag */ static int tapuopen = 0; /* allow user open() */ +static int tapdclone = 1; /* enable devfs cloning */ static SLIST_HEAD(, tap_softc) taphead; /* first device */ static struct clonedevs *tapclones; @@ -154,10 +164,87 @@ SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, "Ethernet tunnel software network interface"); SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0, "Allow user to open /dev/tap (based on node permissions)"); +SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tapdclone, 0, + "Enably legacy devfs interface creation"); SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, ""); +TUNABLE_INT("net.link.tap.devfs_cloning", &tapdclone); + DEV_MODULE(if_tap, tapmodevent, NULL); +static int +tap_clone_create(struct if_clone *ifc, int unit, caddr_t params) +{ + struct cdev *dev; + int i; + int extra; + + if (strcmp(ifc->ifc_name, VMNET) == 0) + extra = VMNET_DEV_MASK; + else + extra = 0; + + /* find any existing device, or allocate new unit number */ + i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, extra); + if (i) { + dev = make_dev(&tap_cdevsw, unit2minor(unit | extra), + UID_ROOT, GID_WHEEL, 0600, "%s%d", ifc->ifc_name, unit); + if (dev != NULL) { + dev_ref(dev); + dev->si_flags |= SI_CHEAPCLONE; + } + } + + tapcreate(dev); + return (0); +} + +/* vmnet devices are tap devices in disguise */ +static int +vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params) +{ + return tap_clone_create(ifc, unit, params); +} + +static void +tap_destroy(struct tap_softc *tp) +{ + struct ifnet *ifp = tp->tap_ifp; + int s; + + /* Unlocked read. */ + KASSERT(!(tp->tap_flags & TAP_OPEN), + ("%s flags is out of sync", ifp->if_xname)); + + knlist_destroy(&tp->tap_rsel.si_note); + destroy_dev(tp->tap_dev); + s = splimp(); + ether_ifdetach(ifp); + if_free_type(ifp, IFT_ETHER); + splx(s); + + mtx_destroy(&tp->tap_mtx); + free(tp, M_TAP); +} + +static void +tap_clone_destroy(struct ifnet *ifp) +{ + struct tap_softc *tp = ifp->if_softc; + + mtx_lock(&tapmtx); + SLIST_REMOVE(&taphead, tp, tap_softc, tap_next); + mtx_unlock(&tapmtx); + tap_destroy(tp); +} + +/* vmnet devices are tap devices in disguise */ +static void +vmnet_clone_destroy(struct ifnet *ifp) +{ + tap_clone_destroy(ifp); +} + /* * tapmodevent * @@ -169,7 +256,6 @@ tapmodevent(module_t mod, int type, void *data) static eventhandler_tag eh_tag = NULL; struct tap_softc *tp = NULL; struct ifnet *ifp = NULL; - int s; switch (type) { case MOD_LOAD: @@ -186,6 +272,8 @@ tapmodevent(module_t mod, int type, void *data) mtx_destroy(&tapmtx); return (ENOMEM); } + if_clone_attach(&tap_cloner); + if_clone_attach(&vmnet_cloner); return (0); case MOD_UNLOAD: @@ -207,6 +295,8 @@ tapmodevent(module_t mod, int type, void *data) mtx_unlock(&tapmtx); EVENTHANDLER_DEREGISTER(dev_clone, eh_tag); + if_clone_detach(&tap_cloner); + if_clone_detach(&vmnet_cloner); mtx_lock(&tapmtx); while ((tp = SLIST_FIRST(&taphead)) != NULL) { @@ -217,19 +307,7 @@ tapmodevent(module_t mod, int type, void *data) TAPDEBUG("detaching %s\n", ifp->if_xname); - /* Unlocked read. */ - KASSERT(!(tp->tap_flags & TAP_OPEN), - ("%s flags is out of sync", ifp->if_xname)); - - knlist_destroy(&tp->tap_rsel.si_note); - destroy_dev(tp->tap_dev); - s = splimp(); - ether_ifdetach(ifp); - if_free_type(ifp, IFT_ETHER); - splx(s); - - mtx_destroy(&tp->tap_mtx); - free(tp, M_TAP); + tap_destroy(tp); mtx_lock(&tapmtx); } mtx_unlock(&tapmtx); @@ -255,38 +333,63 @@ tapmodevent(module_t mod, int type, void *data) static void tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) { + char devname[SPECNAMELEN + 1]; + int i, unit, append_unit; int extra; - int i, unit; - char *device_name = name; if (*dev != NULL) return; - device_name = TAP; + /* + * If tap cloning is enabled, only the superuser can create + * an interface. + */ + if (!tapdclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0) + return; + + unit = 0; + append_unit = 0; extra = 0; + + /* We're interested in only tap/vmnet devices. */ if (strcmp(name, TAP) == 0) { unit = -1; } else if (strcmp(name, VMNET) == 0) { - device_name = VMNET; - extra = VMNET_DEV_MASK; unit = -1; - } else if (dev_stdclone(name, NULL, device_name, &unit) != 1) { - device_name = VMNET; extra = VMNET_DEV_MASK; - if (dev_stdclone(name, NULL, device_name, &unit) != 1) + } else if (dev_stdclone(name, NULL, TAP, &unit) != 1) { + if (dev_stdclone(name, NULL, VMNET, &unit) != 1) { return; + } else { + extra = VMNET_DEV_MASK; + } } + if (unit == -1) + append_unit = 1; + /* find any existing device, or allocate new unit number */ i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra); if (i) { + if (append_unit) { + /* + * We were passed 'tun' or 'tap', with no unit specified + * so we'll need to append it now. + */ + namelen = snprintf(devname, sizeof(devname), "%s%d", name, + unit); + name = devname; + } + *dev = make_dev(&tap_cdevsw, unit2minor(unit | extra), - UID_ROOT, GID_WHEEL, 0600, "%s%d", device_name, unit); + UID_ROOT, GID_WHEEL, 0600, "%s", name); if (*dev != NULL) { dev_ref(*dev); (*dev)->si_flags |= SI_CHEAPCLONE; } } + + if_clone_create(name, namelen, NULL); } /* tapclone */ @@ -385,16 +488,7 @@ tapopen(struct cdev *dev, int flag, int mode, struct thread *td) if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT) return (ENXIO); - /* - * XXXRW: Non-atomic test-and-set of si_drv1. Currently protected - * by Giant, but the race actually exists under memory pressure as - * well even when running with Giant, as malloc() may sleep. - */ tp = dev->si_drv1; - if (tp == NULL) { - tapcreate(dev); - tp = dev->si_drv1; - } mtx_lock(&tp->tap_mtx); if (tp->tap_flags & TAP_OPEN) { diff --git a/sys/net/if_tun.c b/sys/net/if_tun.c index b313b18b18e0..efd57a2ce7fa 100644 --- a/sys/net/if_tun.c +++ b/sys/net/if_tun.c @@ -45,6 +45,7 @@ #include #include +#include #include #include #include @@ -105,13 +106,22 @@ struct tun_softc { static struct mtx tunmtx; static MALLOC_DEFINE(M_TUN, TUNNAME, "Tunnel Interface"); static int tundebug = 0; +static int tundclone = 1; static struct clonedevs *tunclones; static TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead); SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); +SYSCTL_DECL(_net_link); +SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0, + "IP tunnel software network interface."); +SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RW, &tundclone, 0, + "Enable legacy devfs interface creation."); + +TUNABLE_INT("net.link.tun.devfs_cloning", &tundclone); + static void tunclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev); -static void tuncreate(struct cdev *dev); +static void tuncreate(const char *name, struct cdev *dev); static int tunifioctl(struct ifnet *, u_long, caddr_t); static int tuninit(struct ifnet *); static int tunmodevent(module_t, int, void *); @@ -119,6 +129,11 @@ static int tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *rt); static void tunstart(struct ifnet *); +static int tun_clone_create(struct if_clone *, int, caddr_t); +static void tun_clone_destroy(struct ifnet *); + +IFC_SIMPLE_DECLARE(tun, 0); + static d_open_t tunopen; static d_close_t tunclose; static d_read_t tunread; @@ -158,15 +173,45 @@ static struct cdevsw tun_cdevsw = { .d_name = TUNNAME, }; +static int +tun_clone_create(struct if_clone *ifc, int unit, caddr_t params) +{ + struct cdev *dev; + int i; + + /* find any existing device, or allocate new unit number */ + i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0); + if (i) { + /* No preexisting struct cdev *, create one */ + dev = make_dev(&tun_cdevsw, unit2minor(unit), + UID_UUCP, GID_DIALER, 0600, "%s%d", ifc->ifc_name, unit); + if (dev != NULL) { + dev_ref(dev); + dev->si_flags |= SI_CHEAPCLONE; + } + } + tuncreate(ifc->ifc_name, dev); + + return (0); +} + static void tunclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) { - int u, i; + char devname[SPECNAMELEN + 1]; + int u, i, append_unit; if (*dev != NULL) return; + /* + * If tun cloning is enabled, only the superuser can create an + * interface. + */ + if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0) + return; + if (strcmp(name, TUNNAME) == 0) { u = -1; } else if (dev_stdclone(name, NULL, TUNNAME, &u) != 1) @@ -174,17 +219,29 @@ tunclone(void *arg, struct ucred *cred, char *name, int namelen, if (u != -1 && u > IF_MAXUNIT) return; /* Unit number too high */ + if (u == -1) + append_unit = 1; + else + append_unit = 0; + /* find any existing device, or allocate new unit number */ i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0); if (i) { + if (append_unit) { + namelen = snprintf(devname, sizeof(devname), "%s%d", name, + u); + name = devname; + } /* No preexisting struct cdev *, create one */ *dev = make_dev(&tun_cdevsw, unit2minor(u), - UID_UUCP, GID_DIALER, 0600, "tun%d", u); + UID_UUCP, GID_DIALER, 0600, "%s", name); if (*dev != NULL) { dev_ref(*dev); (*dev)->si_flags |= SI_CHEAPCLONE; } } + + if_clone_create(name, namelen, NULL); } static void @@ -206,6 +263,17 @@ tun_destroy(struct tun_softc *tp) free(tp, M_TUN); } +static void +tun_clone_destroy(struct ifnet *ifp) +{ + struct tun_softc *tp = ifp->if_softc; + + mtx_lock(&tunmtx); + TAILQ_REMOVE(&tunhead, tp, tun_list); + mtx_unlock(&tunmtx); + tun_destroy(tp); +} + static int tunmodevent(module_t mod, int type, void *data) { @@ -219,8 +287,10 @@ tunmodevent(module_t mod, int type, void *data) tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000); if (tag == NULL) return (ENOMEM); + if_clone_attach(&tun_cloner); break; case MOD_UNLOAD: + if_clone_detach(&tun_cloner); EVENTHANDLER_DEREGISTER(dev_clone, tag); mtx_lock(&tunmtx); @@ -281,7 +351,7 @@ tunstart(struct ifnet *ifp) /* XXX: should return an error code so it can fail. */ static void -tuncreate(struct cdev *dev) +tuncreate(const char *name, struct cdev *dev) { struct tun_softc *sc; struct ifnet *ifp; @@ -299,8 +369,8 @@ tuncreate(struct cdev *dev) ifp = sc->tun_ifp = if_alloc(IFT_PPP); if (ifp == NULL) panic("%s%d: failed to if_alloc() interface.\n", - TUNNAME, dev2unit(dev)); - if_initname(ifp, TUNNAME, dev2unit(dev)); + name, dev2unit(dev)); + if_initname(ifp, name, dev2unit(dev)); ifp->if_mtu = TUNMTU; ifp->if_ioctl = tunifioctl; ifp->if_output = tunoutput; @@ -331,7 +401,7 @@ tunopen(struct cdev *dev, int flag, int mode, struct thread *td) */ tp = dev->si_drv1; if (!tp) { - tuncreate(dev); + tuncreate(TUNNAME, dev); tp = dev->si_drv1; }