divert: declare PF_DIVERT domain and stop abusing PF_INET

The divert(4) is not a protocol of IPv4.  It is a socket to
intercept packets from ipfw(4) to userland and re-inject them
back.  It can divert and re-inject IPv4 and IPv6 packets today,
but potentially it is not limited to these two protocols.  The
IPPROTO_DIVERT does not belong to known IP protocols, it
doesn't even fit into u_char.  I guess, the implementation of
divert(4) was done the way it is done basically because it was
easier to do it this way, back when protocols for sockets were
intertwined with IP protocols and domains were statically
compiled in.

Moving divert(4) out of inetsw accomplished two important things:

1) IPDIVERT is getting much closer to be not dependent on INET.
   This will be finalized in following changes.
2) Now divert socket no longer aliases with raw IPv4 socket.
   Domain/proto selection code won't need a hack for SOCK_RAW and
   multiple entries in inetsw implementing different flavors of
   raw socket can merge into one without requirement of raw IPv4
   being the last member of dom_protosw.

Differential revision:	https://reviews.freebsd.org/D36379
This commit is contained in:
Gleb Smirnoff 2022-08-30 15:09:21 -07:00
parent 603677334a
commit 8624f4347e
11 changed files with 74 additions and 63 deletions

View file

@ -28,7 +28,7 @@
.\" From: @(#)socket.2 8.1 (Berkeley) 6/4/93
.\" $FreeBSD$
.\"
.Dd August 26, 2022
.Dd August 30, 2022
.Dt SOCKET 2
.Os
.Sh NAME
@ -60,6 +60,7 @@ PF_LOCAL Host-internal protocols (alias for PF_UNIX),
PF_UNIX Host-internal protocols,
PF_INET Internet version 4 protocols,
PF_INET6 Internet version 6 protocols,
PF_DIVERT Firewall packet diversion/re-injection,
PF_ROUTE Internal routing protocol,
PF_KEY Internal key-management function,
PF_NETGRAPH Netgraph sockets,
@ -283,6 +284,7 @@ The socket type is not supported by the protocol.
.Xr accept 2 ,
.Xr bind 2 ,
.Xr connect 2 ,
.Xr divert 4 ,
.Xr getpeername 2 ,
.Xr getsockname 2 ,
.Xr getsockopt 2 ,

View file

@ -31,10 +31,10 @@
quit Exit program
+
# Now let's create a ng_ksocket(4) node, in the family PF_INET,
# of type SOCK_RAW, and protocol IPPROTO_DIVERT:
# Now let's create a ng_ksocket(4) node, in the family PF_DIVERT,
# of type SOCK_RAW:
+ mkpeer ksocket foo inet/raw/divert
+ mkpeer ksocket foo divert/raw/0
# Note that ``foo'' is the hook name on the socket node, which can be
# anything. The ``inet/raw/divert'' is the hook name on the ksocket

View file

@ -1,6 +1,6 @@
.\" $FreeBSD$
.\"
.Dd December 17, 2004
.Dd August 30, 2022
.Dt DIVERT 4
.Os
.Sh NAME
@ -11,7 +11,7 @@
.In sys/socket.h
.In netinet/in.h
.Ft int
.Fn socket PF_INET SOCK_RAW IPPROTO_DIVERT
.Fn socket PF_DIVERT SOCK_RAW 0
.Pp
To enable support for divert sockets, place the following lines in the
kernel configuration file:
@ -30,24 +30,30 @@ ipfw_load="YES"
ipdivert_load="YES"
.Ed
.Sh DESCRIPTION
Divert sockets are similar to raw IP sockets, except that they
can be bound to a specific
Divert sockets allow to intercept and re-inject packets flowing through
the
.Xr ipfw 4
firewall.
A divert socket can be bound to a specific
.Nm
port via the
.Xr bind 2
system call.
The IP address in the bind is ignored; only the port
number is significant.
The sockaddr argument shall be sockaddr_in with sin_port set to the
desired value.
Note that the
.Nm
port has nothing to do with TCP/UDP ports.
It is just a cookie number, that allows to differentiate between different
divert points in the
.Xr ipfw 4
ruleset.
A divert socket bound to a divert port will receive all packets diverted
to that port by some (here unspecified) kernel mechanism(s).
Packets may also be written to a divert port, in which case they
re-enter kernel IP packet processing.
to that port by
.Xr ipfw 4 .
Packets may also be written to a divert port, in which case they re-enter
firewall processing at the next rule.
.Pp
Divert sockets are normally used in conjunction with
.Fx Ns 's
packet filtering implementation and the
.Xr ipfw 8
program.
By reading from and writing to a divert socket, matching packets
can be passed through an arbitrary ``filter'' as they travel through
the host machine, special routing tricks can be done, etc.
@ -154,7 +160,9 @@ Packets written as incoming and having incorrect checksums will be dropped.
Otherwise, all header fields are unchanged (and therefore in network order).
.Pp
Binding to port numbers less than 1024 requires super-user access, as does
creating a socket of type SOCK_RAW.
creating a
.Nm
socket.
.Sh ERRORS
Writing to a divert socket can return these errors, along with
the usual errors possible when writing raw packets:

View file

@ -509,6 +509,17 @@ socreate(int dom, struct socket **aso, int type, int proto,
struct socket *so;
int error;
/*
* XXX: divert(4) historically abused PF_INET. Keep this compatibility
* shim until all applications have been updated.
*/
if (__predict_false(dom == PF_INET && type == SOCK_RAW &&
proto == IPPROTO_DIVERT)) {
dom = PF_DIVERT;
printf("%s uses obsolete way to create divert(4) socket\n",
td->td_proc->p_comm);
}
if (proto)
prp = pffindproto(dom, proto, type);
else

View file

@ -121,6 +121,7 @@ static const struct ng_ksocket_alias ng_ksocket_families[] = {
{ "inet", PF_INET },
{ "inet6", PF_INET6 },
{ "atm", PF_ATM },
{ "divert", PF_DIVERT },
{ NULL, -1 },
};
@ -147,7 +148,6 @@ static const struct ng_ksocket_alias ng_ksocket_protos[] = {
{ "ah", IPPROTO_AH, PF_INET },
{ "swipe", IPPROTO_SWIPE, PF_INET },
{ "encap", IPPROTO_ENCAP, PF_INET },
{ "divert", IPPROTO_DIVERT, PF_INET },
{ "pim", IPPROTO_PIM, PF_INET },
{ NULL, -1 },
};

View file

@ -1751,13 +1751,9 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
INP_WLOCK(inp);
imo = inp->inp_moptions;
/*
* If socket is neither of type SOCK_RAW or SOCK_DGRAM,
* or is a divert socket, reject it.
*/
if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
(inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
/* If socket is neither of type SOCK_RAW or SOCK_DGRAM reject it. */
if (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
inp->inp_socket->so_proto->pr_type != SOCK_DGRAM) {
INP_WUNLOCK(inp);
return (EOPNOTSUPP);
}
@ -2717,13 +2713,9 @@ inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
error = 0;
/*
* If socket is neither of type SOCK_RAW or SOCK_DGRAM,
* or is a divert socket, reject it.
*/
if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
(inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
/* If socket is neither of type SOCK_RAW or SOCK_DGRAM, reject it. */
if (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)
return (EOPNOTSUPP);
switch (sopt->sopt_name) {

View file

@ -36,7 +36,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet6.h"
#include "opt_sctp.h"
#ifndef INET
#error "IPDIVERT requires INET"
#error "IPDIVERT requires INET" /* XXX! */
#endif
#include <sys/param.h>
@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@ -716,7 +717,6 @@ SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist,
static struct protosw div_protosw = {
.pr_type = SOCK_RAW,
.pr_protocol = IPPROTO_DIVERT,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_attach = div_attach,
.pr_bind = div_bind,
@ -729,6 +729,13 @@ static struct protosw div_protosw = {
.pr_sosetlabel = in_pcbsosetlabel
};
static struct domain divertdomain = {
.dom_family = PF_DIVERT,
.dom_name = "divert",
.dom_nprotosw = 1,
.dom_protosw = { &div_protosw },
};
static int
div_modevent(module_t mod, int type, void *unused)
{
@ -736,12 +743,7 @@ div_modevent(module_t mod, int type, void *unused)
switch (type) {
case MOD_LOAD:
/*
* Protocol will be initialized by pf_proto_register().
*/
err = protosw_register(&inetdomain, &div_protosw);
if (err != 0)
return (err);
domain_add(&divertdomain);
ip_divert_ptr = divert_packet;
break;
case MOD_QUIESCE:
@ -763,6 +765,9 @@ div_modevent(module_t mod, int type, void *unused)
* XXXRW: Note that there is a slight race here, as a new
* socket open request could be spinning on the lock and then
* we destroy the lock.
*
* XXXGL: One more reason this code is incorrect is that it
* checks only the current vnet.
*/
INP_INFO_WLOCK(&V_divcbinfo);
if (V_divcbinfo.ipi_count != 0) {
@ -771,7 +776,7 @@ div_modevent(module_t mod, int type, void *unused)
break;
}
ip_divert_ptr = NULL;
err = protosw_unregister(&div_protosw);
domain_remove(&divertdomain);
INP_INFO_WUNLOCK(&V_divcbinfo);
#ifndef VIMAGE
div_destroy(NULL);

View file

@ -1772,13 +1772,9 @@ ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt)
INP_WLOCK(inp);
im6o = inp->in6p_moptions;
/*
* If socket is neither of type SOCK_RAW or SOCK_DGRAM,
* or is a divert socket, reject it.
*/
if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
(inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
/* If socket is neither of type SOCK_RAW or SOCK_DGRAM, reject it. */
if (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
inp->inp_socket->so_proto->pr_type != SOCK_DGRAM) {
INP_WUNLOCK(inp);
return (EOPNOTSUPP);
}
@ -2655,13 +2651,9 @@ ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt)
error = 0;
/*
* If socket is neither of type SOCK_RAW or SOCK_DGRAM,
* or is a divert socket, reject it.
*/
if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
(inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
/* If socket is neither of type SOCK_RAW or SOCK_DGRAM, reject it. */
if (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)
return (EOPNOTSUPP);
switch (sopt->sopt_name) {

View file

@ -268,7 +268,8 @@ struct accept_filter_arg {
#define AF_INET_SDP 40 /* OFED Socket Direct Protocol ipv4 */
#define AF_INET6_SDP 42 /* OFED Socket Direct Protocol ipv6 */
#define AF_HYPERV 43 /* HyperV sockets */
#define AF_MAX 43
#define AF_DIVERT 44 /* divert(4) */
#define AF_MAX 44
/*
* When allocating a new AF_ constant, please only allocate
* even numbered constants for FreeBSD until 134 as odd numbered AF_
@ -393,6 +394,7 @@ struct sockproto {
#define PF_NETLINK AF_NETLINK
#define PF_INET_SDP AF_INET_SDP
#define PF_INET6_SDP AF_INET6_SDP
#define PF_DIVERT AF_DIVERT
#define PF_MAX AF_MAX

View file

@ -109,15 +109,14 @@ pcblist_sysctl(int proto, const char *name, char **bufp)
case IPPROTO_UDP:
mibvar = "net.inet.udp.pcblist";
break;
case IPPROTO_DIVERT:
mibvar = "net.inet.divert.pcblist";
break;
default:
mibvar = "net.inet.raw.pcblist";
break;
}
if (strncmp(name, "sdp", 3) == 0)
mibvar = "net.inet.sdp.pcblist";
else if (strncmp(name, "divert", 6) == 0)
mibvar = "net.inet.divert.pcblist";
len = 0;
if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) {
if (errno != ENOENT)
@ -272,7 +271,7 @@ protopr(u_long off, const char *name, int af1, int proto)
so = &inp->xi_socket;
/* Ignore sockets for protocols other than the desired one. */
if (so->xso_protocol != proto)
if (proto != 0 && so->xso_protocol != proto)
continue;
/* Ignore PCBs which were freed during copyout. */

View file

@ -101,7 +101,7 @@ static struct protox {
NULL, NULL, "sdp", 1, IPPROTO_TCP },
#endif
{ N_DIVCBINFO, -1, 1, protopr,
NULL, NULL, "divert", 1, IPPROTO_DIVERT },
NULL, NULL, "divert", 1, 0 },
{ N_RIPCBINFO, N_IPSTAT, 1, protopr,
ip_stats, NULL, "ip", 1, IPPROTO_RAW },
{ N_RIPCBINFO, N_ICMPSTAT, 1, protopr,