mirror of
https://github.com/freebsd/freebsd-src
synced 2024-10-15 12:54:27 +00:00
rtsock: do not use raw socket code
This makes routing socket implementation self contained and removes one of the last dependencies on the raw socket code and pr_output method. There are very subtle API visible changes: - now routing socket would return EOPNOTSUPP instead of EINVAL on syscalls that are not supposed to be called on a routing socket. - routing socket buffer sizes are now controlled by net.rtsock sysctls instead of net.raw. The latter were not documented anywhere, and even Internet search doesn't find any references or discussions related to these sysctls. Reviewed by: melifaro Differential revision: https://reviews.freebsd.org/D36122
This commit is contained in:
parent
d94ec7490d
commit
36b10ac2cd
230
sys/net/rtsock.c
230
sys/net/rtsock.c
|
@ -60,7 +60,6 @@
|
||||||
#include <net/if_llatbl.h>
|
#include <net/if_llatbl.h>
|
||||||
#include <net/if_types.h>
|
#include <net/if_types.h>
|
||||||
#include <net/netisr.h>
|
#include <net/netisr.h>
|
||||||
#include <net/raw_cb.h>
|
|
||||||
#include <net/route.h>
|
#include <net/route.h>
|
||||||
#include <net/route/route_ctl.h>
|
#include <net/route/route_ctl.h>
|
||||||
#include <net/route/route_var.h>
|
#include <net/route/route_var.h>
|
||||||
|
@ -150,7 +149,7 @@ static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
|
||||||
int (*carp_get_vhid_p)(struct ifaddr *);
|
int (*carp_get_vhid_p)(struct ifaddr *);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Used by rtsock/raw_input callback code to decide whether to filter the update
|
* Used by rtsock callback code to decide whether to filter the update
|
||||||
* notification to a socket bound to a particular FIB.
|
* notification to a socket bound to a particular FIB.
|
||||||
*/
|
*/
|
||||||
#define RTS_FILTER_FIB M_PROTO8
|
#define RTS_FILTER_FIB M_PROTO8
|
||||||
|
@ -159,7 +158,14 @@ int (*carp_get_vhid_p)(struct ifaddr *);
|
||||||
*/
|
*/
|
||||||
#define m_rtsock_family m_pkthdr.PH_loc.eight[0]
|
#define m_rtsock_family m_pkthdr.PH_loc.eight[0]
|
||||||
|
|
||||||
|
struct rcb {
|
||||||
|
LIST_ENTRY(rcb) list;
|
||||||
|
struct socket *rcb_socket;
|
||||||
|
sa_family_t rcb_family;
|
||||||
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
LIST_HEAD(, rcb) cblist;
|
||||||
int ip_count; /* attached w/ AF_INET */
|
int ip_count; /* attached w/ AF_INET */
|
||||||
int ip6_count; /* attached w/ AF_INET6 */
|
int ip6_count; /* attached w/ AF_INET6 */
|
||||||
int any_count; /* total attached */
|
int any_count; /* total attached */
|
||||||
|
@ -198,7 +204,6 @@ static int sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh,
|
||||||
uint32_t weight, struct walkarg *w);
|
uint32_t weight, struct walkarg *w);
|
||||||
static int sysctl_iflist(int af, struct walkarg *w);
|
static int sysctl_iflist(int af, struct walkarg *w);
|
||||||
static int sysctl_ifmalist(int af, struct walkarg *w);
|
static int sysctl_ifmalist(int af, struct walkarg *w);
|
||||||
static int route_output(struct mbuf *m, struct socket *so, ...);
|
|
||||||
static void rt_getmetrics(const struct rtentry *rt,
|
static void rt_getmetrics(const struct rtentry *rt,
|
||||||
const struct nhop_object *nh, struct rt_metrics *out);
|
const struct nhop_object *nh, struct rt_metrics *out);
|
||||||
static void rt_dispatch(struct mbuf *, sa_family_t);
|
static void rt_dispatch(struct mbuf *, sa_family_t);
|
||||||
|
@ -267,84 +272,85 @@ VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
|
||||||
vnet_rts_uninit, 0);
|
vnet_rts_uninit, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int
|
static void
|
||||||
raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
|
rts_append_data(struct socket *so, struct mbuf *m)
|
||||||
struct rawcb *rp)
|
|
||||||
{
|
{
|
||||||
int fibnum;
|
|
||||||
|
|
||||||
KASSERT(m != NULL, ("%s: m is NULL", __func__));
|
if (sbappendaddr(&so->so_rcv, &route_src, m, NULL) == 0) {
|
||||||
KASSERT(proto != NULL, ("%s: proto is NULL", __func__));
|
soroverflow(so);
|
||||||
KASSERT(rp != NULL, ("%s: rp is NULL", __func__));
|
m_freem(m);
|
||||||
|
} else
|
||||||
/* No filtering requested. */
|
sorwakeup(so);
|
||||||
if ((m->m_flags & RTS_FILTER_FIB) == 0)
|
|
||||||
return (0);
|
|
||||||
|
|
||||||
/* Check if it is a rts and the fib matches the one of the socket. */
|
|
||||||
fibnum = M_GETFIB(m);
|
|
||||||
if (proto->sp_family != PF_ROUTE ||
|
|
||||||
rp->rcb_socket == NULL ||
|
|
||||||
rp->rcb_socket->so_fibnum == fibnum)
|
|
||||||
return (0);
|
|
||||||
|
|
||||||
/* Filtering requested and no match, the socket shall be skipped. */
|
|
||||||
return (1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
rts_input(struct mbuf *m)
|
rts_input(struct mbuf *m)
|
||||||
{
|
{
|
||||||
struct sockproto route_proto;
|
struct rcb *rcb;
|
||||||
|
struct socket *last;
|
||||||
|
|
||||||
route_proto.sp_family = PF_ROUTE;
|
last = NULL;
|
||||||
route_proto.sp_protocol = m->m_rtsock_family;
|
RTSOCK_LOCK();
|
||||||
|
LIST_FOREACH(rcb, &V_route_cb.cblist, list) {
|
||||||
|
if (rcb->rcb_family != AF_UNSPEC &&
|
||||||
|
rcb->rcb_family != m->m_rtsock_family)
|
||||||
|
continue;
|
||||||
|
if ((m->m_flags & RTS_FILTER_FIB) &&
|
||||||
|
M_GETFIB(m) != rcb->rcb_socket->so_fibnum)
|
||||||
|
continue;
|
||||||
|
if (last != NULL) {
|
||||||
|
struct mbuf *n;
|
||||||
|
|
||||||
raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb);
|
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
|
||||||
|
if (n != NULL)
|
||||||
|
rts_append_data(last, n);
|
||||||
}
|
}
|
||||||
|
last = rcb->rcb_socket;
|
||||||
/*
|
}
|
||||||
* It really doesn't make any sense at all for this code to share much
|
if (last != NULL)
|
||||||
* with raw_usrreq.c, since its functionality is so restricted. XXX
|
rts_append_data(last, m);
|
||||||
*/
|
else
|
||||||
static void
|
m_freem(m);
|
||||||
rts_abort(struct socket *so)
|
RTSOCK_UNLOCK();
|
||||||
{
|
|
||||||
|
|
||||||
raw_usrreqs.pru_abort(so);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
rts_close(struct socket *so)
|
rts_close(struct socket *so)
|
||||||
{
|
{
|
||||||
|
|
||||||
raw_usrreqs.pru_close(so);
|
soisdisconnected(so);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* pru_accept is EOPNOTSUPP */
|
static SYSCTL_NODE(_net, OID_AUTO, rtsock, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
|
||||||
|
"Routing socket infrastructure");
|
||||||
|
static u_long rts_sendspace = 8192;
|
||||||
|
SYSCTL_ULONG(_net_rtsock, OID_AUTO, sendspace, CTLFLAG_RW, &rts_sendspace, 0,
|
||||||
|
"Default routing socket send space");
|
||||||
|
static u_long rts_recvspace = 8192;
|
||||||
|
SYSCTL_ULONG(_net_rtsock, OID_AUTO, recvspace, CTLFLAG_RW, &rts_recvspace, 0,
|
||||||
|
"Default routing socket receive space");
|
||||||
|
|
||||||
static int
|
static int
|
||||||
rts_attach(struct socket *so, int proto, struct thread *td)
|
rts_attach(struct socket *so, int proto, struct thread *td)
|
||||||
{
|
{
|
||||||
struct rawcb *rp;
|
struct rcb *rcb;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
|
error = soreserve(so, rts_sendspace, rts_recvspace);
|
||||||
|
if (error)
|
||||||
|
return (error);
|
||||||
|
|
||||||
/* XXX */
|
rcb = malloc(sizeof(*rcb), M_PCB, M_WAITOK);
|
||||||
rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
|
rcb->rcb_socket = so;
|
||||||
|
rcb->rcb_family = proto;
|
||||||
|
|
||||||
so->so_pcb = (caddr_t)rp;
|
so->so_pcb = rcb;
|
||||||
so->so_fibnum = td->td_proc->p_fibnum;
|
so->so_fibnum = td->td_proc->p_fibnum;
|
||||||
error = raw_attach(so, proto);
|
so->so_options |= SO_USELOOPBACK;
|
||||||
rp = sotorawcb(so);
|
|
||||||
if (error) {
|
|
||||||
so->so_pcb = NULL;
|
|
||||||
free(rp, M_PCB);
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
RTSOCK_LOCK();
|
RTSOCK_LOCK();
|
||||||
switch(rp->rcb_proto.sp_protocol) {
|
LIST_INSERT_HEAD(&V_route_cb.cblist, rcb, list);
|
||||||
|
switch (proto) {
|
||||||
case AF_INET:
|
case AF_INET:
|
||||||
V_route_cb.ip_count++;
|
V_route_cb.ip_count++;
|
||||||
break;
|
break;
|
||||||
|
@ -355,36 +361,18 @@ rts_attach(struct socket *so, int proto, struct thread *td)
|
||||||
V_route_cb.any_count++;
|
V_route_cb.any_count++;
|
||||||
RTSOCK_UNLOCK();
|
RTSOCK_UNLOCK();
|
||||||
soisconnected(so);
|
soisconnected(so);
|
||||||
so->so_options |= SO_USELOOPBACK;
|
|
||||||
return 0;
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
|
|
||||||
{
|
|
||||||
|
|
||||||
return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
|
|
||||||
{
|
|
||||||
|
|
||||||
return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* pru_connect2 is EOPNOTSUPP */
|
|
||||||
/* pru_control is EOPNOTSUPP */
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
rts_detach(struct socket *so)
|
rts_detach(struct socket *so)
|
||||||
{
|
{
|
||||||
struct rawcb *rp = sotorawcb(so);
|
struct rcb *rcb = so->so_pcb;
|
||||||
|
|
||||||
KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
|
|
||||||
|
|
||||||
RTSOCK_LOCK();
|
RTSOCK_LOCK();
|
||||||
switch(rp->rcb_proto.sp_protocol) {
|
LIST_REMOVE(rcb, list);
|
||||||
|
switch(rcb->rcb_family) {
|
||||||
case AF_INET:
|
case AF_INET:
|
||||||
V_route_cb.ip_count--;
|
V_route_cb.ip_count--;
|
||||||
break;
|
break;
|
||||||
|
@ -394,66 +382,18 @@ rts_detach(struct socket *so)
|
||||||
}
|
}
|
||||||
V_route_cb.any_count--;
|
V_route_cb.any_count--;
|
||||||
RTSOCK_UNLOCK();
|
RTSOCK_UNLOCK();
|
||||||
raw_usrreqs.pru_detach(so);
|
free(rcb, M_PCB);
|
||||||
|
so->so_pcb = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
rts_disconnect(struct socket *so)
|
|
||||||
{
|
|
||||||
|
|
||||||
return (raw_usrreqs.pru_disconnect(so));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* pru_listen is EOPNOTSUPP */
|
|
||||||
|
|
||||||
static int
|
|
||||||
rts_peeraddr(struct socket *so, struct sockaddr **nam)
|
|
||||||
{
|
|
||||||
|
|
||||||
return (raw_usrreqs.pru_peeraddr(so, nam));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* pru_rcvd is EOPNOTSUPP */
|
|
||||||
/* pru_rcvoob is EOPNOTSUPP */
|
|
||||||
|
|
||||||
static int
|
|
||||||
rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
|
|
||||||
struct mbuf *control, struct thread *td)
|
|
||||||
{
|
|
||||||
|
|
||||||
return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* pru_sense is null */
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
rts_shutdown(struct socket *so)
|
rts_shutdown(struct socket *so)
|
||||||
{
|
{
|
||||||
|
|
||||||
return (raw_usrreqs.pru_shutdown(so));
|
socantsendmore(so);
|
||||||
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
rts_sockaddr(struct socket *so, struct sockaddr **nam)
|
|
||||||
{
|
|
||||||
|
|
||||||
return (raw_usrreqs.pru_sockaddr(so, nam));
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct pr_usrreqs route_usrreqs = {
|
|
||||||
.pru_abort = rts_abort,
|
|
||||||
.pru_attach = rts_attach,
|
|
||||||
.pru_bind = rts_bind,
|
|
||||||
.pru_connect = rts_connect,
|
|
||||||
.pru_detach = rts_detach,
|
|
||||||
.pru_disconnect = rts_disconnect,
|
|
||||||
.pru_peeraddr = rts_peeraddr,
|
|
||||||
.pru_send = rts_send,
|
|
||||||
.pru_shutdown = rts_shutdown,
|
|
||||||
.pru_sockaddr = rts_sockaddr,
|
|
||||||
.pru_close = rts_close,
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifndef _SOCKADDR_UNION_DEFINED
|
#ifndef _SOCKADDR_UNION_DEFINED
|
||||||
#define _SOCKADDR_UNION_DEFINED
|
#define _SOCKADDR_UNION_DEFINED
|
||||||
/*
|
/*
|
||||||
|
@ -1021,9 +961,9 @@ alloc_sockaddr_aligned(struct linear_buffer *lb, int len)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*ARGSUSED*/
|
|
||||||
static int
|
static int
|
||||||
route_output(struct mbuf *m, struct socket *so, ...)
|
rts_send(struct socket *so, int flags, struct mbuf *m,
|
||||||
|
struct sockaddr *nam, struct mbuf *control, struct thread *td)
|
||||||
{
|
{
|
||||||
struct rt_msghdr *rtm = NULL;
|
struct rt_msghdr *rtm = NULL;
|
||||||
struct rt_addrinfo info;
|
struct rt_addrinfo info;
|
||||||
|
@ -1038,6 +978,13 @@ route_output(struct mbuf *m, struct socket *so, ...)
|
||||||
struct rib_cmd_info rc;
|
struct rib_cmd_info rc;
|
||||||
struct nhop_object *nh;
|
struct nhop_object *nh;
|
||||||
|
|
||||||
|
if ((flags & PRUS_OOB) || control != NULL) {
|
||||||
|
m_freem(m);
|
||||||
|
if (control != NULL)
|
||||||
|
m_freem(control);
|
||||||
|
return (EOPNOTSUPP);
|
||||||
|
}
|
||||||
|
|
||||||
fibnum = so->so_fibnum;
|
fibnum = so->so_fibnum;
|
||||||
#define senderr(e) { error = e; goto flush;}
|
#define senderr(e) { error = e; goto flush;}
|
||||||
if (m == NULL || ((m->m_len < sizeof(long)) &&
|
if (m == NULL || ((m->m_len < sizeof(long)) &&
|
||||||
|
@ -1231,7 +1178,7 @@ static void
|
||||||
send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m,
|
send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m,
|
||||||
sa_family_t saf, u_int fibnum, int rtm_errno)
|
sa_family_t saf, u_int fibnum, int rtm_errno)
|
||||||
{
|
{
|
||||||
struct rawcb *rp = NULL;
|
struct rcb *rcb = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check to see if we don't want our own messages.
|
* Check to see if we don't want our own messages.
|
||||||
|
@ -1244,7 +1191,7 @@ send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/* There is another listener, so construct message */
|
/* There is another listener, so construct message */
|
||||||
rp = sotorawcb(so);
|
rcb = so->so_pcb;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rtm != NULL) {
|
if (rtm != NULL) {
|
||||||
|
@ -1265,15 +1212,15 @@ send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m,
|
||||||
if (m != NULL) {
|
if (m != NULL) {
|
||||||
M_SETFIB(m, fibnum);
|
M_SETFIB(m, fibnum);
|
||||||
m->m_flags |= RTS_FILTER_FIB;
|
m->m_flags |= RTS_FILTER_FIB;
|
||||||
if (rp) {
|
if (rcb) {
|
||||||
/*
|
/*
|
||||||
* XXX insure we don't get a copy by
|
* XXX insure we don't get a copy by
|
||||||
* invalidating our protocol
|
* invalidating our protocol
|
||||||
*/
|
*/
|
||||||
unsigned short family = rp->rcb_proto.sp_family;
|
sa_family_t family = rcb->rcb_family;
|
||||||
rp->rcb_proto.sp_family = 0;
|
rcb->rcb_family = AF_UNSPEC;
|
||||||
rt_dispatch(m, saf);
|
rt_dispatch(m, saf);
|
||||||
rp->rcb_proto.sp_family = family;
|
rcb->rcb_family = family;
|
||||||
} else
|
} else
|
||||||
rt_dispatch(m, saf);
|
rt_dispatch(m, saf);
|
||||||
}
|
}
|
||||||
|
@ -2696,13 +2643,20 @@ static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||||
|
|
||||||
static struct domain routedomain; /* or at least forward */
|
static struct domain routedomain; /* or at least forward */
|
||||||
|
|
||||||
|
static struct pr_usrreqs route_usrreqs = {
|
||||||
|
.pru_abort = rts_close,
|
||||||
|
.pru_attach = rts_attach,
|
||||||
|
.pru_detach = rts_detach,
|
||||||
|
.pru_send = rts_send,
|
||||||
|
.pru_shutdown = rts_shutdown,
|
||||||
|
.pru_close = rts_close,
|
||||||
|
};
|
||||||
|
|
||||||
static struct protosw routesw[] = {
|
static struct protosw routesw[] = {
|
||||||
{
|
{
|
||||||
.pr_type = SOCK_RAW,
|
.pr_type = SOCK_RAW,
|
||||||
.pr_domain = &routedomain,
|
.pr_domain = &routedomain,
|
||||||
.pr_flags = PR_ATOMIC|PR_ADDR,
|
.pr_flags = PR_ATOMIC|PR_ADDR,
|
||||||
.pr_output = route_output,
|
|
||||||
.pr_ctlinput = raw_ctlinput,
|
|
||||||
.pr_usrreqs = &route_usrreqs
|
.pr_usrreqs = &route_usrreqs
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue