protosw: retire pr_drain and use EVENTHANDLER(9) directly

The method was called for two different conditions: 1) the VM layer is
low on pages or 2) one of UMA zones of mbuf allocator exhausted.
This change 2) into a new event handler, but all affected network
subsystems modified to subscribe to both, so this change shall not
bring functional changes under different low memory situations.

There were three subsystems still using pr_drain: TCP, SCTP and frag6.
The latter had its protosw entry for the only reason to register its
pr_drain method.

Reviewed by:		tuexen, melifaro
Differential revision:	https://reviews.freebsd.org/D36164
This commit is contained in:
Gleb Smirnoff 2022-08-17 11:50:31 -07:00
parent 1922eb3e9c
commit 81a34d374e
18 changed files with 54 additions and 108 deletions

View file

@ -39,14 +39,12 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <sys/ktls.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/refcount.h>
#include <sys/sf_buf.h>
#include <sys/smp.h>
@ -396,14 +394,6 @@ mbuf_init(void *dummy)
uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
uma_zone_set_maxaction(zone_jumbo16, mb_reclaim);
/*
* Hook event handler for low-memory situation, used to
* drain protocols and push data back to the caches (UMA
* later pushes it back to VM).
*/
EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
EVENTHANDLER_PRI_FIRST);
snd_tag_count = counter_u64_alloc(M_WAITOK);
}
SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
@ -828,26 +818,12 @@ mb_ctor_pack(void *mem, int size, void *arg, int how)
/*
* This is the protocol drain routine. Called by UMA whenever any of the
* mbuf zones is closed to its limit.
*
* No locks should be held when this is called. The drain routines have to
* presently acquire some locks which raises the possibility of lock order
* reversal.
*/
static void
mb_reclaim(uma_zone_t zone __unused, int pending __unused)
{
struct epoch_tracker et;
struct domain *dp;
struct protosw *pr;
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__);
NET_EPOCH_ENTER(et);
for (dp = domains; dp != NULL; dp = dp->dom_next)
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
if (pr->pr_drain != NULL)
(*pr->pr_drain)();
NET_EPOCH_EXIT(et);
EVENTHANDLER_INVOKE(mbuf_lowmem, VM_LOW_MBUFS);
}
/*

View file

@ -315,9 +315,6 @@ db_print_protosw(struct protosw *pr, const char *prname, int indent)
db_print_indent(indent);
db_printf("pr_ctloutput: %p ", pr->pr_ctloutput);
db_print_indent(indent);
db_printf("pr_drain: %p\n", pr->pr_drain);
}
static void

View file

@ -435,7 +435,6 @@ pf_proto_unregister(int family, int protocol, int type)
dpr->pr_protocol = PROTO_SPACER;
dpr->pr_flags = 0;
dpr->pr_ctloutput = NULL;
dpr->pr_drain = NULL;
dpr->pr_usrreqs = &nousrreqs;
/* Job is done, not more protection required. */

View file

@ -108,14 +108,6 @@ extern struct domain inetdomain;
}
struct protosw inetsw[] = {
{
.pr_type = 0,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_IP,
.pr_flags = PR_CAPATTACH,
.pr_drain = ip_drain,
.pr_usrreqs = &nousrreqs
},
{
.pr_type = SOCK_DGRAM,
.pr_domain = &inetdomain,
@ -131,7 +123,6 @@ struct protosw inetsw[] = {
.pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD|
PR_CAPATTACH,
.pr_ctloutput = tcp_ctloutput,
.pr_drain = tcp_drain,
.pr_usrreqs = &tcp_usrreqs
},
#ifdef SCTP
@ -141,7 +132,6 @@ struct protosw inetsw[] = {
.pr_protocol = IPPROTO_SCTP,
.pr_flags = PR_WANTRCVD,
.pr_ctloutput = sctp_ctloutput,
.pr_drain = sctp_drain,
.pr_usrreqs = &sctp_usrreqs
},
{
@ -150,7 +140,6 @@ struct protosw inetsw[] = {
.pr_protocol = IPPROTO_SCTP,
.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD,
.pr_ctloutput = sctp_ctloutput,
.pr_drain = NULL, /* Covered by the SOCK_SEQPACKET entry. */
.pr_usrreqs = &sctp_usrreqs
},
#endif /* SCTP */

View file

@ -101,7 +101,6 @@ CTASSERT(sizeof(struct ip) == 20);
/* IP reassembly functions are defined in ip_reass.c. */
extern void ipreass_init(void);
extern void ipreass_drain(void);
#ifdef VIMAGE
extern void ipreass_destroy(void);
#endif
@ -845,20 +844,6 @@ ip_input(struct mbuf *m)
m_freem(m);
}
void
ip_drain(void)
{
VNET_ITERATOR_DECL(vnet_iter);
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
ipreass_drain();
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
}
int
ipproto_register(uint8_t proto, ipproto_input_t input, ipproto_ctlinput_t ctl)
{

View file

@ -92,7 +92,6 @@ VNET_DEFINE_STATIC(int, ipreass_maxbucketsize);
#define V_ipreass_maxbucketsize VNET(ipreass_maxbucketsize)
void ipreass_init(void);
void ipreass_drain(void);
#ifdef VIMAGE
void ipreass_destroy(void);
#endif
@ -597,6 +596,31 @@ ipreass_timer_init(void *arg __unused)
}
SYSINIT(ipreass, SI_SUB_VNET_DONE, SI_ORDER_ANY, ipreass_timer_init, NULL);
/*
* Drain off all datagram fragments.
*/
static void
ipreass_drain(void)
{
VNET_ITERATOR_DECL(vnet_iter);
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
for (int i = 0; i < IPREASS_NHASH; i++) {
IPQ_LOCK(i);
while(!TAILQ_EMPTY(&V_ipq[i].head))
ipq_drop(&V_ipq[i],
TAILQ_FIRST(&V_ipq[i].head));
KASSERT(V_ipq[i].count == 0,
("%s: V_ipq[%d] count %d (V_ipq=%p)", __func__, i,
V_ipq[i].count, V_ipq));
IPQ_UNLOCK(i);
}
CURVNET_RESTORE();
}
}
/*
* Initialize IP reassembly structures.
*/
@ -623,24 +647,10 @@ ipreass_init(void)
maxfrags = IP_MAXFRAGS;
EVENTHANDLER_REGISTER(nmbclusters_change, ipreass_zone_change,
NULL, EVENTHANDLER_PRI_ANY);
}
}
/*
* Drain off all datagram fragments.
*/
void
ipreass_drain(void)
{
for (int i = 0; i < IPREASS_NHASH; i++) {
IPQ_LOCK(i);
while(!TAILQ_EMPTY(&V_ipq[i].head))
ipq_drop(&V_ipq[i], TAILQ_FIRST(&V_ipq[i].head));
KASSERT(V_ipq[i].count == 0,
("%s: V_ipq[%d] count %d (V_ipq=%p)", __func__, i,
V_ipq[i].count, V_ipq));
IPQ_UNLOCK(i);
EVENTHANDLER_REGISTER(vm_lowmem, ipreass_drain, NULL,
LOWMEM_PRI_DEFAULT);
EVENTHANDLER_REGISTER(mbuf_lowmem, ipreass_drain, NULL,
LOWMEM_PRI_DEFAULT);
}
}

View file

@ -214,7 +214,6 @@ int inp_getmoptions(struct inpcb *, struct sockopt *);
int inp_setmoptions(struct inpcb *, struct sockopt *);
int ip_ctloutput(struct socket *, struct sockopt *sopt);
void ip_drain(void);
int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
u_long if_hwassist_flags);
void ip_forward(struct mbuf *m, int srcrt);

View file

@ -61,7 +61,6 @@ struct protosw sctp_stream_protosw = {
.pr_protocol = IPPROTO_SCTP,
.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD,
.pr_ctloutput = sctp_ctloutput,
.pr_drain = sctp_drain,
.pr_usrreqs = &sctp_usrreqs,
};
@ -71,7 +70,6 @@ struct protosw sctp_seqpacket_protosw = {
.pr_protocol = IPPROTO_SCTP,
.pr_flags = PR_WANTRCVD,
.pr_ctloutput = sctp_ctloutput,
.pr_drain = sctp_drain,
.pr_usrreqs = &sctp_usrreqs,
};
#endif
@ -85,7 +83,6 @@ struct protosw sctp6_stream_protosw = {
.pr_protocol = IPPROTO_SCTP,
.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD,
.pr_ctloutput = sctp_ctloutput,
.pr_drain = sctp_drain,
.pr_usrreqs = &sctp6_usrreqs,
};
@ -95,9 +92,6 @@ struct protosw sctp6_seqpacket_protosw = {
.pr_protocol = IPPROTO_SCTP,
.pr_flags = PR_WANTRCVD,
.pr_ctloutput = sctp_ctloutput,
#ifndef INET /* Do not call initialization and drain routines twice. */
.pr_drain = sctp_drain,
#endif
.pr_usrreqs = &sctp6_usrreqs,
};
#endif

View file

@ -6942,15 +6942,18 @@ sctp_drain_mbufs(struct sctp_tcb *stcb)
*/
}
void
static void
sctp_drain(void)
{
struct epoch_tracker et;
VNET_ITERATOR_DECL(vnet_iter);
NET_EPOCH_ENTER(et);
/*
* We must walk the PCB lists for ALL associations here. The system
* is LOW on MBUF's and needs help. This is where reneging will
* occur. We really hope this does NOT happen!
*/
VNET_ITERATOR_DECL(vnet_iter);
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
@ -6962,6 +6965,7 @@ sctp_drain(void)
#ifdef VIMAGE
continue;
#else
NET_EPOCH_EXIT(et);
return;
#endif
}
@ -6981,7 +6985,10 @@ sctp_drain(void)
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
NET_EPOCH_EXIT(et);
}
EVENTHANDLER_DEFINE(vm_lowmem, sctp_drain, NULL, LOWMEM_PRI_DEFAULT);
EVENTHANDLER_DEFINE(mbuf_lowmem, sctp_drain, NULL, LOWMEM_PRI_DEFAULT);
/*
* start a new iterator

View file

@ -611,8 +611,6 @@ sctp_set_primary_addr(struct sctp_tcb *, struct sockaddr *,
bool
sctp_is_vtag_good(uint32_t, uint16_t lport, uint16_t rport, struct timeval *);
/* void sctp_drain(void); */
int sctp_destination_is_reachable(struct sctp_tcb *, struct sockaddr *);
int sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp);

View file

@ -327,7 +327,6 @@ int sctp_ctloutput(struct socket *, struct sockopt *);
void sctp_input_with_port(struct mbuf *, int, uint16_t);
int sctp_input(struct mbuf **, int *, int);
void sctp_pathmtu_adjustment(struct sctp_tcb *, uint32_t, bool);
void sctp_drain(void);
void
sctp_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *,
uint8_t, uint8_t, uint16_t, uint32_t);

View file

@ -1448,6 +1448,8 @@ tcp_vnet_init(void *arg __unused)
VNET_SYSINIT(tcp_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
tcp_vnet_init, NULL);
static void tcp_drain(void);
static void
tcp_init(void *arg __unused)
{
@ -1506,6 +1508,8 @@ tcp_init(void *arg __unused)
ISN_LOCK_INIT();
EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
SHUTDOWN_PRI_DEFAULT);
EVENTHANDLER_REGISTER(vm_lowmem, tcp_drain, NULL, LOWMEM_PRI_DEFAULT);
EVENTHANDLER_REGISTER(mbuf_lowmem, tcp_drain, NULL, LOWMEM_PRI_DEFAULT);
tcp_inp_lro_direct_queue = counter_u64_alloc(M_WAITOK);
tcp_inp_lro_wokeup_queue = counter_u64_alloc(M_WAITOK);
@ -2513,14 +2517,16 @@ tcp_close(struct tcpcb *tp)
return (tp);
}
void
static void
tcp_drain(void)
{
struct epoch_tracker et;
VNET_ITERATOR_DECL(vnet_iter);
if (!do_tcpdrain)
return;
NET_EPOCH_ENTER(et);
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
@ -2558,6 +2564,7 @@ tcp_drain(void)
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
NET_EPOCH_EXIT(et);
}
/*

View file

@ -1085,7 +1085,6 @@ void tcp_twclose(struct tcptw *, int);
void tcp_ctlinput(int, struct sockaddr *, void *);
int tcp_ctloutput(struct socket *, struct sockopt *);
void tcp_ctlinput_viaudp(int, struct sockaddr *, void *, void *);
void tcp_drain(void);
void tcp_fini(void *);
char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, const void *,
const void *);

View file

@ -141,14 +141,6 @@ static struct pr_usrreqs nousrreqs;
}
struct protosw inet6sw[] = {
{
.pr_type = 0,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_IPV6,
.pr_flags = PR_CAPATTACH,
.pr_drain = frag6_drain,
.pr_usrreqs = &nousrreqs,
},
{
.pr_type = SOCK_DGRAM,
.pr_domain = &inet6domain,
@ -164,9 +156,6 @@ struct protosw inet6sw[] = {
.pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD|
PR_LISTEN|PR_CAPATTACH,
.pr_ctloutput = tcp_ctloutput,
#ifndef INET /* don't call initialization, timeout, and drain routines twice */
.pr_drain = tcp_drain,
#endif
.pr_usrreqs = &tcp6_usrreqs,
},
#ifdef SCTP
@ -176,9 +165,6 @@ struct protosw inet6sw[] = {
.pr_protocol = IPPROTO_SCTP,
.pr_flags = PR_WANTRCVD,
.pr_ctloutput = sctp_ctloutput,
#ifndef INET /* Do not call initialization and drain routines twice. */
.pr_drain = sctp_drain,
#endif
.pr_usrreqs = &sctp6_usrreqs
},
{
@ -187,7 +173,6 @@ struct protosw inet6sw[] = {
.pr_protocol = IPPROTO_SCTP,
.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD,
.pr_ctloutput = sctp_ctloutput,
.pr_drain = NULL, /* Covered by the SOCK_SEQPACKET entry. */
.pr_usrreqs = &sctp6_usrreqs
},
#endif /* SCTP */

View file

@ -295,6 +295,10 @@ ip6_init(void *arg __unused)
IP6PROTO_REGISTER(IPPROTO_SCTP, sctp6_input, sctp6_ctlinput);
#endif
EVENTHANDLER_REGISTER(vm_lowmem, frag6_drain, NULL, LOWMEM_PRI_DEFAULT);
EVENTHANDLER_REGISTER(mbuf_lowmem, frag6_drain, NULL,
LOWMEM_PRI_DEFAULT);
netisr_register(&ip6_nh);
#ifdef RSS
netisr_register(&ip6_direct_nh);

View file

@ -205,6 +205,8 @@ EVENTHANDLER_DECLARE(power_suspend_early, power_change_fn);
typedef void (*vm_lowmem_handler_t)(void *, int);
#define LOWMEM_PRI_DEFAULT EVENTHANDLER_PRI_FIRST
EVENTHANDLER_DECLARE(vm_lowmem, vm_lowmem_handler_t);
/* Some of mbuf(9) zones reached maximum */
EVENTHANDLER_DECLARE(mbuf_lowmem, vm_lowmem_handler_t);
/* Root mounted event */
typedef void (*mountroot_handler_t)(void *);

View file

@ -52,9 +52,6 @@ struct sockopt;
* Each protocol has a handle initializing one of these structures,
* which is used for protocol-protocol and system-protocol communication.
*
* The system will call the pr_drain entry if it is low on space and
* this should throw away any non-critical data.
*
* In retrospect, it would be a lot nicer to use an interface
* similar to the vnode VOP interface.
*/
@ -65,7 +62,6 @@ struct uio;
/* USE THESE FOR YOUR PROTOTYPES ! */
typedef int pr_ctloutput_t(struct socket *, struct sockopt *);
typedef void pr_drain_t(void);
typedef void pr_abort_t(struct socket *);
typedef int pr_accept_t(struct socket *, struct sockaddr **);
typedef int pr_attach_t(struct socket *, int, struct thread *);
@ -117,7 +113,6 @@ struct protosw {
/* protocol-protocol hooks */
pr_ctloutput_t *pr_ctloutput; /* control output (from above) */
/* utility hooks */
pr_drain_t *pr_drain; /* flush any excess space possible */
struct pr_usrreqs *pr_usrreqs; /* user-protocol hook */
};

View file

@ -87,6 +87,7 @@ extern int vm_pageout_page_count;
*/
#define VM_LOW_KMEM 0x01
#define VM_LOW_PAGES 0x02
#define VM_LOW_MBUFS 0x04
/*
* Exported routines.