mirror of
https://github.com/freebsd/freebsd-src
synced 2024-10-17 05:43:59 +00:00
14f57a8b02
runs on boot and each time a vnet jail is created. Running cc_init() multiple times results in a panic when attempting to initialise the cc_list lock again, and so r215166 effectively broke the use of vnet jails. Switch to using a SYSINIT to run cc_init() on boot. CC algorithm modules loaded on boot register in the same SI_SUB_PROTO_IFATTACHDOMAIN category as is used in this patch, so cc_init() is run at SI_ORDER_FIRST to ensure the framework is initialised before module registration is attempted. Sponsored by: FreeBSD Foundation Reported and tested by: Mikolaj Golub <to.my.trociny at gmail com> MFC after: 11 weeks X-MFC with: r215166
343 lines
8.8 KiB
C
343 lines
8.8 KiB
C
/*-
|
|
* Copyright (c) 2007-2008
|
|
* Swinburne University of Technology, Melbourne, Australia.
|
|
* Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
|
|
* Copyright (c) 2010 The FreeBSD Foundation
|
|
* All rights reserved.
|
|
*
|
|
* This software was developed at the Centre for Advanced Internet
|
|
* Architectures, Swinburne University, by Lawrence Stewart and James Healy,
|
|
* made possible in part by a grant from the Cisco University Research Program
|
|
* Fund at Community Foundation Silicon Valley.
|
|
*
|
|
* Portions of this software were developed at the Centre for Advanced
|
|
* Internet Architectures, Swinburne University of Technology, Melbourne,
|
|
* Australia by David Hayes under sponsorship from the FreeBSD Foundation.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* This software was first released in 2007 by James Healy and Lawrence Stewart
|
|
* whilst working on the NewTCP research project at Swinburne University's
|
|
* Centre for Advanced Internet Architectures, Melbourne, Australia, which was
|
|
* made possible in part by a grant from the Cisco University Research Program
|
|
* Fund at Community Foundation Silicon Valley. More details are available at:
|
|
* http://caia.swin.edu.au/urp/newtcp/
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/libkern.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/module.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/queue.h>
|
|
#include <sys/rwlock.h>
|
|
#include <sys/sbuf.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/socketvar.h>
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <net/if.h>
|
|
#include <net/if_var.h>
|
|
|
|
#include <netinet/cc.h>
|
|
#include <netinet/in.h>
|
|
#include <netinet/in_pcb.h>
|
|
#include <netinet/tcp_var.h>
|
|
|
|
#include <netinet/cc/cc_module.h>
|
|
|
|
/*
|
|
* List of available cc algorithms on the current system. First element
|
|
* is used as the system default CC algorithm.
|
|
*/
|
|
struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
|
|
|
|
/* Protects the cc_list TAILQ. */
|
|
struct rwlock cc_list_lock;
|
|
|
|
/*
|
|
* Set the default CC algorithm to new_default. The default is identified
|
|
* by being the first element in the cc_list TAILQ.
|
|
*/
|
|
static void
|
|
cc_set_default(struct cc_algo *new_default)
|
|
{
|
|
CC_LIST_WLOCK_ASSERT();
|
|
|
|
/*
|
|
* Make the requested system default CC algorithm the first element in
|
|
* the list if it isn't already.
|
|
*/
|
|
if (new_default != CC_DEFAULT()) {
|
|
STAILQ_REMOVE(&cc_list, new_default, cc_algo, entries);
|
|
STAILQ_INSERT_HEAD(&cc_list, new_default, entries);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Sysctl handler to show and change the default CC algorithm.
|
|
*/
|
|
static int
|
|
cc_default_algo(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
struct cc_algo *funcs;
|
|
int err, found;
|
|
|
|
err = found = 0;
|
|
|
|
if (req->newptr == NULL) {
|
|
char default_cc[TCP_CA_NAME_MAX];
|
|
|
|
/* Just print the current default. */
|
|
CC_LIST_RLOCK();
|
|
strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX);
|
|
CC_LIST_RUNLOCK();
|
|
err = sysctl_handle_string(oidp, default_cc, 1, req);
|
|
} else {
|
|
/* Find algo with specified name and set it to default. */
|
|
CC_LIST_WLOCK();
|
|
STAILQ_FOREACH(funcs, &cc_list, entries) {
|
|
if (strncmp((char *)req->newptr, funcs->name,
|
|
TCP_CA_NAME_MAX) == 0) {
|
|
found = 1;
|
|
cc_set_default(funcs);
|
|
}
|
|
}
|
|
CC_LIST_WUNLOCK();
|
|
|
|
if (!found)
|
|
err = ESRCH;
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
/*
|
|
* Sysctl handler to display the list of available CC algorithms.
|
|
*/
|
|
static int
|
|
cc_list_available(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
struct cc_algo *algo;
|
|
struct sbuf *s;
|
|
int err, first;
|
|
|
|
err = 0;
|
|
first = 1;
|
|
s = sbuf_new(NULL, NULL, TCP_CA_NAME_MAX, SBUF_AUTOEXTEND);
|
|
|
|
if (s == NULL)
|
|
return (ENOMEM);
|
|
|
|
CC_LIST_RLOCK();
|
|
STAILQ_FOREACH(algo, &cc_list, entries) {
|
|
err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
|
|
if (err)
|
|
break;
|
|
first = 0;
|
|
}
|
|
CC_LIST_RUNLOCK();
|
|
|
|
if (!err) {
|
|
sbuf_finish(s);
|
|
err = sysctl_handle_string(oidp, sbuf_data(s), 1, req);
|
|
}
|
|
|
|
sbuf_delete(s);
|
|
return (err);
|
|
}
|
|
|
|
/*
|
|
* Initialise CC subsystem on system boot.
|
|
*/
|
|
static void
|
|
cc_init(void)
|
|
{
|
|
CC_LIST_LOCK_INIT();
|
|
STAILQ_INIT(&cc_list);
|
|
}
|
|
|
|
/*
|
|
* Returns non-zero on success, 0 on failure.
|
|
*/
|
|
int
|
|
cc_deregister_algo(struct cc_algo *remove_cc)
|
|
{
|
|
struct cc_algo *funcs, *tmpfuncs;
|
|
struct tcpcb *tp;
|
|
struct inpcb *inp;
|
|
int err;
|
|
|
|
err = ENOENT;
|
|
|
|
/* Never allow newreno to be deregistered. */
|
|
if (&newreno_cc_algo == remove_cc)
|
|
return (EPERM);
|
|
|
|
/* Remove algo from cc_list so that new connections can't use it. */
|
|
CC_LIST_WLOCK();
|
|
STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
|
|
if (funcs == remove_cc) {
|
|
/*
|
|
* If we're removing the current system default,
|
|
* reset the default to newreno.
|
|
*/
|
|
if (strncmp(CC_DEFAULT()->name, remove_cc->name,
|
|
TCP_CA_NAME_MAX) == 0)
|
|
cc_set_default(&newreno_cc_algo);
|
|
|
|
STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
|
|
err = 0;
|
|
break;
|
|
}
|
|
}
|
|
CC_LIST_WUNLOCK();
|
|
|
|
if (!err) {
|
|
/*
|
|
* Check all active control blocks and change any that are
|
|
* using this algorithm back to newreno. If the algorithm that
|
|
* was in use requires cleanup code to be run, call it.
|
|
*
|
|
* New connections already part way through being initialised
|
|
* with the CC algo we're removing will not race with this code
|
|
* because the INP_INFO_WLOCK is held during initialisation.
|
|
* We therefore don't enter the loop below until the connection
|
|
* list has stabilised.
|
|
*/
|
|
INP_INFO_RLOCK(&V_tcbinfo);
|
|
LIST_FOREACH(inp, &V_tcb, inp_list) {
|
|
INP_WLOCK(inp);
|
|
/* Important to skip tcptw structs. */
|
|
if (!(inp->inp_flags & INP_TIMEWAIT) &&
|
|
(tp = intotcpcb(inp)) != NULL) {
|
|
/*
|
|
* By holding INP_WLOCK here, we are
|
|
* assured that the connection is not
|
|
* currently executing inside the CC
|
|
* module's functions i.e. it is safe to
|
|
* make the switch back to newreno.
|
|
*/
|
|
if (CC_ALGO(tp) == remove_cc) {
|
|
tmpfuncs = CC_ALGO(tp);
|
|
/* Newreno does not require any init. */
|
|
CC_ALGO(tp) = &newreno_cc_algo;
|
|
if (tmpfuncs->cb_destroy != NULL)
|
|
tmpfuncs->cb_destroy(tp->ccv);
|
|
}
|
|
}
|
|
INP_WUNLOCK(inp);
|
|
}
|
|
INP_INFO_RUNLOCK(&V_tcbinfo);
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
/*
|
|
* Returns 0 on success, non-zero on failure.
|
|
*/
|
|
int
|
|
cc_register_algo(struct cc_algo *add_cc)
|
|
{
|
|
struct cc_algo *funcs;
|
|
int err;
|
|
|
|
err = 0;
|
|
|
|
/*
|
|
* Iterate over list of registered CC algorithms and make sure
|
|
* we're not trying to add a duplicate.
|
|
*/
|
|
CC_LIST_WLOCK();
|
|
STAILQ_FOREACH(funcs, &cc_list, entries) {
|
|
if (funcs == add_cc || strncmp(funcs->name, add_cc->name,
|
|
TCP_CA_NAME_MAX) == 0)
|
|
err = EEXIST;
|
|
}
|
|
|
|
if (!err)
|
|
STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
|
|
|
|
CC_LIST_WUNLOCK();
|
|
|
|
return (err);
|
|
}
|
|
|
|
/*
|
|
* Handles kld related events. Returns 0 on success, non-zero on failure.
|
|
*/
|
|
int
|
|
cc_modevent(module_t mod, int event_type, void *data)
|
|
{
|
|
struct cc_algo *algo;
|
|
int err;
|
|
|
|
err = 0;
|
|
algo = (struct cc_algo *)data;
|
|
|
|
switch(event_type) {
|
|
case MOD_LOAD:
|
|
if (algo->mod_init != NULL)
|
|
err = algo->mod_init();
|
|
if (!err)
|
|
err = cc_register_algo(algo);
|
|
break;
|
|
|
|
case MOD_QUIESCE:
|
|
case MOD_SHUTDOWN:
|
|
case MOD_UNLOAD:
|
|
err = cc_deregister_algo(algo);
|
|
if (!err && algo->mod_destroy != NULL)
|
|
algo->mod_destroy();
|
|
if (err == ENOENT)
|
|
err = 0;
|
|
break;
|
|
|
|
default:
|
|
err = EINVAL;
|
|
break;
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
|
|
|
|
/* Declare sysctl tree and populate it. */
|
|
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL,
|
|
"congestion control related settings");
|
|
|
|
SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW,
|
|
NULL, 0, cc_default_algo, "A", "default congestion control algorithm");
|
|
|
|
SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD,
|
|
NULL, 0, cc_list_available, "A",
|
|
"list available congestion control algorithms");
|