mirror of
https://github.com/freebsd/freebsd-src
synced 2024-07-24 11:47:20 +00:00
tcp: Rack fixes and misc updates
So over the past few weeks we have found several bugs and updated hybrid pacing to have more data in the low-level logging. We have also moved more of the BBlogs to "verbose" mode so that we don't generate a lot of the debug data unless you put verbose/debug on. There were a couple of notable bugs, one being the incorrect passing of percentage for reduction to timely and the other the incorrect use of 20% timely Beta instead of 80%. This also expands a simply idea to be able to pace a cwnd (fillcw) as an alternate pacing mechanism combining that with timely reduction/increase. Reviewed by: tuexen Sponsored by: Netflix Inc Differential Revision:https://reviews.freebsd.org/D40391
This commit is contained in:
parent
9121945d70
commit
e022f2b013
|
@ -499,6 +499,8 @@ struct tcp_log_user {
|
|||
#define TCP_HYBRID_PACING_ENABLE 0x0010 /* We are enabling hybrid pacing else disable */
|
||||
#define TCP_HYBRID_PACING_S_MSS 0x0020 /* Clent wants us to set the mss overriding gp est in CU */
|
||||
#define TCP_HYBRID_PACING_SETMSS 0x1000 /* Internal flag that tellsus we set the mss on this entry */
|
||||
#define TCP_HYBRID_PACING_WASSET 0x2000 /* We init to this to know if a hybrid command was issued */
|
||||
|
||||
|
||||
struct tcp_hybrid_req {
|
||||
struct tcp_snd_req req;
|
||||
|
|
|
@ -320,15 +320,28 @@ static int32_t rack_hbp_thresh = 3; /* what is the divisor max_rtt/min_rtt to d
|
|||
/* Part of pacing */
|
||||
static int32_t rack_max_per_above = 30; /* When we go to increment stop if above 100+this% */
|
||||
|
||||
/* Timely information */
|
||||
/* Combine these two gives the range of 'no change' to bw */
|
||||
/* ie the up/down provide the upper and lower bound */
|
||||
/* Timely information:
|
||||
*
|
||||
* Here we have various control parameters on how
|
||||
* timely may change the multiplier. rack_gain_p5_ub
|
||||
* is associated with timely but not directly influencing
|
||||
* the rate decision like the other variables. It controls
|
||||
* the way fill-cw interacts with timely and caps how much
|
||||
* timely can boost the fill-cw b/w.
|
||||
*
|
||||
* The other values are various boost/shrink numbers as well
|
||||
* as potential caps when adjustments are made to the timely
|
||||
* gain (returned by rack_get_output_gain(). Remember too that
|
||||
* the gain returned can be overriden by other factors such as
|
||||
* probeRTT as well as fixed-rate-pacing.
|
||||
*/
|
||||
static int32_t rack_gain_p5_ub = 250;
|
||||
static int32_t rack_gp_per_bw_mul_up = 2; /* 2% */
|
||||
static int32_t rack_gp_per_bw_mul_down = 4; /* 4% */
|
||||
static int32_t rack_gp_rtt_maxmul = 3; /* 3 x maxmin */
|
||||
static int32_t rack_gp_rtt_minmul = 1; /* minrtt + (minrtt/mindiv) is lower rtt */
|
||||
static int32_t rack_gp_rtt_mindiv = 4; /* minrtt + (minrtt * minmul/mindiv) is lower rtt */
|
||||
static int32_t rack_gp_decrease_per = 20; /* 20% decrease in multiplier */
|
||||
static int32_t rack_gp_decrease_per = 80; /* Beta value of timely decrease (.8) = 80 */
|
||||
static int32_t rack_gp_increase_per = 2; /* 2% increase in multiplier */
|
||||
static int32_t rack_per_lower_bound = 50; /* Don't allow to drop below this multiplier */
|
||||
static int32_t rack_per_upper_bound_ss = 0; /* Don't allow SS to grow above this */
|
||||
|
@ -713,7 +726,7 @@ static void
|
|||
rack_log_gpset(struct tcp_rack *rack, uint32_t seq_end, uint32_t ack_end_t,
|
||||
uint32_t send_end_t, int line, uint8_t mode, struct rack_sendmap *rsm)
|
||||
{
|
||||
if (tcp_bblogging_on(rack->rc_tp)) {
|
||||
if (tcp_bblogging_on(rack->rc_tp) && (rack_verbose_logging != 0)) {
|
||||
union tcp_log_stackspecific log;
|
||||
struct timeval tv;
|
||||
|
||||
|
@ -1175,8 +1188,8 @@ rack_init_sysctls(void)
|
|||
SYSCTL_ADD_S32(&rack_sysctl_ctx,
|
||||
SYSCTL_CHILDREN(rack_timely),
|
||||
OID_AUTO, "decrease", CTLFLAG_RW,
|
||||
&rack_gp_decrease_per, 20,
|
||||
"Rack timely decrease percentage of our GP multiplication factor");
|
||||
&rack_gp_decrease_per, 80,
|
||||
"Rack timely Beta value 80 = .8 (scaled by 100)");
|
||||
SYSCTL_ADD_S32(&rack_sysctl_ctx,
|
||||
SYSCTL_CHILDREN(rack_timely),
|
||||
OID_AUTO, "increase", CTLFLAG_RW,
|
||||
|
@ -1187,6 +1200,12 @@ rack_init_sysctls(void)
|
|||
OID_AUTO, "lowerbound", CTLFLAG_RW,
|
||||
&rack_per_lower_bound, 50,
|
||||
"Rack timely lowest percentage we allow GP multiplier to fall to");
|
||||
SYSCTL_ADD_S32(&rack_sysctl_ctx,
|
||||
SYSCTL_CHILDREN(rack_timely),
|
||||
OID_AUTO, "p5_upper", CTLFLAG_RW,
|
||||
&rack_gain_p5_ub, 250,
|
||||
"Profile 5 upper bound to timely gain");
|
||||
|
||||
SYSCTL_ADD_S32(&rack_sysctl_ctx,
|
||||
SYSCTL_CHILDREN(rack_timely),
|
||||
OID_AUTO, "upperboundss", CTLFLAG_RW,
|
||||
|
@ -1967,7 +1986,7 @@ rack_get_fixed_pacing_bw(struct tcp_rack *rack)
|
|||
static void
|
||||
rack_log_hybrid_bw(struct tcp_rack *rack, uint32_t seq, uint64_t cbw, uint64_t tim,
|
||||
uint64_t data, uint8_t mod, uint16_t aux,
|
||||
struct tcp_sendfile_track *cur)
|
||||
struct tcp_sendfile_track *cur, int line)
|
||||
{
|
||||
#ifdef TCP_REQUEST_TRK
|
||||
int do_log = 0;
|
||||
|
@ -1991,7 +2010,7 @@ rack_log_hybrid_bw(struct tcp_rack *rack, uint32_t seq, uint64_t cbw, uint64_t t
|
|||
* All other less noisy logs here except the measure which
|
||||
* also needs to come out on the point and the log.
|
||||
*/
|
||||
do_log = tcp_bblogging_on(rack->rc_tp);
|
||||
do_log = tcp_bblogging_on(rack->rc_tp);
|
||||
} else {
|
||||
do_log = tcp_bblogging_point_on(rack->rc_tp, TCP_BBPOINT_REQ_LEVEL_LOGGING);
|
||||
}
|
||||
|
@ -2004,6 +2023,7 @@ rack_log_hybrid_bw(struct tcp_rack *rack, uint32_t seq, uint64_t cbw, uint64_t t
|
|||
/* Convert our ms to a microsecond */
|
||||
memset(&log, 0, sizeof(log));
|
||||
|
||||
log.u_bbr.cwnd_gain = line;
|
||||
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
|
||||
log.u_bbr.rttProp = tim;
|
||||
log.u_bbr.bw_inuse = cbw;
|
||||
|
@ -2049,8 +2069,10 @@ rack_log_hybrid_bw(struct tcp_rack *rack, uint32_t seq, uint64_t cbw, uint64_t t
|
|||
/* localtime = <delivered | applimited>*/
|
||||
log.u_bbr.applimited = (uint32_t)(cur->localtime & 0x00000000ffffffff);
|
||||
log.u_bbr.delivered = (uint32_t)((cur->localtime >> 32) & 0x00000000ffffffff);
|
||||
#ifdef TCP_REQUEST_TRK
|
||||
off = (uint64_t)(cur) - (uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
|
||||
log.u_bbr.bbr_substate = (uint8_t)(off / sizeof(struct tcp_sendfile_track));
|
||||
#endif
|
||||
log.u_bbr.flex4 = (uint32_t)(rack->rc_tp->t_sndbytes - cur->sent_at_fs);
|
||||
log.u_bbr.flex5 = (uint32_t)(rack->rc_tp->t_snd_rxt_bytes - cur->rxt_at_fs);
|
||||
log.u_bbr.flex7 = (uint16_t)cur->hybrid_flags;
|
||||
|
@ -2083,6 +2105,60 @@ rack_log_hybrid_bw(struct tcp_rack *rack, uint32_t seq, uint64_t cbw, uint64_t t
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifdef TCP_REQUEST_TRK
|
||||
static void
|
||||
rack_log_hybrid_sends(struct tcp_rack *rack, struct tcp_sendfile_track *cur, int line)
|
||||
{
|
||||
if (tcp_bblogging_point_on(rack->rc_tp, TCP_BBPOINT_REQ_LEVEL_LOGGING)) {
|
||||
union tcp_log_stackspecific log;
|
||||
struct timeval tv;
|
||||
uint64_t off;
|
||||
|
||||
/* Convert our ms to a microsecond */
|
||||
memset(&log, 0, sizeof(log));
|
||||
|
||||
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
|
||||
log.u_bbr.cur_del_rate = rack->rc_tp->t_sndbytes;
|
||||
log.u_bbr.delRate = cur->sent_at_fs;
|
||||
log.u_bbr.rttProp = rack->rc_tp->t_snd_rxt_bytes;
|
||||
log.u_bbr.bw_inuse = cur->rxt_at_fs;
|
||||
log.u_bbr.cwnd_gain = line;
|
||||
off = (uint64_t)(cur) - (uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
|
||||
log.u_bbr.bbr_substate = (uint8_t)(off / sizeof(struct tcp_sendfile_track));
|
||||
/* start = < flex1 | flex2 > */
|
||||
log.u_bbr.flex2 = (uint32_t)(cur->start & 0x00000000ffffffff);
|
||||
log.u_bbr.flex1 = (uint32_t)((cur->start >> 32) & 0x00000000ffffffff);
|
||||
/* end = < flex3 | flex4 > */
|
||||
log.u_bbr.flex4 = (uint32_t)(cur->end & 0x00000000ffffffff);
|
||||
log.u_bbr.flex3 = (uint32_t)((cur->end >> 32) & 0x00000000ffffffff);
|
||||
|
||||
/* localtime = <delivered | applimited>*/
|
||||
log.u_bbr.applimited = (uint32_t)(cur->localtime & 0x00000000ffffffff);
|
||||
log.u_bbr.delivered = (uint32_t)((cur->localtime >> 32) & 0x00000000ffffffff);
|
||||
/* client timestamp = <lt_epoch | epoch>*/
|
||||
log.u_bbr.epoch = (uint32_t)(cur->timestamp & 0x00000000ffffffff);
|
||||
log.u_bbr.lt_epoch = (uint32_t)((cur->timestamp >> 32) & 0x00000000ffffffff);
|
||||
/* now set all the flags in */
|
||||
log.u_bbr.pkts_out = cur->hybrid_flags;
|
||||
log.u_bbr.flex6 = cur->flags;
|
||||
/*
|
||||
* Last send time = <flex5 | pkt_epoch> note we do not distinguish cases
|
||||
* where a false retransmit occurred so first_send <-> lastsend may
|
||||
* include longer time then it actually took if we have a false rxt.
|
||||
*/
|
||||
log.u_bbr.pkt_epoch = (uint32_t)(rack->r_ctl.last_tmit_time_acked & 0x00000000ffffffff);
|
||||
log.u_bbr.flex5 = (uint32_t)((rack->r_ctl.last_tmit_time_acked >> 32) & 0x00000000ffffffff);
|
||||
|
||||
log.u_bbr.flex8 = HYBRID_LOG_SENT_LOST;
|
||||
tcp_log_event(rack->rc_tp, NULL,
|
||||
&rack->rc_inp->inp_socket->so_rcv,
|
||||
&rack->rc_inp->inp_socket->so_snd,
|
||||
TCP_HYBRID_PACING_LOG, 0,
|
||||
0, &log, false, NULL, __func__, __LINE__, &tv);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline uint64_t
|
||||
rack_compensate_for_linerate(struct tcp_rack *rack, uint64_t bw)
|
||||
{
|
||||
|
@ -2128,13 +2204,13 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int *capped)
|
|||
*/
|
||||
struct tcp_sendfile_track *ent;
|
||||
|
||||
ent = rack->r_ctl.rc_last_sft;
|
||||
ent = rack->r_ctl.rc_last_sft;
|
||||
microuptime(&tv);
|
||||
timenow = tcp_tv_to_lusectick(&tv);
|
||||
if (timenow >= ent->deadline) {
|
||||
/* No time left we do DGP only */
|
||||
rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
|
||||
0, 0, 0, HYBRID_LOG_OUTOFTIME, 0, ent);
|
||||
0, 0, 0, HYBRID_LOG_OUTOFTIME, 0, ent, __LINE__);
|
||||
rack->r_ctl.bw_rate_cap = 0;
|
||||
return;
|
||||
}
|
||||
|
@ -2143,7 +2219,7 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int *capped)
|
|||
if (timeleft < HPTS_MSEC_IN_SEC) {
|
||||
/* If there is less than a ms left just use DGPs rate */
|
||||
rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
|
||||
0, timeleft, 0, HYBRID_LOG_OUTOFTIME, 0, ent);
|
||||
0, timeleft, 0, HYBRID_LOG_OUTOFTIME, 0, ent, __LINE__);
|
||||
rack->r_ctl.bw_rate_cap = 0;
|
||||
return;
|
||||
}
|
||||
|
@ -2159,7 +2235,7 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int *capped)
|
|||
else {
|
||||
/* TSNH, we should catch it at the send */
|
||||
rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
|
||||
0, timeleft, 0, HYBRID_LOG_CAPERROR, 0, ent);
|
||||
0, timeleft, 0, HYBRID_LOG_CAPERROR, 0, ent, __LINE__);
|
||||
rack->r_ctl.bw_rate_cap = 0;
|
||||
return;
|
||||
}
|
||||
|
@ -2178,7 +2254,7 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int *capped)
|
|||
else {
|
||||
/* TSNH, we should catch it at the send */
|
||||
rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
|
||||
0, timeleft, lengone, HYBRID_LOG_CAPERROR, 0, ent);
|
||||
0, timeleft, lengone, HYBRID_LOG_CAPERROR, 0, ent, __LINE__);
|
||||
rack->r_ctl.bw_rate_cap = 0;
|
||||
return;
|
||||
}
|
||||
|
@ -2186,7 +2262,7 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int *capped)
|
|||
if (lenleft == 0) {
|
||||
/* We have it all sent */
|
||||
rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
|
||||
0, timeleft, lenleft, HYBRID_LOG_ALLSENT, 0, ent);
|
||||
0, timeleft, lenleft, HYBRID_LOG_ALLSENT, 0, ent, __LINE__);
|
||||
if (rack->r_ctl.bw_rate_cap)
|
||||
goto normal_ratecap;
|
||||
else
|
||||
|
@ -2210,10 +2286,10 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int *capped)
|
|||
rack_log_type_pacing_sizes(rack->rc_tp, rack, rack->r_ctl.client_suggested_maxseg, orig_max, __LINE__, 5);
|
||||
}
|
||||
rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
|
||||
calcbw, timeleft, lenleft, HYBRID_LOG_CAP_CALC, 0, ent);
|
||||
calcbw, timeleft, lenleft, HYBRID_LOG_CAP_CALC, 0, ent, __LINE__);
|
||||
if ((calcbw > 0) && (*bw > calcbw)) {
|
||||
rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
|
||||
*bw, ent->deadline, lenleft, HYBRID_LOG_RATE_CAP, 0, ent);
|
||||
*bw, ent->deadline, lenleft, HYBRID_LOG_RATE_CAP, 0, ent, __LINE__);
|
||||
*capped = 1;
|
||||
*bw = calcbw;
|
||||
}
|
||||
|
@ -2241,7 +2317,7 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int *capped)
|
|||
*bw = rack->r_ctl.bw_rate_cap;
|
||||
rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
|
||||
*bw, 0, 0,
|
||||
HYBRID_LOG_RATE_CAP, 1, NULL);
|
||||
HYBRID_LOG_RATE_CAP, 1, NULL, __LINE__);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2916,7 +2992,7 @@ rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,
|
|||
static void
|
||||
rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint32_t cts, struct timeval *tv, int line)
|
||||
{
|
||||
if (tcp_bblogging_on(rack->rc_tp)) {
|
||||
if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
|
||||
union tcp_log_stackspecific log;
|
||||
|
||||
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
|
||||
|
@ -3116,7 +3192,7 @@ rack_log_alt_to_to_cancel(struct tcp_rack *rack,
|
|||
static void
|
||||
rack_log_to_processing(struct tcp_rack *rack, uint32_t cts, int32_t ret, int32_t timers)
|
||||
{
|
||||
if (tcp_bblogging_on(rack->rc_tp)) {
|
||||
if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
|
||||
union tcp_log_stackspecific log;
|
||||
struct timeval tv;
|
||||
|
||||
|
@ -3819,15 +3895,30 @@ rack_increase_bw_mul(struct tcp_rack *rack, int timely_says, uint64_t cur_bw, ui
|
|||
static uint32_t
|
||||
rack_get_decrease(struct tcp_rack *rack, uint32_t curper, int32_t rtt_diff)
|
||||
{
|
||||
/*
|
||||
/*-
|
||||
* norm_grad = rtt_diff / minrtt;
|
||||
* new_per = curper * (1 - B * norm_grad)
|
||||
*
|
||||
* B = rack_gp_decrease_per (default 10%)
|
||||
* B = rack_gp_decrease_per (default 80%)
|
||||
* rtt_dif = input var current rtt-diff
|
||||
* curper = input var current percentage
|
||||
* minrtt = from rack filter
|
||||
*
|
||||
* In order to do the floating point calculations above we
|
||||
* do an integer conversion. The code looks confusing so let me
|
||||
* translate it into something that use more variables and
|
||||
* is clearer for us humans :)
|
||||
*
|
||||
* uint64_t norm_grad, inverse, reduce_by, final_result;
|
||||
* uint32_t perf;
|
||||
*
|
||||
* norm_grad = (((uint64_t)rtt_diff * 1000000) /
|
||||
* (uint64_t)get_filter_small(&rack->r_ctl.rc_gp_min_rtt));
|
||||
* inverse = ((uint64_t)rack_gp_decrease * (uint64_t)1000000) * norm_grad;
|
||||
* inverse /= 1000000;
|
||||
* reduce_by = (1000000 - inverse);
|
||||
* final_result = (cur_per * reduce_by) / 1000000;
|
||||
* perf = (uint32_t)final_result;
|
||||
*/
|
||||
uint64_t perf;
|
||||
|
||||
|
@ -3852,7 +3943,7 @@ rack_decrease_highrtt(struct tcp_rack *rack, uint32_t curper, uint32_t rtt)
|
|||
* result = curper * (1 - (B * ( 1 - ------ ))
|
||||
* gp_srtt
|
||||
*
|
||||
* B = rack_gp_decrease_per (default 10%)
|
||||
* B = rack_gp_decrease_per (default .8 i.e. 80)
|
||||
* highrttthresh = filter_min * rack_gp_rtt_maxmul
|
||||
*/
|
||||
uint64_t perf;
|
||||
|
@ -3864,6 +3955,20 @@ rack_decrease_highrtt(struct tcp_rack *rack, uint32_t curper, uint32_t rtt)
|
|||
((uint64_t)rack_gp_decrease_per * ((uint64_t)1000000 -
|
||||
((uint64_t)highrttthresh * (uint64_t)1000000) /
|
||||
(uint64_t)rtt)) / 100)) /(uint64_t)1000000);
|
||||
if (tcp_bblogging_on(rack->rc_tp)) {
|
||||
uint64_t log1;
|
||||
|
||||
log1 = rtt;
|
||||
log1 <<= 32;
|
||||
log1 |= highrttthresh;
|
||||
rack_log_timely(rack,
|
||||
rack_gp_decrease_per,
|
||||
(uint64_t)curper,
|
||||
log1,
|
||||
perf,
|
||||
__LINE__,
|
||||
15);
|
||||
}
|
||||
return (perf);
|
||||
}
|
||||
|
||||
|
@ -3911,7 +4016,7 @@ rack_decrease_bw_mul(struct tcp_rack *rack, int timely_says, uint32_t rtt, int32
|
|||
/* Sent in SS */
|
||||
if (timely_says == 2) {
|
||||
new_per = rack_decrease_highrtt(rack, rack->r_ctl.rack_per_of_gp_ss, rtt);
|
||||
alt = rack_get_decrease(rack, rack->r_ctl.rack_per_of_gp_rec, rtt_diff);
|
||||
alt = rack_get_decrease(rack, rack->r_ctl.rack_per_of_gp_ss, rtt_diff);
|
||||
if (alt < new_per)
|
||||
val = alt;
|
||||
else
|
||||
|
@ -3944,7 +4049,7 @@ rack_decrease_bw_mul(struct tcp_rack *rack, int timely_says, uint32_t rtt, int32
|
|||
/* Sent in CA */
|
||||
if (timely_says == 2) {
|
||||
new_per = rack_decrease_highrtt(rack, rack->r_ctl.rack_per_of_gp_ca, rtt);
|
||||
alt = rack_get_decrease(rack, rack->r_ctl.rack_per_of_gp_rec, rtt_diff);
|
||||
alt = rack_get_decrease(rack, rack->r_ctl.rack_per_of_gp_ca, rtt_diff);
|
||||
if (alt < new_per)
|
||||
val = alt;
|
||||
else
|
||||
|
@ -5040,7 +5145,7 @@ rack_do_goodput_measurement(struct tcpcb *tp, struct tcp_rack *rack,
|
|||
(rack->r_ctl.num_measurements >= rack->r_ctl.req_measurements)) {
|
||||
/* We have enough measurements now */
|
||||
rack->gp_ready = 1;
|
||||
if ((rack->rc_always_pace && (rack->use_fixed_rate == 0)) ||
|
||||
if (rack->dgp_on ||
|
||||
rack->rack_hibeta)
|
||||
rack_set_cc_pacing(rack);
|
||||
if (rack->defer_options)
|
||||
|
@ -6860,21 +6965,20 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
|
|||
* even a SACK should not disturb us (with
|
||||
* the exception of r_rr_config 3).
|
||||
*/
|
||||
if (rack->r_ctl.rc_hpts_flags & PACE_TMR_RACK) {
|
||||
if ((rack->r_ctl.rc_hpts_flags & PACE_TMR_RACK) ||
|
||||
(IN_RECOVERY(tp->t_flags))) {
|
||||
if (rack->r_rr_config != 3)
|
||||
tp->t_flags2 |= TF2_DONT_SACK_QUEUE;
|
||||
else if (rack->rc_pace_dnd) {
|
||||
if (IN_RECOVERY(tp->t_flags)) {
|
||||
/*
|
||||
* When DND is on, we only let a sack
|
||||
* interrupt us if we are not in recovery.
|
||||
*
|
||||
* If DND is off, then we never hit here
|
||||
* and let all sacks wake us up.
|
||||
*
|
||||
*/
|
||||
tp->t_flags2 |= TF2_DONT_SACK_QUEUE;
|
||||
}
|
||||
/*
|
||||
* When DND is on, we only let a sack
|
||||
* interrupt us if we are not in recovery.
|
||||
*
|
||||
* If DND is off, then we never hit here
|
||||
* and let all sacks wake us up.
|
||||
*
|
||||
*/
|
||||
tp->t_flags2 |= TF2_DONT_SACK_QUEUE;
|
||||
}
|
||||
}
|
||||
/* For sack attackers we want to ignore sack */
|
||||
|
@ -10357,7 +10461,7 @@ rack_process_to_cumack(struct tcpcb *tp, struct tcp_rack *rack, register uint32_
|
|||
|
||||
rack->r_wanted_output = 1;
|
||||
if (SEQ_GT(th_ack, tp->snd_una))
|
||||
rack->r_ctl.last_cumack_advance = acktime;
|
||||
rack->r_ctl.last_cumack_advance = acktime;
|
||||
|
||||
/* Tend any TLP that has been marked for 1/2 the seq space (its old) */
|
||||
if ((rack->rc_last_tlp_acked_set == 1)&&
|
||||
|
@ -10484,6 +10588,7 @@ rack_process_to_cumack(struct tcpcb *tp, struct tcp_rack *rack, register uint32_
|
|||
}
|
||||
}
|
||||
/* Now do we consume the whole thing? */
|
||||
rack->r_ctl.last_tmit_time_acked = rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)];
|
||||
if (SEQ_GEQ(th_ack, rsm->r_end)) {
|
||||
/* Its all consumed. */
|
||||
uint32_t left;
|
||||
|
@ -10619,16 +10724,43 @@ rack_process_to_cumack(struct tcpcb *tp, struct tcp_rack *rack, register uint32_
|
|||
/* The trim will move th_ack into r_start for us */
|
||||
tqhash_trim(rack->r_ctl.tqh, th_ack);
|
||||
/* Now do we need to move the mbuf fwd too? */
|
||||
if (rsm->m) {
|
||||
while (rsm->soff >= rsm->m->m_len) {
|
||||
rsm->soff -= rsm->m->m_len;
|
||||
rsm->m = rsm->m->m_next;
|
||||
KASSERT((rsm->m != NULL),
|
||||
(" nrsm:%p hit at soff:%u null m",
|
||||
rsm, rsm->soff));
|
||||
{
|
||||
struct mbuf *m;
|
||||
uint32_t soff;
|
||||
|
||||
m = rsm->m;
|
||||
soff = rsm->soff;
|
||||
if (m) {
|
||||
while (soff >= m->m_len) {
|
||||
soff -= m->m_len;
|
||||
KASSERT((m->m_next != NULL),
|
||||
(" rsm:%p off:%u soff:%u m:%p",
|
||||
rsm, rsm->soff, soff, m));
|
||||
m = m->m_next;
|
||||
if (m == NULL) {
|
||||
/*
|
||||
* This is a fall-back that prevents a panic. In reality
|
||||
* we should be able to walk the mbuf's and find our place.
|
||||
* At this point snd_una has not been updated with the sbcut() yet
|
||||
* but tqhash_trim did update rsm->r_start so the offset calcuation
|
||||
* should work fine. This is undesirable since we will take cache
|
||||
* hits to access the socket buffer. And even more puzzling is that
|
||||
* it happens occasionally. It should not :(
|
||||
*/
|
||||
m = sbsndmbuf(&rack->rc_inp->inp_socket->so_snd,
|
||||
(rsm->r_start - tp->snd_una),
|
||||
&soff);
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Now save in our updated values.
|
||||
*/
|
||||
rsm->m = m;
|
||||
rsm->soff = soff;
|
||||
rsm->orig_m_len = rsm->m->m_len;
|
||||
rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
|
||||
}
|
||||
rsm->orig_m_len = rsm->m->m_len;
|
||||
rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
|
||||
}
|
||||
if (rack->app_limited_needs_set &&
|
||||
SEQ_GEQ(th_ack, tp->gput_seq))
|
||||
|
@ -11516,7 +11648,7 @@ rack_check_bottom_drag(struct tcpcb *tp,
|
|||
(rack->r_ctl.num_measurements >= rack->r_ctl.req_measurements)) {
|
||||
/* We have enough measurements now */
|
||||
rack->gp_ready = 1;
|
||||
if ((rack->rc_always_pace && (rack->use_fixed_rate == 0)) ||
|
||||
if (rack->dgp_on ||
|
||||
rack->rack_hibeta)
|
||||
rack_set_cc_pacing(rack);
|
||||
if (rack->defer_options)
|
||||
|
@ -11557,7 +11689,7 @@ rack_log_hybrid(struct tcp_rack *rack, uint32_t seq,
|
|||
int do_log;
|
||||
|
||||
do_log = tcp_bblogging_on(rack->rc_tp);
|
||||
if (do_log == 0) {
|
||||
if (do_log == 0) {
|
||||
if ((do_log = tcp_bblogging_point_on(rack->rc_tp, TCP_BBPOINT_REQ_LEVEL_LOGGING) )== 0)
|
||||
return;
|
||||
/* We only allow the three below with point logging on */
|
||||
|
@ -11565,7 +11697,7 @@ rack_log_hybrid(struct tcp_rack *rack, uint32_t seq,
|
|||
(mod != HYBRID_LOG_RULES_SET) &&
|
||||
(mod != HYBRID_LOG_REQ_COMP))
|
||||
return;
|
||||
|
||||
|
||||
}
|
||||
if (do_log) {
|
||||
union tcp_log_stackspecific log;
|
||||
|
@ -11593,8 +11725,10 @@ rack_log_hybrid(struct tcp_rack *rack, uint32_t seq,
|
|||
log.u_bbr.epoch = (uint32_t)(cur->deadline & 0x00000000ffffffff);
|
||||
log.u_bbr.lt_epoch = (uint32_t)((cur->deadline >> 32) & 0x00000000ffffffff) ;
|
||||
log.u_bbr.bbr_state = 1;
|
||||
#ifdef TCP_REQUEST_TRK
|
||||
off = (uint64_t)(cur) - (uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
|
||||
log.u_bbr.use_lt_bw = (uint8_t)(off / sizeof(struct tcp_sendfile_track));
|
||||
#endif
|
||||
} else {
|
||||
log.u_bbr.flex2 = err;
|
||||
}
|
||||
|
@ -11633,7 +11767,8 @@ rack_set_dgp_hybrid_mode(struct tcp_rack *rack, tcp_seq seq, uint32_t len)
|
|||
rc_cur = tcp_req_find_req_for_seq(rack->rc_tp, seq);
|
||||
if (rc_cur == NULL) {
|
||||
/* If not in the beginning what about the end piece */
|
||||
rack_log_hybrid(rack, seq, NULL, HYBRID_LOG_NO_RANGE, __LINE__, err);
|
||||
if (rack->rc_hybrid_mode)
|
||||
rack_log_hybrid(rack, seq, NULL, HYBRID_LOG_NO_RANGE, __LINE__, err);
|
||||
rc_cur = tcp_req_find_req_for_seq(rack->rc_tp, (seq + len - 1));
|
||||
} else {
|
||||
err = 12345;
|
||||
|
@ -11644,12 +11779,17 @@ rack_set_dgp_hybrid_mode(struct tcp_rack *rack, tcp_seq seq, uint32_t len)
|
|||
rack->r_ctl.client_suggested_maxseg = 0;
|
||||
rack->rc_catch_up = 0;
|
||||
rack->r_ctl.bw_rate_cap = 0;
|
||||
rack_log_hybrid(rack, (seq + len - 1), NULL, HYBRID_LOG_NO_RANGE, __LINE__, err);
|
||||
if (rack->rc_hybrid_mode)
|
||||
rack_log_hybrid(rack, (seq + len - 1), NULL, HYBRID_LOG_NO_RANGE, __LINE__, err);
|
||||
if (rack->r_ctl.rc_last_sft) {
|
||||
rack->r_ctl.rc_last_sft = NULL;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if ((rc_cur->hybrid_flags & TCP_HYBRID_PACING_WASSET) == 0) {
|
||||
/* This entry was never setup for hybrid pacing on/off etc */
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Ok if we have a new entry *or* have never
|
||||
* set up an entry we need to proceed. If
|
||||
|
@ -11661,7 +11801,8 @@ rack_set_dgp_hybrid_mode(struct tcp_rack *rack, tcp_seq seq, uint32_t len)
|
|||
if ((rack->r_ctl.rc_last_sft != NULL) &&
|
||||
(rack->r_ctl.rc_last_sft == rc_cur)) {
|
||||
/* Its already in place */
|
||||
rack_log_hybrid(rack, seq, rc_cur, HYBRID_LOG_ISSAME, __LINE__, 0);
|
||||
if (rack->rc_hybrid_mode)
|
||||
rack_log_hybrid(rack, seq, rc_cur, HYBRID_LOG_ISSAME, __LINE__, 0);
|
||||
return;
|
||||
}
|
||||
if (rack->rc_hybrid_mode == 0) {
|
||||
|
@ -11757,7 +11898,8 @@ rack_chk_req_and_hybrid_on_out(struct tcp_rack *rack, tcp_seq seq, uint32_t len,
|
|||
* way it will complete when all of it is acked.
|
||||
*/
|
||||
ent->end_seq = (seq + len);
|
||||
rack_log_hybrid_bw(rack, seq, len, 0, 0, HYBRID_LOG_EXTEND, 0, ent);
|
||||
if (rack->rc_hybrid_mode)
|
||||
rack_log_hybrid_bw(rack, seq, len, 0, 0, HYBRID_LOG_EXTEND, 0, ent, __LINE__);
|
||||
}
|
||||
/* Now validate we have set the send time of this one */
|
||||
if ((ent->flags & TCP_TRK_TRACK_FLG_FSND) == 0) {
|
||||
|
@ -11941,6 +12083,7 @@ rack_req_check_for_comp(struct tcp_rack *rack, tcp_seq th_ack)
|
|||
/* Ok this ack frees it */
|
||||
rack_log_hybrid(rack, th_ack,
|
||||
ent, HYBRID_LOG_REQ_COMP, __LINE__, 0);
|
||||
rack_log_hybrid_sends(rack, ent, __LINE__);
|
||||
/* calculate the time based on the ack arrival */
|
||||
data = ent->end - ent->start;
|
||||
laa = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
|
||||
|
@ -11962,7 +12105,7 @@ rack_req_check_for_comp(struct tcp_rack *rack, tcp_seq th_ack)
|
|||
cbw /= tim;
|
||||
else
|
||||
cbw = 0;
|
||||
rack_log_hybrid_bw(rack, th_ack, cbw, tim, data, HYBRID_LOG_BW_MEASURE, 0, ent);
|
||||
rack_log_hybrid_bw(rack, th_ack, cbw, tim, data, HYBRID_LOG_BW_MEASURE, 0, ent, __LINE__);
|
||||
/*
|
||||
* Check to see if we are freeing what we are pointing to send wise
|
||||
* if so be sure to NULL the pointer so we know we are no longer
|
||||
|
@ -14254,7 +14397,7 @@ rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack, uint32_t line, u
|
|||
if (fill_override)
|
||||
rate_wanted = *fill_override;
|
||||
else
|
||||
rate_wanted = rack_get_output_bw(rack, bw_est, NULL, NULL);
|
||||
rate_wanted = rack_get_gp_est(rack);
|
||||
if (rate_wanted) {
|
||||
/* We have something */
|
||||
rack->r_ctl.rc_pace_max_segs = rack_get_pacing_len(rack,
|
||||
|
@ -14885,8 +15028,6 @@ rack_init(struct tcpcb *tp, void **ptr)
|
|||
*/
|
||||
rack->rc_new_rnd_needed = 1;
|
||||
rack->r_ctl.rc_split_limit = V_tcp_map_split_limit;
|
||||
rack->r_ctl.rc_saved_beta.beta = V_newreno_beta_ecn;
|
||||
rack->r_ctl.rc_saved_beta.beta_ecn = V_newreno_beta_ecn;
|
||||
/* We want abe like behavior as well */
|
||||
rack->r_ctl.rc_saved_beta.newreno_flags |= CC_NEWRENO_BETA_ECN_ENABLED;
|
||||
rack->r_ctl.rc_reorder_fade = rack_reorder_fade;
|
||||
|
@ -14924,8 +15065,18 @@ rack_init(struct tcpcb *tp, void **ptr)
|
|||
rack->rc_user_set_max_segs = rack_hptsi_segments;
|
||||
rack->rc_force_max_seg = 0;
|
||||
TAILQ_INIT(&rack->r_ctl.opt_list);
|
||||
if (rack_hibeta_setting)
|
||||
rack->r_ctl.rc_saved_beta.beta = V_newreno_beta_ecn;
|
||||
rack->r_ctl.rc_saved_beta.beta_ecn = V_newreno_beta_ecn;
|
||||
if (rack_hibeta_setting) {
|
||||
rack->rack_hibeta = 1;
|
||||
if ((rack_hibeta_setting >= 50) &&
|
||||
(rack_hibeta_setting <= 100)) {
|
||||
rack->r_ctl.rc_saved_beta.beta = rack_hibeta_setting;
|
||||
rack->r_ctl.saved_hibeta = rack_hibeta_setting;
|
||||
}
|
||||
} else {
|
||||
rack->r_ctl.saved_hibeta = 50;
|
||||
}
|
||||
rack->r_ctl.rc_reorder_shift = rack_reorder_thresh;
|
||||
rack->r_ctl.rc_pkt_delay = rack_pkt_delay;
|
||||
rack->r_ctl.rc_tlp_cwnd_reduce = rack_lower_cwnd_at_tlp;
|
||||
|
@ -14941,7 +15092,7 @@ rack_init(struct tcpcb *tp, void **ptr)
|
|||
rack->rc_gp_no_rec_chg = 1;
|
||||
if (rack_pace_every_seg && tcp_can_enable_pacing()) {
|
||||
rack->rc_always_pace = 1;
|
||||
if ((rack->gp_ready) && (rack->rc_always_pace && (rack->use_fixed_rate == 0)))
|
||||
if (rack->rack_hibeta)
|
||||
rack_set_cc_pacing(rack);
|
||||
} else
|
||||
rack->rc_always_pace = 0;
|
||||
|
@ -17204,6 +17355,19 @@ rack_log_pacing_delay_calc(struct tcp_rack *rack, uint32_t len, uint32_t slot,
|
|||
union tcp_log_stackspecific log;
|
||||
struct timeval tv;
|
||||
|
||||
if (rack_verbose_logging == 0) {
|
||||
/*
|
||||
* We are not verbose screen out all but
|
||||
* ones we always want.
|
||||
*/
|
||||
if ((method != 2) &&
|
||||
(method != 3) &&
|
||||
(method != 7) &&
|
||||
(method != 14) &&
|
||||
(method != 20)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
memset(&log, 0, sizeof(log));
|
||||
log.u_bbr.flex1 = slot;
|
||||
log.u_bbr.flex2 = len;
|
||||
|
@ -17307,6 +17471,60 @@ rack_get_pacing_len(struct tcp_rack *rack, uint64_t bw, uint32_t mss)
|
|||
return (new_tso);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
rack_arrive_at_discounted_rate(struct tcp_rack *rack, uint64_t window_input, uint32_t *rate_set, uint32_t *gain_b)
|
||||
{
|
||||
uint64_t reduced_win;
|
||||
uint32_t gain;
|
||||
|
||||
if (window_input < rc_init_window(rack)) {
|
||||
/*
|
||||
* The cwnd is collapsed to
|
||||
* nearly zero, maybe because of a time-out?
|
||||
* Lets drop back to the lt-bw.
|
||||
*/
|
||||
reduced_win = rack_get_lt_bw(rack);
|
||||
/* Set the flag so the caller knows its a rate and not a reduced window */
|
||||
*rate_set = 1;
|
||||
gain = 100;
|
||||
} else if (IN_RECOVERY(rack->rc_tp->t_flags)) {
|
||||
/*
|
||||
* If we are in recover our cwnd needs to be less for
|
||||
* our pacing consideration.
|
||||
*/
|
||||
if (rack->rack_hibeta == 0) {
|
||||
reduced_win = window_input / 2;
|
||||
gain = 50;
|
||||
} else {
|
||||
reduced_win = window_input * rack->r_ctl.saved_hibeta;
|
||||
reduced_win /= 100;
|
||||
gain = rack->r_ctl.saved_hibeta;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Apply Timely factor to increase/decrease the
|
||||
* amount we are pacing at.
|
||||
*/
|
||||
gain = rack_get_output_gain(rack, NULL);
|
||||
if (gain > rack_gain_p5_ub) {
|
||||
gain = rack_gain_p5_ub;
|
||||
}
|
||||
reduced_win = window_input * gain;
|
||||
reduced_win /= 100;
|
||||
}
|
||||
if (gain_b != NULL)
|
||||
*gain_b = gain;
|
||||
/*
|
||||
* What is being returned here is a trimmed down
|
||||
* window values in all cases where rate_set is left
|
||||
* at 0. In one case we actually return the rate (lt_bw).
|
||||
* the "reduced_win" is returned as a slimmed down cwnd that
|
||||
* is then calculated by the caller into a rate when rate_set
|
||||
* is 0.
|
||||
*/
|
||||
return (reduced_win);
|
||||
}
|
||||
|
||||
static int32_t
|
||||
pace_to_fill_cwnd(struct tcp_rack *rack, int32_t slot, uint32_t len, uint32_t segsiz, int *capped, uint64_t *rate_wanted, uint8_t non_paced)
|
||||
{
|
||||
|
@ -17331,12 +17549,21 @@ pace_to_fill_cwnd(struct tcp_rack *rack, int32_t slot, uint32_t len, uint32_t se
|
|||
* and the the smallest send window.
|
||||
*/
|
||||
fill_bw = min(rack->rc_tp->snd_cwnd, rack->r_ctl.cwnd_to_use);
|
||||
if (rack->rc_fillcw_apply_discount) {
|
||||
uint32_t rate_set = 0;
|
||||
|
||||
fill_bw = rack_arrive_at_discounted_rate(rack, fill_bw, &rate_set, NULL);
|
||||
if (rate_set) {
|
||||
goto at_lt_bw;
|
||||
}
|
||||
}
|
||||
/* Take the rwnd if its smaller */
|
||||
if (fill_bw > rack->rc_tp->snd_wnd)
|
||||
fill_bw = rack->rc_tp->snd_wnd;
|
||||
/* Now lets make it into a b/w */
|
||||
fill_bw *= (uint64_t)HPTS_USEC_IN_SEC;
|
||||
fill_bw /= (uint64_t)rack->r_ctl.rc_last_us_rtt;
|
||||
at_lt_bw:
|
||||
if (rack->r_fill_less_agg) {
|
||||
/*
|
||||
* We want the average of the rate_wanted
|
||||
|
@ -17404,8 +17631,9 @@ pace_to_fill_cwnd(struct tcp_rack *rack, int32_t slot, uint32_t len, uint32_t se
|
|||
}
|
||||
}
|
||||
if (rack->r_ctl.bw_rate_cap && (fill_bw > rack->r_ctl.bw_rate_cap)) {
|
||||
rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
|
||||
fill_bw, 0, 0, HYBRID_LOG_RATE_CAP, 2, NULL);
|
||||
if (rack->rc_hybrid_mode)
|
||||
rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
|
||||
fill_bw, 0, 0, HYBRID_LOG_RATE_CAP, 2, NULL, __LINE__);
|
||||
fill_bw = rack->r_ctl.bw_rate_cap;
|
||||
}
|
||||
/*
|
||||
|
@ -17513,9 +17741,27 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
|
|||
(rack->r_ctl.gp_bw == 0)) {
|
||||
/* no way to yet do an estimate */
|
||||
bw_est = rate_wanted = 0;
|
||||
} else {
|
||||
} else if (rack->dgp_on) {
|
||||
bw_est = rack_get_bw(rack);
|
||||
rate_wanted = rack_get_output_bw(rack, bw_est, rsm, &capped);
|
||||
} else {
|
||||
uint32_t gain, rate_set = 0;
|
||||
|
||||
rate_wanted = min(rack->rc_tp->snd_cwnd, rack->r_ctl.cwnd_to_use);
|
||||
rate_wanted = rack_arrive_at_discounted_rate(rack, rate_wanted, &rate_set, &gain);
|
||||
if (rate_set == 0) {
|
||||
if (rate_wanted > rack->rc_tp->snd_wnd)
|
||||
rate_wanted = rack->rc_tp->snd_wnd;
|
||||
/* Now lets make it into a b/w */
|
||||
rate_wanted *= (uint64_t)HPTS_USEC_IN_SEC;
|
||||
rate_wanted /= (uint64_t)rack->r_ctl.rc_last_us_rtt;
|
||||
}
|
||||
bw_est = rate_wanted;
|
||||
rack_log_pacing_delay_calc(rack, rack->rc_tp->snd_cwnd,
|
||||
rack->r_ctl.cwnd_to_use,
|
||||
rate_wanted, bw_est,
|
||||
rack->r_ctl.rc_last_us_rtt,
|
||||
88, __LINE__, NULL, gain);
|
||||
}
|
||||
if ((bw_est == 0) || (rate_wanted == 0) ||
|
||||
((rack->gp_ready == 0) && (rack->use_fixed_rate == 0))) {
|
||||
|
@ -17534,16 +17780,16 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
|
|||
* means we may be off if we are larger than 1500 bytes
|
||||
* or smaller. But this just makes us more conservative.
|
||||
*/
|
||||
|
||||
|
||||
oh = (tp->t_maxseg - segsiz) + sizeof(struct tcphdr);
|
||||
if (rack->r_is_v6) {
|
||||
#ifdef INET6
|
||||
oh += sizeof(struct ip6_hdr);
|
||||
#endif
|
||||
#endif
|
||||
} else {
|
||||
#ifdef INET
|
||||
oh += sizeof(struct ip);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
/* We add a fixed 14 for the ethernet header */
|
||||
oh += 14;
|
||||
|
@ -17602,6 +17848,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
|
|||
prev_fill = rack->r_via_fill_cw;
|
||||
if ((rack->rc_pace_to_cwnd) &&
|
||||
(capped == 0) &&
|
||||
(rack->dgp_on == 1) &&
|
||||
(rack->use_fixed_rate == 0) &&
|
||||
(rack->in_probe_rtt == 0) &&
|
||||
(IN_FASTRECOVERY(rack->rc_tp->t_flags) == 0)) {
|
||||
|
@ -17652,8 +17899,8 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
|
|||
if (rack->r_ctl.crte) {
|
||||
rack->rack_hdrw_pacing = 1;
|
||||
rack->r_ctl.rc_pace_max_segs = tcp_get_pacing_burst_size_w_divisor(tp, rate_wanted, segsiz,
|
||||
pace_one, rack->r_ctl.crte,
|
||||
NULL, rack->r_ctl.pace_len_divisor);
|
||||
pace_one, rack->r_ctl.crte,
|
||||
NULL, rack->r_ctl.pace_len_divisor);
|
||||
rack_log_hdwr_pacing(rack,
|
||||
rate_wanted, rack->r_ctl.crte->rate, __LINE__,
|
||||
err, 0);
|
||||
|
@ -17695,8 +17942,8 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
|
|||
* do allow hardware pacing to be restarted.
|
||||
*/
|
||||
rack_log_hdwr_pacing(rack,
|
||||
bw_est, rack->r_ctl.crte->rate, __LINE__,
|
||||
0, 5);
|
||||
bw_est, rack->r_ctl.crte->rate, __LINE__,
|
||||
0, 5);
|
||||
tcp_rel_pacing_rate(rack->r_ctl.crte, rack->rc_tp);
|
||||
rack->r_ctl.crte = NULL;
|
||||
rack->rack_attempt_hdwr_pace = 0;
|
||||
|
@ -17705,11 +17952,11 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
|
|||
goto done_w_hdwr;
|
||||
}
|
||||
nrte = tcp_chg_pacing_rate(rack->r_ctl.crte,
|
||||
rack->rc_tp,
|
||||
rack->rc_inp->inp_route.ro_nh->nh_ifp,
|
||||
rate_wanted,
|
||||
RS_PACING_GEQ,
|
||||
&err, &rack->r_ctl.crte_prev_rate);
|
||||
rack->rc_tp,
|
||||
rack->rc_inp->inp_route.ro_nh->nh_ifp,
|
||||
rate_wanted,
|
||||
RS_PACING_GEQ,
|
||||
&err, &rack->r_ctl.crte_prev_rate);
|
||||
if (nrte == NULL) {
|
||||
/*
|
||||
* Lost the rate, lets drop hardware pacing
|
||||
|
@ -17725,8 +17972,8 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
|
|||
} else if (nrte != rack->r_ctl.crte) {
|
||||
rack->r_ctl.crte = nrte;
|
||||
rack->r_ctl.rc_pace_max_segs = tcp_get_pacing_burst_size_w_divisor(tp, rate_wanted,
|
||||
segsiz, pace_one, rack->r_ctl.crte,
|
||||
NULL, rack->r_ctl.pace_len_divisor);
|
||||
segsiz, pace_one, rack->r_ctl.crte,
|
||||
NULL, rack->r_ctl.pace_len_divisor);
|
||||
rack_log_hdwr_pacing(rack,
|
||||
rate_wanted, rack->r_ctl.crte->rate, __LINE__,
|
||||
err, 2);
|
||||
|
@ -17747,7 +17994,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
|
|||
98, __LINE__, NULL, 0);
|
||||
slot = minslot;
|
||||
}
|
||||
done_w_hdwr:
|
||||
done_w_hdwr:
|
||||
if (rack_limit_time_with_srtt &&
|
||||
(rack->use_fixed_rate == 0) &&
|
||||
(rack->rack_hdrw_pacing == 0)) {
|
||||
|
@ -18070,7 +18317,7 @@ rack_log_fsb(struct tcp_rack *rack, struct tcpcb *tp, struct socket *so, uint32_
|
|||
unsigned ipoptlen, int32_t orig_len, int32_t len, int error,
|
||||
int rsm_is_null, int optlen, int line, uint16_t mode)
|
||||
{
|
||||
if (tcp_bblogging_on(rack->rc_tp)) {
|
||||
if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
|
||||
union tcp_log_stackspecific log;
|
||||
struct timeval tv;
|
||||
|
||||
|
@ -18869,6 +19116,10 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
|
|||
rack->r_ctl.last_sent_tlp_seq = rsm->r_start;
|
||||
rack->r_ctl.last_sent_tlp_len = rsm->r_end - rsm->r_start;
|
||||
}
|
||||
if (rack->r_ctl.rc_prr_sndcnt >= len)
|
||||
rack->r_ctl.rc_prr_sndcnt -= len;
|
||||
else
|
||||
rack->r_ctl.rc_prr_sndcnt = 0;
|
||||
}
|
||||
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
|
||||
rack->forced_ack = 0; /* If we send something zap the FA flag */
|
||||
|
@ -19049,6 +19300,7 @@ rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val,
|
|||
m = NULL;
|
||||
goto failed;
|
||||
}
|
||||
rack->r_ctl.cwnd_to_use = tp->snd_cwnd;
|
||||
startseq = tp->snd_max;
|
||||
segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
|
||||
inp = rack->rc_inp;
|
||||
|
@ -22402,6 +22654,7 @@ rack_set_dgp(struct tcp_rack *rack)
|
|||
if (tcp_can_enable_pacing() == 0)
|
||||
return (EBUSY);
|
||||
}
|
||||
rack->rc_fillcw_apply_discount = 0;
|
||||
rack->dgp_on = 1;
|
||||
rack->rc_always_pace = 1;
|
||||
rack->use_fixed_rate = 0;
|
||||
|
@ -22490,6 +22743,26 @@ rack_set_profile(struct tcp_rack *rack, int prof)
|
|||
err = rack_set_dgp(rack);
|
||||
if (err)
|
||||
return (err);
|
||||
} else if (prof == 5) {
|
||||
err = rack_set_dgp(rack);
|
||||
if (err)
|
||||
return (err);
|
||||
/*
|
||||
* By turning DGP off we change the rate
|
||||
* picked to be only the one the cwnd and rtt
|
||||
* get us.
|
||||
*/
|
||||
rack->dgp_on = 0;
|
||||
} else if (prof == 6) {
|
||||
err = rack_set_dgp(rack);
|
||||
if (err)
|
||||
return (err);
|
||||
/*
|
||||
* Profile 6 tweaks DGP so that it will apply to
|
||||
* fill-cw the same settings that profile5 does
|
||||
* to replace DGP. It gets then the max(dgp-rate, fillcw(discounted).
|
||||
*/
|
||||
rack->rc_fillcw_apply_discount = 1;
|
||||
} else if (prof == 0) {
|
||||
/* This changes things back to the default settings */
|
||||
rack->dgp_on = 0;
|
||||
|
@ -22506,7 +22779,7 @@ rack_set_profile(struct tcp_rack *rack, int prof)
|
|||
}
|
||||
if (rack_pace_every_seg && tcp_can_enable_pacing()) {
|
||||
rack->rc_always_pace = 1;
|
||||
if ((rack->gp_ready) && (rack->use_fixed_rate == 0))
|
||||
if (rack->rack_hibeta)
|
||||
rack_set_cc_pacing(rack);
|
||||
} else
|
||||
rack->rc_always_pace = 0;
|
||||
|
@ -22658,7 +22931,7 @@ process_hybrid_pacing(struct tcp_rack *rack, struct tcp_hybrid_req *hybrid)
|
|||
}
|
||||
}
|
||||
/* Now set in our flags */
|
||||
sft->hybrid_flags = hybrid->hybrid_flags;
|
||||
sft->hybrid_flags = hybrid->hybrid_flags | TCP_HYBRID_PACING_WASSET;
|
||||
if (hybrid->hybrid_flags & TCP_HYBRID_PACING_CSPR)
|
||||
sft->cspr = hybrid->cspr;
|
||||
else
|
||||
|
@ -22727,10 +23000,25 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
|
|||
break;
|
||||
case TCP_RACK_HI_BETA:
|
||||
RACK_OPTS_INC(tcp_rack_hi_beta);
|
||||
if (optval)
|
||||
if (optval > 0) {
|
||||
rack->rack_hibeta = 1;
|
||||
else
|
||||
if ((optval >= 50) &&
|
||||
(optval <= 100)) {
|
||||
/*
|
||||
* User wants to set a custom beta.
|
||||
*/
|
||||
rack->r_ctl.saved_hibeta = optval;
|
||||
if (rack->rc_pacing_cc_set)
|
||||
rack_undo_cc_pacing(rack);
|
||||
rack->r_ctl.rc_saved_beta.beta = optval;
|
||||
}
|
||||
if (rack->rc_pacing_cc_set == 0)
|
||||
rack_set_cc_pacing(rack);
|
||||
} else {
|
||||
rack->rack_hibeta = 0;
|
||||
if (rack->rc_pacing_cc_set)
|
||||
rack_undo_cc_pacing(rack);
|
||||
}
|
||||
break;
|
||||
case TCP_RACK_PACING_BETA:
|
||||
RACK_OPTS_INC(tcp_rack_beta);
|
||||
|
@ -23003,7 +23291,7 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
|
|||
break;
|
||||
} else if (tcp_can_enable_pacing()) {
|
||||
rack->rc_always_pace = 1;
|
||||
if ((rack->gp_ready) && (rack->use_fixed_rate == 0))
|
||||
if (rack->rack_hibeta)
|
||||
rack_set_cc_pacing(rack);
|
||||
}
|
||||
else {
|
||||
|
@ -23099,7 +23387,10 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
|
|||
case TCP_RACK_PACE_MAX_SEG:
|
||||
/* Max segments size in a pace in bytes */
|
||||
RACK_OPTS_INC(tcp_rack_max_seg);
|
||||
rack->rc_user_set_max_segs = optval;
|
||||
if (optval <= MAX_USER_SET_SEG)
|
||||
rack->rc_user_set_max_segs = optval;
|
||||
else
|
||||
rack->rc_user_set_max_segs = MAX_USER_SET_SEG;
|
||||
rack_set_pace_segments(tp, rack, __LINE__, NULL);
|
||||
break;
|
||||
case TCP_RACK_PACE_RATE_REC:
|
||||
|
@ -23111,7 +23402,7 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
|
|||
if (rack->r_ctl.rc_fixed_pacing_rate_ss == 0)
|
||||
rack->r_ctl.rc_fixed_pacing_rate_ss = optval;
|
||||
rack->use_fixed_rate = 1;
|
||||
if (rack->rc_always_pace && rack->gp_ready && rack->rack_hibeta)
|
||||
if (rack->rack_hibeta)
|
||||
rack_set_cc_pacing(rack);
|
||||
rack_log_pacing_delay_calc(rack,
|
||||
rack->r_ctl.rc_fixed_pacing_rate_ss,
|
||||
|
@ -23129,7 +23420,7 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
|
|||
if (rack->r_ctl.rc_fixed_pacing_rate_rec == 0)
|
||||
rack->r_ctl.rc_fixed_pacing_rate_rec = optval;
|
||||
rack->use_fixed_rate = 1;
|
||||
if (rack->rc_always_pace && rack->gp_ready && rack->rack_hibeta)
|
||||
if (rack->rack_hibeta)
|
||||
rack_set_cc_pacing(rack);
|
||||
rack_log_pacing_delay_calc(rack,
|
||||
rack->r_ctl.rc_fixed_pacing_rate_ss,
|
||||
|
@ -23147,7 +23438,7 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
|
|||
if (rack->r_ctl.rc_fixed_pacing_rate_rec == 0)
|
||||
rack->r_ctl.rc_fixed_pacing_rate_rec = optval;
|
||||
rack->use_fixed_rate = 1;
|
||||
if (rack->rc_always_pace && rack->gp_ready && rack->rack_hibeta)
|
||||
if (rack->rack_hibeta)
|
||||
rack_set_cc_pacing(rack);
|
||||
rack_log_pacing_delay_calc(rack,
|
||||
rack->r_ctl.rc_fixed_pacing_rate_ss,
|
||||
|
|
|
@ -458,6 +458,7 @@ struct rack_control {
|
|||
uint64_t last_sndbytes;
|
||||
uint64_t last_snd_rxt_bytes;
|
||||
uint64_t rxt_threshold;
|
||||
uint64_t last_tmit_time_acked; /* Holds the last cumack point's last send time */
|
||||
uint32_t last_rnd_rxt_clamped;
|
||||
uint32_t num_of_clamps_applied;
|
||||
uint32_t clamp_options;
|
||||
|
@ -526,6 +527,7 @@ struct rack_control {
|
|||
uint8_t rc_no_push_at_mrtt; /* No push when we exceed max rtt */
|
||||
uint8_t num_measurements; /* Number of measurements (up to 0xff, we freeze at 0xff) */
|
||||
uint8_t req_measurements; /* How many measurements are required? */
|
||||
uint8_t saved_hibeta;
|
||||
uint8_t rc_tlp_cwnd_reduce; /* Socket option value Lock(a) */
|
||||
uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */
|
||||
uint8_t rc_rate_sample_method;
|
||||
|
@ -577,6 +579,7 @@ struct rack_control {
|
|||
#define HYBRID_LOG_OUTOFTIME 12 /* We are past the deadline DGP */
|
||||
#define HYBRID_LOG_CAPERROR 13 /* Hit one of the TSNH cases */
|
||||
#define HYBRID_LOG_EXTEND 14 /* We extended the end */
|
||||
#define HYBRID_LOG_SENT_LOST 15 /* A closing sent/lost report */
|
||||
|
||||
#define RACK_TIMELY_CNT_BOOST 5 /* At 5th increase boost */
|
||||
#define RACK_MINRTT_FILTER_TIM 10 /* Seconds */
|
||||
|
@ -589,6 +592,8 @@ struct rack_control {
|
|||
* +Slam cwnd
|
||||
*/
|
||||
|
||||
#define MAX_USER_SET_SEG 0x3f /* The max we can set is 63 which is probably too many */
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
struct tcp_rack {
|
||||
|
@ -659,7 +664,8 @@ struct tcp_rack {
|
|||
r_via_fill_cw : 1,
|
||||
r_fill_less_agg : 1;
|
||||
|
||||
uint8_t rc_user_set_max_segs; /* Socket option value Lock(a) */
|
||||
uint8_t rc_user_set_max_segs : 7, /* Socket option value Lock(a) */
|
||||
rc_fillcw_apply_discount;
|
||||
uint8_t rc_labc; /* Appropriate Byte Counting Value */
|
||||
uint16_t forced_ack : 1,
|
||||
rc_gp_incr : 1,
|
||||
|
|
Loading…
Reference in a new issue