Revert "Update to bring the rack stack with all its fixes in."

This commit was incomplete and breaks LINT kernels.  The tree has been
broken for 8+ hours.

This reverts commit f6d489f402.
This commit is contained in:
Brooks Davis 2024-03-11 20:15:20 +00:00
parent 16f8d88294
commit c112243f6b
14 changed files with 1163 additions and 3579 deletions

View file

@ -5,7 +5,7 @@
STACKNAME= rack
KMOD= tcp_${STACKNAME}
SRCS= rack.c sack_filter.c rack_bbr_common.c tailq_hash.c rack_pcm.c
SRCS= rack.c sack_filter.c rack_bbr_common.c tailq_hash.c
SRCS+= opt_inet.h opt_inet6.h opt_ipsec.h
SRCS+= opt_kern_tls.h

View file

@ -334,22 +334,9 @@ __tcp_set_flags(struct tcphdr *th, uint16_t flags)
#define TCP_RACK_PACING_DIVISOR 1146 /* Pacing divisor given to rate-limit code for burst sizing */
#define TCP_RACK_PACE_MIN_SEG 1147 /* Pacing min seg size rack will use */
#define TCP_RACK_DGP_IN_REC 1148 /* Do we use full DGP in recovery? */
#define TCP_POLICER_DETECT 1149 /* Do we apply a thresholds to rack to detect and compensate for policers? */
#define TCP_RXT_CLAMP TCP_POLICER_DETECT
#define TCP_RXT_CLAMP 1149 /* Do we apply a threshold to rack so if excess rxt clamp cwnd? */
#define TCP_HYBRID_PACING 1150 /* Hybrid pacing enablement */
#define TCP_PACING_DND 1151 /* When pacing with rr_config=3 can sacks disturb us */
#define TCP_SS_EEXIT 1152 /* Do we do early exit from slowtart if no b/w growth */
#define TCP_DGP_UPPER_BOUNDS 1153 /* SS and CA upper bound in percentage */
#define TCP_NO_TIMELY 1154 /* Disable/enable Timely */
#define TCP_HONOR_HPTS_MIN 1155 /* Do we honor hpts min to */
#define TCP_REC_IS_DYN 1156 /* Do we allow timely to change recovery multiplier? */
#define TCP_SIDECHAN_DIS 1157 /* Disable/enable the side-channel */
#define TCP_FILLCW_RATE_CAP 1158 /* Set a cap for DGP's fillcw */
#define TCP_POLICER_MSS 1159 /* Policer MSS requirement */
#define TCP_STACK_SPEC_INFO 1160 /* Get stack specific information (if present) */
#define RACK_CSPR_IS_FCC 1161
#define TCP_GP_USE_LTBW 1162 /* how we use lt_bw 0=not, 1=min, 2=max */
/* Start of reserved space for third-party user-settable options. */
#define TCP_VENDOR SO_VENDOR
@ -460,7 +447,6 @@ struct tcp_info {
u_int32_t tcpi_rcv_adv; /* Peer advertised window */
u_int32_t tcpi_dupacks; /* Consecutive dup ACKs recvd */
u_int32_t tcpi_rttmin; /* Min observed RTT */
/* Padding to grow without breaking ABI. */
u_int32_t __tcpi_pad[14]; /* Padding. */
};
@ -477,20 +463,6 @@ struct tcp_fastopen {
#define TCP_FUNCTION_NAME_LEN_MAX 32
struct stack_specific_info {
char stack_name[TCP_FUNCTION_NAME_LEN_MAX];
uint64_t policer_last_bw; /* Only valid if detection enabled and policer detected */
uint64_t bytes_transmitted;
uint64_t bytes_retransmitted;
uint32_t policer_detection_enabled: 1,
policer_detected : 1, /* transport thinks a policer is on path */
highly_buffered : 1, /* transport considers the path highly buffered */
spare : 29;
uint32_t policer_bucket_size; /* Only valid if detection enabled and policer detected */
uint32_t current_round;
uint32_t _rack_i_pad[18];
};
struct tcp_function_set {
char function_set_name[TCP_FUNCTION_NAME_LEN_MAX];
uint32_t pcbcnt;
@ -516,7 +488,6 @@ struct tcp_snd_req {
uint64_t start;
uint64_t end;
uint32_t flags;
uint32_t playout_ms;
};
union tcp_log_userdata {
@ -547,12 +518,9 @@ struct tcp_log_user {
#define TCP_HYBRID_PACING_H_MS 0x0008 /* A client hint for maxseg is present */
#define TCP_HYBRID_PACING_ENABLE 0x0010 /* We are enabling hybrid pacing else disable */
#define TCP_HYBRID_PACING_S_MSS 0x0020 /* Clent wants us to set the mss overriding gp est in CU */
#define TCP_HAS_PLAYOUT_MS 0x0040 /* The client included the chunk playout milliseconds: deprecate */
/* the below are internal only flags */
#define TCP_HYBRID_PACING_USER_MASK 0x0FFF /* Non-internal flags mask */
#define TCP_HYBRID_PACING_SETMSS 0x1000 /* Internal flag that tells us we set the mss on this entry */
#define TCP_HYBRID_PACING_SETMSS 0x1000 /* Internal flag that tellsus we set the mss on this entry */
#define TCP_HYBRID_PACING_WASSET 0x2000 /* We init to this to know if a hybrid command was issued */
#define TCP_HYBRID_PACING_SENDTIME 0x4000 /* Duplicate tm to last, use sendtime for catch up mode */
struct tcp_hybrid_req {
struct tcp_snd_req req;

View file

@ -267,9 +267,7 @@ enum tcp_log_events {
TCP_RACK_TP_TRIGGERED, /* A rack tracepoint is triggered 68 */
TCP_HYBRID_PACING_LOG, /* Hybrid pacing log 69 */
TCP_LOG_PRU, /* TCP protocol user request 70 */
TCP_POLICER_DET, /* TCP Policer detectionn 71 */
TCP_PCM_MEASURE, /* TCP Path Capacity Measurement 72 */
TCP_LOG_END /* End (keep at end) 72 */
TCP_LOG_END /* End (keep at end) 71 */
};
enum tcp_log_states {
@ -373,11 +371,10 @@ struct tcp_log_dev_log_queue {
#define TCP_TP_COLLAPSED_RXT 0x00000004 /* When we actually retransmit a collapsed window rsm */
#define TCP_TP_REQ_LOG_FAIL 0x00000005 /* We tried to allocate a Request log but had no space */
#define TCP_TP_RESET_RCV 0x00000006 /* Triggers when we receive a RST */
#define TCP_TP_POLICER_DET 0x00000007 /* When we detect a policer */
#define TCP_TP_EXCESS_RXT TCP_TP_POLICER_DET /* alias */
#define TCP_TP_EXCESS_RXT 0x00000007 /* When we get excess RXT's clamping the cwnd */
#define TCP_TP_SAD_TRIGGERED 0x00000008 /* Sack Attack Detection triggers */
#define TCP_TP_SAD_SUSPECT 0x0000000a /* A sack has supicious information in it */
#define TCP_TP_PACED_BOTTOM 0x0000000b /* We have paced at the bottom */
#ifdef _KERNEL

View file

@ -11529,9 +11529,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
bbr_set_pktepoch(bbr, cts, __LINE__);
bbr_check_bbr_for_state(bbr, cts, __LINE__, (bbr->r_ctl.rc_lost - lost));
if (nxt_pkt == 0) {
if ((bbr->r_wanted_output != 0) ||
(tp->t_flags & TF_ACKNOW)) {
if (bbr->r_wanted_output != 0) {
bbr->rc_output_starts_timer = 0;
did_out = 1;
if (tcp_output(tp) < 0)

File diff suppressed because it is too large Load diff

View file

@ -51,10 +51,5 @@ void sack_filter_clear(struct sack_filter *sf, tcp_seq seq);
int sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks,
tcp_seq th_ack);
void sack_filter_reject(struct sack_filter *sf, struct sackblk *in);
static inline uint8_t sack_filter_blks_used(struct sack_filter *sf)
{
return (sf->sf_used);
}
#endif
#endif

View file

@ -65,6 +65,7 @@
#include <netinet/tcp_log_buf.h>
#include <netinet/tcp_syncache.h>
#include <netinet/tcp_hpts.h>
#include <netinet/tcp_ratelimit.h>
#include <netinet/tcp_accounting.h>
#include <netinet/tcpip.h>
#include <netinet/cc/cc.h>
@ -99,7 +100,6 @@
#include "sack_filter.h"
#include "tcp_rack.h"
#include "tailq_hash.h"
#include "opt_global.h"
struct rack_sendmap *
@ -107,7 +107,7 @@ tqhash_min(struct tailq_hash *hs)
{
struct rack_sendmap *rsm;
rsm = hs->rsm_min;
rsm = tqhash_find(hs, hs->min);
return(rsm);
}
@ -116,7 +116,7 @@ tqhash_max(struct tailq_hash *hs)
{
struct rack_sendmap *rsm;
rsm = hs->rsm_max;
rsm = tqhash_find(hs, (hs->max - 1));
return (rsm);
}
@ -224,19 +224,13 @@ tqhash_prev(struct tailq_hash *hs, struct rack_sendmap *rsm)
void
tqhash_remove(struct tailq_hash *hs, struct rack_sendmap *rsm, int type)
{
TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next);
hs->count--;
if (hs->count == 0) {
hs->min = hs->max;
hs->rsm_max = hs->rsm_min = NULL;
} else if (type == REMOVE_TYPE_CUMACK) {
hs->min = rsm->r_end;
hs->rsm_min = tqhash_next(hs, rsm);
} else if (rsm == hs->rsm_max) {
hs->rsm_max = tqhash_prev(hs, rsm);
hs->max = hs->rsm_max->r_end;
}
TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next);
}
int
@ -246,7 +240,6 @@ tqhash_insert(struct tailq_hash *hs, struct rack_sendmap *rsm)
int inserted = 0;
uint32_t ebucket;
#ifdef INVARIANTS
if (hs->count > 0) {
if ((rsm->r_end - hs->min) > MAX_ALLOWED_SEQ_RANGE) {
return (-1);
@ -256,7 +249,6 @@ tqhash_insert(struct tailq_hash *hs, struct rack_sendmap *rsm)
return (-2);
}
}
#endif
rsm->bindex = rsm->r_start / SEQ_BUCKET_SIZE;
rsm->bindex %= MAX_HASH_ENTRIES;
ebucket = rsm->r_end / SEQ_BUCKET_SIZE;
@ -271,17 +263,13 @@ tqhash_insert(struct tailq_hash *hs, struct rack_sendmap *rsm)
/* Special case */
hs->min = rsm->r_start;
hs->max = rsm->r_end;
hs->rsm_min = hs->rsm_max = rsm;
hs->count = 1;
} else {
hs->count++;
if (SEQ_GEQ(rsm->r_end, hs->max)) {
if (SEQ_GT(rsm->r_end, hs->max))
hs->max = rsm->r_end;
hs->rsm_max = rsm;
} if (SEQ_LEQ(rsm->r_start, hs->min)) {
if (SEQ_LT(rsm->r_start, hs->min))
hs->min = rsm->r_start;
hs->rsm_min = rsm;
}
}
/* Check the common case of inserting at the end */
l = TAILQ_LAST(&hs->ht[rsm->bindex], rack_head);
@ -311,7 +299,6 @@ tqhash_init(struct tailq_hash *hs)
TAILQ_INIT(&hs->ht[i]);
}
hs->min = hs->max = 0;
hs->rsm_min = hs->rsm_max = NULL;
hs->count = 0;
}
@ -352,11 +339,3 @@ tqhash_trim(struct tailq_hash *hs, uint32_t th_ack)
return (0);
}
void
tqhash_update_end(struct tailq_hash *hs, struct rack_sendmap *rsm,
uint32_t th_ack)
{
if (hs->max == rsm->r_end)
hs->max = th_ack;
rsm->r_end = th_ack;
}

View file

@ -13,12 +13,10 @@
#define MAX_ALLOWED_SEQ_RANGE (SEQ_BUCKET_SIZE * (MAX_HASH_ENTRIES-1))
struct tailq_hash {
struct rack_head ht[MAX_HASH_ENTRIES];
uint32_t min;
uint32_t max;
uint32_t count;
struct rack_sendmap *rsm_min;
struct rack_sendmap *rsm_max;
struct rack_head ht[MAX_HASH_ENTRIES];
};
struct rack_sendmap *
@ -55,10 +53,6 @@ tqhash_init(struct tailq_hash *hs);
int
tqhash_trim(struct tailq_hash *hs, uint32_t th_ack);
void
tqhash_update_end(struct tailq_hash *hs, struct rack_sendmap *rsm,
uint32_t th_ack);
#define TQHASH_FOREACH(var, head) \
for ((var) = tqhash_min((head)); \

View file

@ -48,8 +48,6 @@
#define RACK_MERGED 0x080000/* The RSM was merged */
#define RACK_PMTU_CHG 0x100000/* The path mtu changed on this guy */
#define RACK_STRADDLE 0x200000/* The seq straddles the bucket line */
#define RACK_WAS_LOST 0x400000/* Is the rsm considered lost */
#define RACK_IS_PCM 0x800000/* A PCM measurement is being taken */
#define RACK_NUM_OF_RETRANS 3
#define RACK_INITIAL_RTO 1000000 /* 1 second in microseconds */
@ -65,7 +63,6 @@ struct rack_sendmap {
uint32_t r_rtr_bytes; /* How many bytes have been retransmitted */
uint32_t r_flags : 24, /* Flags as defined above */
r_rtr_cnt : 8; /* Retran count, index this -1 to get time */
uint32_t r_act_rxt_cnt; /* The actual total count of transmits */
struct mbuf *m;
uint32_t soff;
uint32_t orig_m_len; /* The original mbuf len when we sent (can update) */
@ -177,8 +174,6 @@ struct rack_rtt_sample {
#define RACK_TO_FRM_PERSIST 5
#define RACK_TO_FRM_DELACK 6
#define RCV_PATH_RTT_MS 10 /* How many ms between recv path RTT's */
struct rack_opts_stats {
uint64_t tcp_rack_tlp_reduce;
uint64_t tcp_rack_pace_always;
@ -237,7 +232,7 @@ struct rack_opts_stats {
uint64_t tcp_rack_rtt_use;
uint64_t tcp_data_after_close;
uint64_t tcp_defer_opt;
uint64_t tcp_pol_detect;
uint64_t tcp_rxt_clamp;
uint64_t tcp_rack_beta;
uint64_t tcp_rack_beta_ecn;
uint64_t tcp_rack_timer_slop;
@ -247,11 +242,6 @@ struct rack_opts_stats {
uint64_t tcp_rack_pacing_divisor;
uint64_t tcp_rack_min_seg;
uint64_t tcp_dgp_in_rec;
uint64_t tcp_notimely;
uint64_t tcp_honor_hpts;
uint64_t tcp_dyn_rec;
uint64_t tcp_fillcw_rate_cap;
uint64_t tcp_pol_mss;
};
/* RTT shrink reasons */
@ -273,9 +263,6 @@ struct rack_opts_stats {
#define TLP_USE_TWO_TWO 3 /* Use 2.2 behavior */
#define RACK_MIN_BW 8000 /* 64kbps in Bps */
#define CCSP_DIS_MASK 0x0001
#define HYBRID_DIS_MASK 0x0002
/* Rack quality indicators for GPUT measurements */
#define RACK_QUALITY_NONE 0 /* No quality stated */
#define RACK_QUALITY_HIGH 1 /* A normal measurement of a GP RTT */
@ -332,7 +319,6 @@ extern counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
*
*/
#define RACK_GP_HIST 4 /* How much goodput history do we maintain? */
#define RETRAN_CNT_SIZE 16
#define RACK_NUM_FSB_DEBUG 16
#ifdef _KERNEL
@ -356,26 +342,6 @@ struct rack_fast_send_blk {
struct tailq_hash;
struct rack_pcm_info {
/* Base send time and s/e filled in by rack_log_output */
uint64_t send_time;
uint32_t sseq;
uint32_t eseq;
/* Ack's fill in the rest of the data */
uint16_t cnt;
/* Maximum acks present */
uint16_t cnt_alloc;
};
#define RACK_DEFAULT_PCM_ARRAY 16
struct rack_pcm_stats {
uint32_t sseq;
uint32_t eseq;
uint64_t ack_time;
};
struct rack_control {
/* Second cache line 0x40 from tcp_rack */
struct tailq_hash *tqh; /* Tree of all segments Lock(a) */
@ -436,7 +402,6 @@ struct rack_control {
uint32_t rc_rcvtime; /* When we last received data */
uint32_t rc_num_split_allocs; /* num split map entries allocated */
uint32_t rc_split_limit; /* Limit from control var can be set by socket opt */
uint32_t rack_avg_rec_sends;
uint32_t rc_last_output_to;
uint32_t rc_went_idle_time;
@ -487,45 +452,19 @@ struct rack_control {
struct tcp_sendfile_track *rc_last_sft;
uint32_t lt_seq; /* Seq at start of lt_bw gauge */
int32_t rc_rtt_diff; /* Timely style rtt diff of our gp_srtt */
uint64_t last_sndbytes;
uint64_t last_snd_rxt_bytes;
uint64_t rxt_threshold;
uint64_t last_tmit_time_acked; /* Holds the last cumack point's last send time */
/* Recovery stats */
uint64_t time_entered_recovery;
uint64_t bytes_acked_in_recovery;
/* Policer Detection */
uint64_t last_policer_sndbytes;
uint64_t last_policer_snd_rxt_bytes;
uint64_t policer_bw;
uint64_t last_sendtime;
uint64_t last_gpest;
uint64_t last_tm_mark; /* Last tm mark used */
uint64_t fillcw_cap; /* B/W cap on fill cw */
struct rack_pcm_info pcm_i;
struct rack_pcm_stats *pcm_s;
uint32_t gp_gain_req; /* Percent off gp gain req */
uint32_t last_rnd_of_gp_rise;
uint32_t gp_rnd_thresh;
uint32_t ss_hi_fs;
uint32_t gate_to_fs;
uint32_t policer_max_seg;
uint32_t pol_bw_comp;
uint16_t policer_rxt_threshold;
uint8_t policer_avg_threshold;
uint8_t policer_med_threshold;
uint32_t pcm_max_seg;
uint32_t last_pcm_round;
uint32_t pcm_idle_rounds;
uint32_t current_policer_bucket;
uint32_t policer_bucket_size;
uint32_t idle_snd_una;
uint32_t ack_for_idle;
uint32_t last_amount_before_rec;
uint32_t last_rnd_rxt_clamped;
uint32_t num_of_clamps_applied;
uint32_t clamp_options;
uint32_t max_clamps;
uint32_t rc_gp_srtt; /* Current GP srtt */
uint32_t rc_prev_gp_srtt; /* Previous RTT */
uint32_t rc_entry_gp_rtt; /* Entry to PRTT gp-rtt */
uint32_t rc_loss_at_start; /* At measurement window where was our lost value */
uint32_t rc_considered_lost; /* Count in recovery of non-retransmitted bytes considered lost */
uint32_t dsack_round_end; /* In a round of seeing a DSACK */
uint32_t current_round; /* Starting at zero */
@ -552,8 +491,6 @@ struct rack_control {
uint32_t rc_snd_max_at_rto; /* For non-sack when the RTO occurred what was snd-max */
uint32_t rc_out_at_rto;
int32_t rc_scw_index;
uint32_t max_reduction;
uint32_t side_chan_dis_mask; /* Bit mask of socket opt's disabled */
uint32_t rc_tlp_threshold; /* Socket option value Lock(a) */
uint32_t rc_last_timeout_snduna;
uint32_t last_tlp_acked_start;
@ -566,11 +503,7 @@ struct rack_control {
uint32_t ack_during_sd;
uint32_t input_pkt;
uint32_t saved_input_pkt;
uint32_t saved_policer_val; /* The encoded value we used to setup policer detection */
uint32_t cleared_app_ack_seq;
uint32_t last_rcv_tstmp_for_rtt;
uint32_t last_time_of_arm_rcv;
uint32_t rto_ssthresh;
uint32_t saved_rxt_clamp_val; /* The encoded value we used to setup clamping */
struct newreno rc_saved_beta; /*
* For newreno cc:
* rc_saved_cc are the values we have had
@ -583,13 +516,10 @@ struct rack_control {
* we also set the flag (if ecn_beta is set) to make
* new_reno do less of a backoff for ecn (think abe).
*/
uint16_t rc_cnt_of_retran[RETRAN_CNT_SIZE];
uint16_t rc_early_recovery_segs; /* Socket option value Lock(a) */
uint16_t rc_reorder_shift; /* Socket option value Lock(a) */
uint8_t policer_del_mss; /* How many mss during recovery for policer detection */
uint8_t rack_per_upper_bound_ss;
uint8_t rack_per_upper_bound_ca;
uint8_t cleared_app_ack;
uint8_t dsack_persist;
uint8_t rc_no_push_at_mrtt; /* No push when we exceed max rtt */
uint8_t num_measurements; /* Number of measurements (up to 0xff, we freeze at 0xff) */
@ -598,19 +528,17 @@ struct rack_control {
uint8_t rc_tlp_cwnd_reduce; /* Socket option value Lock(a) */
uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */
uint8_t rc_rate_sample_method;
uint8_t policer_alt_median; /* Alternate median for policer detection */
uint8_t rc_dgp_bl_agg; /* Buffer Level aggression during DGP */
uint8_t full_dgp_in_rec; /* Flag to say if we do full DGP in recovery */
uint8_t client_suggested_maxseg; /* Not sure what to do with this yet */
uint8_t use_gp_not_last;
uint8_t pacing_method; /* If pace_always, what type of pacing */
uint8_t pacing_discount_amm; /*
* This is a multipler to the base discount that
* can be used to increase the discount.
*/
uint8_t already_had_a_excess;
};
#endif
#define RACK_PACING_NONE 0x00
#define RACK_DGP_PACING 0x01
#define RACK_REG_PACING 0x02
/* DGP with no buffer level mitigations */
#define DGP_LEVEL0 0
@ -650,10 +578,6 @@ struct rack_control {
#define HYBRID_LOG_EXTEND 14 /* We extended the end */
#define HYBRID_LOG_SENT_LOST 15 /* A closing sent/lost report */
#define LOST_ZERO 1 /* Zero it out */
#define LOST_ADD 2 /* Add to it */
#define LOST_SUB 3 /* Sub from it */
#define RACK_TIMELY_CNT_BOOST 5 /* At 5th increase boost */
#define RACK_MINRTT_FILTER_TIM 10 /* Seconds */
@ -666,7 +590,6 @@ struct rack_control {
*/
#define MAX_USER_SET_SEG 0x3f /* The max we can set is 63 which is probably too many */
#define RACK_FREE_CNT_MAX 0x2f /* Max our counter can do */
#ifdef _KERNEL
@ -678,9 +601,8 @@ struct tcp_rack {
int32_t, int32_t, uint32_t, int, int, uint8_t); /* Lock(a) */
struct tcpcb *rc_tp; /* The tcpcb Lock(a) */
struct inpcb *rc_inp; /* The inpcb Lock(a) */
uint8_t rc_free_cnt : 6,
rc_skip_timely : 1,
pcm_enabled : 1; /* Is PCM enabled */
uint8_t rc_free_cnt; /* Number of free entries on the rc_free list
* Lock(a) */
uint8_t client_bufferlvl : 3, /* Expected range [0,5]: 0=unset, 1=low/empty */
rack_deferred_inited : 1,
/* ******************************************************************** */
@ -690,11 +612,11 @@ struct tcp_rack {
shape_rxt_to_pacing_min : 1,
/* ******************************************************************** */
rc_ack_required: 1,
r_use_hpts_min : 1;
r_pacing_discount : 1;
uint8_t no_prr_addback : 1,
gp_ready : 1,
defer_options: 1,
dis_lt_bw : 1,
excess_rxt_on: 1, /* Are actions on for excess retransmissions? */
rc_ack_can_sendout_data: 1, /*
* If set it will override pacing restrictions on not sending
* data when the pacing timer is running. I.e. you set this
@ -737,7 +659,7 @@ struct tcp_rack {
r_rack_hw_rate_caps: 1,
r_up_only: 1,
r_via_fill_cw : 1,
r_rcvpath_rtt_up : 1;
r_fill_less_agg : 1;
uint8_t rc_user_set_max_segs : 7, /* Socket option value Lock(a) */
rc_fillcw_apply_discount;
@ -751,7 +673,7 @@ struct tcp_rack {
rc_highly_buffered: 1, /* The path is highly buffered */
rc_dragged_bottom: 1,
rc_pace_dnd : 1, /* The pace do not disturb bit */
rc_initial_ss_comp : 1,
rc_avali2 : 1,
rc_gp_filled : 1,
rc_hw_nobuf : 1;
uint8_t r_state : 4, /* Current rack state Lock(a) */
@ -774,8 +696,8 @@ struct tcp_rack {
uint8_t app_limited_needs_set : 1,
use_fixed_rate : 1,
rc_has_collapsed : 1,
use_lesser_lt_bw : 1,
cspr_is_fcc : 1,
r_cwnd_was_clamped : 1,
r_clamped_gets_lower : 1,
rack_hdrw_pacing : 1, /* We are doing Hardware pacing */
rack_hdw_pace_ena : 1, /* Is hardware pacing enabled? */
rack_attempt_hdwr_pace : 1; /* Did we attempt hdwr pacing (if allowed) */
@ -800,14 +722,7 @@ struct tcp_rack {
r_persist_lt_bw_off : 1,
r_collapse_point_valid : 1,
dgp_on : 1;
uint16_t rto_from_rec: 1,
avail_bit: 1,
pcm_in_progress: 1,
pcm_needed: 1,
policer_detect_on: 1, /* Are we detecting policers? */
rc_policer_detected : 1, /* We are beiing policed */
rc_policer_should_pace : 1, /* The sizing algo thinks we should pace */
rc_sendvars_notset : 1, /* Inside rack_init send variables (snd_max/una etc) were not set */
uint16_t rc_init_win : 8,
rc_gp_rtt_set : 1,
rc_gp_dyn_mul : 1,
rc_gp_saw_rec : 1,
@ -820,9 +735,5 @@ struct tcp_rack {
struct rack_control r_ctl;
} __aligned(CACHE_LINE_SIZE);
void rack_update_pcm_ack(struct tcp_rack *rack, int was_cumack,
uint32_t ss, uint32_t es);
#endif
#endif

View file

@ -287,29 +287,18 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, ts_offset_per_conn, CTLFLAG_VNET | CTLFLAG_R
static volatile uint32_t number_of_tcp_connections_pacing = 0;
static uint32_t shadow_num_connections = 0;
static counter_u64_t tcp_pacing_failures;
static counter_u64_t tcp_dgp_failures;
static uint32_t shadow_tcp_pacing_dgp = 0;
static volatile uint32_t number_of_dgp_connections = 0;
static int tcp_pacing_limit = 10000;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, pacing_limit, CTLFLAG_RW,
&tcp_pacing_limit, 1000,
"If the TCP stack does pacing, is there a limit (-1 = no, 0 = no pacing N = number of connections)");
static int tcp_dgp_limit = -1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, dgp_limit, CTLFLAG_RW,
&tcp_dgp_limit, -1,
"If the TCP stack does DGP, is there a limit (-1 = no, 0 = no dgp N = number of connections)");
SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pacing_count, CTLFLAG_RD,
&shadow_num_connections, 0, "Number of TCP connections being paced");
SYSCTL_COUNTER_U64(_net_inet_tcp, OID_AUTO, pacing_failures, CTLFLAG_RD,
&tcp_pacing_failures, "Number of times we failed to enable pacing to avoid exceeding the limit");
SYSCTL_COUNTER_U64(_net_inet_tcp, OID_AUTO, dgp_failures, CTLFLAG_RD,
&tcp_dgp_failures, "Number of times we failed to enable dgp to avoid exceeding the limit");
static int tcp_log_debug = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
&tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
@ -1582,7 +1571,6 @@ tcp_init(void *arg __unused)
tcp_uncomp_total = counter_u64_alloc(M_WAITOK);
tcp_bad_csums = counter_u64_alloc(M_WAITOK);
tcp_pacing_failures = counter_u64_alloc(M_WAITOK);
tcp_dgp_failures = counter_u64_alloc(M_WAITOK);
#ifdef TCPPCAP
tcp_pcap_init();
#endif
@ -4034,43 +4022,6 @@ tcp_can_enable_pacing(void)
}
}
int
tcp_incr_dgp_pacing_cnt(void)
{
if ((tcp_dgp_limit == -1) ||
(tcp_dgp_limit > number_of_dgp_connections)) {
atomic_fetchadd_int(&number_of_dgp_connections, 1);
shadow_tcp_pacing_dgp = number_of_dgp_connections;
return (1);
} else {
counter_u64_add(tcp_dgp_failures, 1);
return (0);
}
}
static uint8_t tcp_dgp_warning = 0;
void
tcp_dec_dgp_pacing_cnt(void)
{
uint32_t ret;
ret = atomic_fetchadd_int(&number_of_dgp_connections, -1);
shadow_tcp_pacing_dgp = number_of_dgp_connections;
KASSERT(ret != 0, ("number_of_dgp_connections -1 would cause wrap?"));
if (ret == 0) {
if (tcp_dgp_limit != -1) {
printf("Warning all DGP is now disabled, count decrements invalidly!\n");
tcp_dgp_limit = 0;
tcp_dgp_warning = 1;
} else if (tcp_dgp_warning == 0) {
printf("Warning DGP pacing is invalid, invalid decrement\n");
tcp_dgp_warning = 1;
}
}
}
static uint8_t tcp_pacing_warning = 0;
void
@ -4590,7 +4541,7 @@ tcp_req_alloc_req_full(struct tcpcb *tp, struct tcp_snd_req *req, uint64_t ts, i
if (tp->t_tcpreq_req) {
for(i = 0, allocated = 0; i < MAX_TCP_TRK_REQ; i++) {
fil = &tp->t_tcpreq_info[i];
if ((fil->flags & TCP_TRK_TRACK_FLG_USED) == 0)
if (fil->flags != TCP_TRK_TRACK_FLG_USED)
continue;
if ((fil->timestamp == req->timestamp) &&
(fil->start == req->start) &&
@ -4622,7 +4573,6 @@ tcp_req_alloc_req_full(struct tcpcb *tp, struct tcp_snd_req *req, uint64_t ts, i
allocated = 1;
fil->flags = TCP_TRK_TRACK_FLG_USED;
fil->timestamp = req->timestamp;
fil->playout_ms = req->playout_ms;
fil->localtime = ts;
fil->start = req->start;
if (req->flags & TCP_LOG_HTTPD_RANGE_END) {
@ -4639,10 +4589,7 @@ tcp_req_alloc_req_full(struct tcpcb *tp, struct tcp_snd_req *req, uint64_t ts, i
fil->sbcc_at_s = tptosocket(tp)->so_snd.sb_ccc;
fil->start_seq = tp->snd_una +
tptosocket(tp)->so_snd.sb_ccc;
if (req->flags & TCP_LOG_HTTPD_RANGE_END)
fil->end_seq = (fil->start_seq + ((uint32_t)(fil->end - fil->start)));
else
fil->end_seq = 0;
fil->end_seq = (fil->start_seq + ((uint32_t)(fil->end - fil->start)));
if (tptosocket(tp)->so_snd.sb_tls_info) {
/*
* This session is doing TLS. Take a swag guess

View file

@ -1032,10 +1032,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
if (!solisten_enqueue(so, SS_ISCONNECTED))
tp->t_flags |= TF_SONOTCONN;
/* Can we inherit anything from the listener? */
if (tp->t_fb->tfb_inherit != NULL) {
(*tp->t_fb->tfb_inherit)(tp, sotoinpcb(lso));
}
return (so);
allocfail:

View file

@ -179,12 +179,6 @@ tcp_usr_attach(struct socket *so, int proto, struct thread *td)
goto out;
}
tp->t_state = TCPS_CLOSED;
/* Can we inherit anything from the listener? */
if ((so->so_listen != NULL) &&
(so->so_listen->so_pcb != NULL) &&
(tp->t_fb->tfb_inherit != NULL)) {
(*tp->t_fb->tfb_inherit)(tp, sotoinpcb(so->so_listen));
}
tcp_bblog_pru(tp, PRU_ATTACH, error);
INP_WUNLOCK(inp);
TCPSTATES_INC(TCPS_CLOSED);
@ -1607,7 +1601,6 @@ tcp_fill_info(const struct tcpcb *tp, struct tcp_info *ti)
ti->tcpi_rcv_numsacks = tp->rcv_numsacks;
ti->tcpi_rcv_adv = tp->rcv_adv;
ti->tcpi_dupacks = tp->t_dupacks;
ti->tcpi_rttmin = tp->t_rttlow;
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE) {
ti->tcpi_options |= TCPI_OPT_TOE;

View file

@ -138,8 +138,7 @@ STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
#define TCP_TRK_TRACK_FLG_OPEN 0x02 /* End is not valid (open range request) */
#define TCP_TRK_TRACK_FLG_SEQV 0x04 /* We had a sendfile that touched it */
#define TCP_TRK_TRACK_FLG_COMP 0x08 /* Sendfile as placed the last bits (range req only) */
#define TCP_TRK_TRACK_FLG_FSND 0x10 /* First send has been done into the seq space */
#define TCP_TRK_TRACK_FLG_LSND 0x20 /* We were able to set the Last Sent */
#define TCP_TRK_TRACK_FLG_FSND 0x10 /* First send has been done into the seq space */
#define MAX_TCP_TRK_REQ 5 /* Max we will have at once */
struct tcp_sendfile_track {
@ -152,14 +151,11 @@ struct tcp_sendfile_track {
uint64_t cspr; /* Client suggested pace rate */
uint64_t sent_at_fs; /* What was t_sndbytes as we begun sending */
uint64_t rxt_at_fs; /* What was t_snd_rxt_bytes as we begun sending */
uint64_t sent_at_ls; /* Sent value at the last send */
uint64_t rxt_at_ls; /* Retransmit value at the last send */
tcp_seq start_seq; /* First TCP Seq assigned */
tcp_seq end_seq; /* If range req last seq */
uint32_t flags; /* Type of request open etc */
uint32_t sbcc_at_s; /* When we allocate what is the sb_cc */
uint32_t hint_maxseg; /* Client hinted maxseg */
uint32_t playout_ms; /* Client playout ms */
uint32_t hybrid_flags; /* Hybrid flags on this request */
};
@ -627,8 +623,6 @@ struct tcp_function_block {
void (*tfb_switch_failed)(struct tcpcb *);
bool (*tfb_early_wake_check)(struct tcpcb *);
int (*tfb_compute_pipe)(struct tcpcb *tp);
int (*tfb_stack_info)(struct tcpcb *tp, struct stack_specific_info *);
void (*tfb_inherit)(struct tcpcb *tp, struct inpcb *h_inp);
volatile uint32_t tfb_refcnt;
uint32_t tfb_flags;
uint8_t tfb_id;
@ -794,7 +788,7 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack)
#define TF_TSO 0x01000000 /* TSO enabled on this connection */
#define TF_TOE 0x02000000 /* this connection is offloaded */
#define TF_CLOSED 0x04000000 /* close(2) called on socket */
#define TF_SENTSYN 0x08000000 /* At least one syn has been sent */
#define TF_UNUSED1 0x08000000 /* unused */
#define TF_LRD 0x10000000 /* Lost Retransmission Detection */
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
@ -1507,8 +1501,6 @@ void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
int tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
size_t seed_len);
int tcp_can_enable_pacing(void);
int tcp_incr_dgp_pacing_cnt(void);
void tcp_dec_dgp_pacing_cnt(void);
void tcp_decrement_paced_conn(void);
void tcp_change_time_units(struct tcpcb *, int);
void tcp_handle_orphaned_packets(struct tcpcb *);