From b0a231a26d56265521abbb6db1748accd6bb036a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 10 Apr 2019 14:32:37 +0200 Subject: [PATCH 1/5] net: caif: avoid using qdisc_qlen() Such helper does not cope correctly with NOLOCK qdiscs. In the following patches we will move back qlen to per CPU values for such qdiscs, so qdisc_qlen_sum() is not an option, too. Instead, use qlen only for lock qdiscs, and always set flow off for NOLOCK qdiscs with a not empty tx queue. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/caif/caif_dev.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 711d7156efd8..6c6e01963aac 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -186,15 +186,19 @@ static int transmit(struct cflayer *layer, struct cfpkt *pkt) goto noxoff; if (likely(!netif_queue_stopped(caifd->netdev))) { + struct Qdisc *sch; + /* If we run with a TX queue, check if the queue is too long*/ txq = netdev_get_tx_queue(skb->dev, 0); - qlen = qdisc_qlen(rcu_dereference_bh(txq->qdisc)); - - if (likely(qlen == 0)) + sch = rcu_dereference_bh(txq->qdisc); + if (likely(qdisc_is_empty(sch))) goto noxoff; + /* can check for explicit qdisc len value only !NOLOCK, + * always set flow off otherwise + */ high = (caifd->netdev->tx_queue_len * q_high) / 100; - if (likely(qlen < high)) + if (!(sch->flags & TCQ_F_NOLOCK) && likely(sch->q.qlen < high)) goto noxoff; } From 1f5e6fdd6aec7929e67afad1e42e35d894a119ae Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 10 Apr 2019 14:32:38 +0200 Subject: [PATCH 2/5] net: sched: prefer qdisc_is_empty() over direct qlen access When checking for root qdisc queue length, do not access directly q.qlen. In the following patches we will move back qlen accounting to per CPU values for NOLOCK qdiscs. Instead, prefer the qdisc_is_empty() helper usage. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0aea0e262452..7ecb6127e980 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -747,7 +747,7 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev) struct netdev_queue *txq = netdev_get_tx_queue(dev, i); const struct Qdisc *q = rcu_dereference(txq->qdisc); - if (q->q.qlen) { + if (!qdisc_is_empty(q)) { rcu_read_unlock(); return false; } From 9c01c9f1f2a3ddbddbf3b233cc6bfa86f5a59af0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 10 Apr 2019 14:32:39 +0200 Subject: [PATCH 3/5] net: sched: always do stats accounting according to TCQ_F_CPUSTATS The core sched implementation checks independently for NOLOCK flag to acquire/release the root spin lock and for qdisc_is_percpu_stats() to account per CPU values in many places. This change update the last few places checking the TCQ_F_NOLOCK to do per CPU stats accounting according to qdisc_is_percpu_stats() value. The above allows to clean dev_requeue_skb() implementation a bit and makes stats update always consistent with a single flag. v1 -> v2: - do not move qdisc_is_empty definition, fix build issue Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sch_generic.h | 23 +++++++++++------- net/sched/sch_generic.c | 50 +++++++++++++-------------------------- 2 files changed, 31 insertions(+), 42 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7ecb6127e980..ed56474cfe3b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -146,9 +146,14 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } +static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) +{ + return q->flags & TCQ_F_CPUSTATS; +} + static inline bool qdisc_is_empty(const struct Qdisc *qdisc) { - if (qdisc->flags & TCQ_F_NOLOCK) + if (qdisc_is_percpu_stats(qdisc)) return qdisc->empty; return !qdisc->q.qlen; } @@ -490,7 +495,7 @@ static inline u32 qdisc_qlen_sum(const struct Qdisc *q) { u32 qlen = q->qstats.qlen; - if (q->flags & TCQ_F_NOLOCK) + if (qdisc_is_percpu_stats(q)) qlen += atomic_read(&q->q.atomic_qlen); else qlen += q->q.qlen; @@ -817,11 +822,6 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return sch->enqueue(skb, sch, to_free); } -static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) -{ - return q->flags & TCQ_F_CPUSTATS; -} - static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, __u64 bytes, __u32 packets) { @@ -1113,8 +1113,13 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) if (skb) { skb = __skb_dequeue(&sch->gso_skb); - qdisc_qstats_backlog_dec(sch, skb); - sch->q.qlen--; + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_cpu_backlog_dec(sch, skb); + qdisc_qstats_atomic_qlen_dec(sch); + } else { + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; + } } else { skb = sch->dequeue(sch); } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 81356ef38d1d..ddff2952be87 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -118,52 +118,36 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, spin_unlock(lock); } -static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) +static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - while (skb) { - struct sk_buff *next = skb->next; + spinlock_t *lock = NULL; - __skb_queue_tail(&q->gso_skb, skb); - q->qstats.requeues++; - qdisc_qstats_backlog_inc(q, skb); - q->q.qlen++; /* it's still part of the queue */ - - skb = next; + if (q->flags & TCQ_F_NOLOCK) { + lock = qdisc_lock(q); + spin_lock(lock); } - __netif_schedule(q); - return 0; -} - -static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q) -{ - spinlock_t *lock = qdisc_lock(q); - - spin_lock(lock); while (skb) { struct sk_buff *next = skb->next; __skb_queue_tail(&q->gso_skb, skb); - qdisc_qstats_cpu_requeues_inc(q); - qdisc_qstats_cpu_backlog_inc(q, skb); - qdisc_qstats_atomic_qlen_inc(q); + /* it's still part of the queue */ + if (qdisc_is_percpu_stats(q)) { + qdisc_qstats_cpu_requeues_inc(q); + qdisc_qstats_cpu_backlog_inc(q, skb); + qdisc_qstats_atomic_qlen_inc(q); + } else { + q->qstats.requeues++; + qdisc_qstats_backlog_inc(q, skb); + q->q.qlen++; + } skb = next; } - spin_unlock(lock); - + if (lock) + spin_unlock(lock); __netif_schedule(q); - - return 0; -} - -static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) -{ - if (q->flags & TCQ_F_NOLOCK) - return dev_requeue_skb_locked(skb, q); - else - return __dev_requeue_skb(skb, q); } static void try_bulk_dequeue_skb(struct Qdisc *q, From 8a53e616de294873fec1a75ddb77ecb3d225cee0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 10 Apr 2019 14:32:40 +0200 Subject: [PATCH 4/5] net: sched: when clearing NOLOCK, clear TCQ_F_CPUSTATS, too Since stats updating is always consistent with TCQ_F_CPUSTATS flag, we can disable it at qdisc creation time flipping such bit. In my experiments, if the NOLOCK flag is cleared, per CPU stats accounting does not give any measurable performance gain, but it waste some memory. Let's clear TCQ_F_CPUSTATS together with NOLOCK, when enslaving a NOLOCK qdisc to 'lock' one. Use stats update helper inside pfifo_fast, to cope correctly with TCQ_F_CPUSTATS flag change. As a side effect, q.qlen value for any child qdiscs is always consistent for all lock classfull qdiscs. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sch_generic.h | 26 ++++++++++++++++++++++++++ net/sched/sch_api.c | 15 ++++++++++++++- net/sched/sch_generic.c | 10 ++-------- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index ed56474cfe3b..f069011524ba 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1106,6 +1106,32 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) return skb; } +static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch, + struct sk_buff *skb) +{ + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_cpu_backlog_dec(sch, skb); + qdisc_bstats_cpu_update(sch, skb); + qdisc_qstats_atomic_qlen_dec(sch); + } else { + qdisc_qstats_backlog_dec(sch, skb); + qdisc_bstats_update(sch, skb); + sch->q.qlen--; + } +} + +static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch, + unsigned int pkt_len) +{ + if (qdisc_is_percpu_stats(sch)) { + qdisc_qstats_atomic_qlen_inc(sch); + this_cpu_add(sch->cpu_qstats->backlog, pkt_len); + } else { + sch->qstats.backlog += pkt_len; + sch->q.qlen++; + } +} + /* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) { diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index fb8f138b9776..c126b9f78d6e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -998,6 +998,19 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb, qdisc_put(old); } +static void qdisc_clear_nolock(struct Qdisc *sch) +{ + sch->flags &= ~TCQ_F_NOLOCK; + if (!(sch->flags & TCQ_F_CPUSTATS)) + return; + + free_percpu(sch->cpu_bstats); + free_percpu(sch->cpu_qstats); + sch->cpu_bstats = NULL; + sch->cpu_qstats = NULL; + sch->flags &= ~TCQ_F_CPUSTATS; +} + /* Graft qdisc "new" to class "classid" of qdisc "parent" or * to device "dev". * @@ -1076,7 +1089,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, /* Only support running class lockless if parent is lockless */ if (new && (new->flags & TCQ_F_NOLOCK) && parent && !(parent->flags & TCQ_F_NOLOCK)) - new->flags &= ~TCQ_F_NOLOCK; + qdisc_clear_nolock(new); if (!cops || !cops->graft) return -EOPNOTSUPP; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ddff2952be87..12a6e1a39fa0 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -629,11 +629,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, if (unlikely(err)) return qdisc_drop_cpu(skb, qdisc, to_free); - qdisc_qstats_atomic_qlen_inc(qdisc); - /* Note: skb can not be used after skb_array_produce(), - * so we better not use qdisc_qstats_cpu_backlog_inc() - */ - this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len); + qdisc_update_stats_at_enqueue(qdisc, pkt_len); return NET_XMIT_SUCCESS; } @@ -652,9 +648,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) skb = __skb_array_consume(q); } if (likely(skb)) { - qdisc_qstats_cpu_backlog_dec(qdisc, skb); - qdisc_bstats_cpu_update(qdisc, skb); - qdisc_qstats_atomic_qlen_dec(qdisc); + qdisc_update_stats_at_dequeue(qdisc, skb); } else { qdisc->empty = true; } From 73eb628ddfd3884d1e58a8022de2e78de7807fc6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 10 Apr 2019 14:32:41 +0200 Subject: [PATCH 5/5] Revert: "net: sched: put back q.qlen into a single location" This revert commit 46b1c18f9deb ("net: sched: put back q.qlen into a single location"). After the previous patch, when a NOLOCK qdisc is enslaved to a locking qdisc it switches to global stats accounting. As a consequence, when a classful qdisc accesses directly a child qdisc's qlen, such qdisc is not doing per CPU accounting and qlen value is consistent. In the control path nobody uses directly qlen since commit e5f0e8f8e45 ("net: sched: introduce and use qdisc tree flush/purge helpers"), so we can remove the contented atomic ops from the datapath. v1 -> v2: - complete the qdisc_qstats_atomic_qlen_dec() -> qdisc_qstats_cpu_qlen_dec() replacement, fix build issue - more descriptive commit message Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sch_generic.h | 37 +++++++++++++++++++++---------------- net/core/gen_stats.c | 2 ++ net/sched/sch_generic.c | 9 +++++---- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f069011524ba..e8f85cd2afce 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -52,10 +52,7 @@ struct qdisc_size_table { struct qdisc_skb_head { struct sk_buff *head; struct sk_buff *tail; - union { - u32 qlen; - atomic_t atomic_qlen; - }; + __u32 qlen; spinlock_t lock; }; @@ -486,19 +483,27 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) BUILD_BUG_ON(sizeof(qcb->data) < sz); } +static inline int qdisc_qlen_cpu(const struct Qdisc *q) +{ + return this_cpu_ptr(q->cpu_qstats)->qlen; +} + static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; } -static inline u32 qdisc_qlen_sum(const struct Qdisc *q) +static inline int qdisc_qlen_sum(const struct Qdisc *q) { - u32 qlen = q->qstats.qlen; + __u32 qlen = q->qstats.qlen; + int i; - if (qdisc_is_percpu_stats(q)) - qlen += atomic_read(&q->q.atomic_qlen); - else + if (qdisc_is_percpu_stats(q)) { + for_each_possible_cpu(i) + qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; + } else { qlen += q->q.qlen; + } return qlen; } @@ -889,14 +894,14 @@ static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch, this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); } -static inline void qdisc_qstats_atomic_qlen_inc(struct Qdisc *sch) +static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch) { - atomic_inc(&sch->q.atomic_qlen); + this_cpu_inc(sch->cpu_qstats->qlen); } -static inline void qdisc_qstats_atomic_qlen_dec(struct Qdisc *sch) +static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch) { - atomic_dec(&sch->q.atomic_qlen); + this_cpu_dec(sch->cpu_qstats->qlen); } static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) @@ -1112,7 +1117,7 @@ static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch, if (qdisc_is_percpu_stats(sch)) { qdisc_qstats_cpu_backlog_dec(sch, skb); qdisc_bstats_cpu_update(sch, skb); - qdisc_qstats_atomic_qlen_dec(sch); + qdisc_qstats_cpu_qlen_dec(sch); } else { qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); @@ -1124,7 +1129,7 @@ static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch, unsigned int pkt_len) { if (qdisc_is_percpu_stats(sch)) { - qdisc_qstats_atomic_qlen_inc(sch); + qdisc_qstats_cpu_qlen_inc(sch); this_cpu_add(sch->cpu_qstats->backlog, pkt_len); } else { sch->qstats.backlog += pkt_len; @@ -1141,7 +1146,7 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) skb = __skb_dequeue(&sch->gso_skb); if (qdisc_is_percpu_stats(sch)) { qdisc_qstats_cpu_backlog_dec(sch, skb); - qdisc_qstats_atomic_qlen_dec(sch); + qdisc_qstats_cpu_qlen_dec(sch); } else { qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index ac679f74ba47..9bf1b9ad1780 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -291,6 +291,7 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats, for_each_possible_cpu(i) { const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); + qstats->qlen = 0; qstats->backlog += qcpu->backlog; qstats->drops += qcpu->drops; qstats->requeues += qcpu->requeues; @@ -306,6 +307,7 @@ void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, if (cpu) { __gnet_stats_copy_queue_cpu(qstats, cpu); } else { + qstats->qlen = q->qlen; qstats->backlog = q->backlog; qstats->drops = q->drops; qstats->requeues = q->requeues; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 12a6e1a39fa0..848aab3693bd 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -68,7 +68,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) skb = __skb_dequeue(&q->skb_bad_txq); if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_dec(q, skb); - qdisc_qstats_atomic_qlen_dec(q); + qdisc_qstats_cpu_qlen_dec(q); } else { qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; @@ -108,7 +108,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_inc(q, skb); - qdisc_qstats_atomic_qlen_inc(q); + qdisc_qstats_cpu_qlen_inc(q); } else { qdisc_qstats_backlog_inc(q, skb); q->q.qlen++; @@ -136,7 +136,7 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_requeues_inc(q); qdisc_qstats_cpu_backlog_inc(q, skb); - qdisc_qstats_atomic_qlen_inc(q); + qdisc_qstats_cpu_qlen_inc(q); } else { q->qstats.requeues++; qdisc_qstats_backlog_inc(q, skb); @@ -236,7 +236,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, skb = __skb_dequeue(&q->gso_skb); if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_dec(q, skb); - qdisc_qstats_atomic_qlen_dec(q); + qdisc_qstats_cpu_qlen_dec(q); } else { qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; @@ -694,6 +694,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i); q->backlog = 0; + q->qlen = 0; } }