linux/net/sched/em_ipset.c
Toke Høiland-Jørgensen d7bf2ebebc sched: consistently handle layer3 header accesses in the presence of VLANs
There are a couple of places in net/sched/ that check skb->protocol and act
on the value there. However, in the presence of VLAN tags, the value stored
in skb->protocol can be inconsistent based on whether VLAN acceleration is
enabled. The commit quoted in the Fixes tag below fixed the users of
skb->protocol to use a helper that will always see the VLAN ethertype.

However, most of the callers don't actually handle the VLAN ethertype, but
expect to find the IP header type in the protocol field. This means that
things like changing the ECN field, or parsing diffserv values, stops
working if there's a VLAN tag, or if there are multiple nested VLAN
tags (QinQ).

To fix this, change the helper to take an argument that indicates whether
the caller wants to skip the VLAN tags or not. When skipping VLAN tags, we
make sure to skip all of them, so behaviour is consistent even in QinQ
mode.

To make the helper usable from the ECN code, move it to if_vlan.h instead
of pkt_sched.h.

v3:
- Remove empty lines
- Move vlan variable definitions inside loop in skb_protocol()
- Also use skb_protocol() helper in IP{,6}_ECN_decapsulate() and
  bpf_skb_ecn_set_ce()

v2:
- Use eth_type_vlan() helper in skb_protocol()
- Also fix code that reads skb->protocol directly
- Change a couple of 'if/else if' statements to switch constructs to avoid
  calling the helper twice

Reported-by: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
Fixes: d8b9605d26 ("net: sched: fix skb->protocol use in case of accelerated vlan path")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-03 14:34:53 -07:00

135 lines
2.9 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* net/sched/em_ipset.c ipset ematch
*
* Copyright (c) 2012 Florian Westphal <fw@strlen.de>
*/
#include <linux/gfp.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/skbuff.h>
#include <linux/netfilter/xt_set.h>
#include <linux/ipv6.h>
#include <net/ip.h>
#include <net/pkt_cls.h>
static int em_ipset_change(struct net *net, void *data, int data_len,
struct tcf_ematch *em)
{
struct xt_set_info *set = data;
ip_set_id_t index;
if (data_len != sizeof(*set))
return -EINVAL;
index = ip_set_nfnl_get_byindex(net, set->index);
if (index == IPSET_INVALID_ID)
return -ENOENT;
em->datalen = sizeof(*set);
em->data = (unsigned long)kmemdup(data, em->datalen, GFP_KERNEL);
if (em->data)
return 0;
ip_set_nfnl_put(net, index);
return -ENOMEM;
}
static void em_ipset_destroy(struct tcf_ematch *em)
{
const struct xt_set_info *set = (const void *) em->data;
if (set) {
ip_set_nfnl_put(em->net, set->index);
kfree((void *) em->data);
}
}
static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
struct tcf_pkt_info *info)
{
struct ip_set_adt_opt opt;
struct xt_action_param acpar;
const struct xt_set_info *set = (const void *) em->data;
struct net_device *dev, *indev = NULL;
struct nf_hook_state state = {
.net = em->net,
};
int ret, network_offset;
switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
state.pf = NFPROTO_IPV4;
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
return 0;
acpar.thoff = ip_hdrlen(skb);
break;
case htons(ETH_P_IPV6):
state.pf = NFPROTO_IPV6;
if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
return 0;
/* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */
acpar.thoff = sizeof(struct ipv6hdr);
break;
default:
return 0;
}
opt.family = state.pf;
opt.dim = set->dim;
opt.flags = set->flags;
opt.cmdflags = 0;
opt.ext.timeout = ~0u;
network_offset = skb_network_offset(skb);
skb_pull(skb, network_offset);
dev = skb->dev;
rcu_read_lock();
if (skb->skb_iif)
indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
state.in = indev ? indev : dev;
state.out = dev;
acpar.state = &state;
ret = ip_set_test(set->index, skb, &acpar, &opt);
rcu_read_unlock();
skb_push(skb, network_offset);
return ret;
}
static struct tcf_ematch_ops em_ipset_ops = {
.kind = TCF_EM_IPSET,
.change = em_ipset_change,
.destroy = em_ipset_destroy,
.match = em_ipset_match,
.owner = THIS_MODULE,
.link = LIST_HEAD_INIT(em_ipset_ops.link)
};
static int __init init_em_ipset(void)
{
return tcf_em_register(&em_ipset_ops);
}
static void __exit exit_em_ipset(void)
{
tcf_em_unregister(&em_ipset_ops);
}
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
MODULE_DESCRIPTION("TC extended match for IP sets");
module_init(init_em_ipset);
module_exit(exit_em_ipset);
MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPSET);