linux/include/net/flow_dissector.h

474 lines
12 KiB
C
Raw Normal View History

License cleanup: add SPDX GPL-2.0 license identifier to files with no license Many source files in the tree are missing licensing information, which makes it harder for compliance tools to determine the correct license. By default all files without license information are under the default license of the kernel, which is GPL version 2. Update the files which contain no license information with the 'GPL-2.0' SPDX license identifier. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This patch is based on work done by Thomas Gleixner and Kate Stewart and Philippe Ombredanne. How this work was done: Patches were generated and checked against linux-4.14-rc6 for a subset of the use cases: - file had no licensing information it it. - file was a */uapi/* one with no licensing information in it, - file was a */uapi/* one with existing licensing information, Further patches will be generated in subsequent months to fix up cases where non-standard license headers were used, and references to license had to be inferred by heuristics based on keywords. The analysis to determine which SPDX License Identifier to be applied to a file was done in a spreadsheet of side by side results from of the output of two independent scanners (ScanCode & Windriver) producing SPDX tag:value files created by Philippe Ombredanne. Philippe prepared the base worksheet, and did an initial spot review of a few 1000 files. The 4.13 kernel was the starting point of the analysis with 60,537 files assessed. Kate Stewart did a file by file comparison of the scanner results in the spreadsheet to determine which SPDX license identifier(s) to be applied to the file. She confirmed any determination that was not immediately clear with lawyers working with the Linux Foundation. Criteria used to select files for SPDX license identifier tagging was: - Files considered eligible had to be source code files. - Make and config files were included as candidates if they contained >5 lines of source - File already had some variant of a license header in it (even if <5 lines). All documentation files were explicitly excluded. The following heuristics were used to determine which SPDX license identifiers to apply. - when both scanners couldn't find any license traces, file was considered to have no license information in it, and the top level COPYING file license applied. For non */uapi/* files that summary was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 11139 and resulted in the first patch in this series. If that file was a */uapi/* path one, it was "GPL-2.0 WITH Linux-syscall-note" otherwise it was "GPL-2.0". Results of that was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 WITH Linux-syscall-note 930 and resulted in the second patch in this series. - if a file had some form of licensing information in it, and was one of the */uapi/* ones, it was denoted with the Linux-syscall-note if any GPL family license was found in the file or had no licensing in it (per prior point). Results summary: SPDX license identifier # files ---------------------------------------------------|------ GPL-2.0 WITH Linux-syscall-note 270 GPL-2.0+ WITH Linux-syscall-note 169 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) 21 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) 17 LGPL-2.1+ WITH Linux-syscall-note 15 GPL-1.0+ WITH Linux-syscall-note 14 ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) 5 LGPL-2.0+ WITH Linux-syscall-note 4 LGPL-2.1 WITH Linux-syscall-note 3 ((GPL-2.0 WITH Linux-syscall-note) OR MIT) 3 ((GPL-2.0 WITH Linux-syscall-note) AND MIT) 1 and that resulted in the third patch in this series. - when the two scanners agreed on the detected license(s), that became the concluded license(s). - when there was disagreement between the two scanners (one detected a license but the other didn't, or they both detected different licenses) a manual inspection of the file occurred. - In most cases a manual inspection of the information in the file resulted in a clear resolution of the license that should apply (and which scanner probably needed to revisit its heuristics). - When it was not immediately clear, the license identifier was confirmed with lawyers working with the Linux Foundation. - If there was any question as to the appropriate license identifier, the file was flagged for further research and to be revisited later in time. In total, over 70 hours of logged manual review was done on the spreadsheet to determine the SPDX license identifiers to apply to the source files by Kate, Philippe, Thomas and, in some cases, confirmation by lawyers working with the Linux Foundation. Kate also obtained a third independent scan of the 4.13 code base from FOSSology, and compared selected files where the other two scanners disagreed against that SPDX file, to see if there was new insights. The Windriver scanner is based on an older version of FOSSology in part, so they are related. Thomas did random spot checks in about 500 files from the spreadsheets for the uapi headers and agreed with SPDX license identifier in the files he inspected. For the non-uapi files Thomas did random spot checks in about 15000 files. In initial set of patches against 4.14-rc6, 3 files were found to have copy/paste license identifier errors, and have been fixed to reflect the correct identifier. Additionally Philippe spent 10 hours this week doing a detailed manual inspection and review of the 12,461 patched files from the initial patch version early this week with: - a full scancode scan run, collecting the matched texts, detected license ids and scores - reviewing anything where there was a license detected (about 500+ files) to ensure that the applied SPDX license was correct - reviewing anything where there was no detection but the patch license was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied SPDX license was correct This produced a worksheet with 20 files needing minor correction. This worksheet was then exported into 3 different .csv files for the different types of files to be modified. These .csv files were then reviewed by Greg. Thomas wrote a script to parse the csv files and add the proper SPDX tag to the file, in the format that the file expected. This script was further refined by Greg based on the output to detect more types of files automatically and to distinguish between header and source .c files (which need different comment types.) Finally Greg ran the script using the .csv files to generate the patches. Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org> Reviewed-by: Philippe Ombredanne <pombredanne@nexb.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-11-01 14:07:57 +00:00
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NET_FLOW_DISSECTOR_H
#define _NET_FLOW_DISSECTOR_H
#include <linux/types.h>
#include <linux/in6.h>
net/flow_dissector: switch to siphash UDP IPv6 packets auto flowlabels are using a 32bit secret (static u32 hashrnd in net/core/flow_dissector.c) and apply jhash() over fields known by the receivers. Attackers can easily infer the 32bit secret and use this information to identify a device and/or user, since this 32bit secret is only set at boot time. Really, using jhash() to generate cookies sent on the wire is a serious security concern. Trying to change the rol32(hash, 16) in ip6_make_flowlabel() would be a dead end. Trying to periodically change the secret (like in sch_sfq.c) could change paths taken in the network for long lived flows. Let's switch to siphash, as we did in commit df453700e8d8 ("inet: switch IP ID generator to siphash") Using a cryptographically strong pseudo random function will solve this privacy issue and more generally remove other weak points in the stack. Packet schedulers using skb_get_hash_perturb() benefit from this change. Fixes: b56774163f99 ("ipv6: Enable auto flow labels by default") Fixes: 42240901f7c4 ("ipv6: Implement different admin modes for automatic flow labels") Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel") Fixes: cb1ce2ef387b ("ipv6: Implement automatic flow label generation on transmit") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Jonathan Berger <jonathann1@walla.com> Reported-by: Amit Klein <aksecurity@gmail.com> Reported-by: Benny Pinkas <benny@pinkas.net> Cc: Tom Herbert <tom@herbertland.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-22 14:57:46 +00:00
#include <linux/siphash.h>
#include <linux/string.h>
#include <uapi/linux/if_ether.h>
struct bpf_prog;
struct net;
struct sk_buff;
/**
* struct flow_dissector_key_control:
* @thoff: Transport header offset
* @addr_type: Type of key. One of FLOW_DISSECTOR_KEY_*
* @flags: Key flags. Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAGENCAPSULATION)
*/
struct flow_dissector_key_control {
u16 thoff;
u16 addr_type;
u32 flags;
};
#define FLOW_DIS_IS_FRAGMENT BIT(0)
#define FLOW_DIS_FIRST_FRAG BIT(1)
#define FLOW_DIS_ENCAPSULATION BIT(2)
enum flow_dissect_ret {
FLOW_DISSECT_RET_OUT_GOOD,
FLOW_DISSECT_RET_OUT_BAD,
FLOW_DISSECT_RET_PROTO_AGAIN,
FLOW_DISSECT_RET_IPPROTO_AGAIN,
FLOW_DISSECT_RET_CONTINUE,
};
/**
* struct flow_dissector_key_basic:
* @n_proto: Network header protocol (eg. IPv4/IPv6)
* @ip_proto: Transport header protocol (eg. TCP/UDP)
* @padding: Unused
*/
struct flow_dissector_key_basic {
__be16 n_proto;
u8 ip_proto;
u8 padding;
};
struct flow_dissector_key_tags {
u32 flow_label;
};
struct flow_dissector_key_vlan {
union {
struct {
u16 vlan_id:12,
vlan_dei:1,
vlan_priority:3;
};
__be16 vlan_tci;
};
__be16 vlan_tpid;
net/sched: flower: fix parsing of ethertype following VLAN header A tc flower filter matching TCA_FLOWER_KEY_VLAN_ETH_TYPE is expected to match the L2 ethertype following the first VLAN header, as confirmed by linked discussion with the maintainer. However, such rule also matches packets that have additional second VLAN header, even though filter has both eth_type and vlan_ethtype set to "ipv4". Looking at the code this seems to be mostly an artifact of the way flower uses flow dissector. First, even though looking at the uAPI eth_type and vlan_ethtype appear like a distinct fields, in flower they are all mapped to the same key->basic.n_proto. Second, flow dissector skips following VLAN header as no keys for FLOW_DISSECTOR_KEY_CVLAN are set and eventually assigns the value of n_proto to last parsed header. With these, such filters ignore any headers present between first VLAN header and first "non magic" header (ipv4 in this case) that doesn't result FLOW_DISSECT_RET_PROTO_AGAIN. Fix the issue by extending flow dissector VLAN key structure with new 'vlan_eth_type' field that matches first ethertype following previously parsed VLAN header. Modify flower classifier to set the new flow_dissector_key_vlan->vlan_eth_type with value obtained from TCA_FLOWER_KEY_VLAN_ETH_TYPE/TCA_FLOWER_KEY_CVLAN_ETH_TYPE uAPIs. Link: https://lore.kernel.org/all/Yjhgi48BpTGh6dig@nanopsycho/ Fixes: 9399ae9a6cb2 ("net_sched: flower: Add vlan support") Fixes: d64efd0926ba ("net/sched: flower: Add supprt for matching on QinQ vlan headers") Signed-off-by: Vlad Buslov <vladbu@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-04-06 11:22:41 +00:00
__be16 vlan_eth_type;
u16 padding;
};
flow_dissector: Parse multiple MPLS Label Stack Entries The current MPLS dissector only parses the first MPLS Label Stack Entry (second LSE can be parsed too, but only to set a key_id). This patch adds the possibility to parse several LSEs by making __skb_flow_dissect_mpls() return FLOW_DISSECT_RET_PROTO_AGAIN as long as the Bottom Of Stack bit hasn't been seen, up to a maximum of FLOW_DIS_MPLS_MAX entries. FLOW_DIS_MPLS_MAX is arbitrarily set to 7. This should be enough for many practical purposes, without wasting too much space. To record the parsed values, flow_dissector_key_mpls is modified to store an array of stack entries, instead of just the values of the first one. A bit field, "used_lses", is also added to keep track of the LSEs that have been set. The objective is to avoid defining a new FLOW_DISSECTOR_KEY_MPLS_XX for each level of the MPLS stack. TC flower is adapted for the new struct flow_dissector_key_mpls layout. Matching on several MPLS Label Stack Entries will be added in the next patch. The NFP and MLX5 drivers are also adapted: nfp_flower_compile_mac() and mlx5's parse_tunnel() now verify that the rule only uses the first LSE and fail if it doesn't. Finally, the behaviour of the FLOW_DISSECTOR_KEY_MPLS_ENTROPY key is slightly modified. Instead of recording the first Entropy Label, it now records the last one. This shouldn't have any consequences since there doesn't seem to have any user of FLOW_DISSECTOR_KEY_MPLS_ENTROPY in the tree. We'd probably better do a hash of all parsed MPLS labels instead (excluding reserved labels) anyway. That'd give better entropy and would probably also simplify the code. But that's not the purpose of this patch, so I'm keeping that as a future possible improvement. Signed-off-by: Guillaume Nault <gnault@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-26 12:29:00 +00:00
struct flow_dissector_mpls_lse {
u32 mpls_ttl:8,
mpls_bos:1,
mpls_tc:3,
mpls_label:20;
};
flow_dissector: Parse multiple MPLS Label Stack Entries The current MPLS dissector only parses the first MPLS Label Stack Entry (second LSE can be parsed too, but only to set a key_id). This patch adds the possibility to parse several LSEs by making __skb_flow_dissect_mpls() return FLOW_DISSECT_RET_PROTO_AGAIN as long as the Bottom Of Stack bit hasn't been seen, up to a maximum of FLOW_DIS_MPLS_MAX entries. FLOW_DIS_MPLS_MAX is arbitrarily set to 7. This should be enough for many practical purposes, without wasting too much space. To record the parsed values, flow_dissector_key_mpls is modified to store an array of stack entries, instead of just the values of the first one. A bit field, "used_lses", is also added to keep track of the LSEs that have been set. The objective is to avoid defining a new FLOW_DISSECTOR_KEY_MPLS_XX for each level of the MPLS stack. TC flower is adapted for the new struct flow_dissector_key_mpls layout. Matching on several MPLS Label Stack Entries will be added in the next patch. The NFP and MLX5 drivers are also adapted: nfp_flower_compile_mac() and mlx5's parse_tunnel() now verify that the rule only uses the first LSE and fail if it doesn't. Finally, the behaviour of the FLOW_DISSECTOR_KEY_MPLS_ENTROPY key is slightly modified. Instead of recording the first Entropy Label, it now records the last one. This shouldn't have any consequences since there doesn't seem to have any user of FLOW_DISSECTOR_KEY_MPLS_ENTROPY in the tree. We'd probably better do a hash of all parsed MPLS labels instead (excluding reserved labels) anyway. That'd give better entropy and would probably also simplify the code. But that's not the purpose of this patch, so I'm keeping that as a future possible improvement. Signed-off-by: Guillaume Nault <gnault@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-05-26 12:29:00 +00:00
#define FLOW_DIS_MPLS_MAX 7
struct flow_dissector_key_mpls {
struct flow_dissector_mpls_lse ls[FLOW_DIS_MPLS_MAX]; /* Label Stack */
u8 used_lses; /* One bit set for each Label Stack Entry in use */
};
static inline void dissector_set_mpls_lse(struct flow_dissector_key_mpls *mpls,
int lse_index)
{
mpls->used_lses |= 1 << lse_index;
}
#define FLOW_DIS_TUN_OPTS_MAX 255
/**
* struct flow_dissector_key_enc_opts:
* @data: tunnel option data
* @len: length of tunnel option data
* @dst_opt_type: tunnel option type
*/
struct flow_dissector_key_enc_opts {
u8 data[FLOW_DIS_TUN_OPTS_MAX]; /* Using IP_TUNNEL_OPTS_MAX is desired
* here but seems difficult to #include
*/
u8 len;
ip_tunnel: convert __be16 tunnel flags to bitmaps Historically, tunnel flags like TUNNEL_CSUM or TUNNEL_ERSPAN_OPT have been defined as __be16. Now all of those 16 bits are occupied and there's no more free space for new flags. It can't be simply switched to a bigger container with no adjustments to the values, since it's an explicit Endian storage, and on LE systems (__be16)0x0001 equals to (__be64)0x0001000000000000. We could probably define new 64-bit flags depending on the Endianness, i.e. (__be64)0x0001 on BE and (__be64)0x00010000... on LE, but that would introduce an Endianness dependency and spawn a ton of Sparse warnings. To mitigate them, all of those places which were adjusted with this change would be touched anyway, so why not define stuff properly if there's no choice. Define IP_TUNNEL_*_BIT counterparts as a bit number instead of the value already coded and a fistful of <16 <-> bitmap> converters and helpers. The two flags which have a different bit position are SIT_ISATAP_BIT and VTI_ISVTI_BIT, as they were defined not as __cpu_to_be16(), but as (__force __be16), i.e. had different positions on LE and BE. Now they both have strongly defined places. Change all __be16 fields which were used to store those flags, to IP_TUNNEL_DECLARE_FLAGS() -> DECLARE_BITMAP(__IP_TUNNEL_FLAG_NUM) -> unsigned long[1] for now, and replace all TUNNEL_* occurrences to their bitmap counterparts. Use the converters in the places which talk to the userspace, hardware (NFP) or other hosts (GRE header). The rest must explicitly use the new flags only. This must be done at once, otherwise there will be too many conversions throughout the code in the intermediate commits. Finally, disable the old __be16 flags for use in the kernel code (except for the two 'irregular' flags mentioned above), to prevent any accidental (mis)use of them. For the userspace, nothing is changed, only additions were made. Most noticeable bloat-o-meter difference (.text): vmlinux: 307/-1 (306) gre.ko: 62/0 (62) ip_gre.ko: 941/-217 (724) [*] ip_tunnel.ko: 390/-900 (-510) [**] ip_vti.ko: 138/0 (138) ip6_gre.ko: 534/-18 (516) [*] ip6_tunnel.ko: 118/-10 (108) [*] gre_flags_to_tnl_flags() grew, but still is inlined [**] ip_tunnel_find() got uninlined, hence such decrease The average code size increase in non-extreme case is 100-200 bytes per module, mostly due to sizeof(long) > sizeof(__be16), as %__IP_TUNNEL_FLAG_NUM is less than %BITS_PER_LONG and the compilers are able to expand the majority of bitmap_*() calls here into direct operations on scalars. Reviewed-by: Simon Horman <horms@kernel.org> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2024-03-27 15:23:53 +00:00
u32 dst_opt_type;
};
struct flow_dissector_key_keyid {
__be32 keyid;
};
/**
* struct flow_dissector_key_ipv4_addrs:
* @src: source ip address
* @dst: destination ip address
*/
struct flow_dissector_key_ipv4_addrs {
/* (src,dst) must be grouped, in the same way than in IP header */
__be32 src;
__be32 dst;
};
/**
* struct flow_dissector_key_ipv6_addrs:
* @src: source ip address
* @dst: destination ip address
*/
struct flow_dissector_key_ipv6_addrs {
/* (src,dst) must be grouped, in the same way than in IP header */
struct in6_addr src;
struct in6_addr dst;
};
/**
* struct flow_dissector_key_tipc:
* @key: source node address combined with selector
*/
struct flow_dissector_key_tipc {
__be32 key;
};
/**
* struct flow_dissector_key_addrs:
* @v4addrs: IPv4 addresses
* @v6addrs: IPv6 addresses
* @tipckey: TIPC key
*/
struct flow_dissector_key_addrs {
union {
struct flow_dissector_key_ipv4_addrs v4addrs;
struct flow_dissector_key_ipv6_addrs v6addrs;
struct flow_dissector_key_tipc tipckey;
};
};
/**
* struct flow_dissector_key_arp:
* @sip: Sender IP address
* @tip: Target IP address
* @op: Operation
* @sha: Sender hardware address
* @tha: Target hardware address
*/
struct flow_dissector_key_arp {
__u32 sip;
__u32 tip;
__u8 op;
unsigned char sha[ETH_ALEN];
unsigned char tha[ETH_ALEN];
};
/**
* struct flow_dissector_key_ports:
* @ports: port numbers of Transport header
* @src: source port number
* @dst: destination port number
*/
struct flow_dissector_key_ports {
union {
__be32 ports;
struct {
__be16 src;
__be16 dst;
};
};
};
/**
* struct flow_dissector_key_ports_range
* @tp: port number from packet
* @tp_min: min port number in range
* @tp_max: max port number in range
*/
struct flow_dissector_key_ports_range {
union {
struct flow_dissector_key_ports tp;
struct {
struct flow_dissector_key_ports tp_min;
struct flow_dissector_key_ports tp_max;
};
};
};
/**
* struct flow_dissector_key_icmp:
* @type: ICMP type
* @code: ICMP code
* @id: Session identifier
*/
struct flow_dissector_key_icmp {
struct {
u8 type;
u8 code;
};
u16 id;
};
/**
* struct flow_dissector_key_eth_addrs:
* @src: source Ethernet address
* @dst: destination Ethernet address
*/
struct flow_dissector_key_eth_addrs {
/* (dst,src) must be grouped, in the same way than in ETH header */
unsigned char dst[ETH_ALEN];
unsigned char src[ETH_ALEN];
};
/**
* struct flow_dissector_key_tcp:
* @flags: flags
*/
struct flow_dissector_key_tcp {
__be16 flags;
};
/**
* struct flow_dissector_key_ip:
* @tos: tos
* @ttl: ttl
*/
struct flow_dissector_key_ip {
__u8 tos;
__u8 ttl;
};
/**
* struct flow_dissector_key_meta:
* @ingress_ifindex: ingress ifindex
* @ingress_iftype: ingress interface type
* @l2_miss: packet did not match an L2 entry during forwarding
*/
struct flow_dissector_key_meta {
int ingress_ifindex;
u16 ingress_iftype;
u8 l2_miss;
};
/**
* struct flow_dissector_key_ct:
* @ct_state: conntrack state after converting with map
* @ct_mark: conttrack mark
* @ct_zone: conntrack zone
* @ct_labels: conntrack labels
*/
struct flow_dissector_key_ct {
u16 ct_state;
u16 ct_zone;
u32 ct_mark;
u32 ct_labels[4];
};
/**
* struct flow_dissector_key_hash:
* @hash: hash value
*/
struct flow_dissector_key_hash {
u32 hash;
};
/**
* struct flow_dissector_key_num_of_vlans:
* @num_of_vlans: num_of_vlans value
*/
struct flow_dissector_key_num_of_vlans {
u8 num_of_vlans;
};
/**
* struct flow_dissector_key_pppoe:
* @session_id: pppoe session id
* @ppp_proto: ppp protocol
* @type: pppoe eth type
*/
struct flow_dissector_key_pppoe {
__be16 session_id;
__be16 ppp_proto;
__be16 type;
};
/**
* struct flow_dissector_key_l2tpv3:
* @session_id: identifier for a l2tp session
*/
struct flow_dissector_key_l2tpv3 {
__be32 session_id;
};
/**
* struct flow_dissector_key_ipsec:
* @spi: identifier for a ipsec connection
*/
struct flow_dissector_key_ipsec {
__be32 spi;
};
/**
* struct flow_dissector_key_cfm
* @mdl_ver: maintenance domain level (mdl) and cfm protocol version
* @opcode: code specifying a type of cfm protocol packet
*
* See 802.1ag, ITU-T G.8013/Y.1731
* 1 2
* |7 6 5 4 3 2 1 0|7 6 5 4 3 2 1 0|
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | mdl | version | opcode |
* +-----+---------+-+-+-+-+-+-+-+-+
*/
struct flow_dissector_key_cfm {
u8 mdl_ver;
u8 opcode;
};
#define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5)
#define FLOW_DIS_CFM_MDL_MAX 7
enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */
FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
cls_flower: Fix the behavior using port ranges with hw-offload The recent commit 5c72299fba9d ("net: sched: cls_flower: Classify packets using port ranges") had added filtering based on port ranges to tc flower. However the commit missed necessary changes in hw-offload code, so the feature gave rise to generating incorrect offloaded flow keys in NIC. One more detailed example is below: $ tc qdisc add dev eth0 ingress $ tc filter add dev eth0 ingress protocol ip flower ip_proto tcp \ dst_port 100-200 action drop With the setup above, an exact match filter with dst_port == 0 will be installed in NIC by hw-offload. IOW, the NIC will have a rule which is equivalent to the following one. $ tc qdisc add dev eth0 ingress $ tc filter add dev eth0 ingress protocol ip flower ip_proto tcp \ dst_port 0 action drop The behavior was caused by the flow dissector which extracts packet data into the flow key in the tc flower. More specifically, regardless of exact match or specified port ranges, fl_init_dissector() set the FLOW_DISSECTOR_KEY_PORTS flag in struct flow_dissector to extract port numbers from skb in skb_flow_dissect() called by fl_classify(). Note that device drivers received the same struct flow_dissector object as used in skb_flow_dissect(). Thus, offloaded drivers could not identify which of these is used because the FLOW_DISSECTOR_KEY_PORTS flag was set to struct flow_dissector in either case. This patch adds the new FLOW_DISSECTOR_KEY_PORTS_RANGE flag and the new tp_range field in struct fl_flow_key to recognize which filters are applied to offloaded drivers. At this point, when filters based on port ranges passed to drivers, drivers return the EOPNOTSUPP error because they do not support the feature (the newly created FLOW_DISSECTOR_KEY_PORTS_RANGE flag). Fixes: 5c72299fba9d ("net: sched: cls_flower: Classify packets using port ranges") Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-03 10:40:12 +00:00
FLOW_DISSECTOR_KEY_PORTS_RANGE, /* struct flow_dissector_key_ports */
FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */
FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */
FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */
FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_vlan */
FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_tags */
FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */
FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */
FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */
FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */
FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */
FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */
FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */
FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */
FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_vlan */
FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */
FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */
FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */
FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */
FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */
FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */
FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */
FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */
FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */
FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */
FLOW_DISSECTOR_KEY_MAX,
};
#define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0)
#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(1)
#define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(2)
cls_flower: Fix inability to match GRE/IPIP packets When a packet of a new flow arrives in openvswitch kernel module, it dissects the packet and passes the extracted flow key to ovs-vswtichd daemon. If hw- offload configuration is enabled, the daemon creates a new TC flower entry to bypass openvswitch kernel module for the flow (TC flower can also offload flows to NICs but this time that does not matter). In this processing flow, I found the following issue in cases of GRE/IPIP packets. When ovs_flow_key_extract() in openvswitch module parses a packet of a new GRE (or IPIP) flow received on non-tunneling vports, it extracts information of the outer IP header for ip_proto/src_ip/dst_ip match keys. This means ovs-vswitchd creates a TC flower entry with IP protocol/addresses match keys whose values are those of the outer IP header. OTOH, TC flower, which uses flow_dissector (different parser from openvswitch module), extracts information of the inner IP header. The following flow is an example to describe the issue in more detail. <----------- Outer IP -----------------> <---------- Inner IP ----------> +----------+--------------+--------------+----------+----------+----------+ | ip_proto | src_ip | dst_ip | ip_proto | src_ip | dst_ip | | 47 (GRE) | 192.168.10.1 | 192.168.10.2 | 6 (TCP) | 10.0.0.1 | 10.0.0.2 | +----------+--------------+--------------+----------+----------+----------+ In this case, TC flower entry and extracted information are shown as below: - ovs-vswitchd creates TC flower entry with: - ip_proto: 47 - src_ip: 192.168.10.1 - dst_ip: 192.168.10.2 - TC flower extracts below for IP header matches: - ip_proto: 6 - src_ip: 10.0.0.1 - dst_ip: 10.0.0.2 Thus, GRE or IPIP packets never match the TC flower entry, as each dissector behaves differently. IMHO, the behavior of TC flower (flow dissector) does not look correct, as ip_proto/src_ip/dst_ip in TC flower match means the outermost IP header information except for GRE/IPIP cases. This patch adds a new flow_dissector flag FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP which skips dissection of the encapsulated inner GRE/IPIP header in TC flower classifier. Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-29 09:21:41 +00:00
#define FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP BIT(3)
struct flow_dissector_key {
enum flow_dissector_key_id key_id;
size_t offset; /* offset of struct flow_dissector_key_*
in target the struct */
};
struct flow_dissector {
unsigned long long used_keys;
/* each bit represents presence of one key id */
unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];
};
struct flow_keys_basic {
struct flow_dissector_key_control control;
struct flow_dissector_key_basic basic;
};
struct flow_keys {
struct flow_dissector_key_control control;
#define FLOW_KEYS_HASH_START_FIELD basic
net/flow_dissector: switch to siphash UDP IPv6 packets auto flowlabels are using a 32bit secret (static u32 hashrnd in net/core/flow_dissector.c) and apply jhash() over fields known by the receivers. Attackers can easily infer the 32bit secret and use this information to identify a device and/or user, since this 32bit secret is only set at boot time. Really, using jhash() to generate cookies sent on the wire is a serious security concern. Trying to change the rol32(hash, 16) in ip6_make_flowlabel() would be a dead end. Trying to periodically change the secret (like in sch_sfq.c) could change paths taken in the network for long lived flows. Let's switch to siphash, as we did in commit df453700e8d8 ("inet: switch IP ID generator to siphash") Using a cryptographically strong pseudo random function will solve this privacy issue and more generally remove other weak points in the stack. Packet schedulers using skb_get_hash_perturb() benefit from this change. Fixes: b56774163f99 ("ipv6: Enable auto flow labels by default") Fixes: 42240901f7c4 ("ipv6: Implement different admin modes for automatic flow labels") Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel") Fixes: cb1ce2ef387b ("ipv6: Implement automatic flow label generation on transmit") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Jonathan Berger <jonathann1@walla.com> Reported-by: Amit Klein <aksecurity@gmail.com> Reported-by: Benny Pinkas <benny@pinkas.net> Cc: Tom Herbert <tom@herbertland.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-22 14:57:46 +00:00
struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT);
struct flow_dissector_key_tags tags;
struct flow_dissector_key_vlan vlan;
struct flow_dissector_key_vlan cvlan;
struct flow_dissector_key_keyid keyid;
struct flow_dissector_key_ports ports;
struct flow_dissector_key_icmp icmp;
/* 'addrs' must be the last member */
struct flow_dissector_key_addrs addrs;
};
#define FLOW_KEYS_HASH_OFFSET \
offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD)
__be32 flow_get_u32_src(const struct flow_keys *flow);
__be32 flow_get_u32_dst(const struct flow_keys *flow);
extern struct flow_dissector flow_keys_dissector;
extern struct flow_dissector flow_keys_basic_dissector;
/* struct flow_keys_digest:
*
* This structure is used to hold a digest of the full flow keys. This is a
* larger "hash" of a flow to allow definitively matching specific flows where
* the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so
* that it can be used in CB of skb (see sch_choke for an example).
*/
#define FLOW_KEYS_DIGEST_LEN 16
struct flow_keys_digest {
u8 data[FLOW_KEYS_DIGEST_LEN];
};
void make_flow_keys_digest(struct flow_keys_digest *digest,
const struct flow_keys *flow);
static inline bool flow_keys_have_l4(const struct flow_keys *keys)
{
return (keys->ports.ports || keys->tags.flow_label);
}
u32 flow_hash_from_keys(struct flow_keys *keys);
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
struct flow_dissector_key_icmp *key_icmp,
const void *data, int thoff, int hlen);
static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
{
return flow_dissector->used_keys & (1ULL << key_id);
}
static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id,
void *target_container)
{
return ((char *)target_container) + flow_dissector->offset[key_id];
}
struct bpf_flow_dissector {
struct bpf_flow_keys *flow_keys;
const struct sk_buff *skb;
const void *data;
const void *data_end;
};
static inline void
flow_dissector_init_keys(struct flow_dissector_key_control *key_control,
struct flow_dissector_key_basic *key_basic)
{
memset(key_control, 0, sizeof(*key_control));
memset(key_basic, 0, sizeof(*key_basic));
}
#ifdef CONFIG_BPF_SYSCALL
int flow_dissector_bpf_prog_attach_check(struct net *net,
struct bpf_prog *prog);
#endif /* CONFIG_BPF_SYSCALL */
#endif