Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter/IPVS fixes for net

The following patchset contains Netfilter fixes for net:

1) Crash due to missing initialization of timer data in
   xt_IDLETIMER, from Juhee Kang.

2) NF_CONNTRACK_SECMARK should be bool in Kconfig, from Vegard Nossum.

3) Skip netdev events on netns removal, from Florian Westphal.

4) Add testcase to show port shadowing via UDP, also from Florian.

5) Remove pr_debug() code in ip6t_rt, this fixes a crash due to
   unsafe access to non-linear skbuff, from Xin Long.

6) Make net/ipv4/vs/debug_level read-only from non-init netns,
   from Antoine Tenart.

7) Remove bogus invocation to bash in selftests/netfilter/nft_flowtable.sh
   also from Florian.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-10-21 12:32:41 +01:00
commit 1439caa1d9
8 changed files with 164 additions and 52 deletions

View file

@ -926,7 +926,9 @@ static int translate_table(struct net *net, const char *name,
return -ENOMEM;
for_each_possible_cpu(i) {
newinfo->chainstack[i] =
vmalloc(array_size(udc_cnt, sizeof(*(newinfo->chainstack[0]))));
vmalloc_node(array_size(udc_cnt,
sizeof(*(newinfo->chainstack[0]))),
cpu_to_node(i));
if (!newinfo->chainstack[i]) {
while (i)
vfree(newinfo->chainstack[--i]);

View file

@ -25,12 +25,7 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
static inline bool
segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
{
bool r;
pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n",
invert ? '!' : ' ', min, id, max);
r = (id >= min && id <= max) ^ invert;
pr_debug(" result %s\n", r ? "PASS" : "FAILED");
return r;
return (id >= min && id <= max) ^ invert;
}
static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
@ -65,30 +60,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
return false;
}
pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
pr_debug("TYPE %04X ", rh->type);
pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
pr_debug("IPv6 RT segsleft %02X ",
segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
rh->segments_left,
!!(rtinfo->invflags & IP6T_RT_INV_SGS)));
pr_debug("type %02X %02X %02X ",
rtinfo->rt_type, rh->type,
(!(rtinfo->flags & IP6T_RT_TYP) ||
((rtinfo->rt_type == rh->type) ^
!!(rtinfo->invflags & IP6T_RT_INV_TYP))));
pr_debug("len %02X %04X %02X ",
rtinfo->hdrlen, hdrlen,
!(rtinfo->flags & IP6T_RT_LEN) ||
((rtinfo->hdrlen == hdrlen) ^
!!(rtinfo->invflags & IP6T_RT_INV_LEN)));
pr_debug("res %02X %02X %02X ",
rtinfo->flags & IP6T_RT_RES,
((const struct rt0_hdr *)rh)->reserved,
!((rtinfo->flags & IP6T_RT_RES) &&
(((const struct rt0_hdr *)rh)->reserved)));
ret = (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
rh->segments_left,
!!(rtinfo->invflags & IP6T_RT_INV_SGS))) &&
@ -107,22 +78,22 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
reserved),
sizeof(_reserved),
&_reserved);
if (!rp) {
par->hotdrop = true;
return false;
}
ret = (*rp == 0);
}
pr_debug("#%d ", rtinfo->addrnr);
if (!(rtinfo->flags & IP6T_RT_FST)) {
return ret;
} else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) {
pr_debug("Not strict ");
if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
pr_debug("There isn't enough space\n");
return false;
} else {
unsigned int i = 0;
pr_debug("#%d ", rtinfo->addrnr);
for (temp = 0;
temp < (unsigned int)((hdrlen - 8) / 16);
temp++) {
@ -138,26 +109,20 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
return false;
}
if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
pr_debug("i=%d temp=%d;\n", i, temp);
if (ipv6_addr_equal(ap, &rtinfo->addrs[i]))
i++;
}
if (i == rtinfo->addrnr)
break;
}
pr_debug("i=%d #%d\n", i, rtinfo->addrnr);
if (i == rtinfo->addrnr)
return ret;
else
return false;
}
} else {
pr_debug("Strict ");
if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
pr_debug("There isn't enough space\n");
return false;
} else {
pr_debug("#%d ", rtinfo->addrnr);
for (temp = 0; temp < rtinfo->addrnr; temp++) {
ap = skb_header_pointer(skb,
ptr
@ -173,7 +138,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
break;
}
pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr);
if (temp == rtinfo->addrnr &&
temp == (unsigned int)((hdrlen - 8) / 16))
return ret;

View file

@ -109,7 +109,7 @@ config NF_CONNTRACK_MARK
config NF_CONNTRACK_SECMARK
bool 'Connection tracking security mark support'
depends on NETWORK_SECMARK
default m if NETFILTER_ADVANCED=n
default y if NETFILTER_ADVANCED=n
help
This option enables security markings to be applied to
connections. Typically they are copied to connections from

View file

@ -4090,6 +4090,11 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
#ifdef CONFIG_IP_VS_DEBUG
/* Global sysctls must be ro in non-init netns */
if (!net_eq(net, &init_net))
tbl[idx++].mode = 0444;
#endif
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) {

View file

@ -342,12 +342,6 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
return;
}
/* UNREGISTER events are also happening on netns exit.
*
* Although nf_tables core releases all tables/chains, only this event
* handler provides guarantee that hook->ops.dev is still accessible,
* so we cannot skip exiting net namespaces.
*/
__nft_release_basechain(ctx);
}
@ -366,6 +360,9 @@ static int nf_tables_netdev_event(struct notifier_block *this,
event != NETDEV_CHANGENAME)
return NOTIFY_DONE;
if (!check_net(ctx.net))
return NOTIFY_DONE;
nft_net = nft_pernet(ctx.net);
mutex_lock(&nft_net->commit_mutex);
list_for_each_entry(table, &nft_net->tables, list) {

View file

@ -137,7 +137,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
{
int ret;
info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
info->timer = kzalloc(sizeof(*info->timer), GFP_KERNEL);
if (!info->timer) {
ret = -ENOMEM;
goto out;

View file

@ -199,7 +199,6 @@ fi
# test basic connectivity
if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
echo "ERROR: ns1 cannot reach ns2" 1>&2
bash
exit 1
fi

View file

@ -741,6 +741,149 @@ EOF
return $lret
}
# test port shadowing.
# create two listening services, one on router (ns0), one
# on client (ns2), which is masqueraded from ns1 point of view.
# ns2 sends udp packet coming from service port to ns1, on a highport.
# Later, if n1 uses same highport to connect to ns0:service, packet
# might be port-forwarded to ns2 instead.
# second argument tells if we expect the 'fake-entry' to take effect
# (CLIENT) or not (ROUTER).
test_port_shadow()
{
local test=$1
local expect=$2
local daddrc="10.0.1.99"
local daddrs="10.0.1.1"
local result=""
local logmsg=""
echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
nc_r=$!
echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
nc_c=$!
# make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
# ns1 tries to connect to ns0:1405. With default settings this should connect
# to client, it matches the conntrack entry created above.
result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405)
if [ "$result" = "$expect" ] ;then
echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
else
echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended"
ret=1
fi
kill $nc_r $nc_c 2>/dev/null
# flush udp entries for next test round, if any
ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
}
# This prevents port shadow of router service via packet filter,
# packets claiming to originate from service port from internal
# network are dropped.
test_port_shadow_filter()
{
local family=$1
ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family filter {
chain forward {
type filter hook forward priority 0; policy accept;
meta iif veth1 udp sport 1405 drop
}
}
EOF
test_port_shadow "port-filter" "ROUTER"
ip netns exec "$ns0" nft delete table $family filter
}
# This prevents port shadow of router service via notrack.
test_port_shadow_notrack()
{
local family=$1
ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family raw {
chain prerouting {
type filter hook prerouting priority -300; policy accept;
meta iif veth0 udp dport 1405 notrack
udp dport 1405 notrack
}
chain output {
type filter hook output priority -300; policy accept;
udp sport 1405 notrack
}
}
EOF
test_port_shadow "port-notrack" "ROUTER"
ip netns exec "$ns0" nft delete table $family raw
}
# This prevents port shadow of router service via sport remap.
test_port_shadow_pat()
{
local family=$1
ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family pat {
chain postrouting {
type nat hook postrouting priority -1; policy accept;
meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random
}
}
EOF
test_port_shadow "pat" "ROUTER"
ip netns exec "$ns0" nft delete table $family pat
}
test_port_shadowing()
{
local family="ip"
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
chain postrouting {
type nat hook postrouting priority 0; policy accept;
meta oif veth0 masquerade
}
}
EOF
if [ $? -ne 0 ]; then
echo "SKIP: Could not add add $family masquerade hook"
return $ksft_skip
fi
# test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
test_port_shadow "default" "CLIENT"
# test packet filter based mitigation: prevent forwarding of
# packets claiming to come from the service port.
test_port_shadow_filter "$family"
# test conntrack based mitigation: connections going or coming
# from router:service bypass connection tracking.
test_port_shadow_notrack "$family"
# test nat based mitigation: fowarded packets coming from service port
# are masqueraded with random highport.
test_port_shadow_pat "$family"
ip netns exec "$ns0" nft delete table $family nat
}
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in 0 1 2; do
@ -861,6 +1004,8 @@ reset_counters
$test_inet_nat && test_redirect inet
$test_inet_nat && test_redirect6 inet
test_port_shadowing
if [ $ret -ne 0 ];then
echo -n "FAIL: "
nft --version