ipv6: remove hard coded limitation on ipv6_pinfo

IPv6 inet sockets are supposed to have a "struct ipv6_pinfo"
field at the end of their definition, so that inet6_sk_generic()
can derive from socket size the offset of the "struct ipv6_pinfo".

This is very fragile, and prevents adding bigger alignment
in sockets, because inet6_sk_generic() does not work
if the compiler adds padding after the ipv6_pinfo component.

We are currently working on a patch series to reorganize
TCP structures for better data locality and found issues
similar to the one fixed in commit f5d547676c
("tcp: fix tcp_inet6_sk() for 32bit kernels")

Alternative would be to force an alignment on "struct ipv6_pinfo",
greater or equal to __alignof__(any ipv6 sock) to ensure there is
no padding. This does not look great.

v2: fix typo in mptcp_proto_v6_init() (Paolo)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Chao Wu <wwchao@google.com>
Cc: Wei Wang <weiwan@google.com>
Cc: Coco Li <lixiaoyan@google.com>
Cc: YiFei Zhu <zhuyifei@google.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2023-07-20 11:09:01 +00:00 committed by David S. Miller
parent 1671bcfd76
commit f5f80e32de
13 changed files with 16 additions and 20 deletions

View file

@ -200,14 +200,7 @@ struct inet6_cork {
u8 tclass;
};
/**
* struct ipv6_pinfo - ipv6 private area
*
* In the struct sock hierarchy (tcp6_sock, upd6_sock, etc)
* this _must_ be the last member, so that inet6_sk_generic
* is able to calculate its offset from the base struct sock
* by using the struct proto->slab_obj_size member. -acme
*/
/* struct ipv6_pinfo - ipv6 private area */
struct ipv6_pinfo {
struct in6_addr saddr;
struct in6_pktinfo sticky_pktinfo;
@ -307,19 +300,19 @@ struct raw6_sock {
__u32 offset; /* checksum offset */
struct icmp6_filter filter;
__u32 ip6mr_table;
/* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */
struct ipv6_pinfo inet6;
};
struct udp6_sock {
struct udp_sock udp;
/* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */
struct ipv6_pinfo inet6;
};
struct tcp6_sock {
struct tcp_sock tcp;
/* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */
struct ipv6_pinfo inet6;
};

View file

@ -1339,6 +1339,7 @@ struct proto {
struct kmem_cache *slab;
unsigned int obj_size;
unsigned int ipv6_pinfo_offset;
slab_flags_t slab_flags;
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */

View file

@ -1056,6 +1056,7 @@ static struct proto dccp_v6_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
.ipv6_pinfo_offset = offsetof(struct dccp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,

View file

@ -13,10 +13,6 @@
struct dccp6_sock {
struct dccp_sock dccp;
/*
* ipv6_pinfo has to be the last member of dccp6_sock,
* see inet6_sk_generic.
*/
struct ipv6_pinfo inet6;
};

View file

@ -102,9 +102,9 @@ bool ipv6_mod_enabled(void)
}
EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
const int offset = sk->sk_prot->ipv6_pinfo_offset;
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}

View file

@ -215,6 +215,7 @@ struct proto pingv6_prot = {
.get_port = ping_get_port,
.put_port = ping_unhash,
.obj_size = sizeof(struct raw6_sock),
.ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
};
EXPORT_SYMBOL_GPL(pingv6_prot);

View file

@ -1216,6 +1216,7 @@ struct proto rawv6_prot = {
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw6_sock),
.ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
.useroffset = offsetof(struct raw6_sock, filter),
.usersize = sizeof_field(struct raw6_sock, filter),
.h.raw_hash = &raw_v6_hashinfo,

View file

@ -2175,6 +2175,7 @@ struct proto tcpv6_prot = {
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
.ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,

View file

@ -1798,6 +1798,7 @@ struct proto udpv6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
.ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = NULL,
.diag_destroy = udp_abort,
};

View file

@ -67,6 +67,7 @@ struct proto udplitev6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
.ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = &udplite_table,
};

View file

@ -36,9 +36,6 @@ struct l2tp_ip6_sock {
u32 conn_id;
u32 peer_conn_id;
/* ipv6_pinfo has to be the last member of l2tp_ip6_sock, see
* inet6_sk_generic
*/
struct ipv6_pinfo inet6;
};
@ -730,6 +727,7 @@ static struct proto l2tp_ip6_prot = {
.hash = l2tp_ip6_hash,
.unhash = l2tp_ip6_unhash,
.obj_size = sizeof(struct l2tp_ip6_sock),
.ipv6_pinfo_offset = offsetof(struct l2tp_ip6_sock, inet6),
};
static const struct proto_ops l2tp_ip6_ops = {

View file

@ -3988,6 +3988,7 @@ int __init mptcp_proto_v6_init(void)
strcpy(mptcp_v6_prot.name, "MPTCPv6");
mptcp_v6_prot.slab = NULL;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np);
err = proto_register(&mptcp_v6_prot, 1);
if (err)

View file

@ -9732,6 +9732,7 @@ struct proto sctpv6_prot = {
.unhash = sctp_unhash,
.no_autobind = true,
.obj_size = sizeof(struct sctp6_sock),
.ipv6_pinfo_offset = offsetof(struct sctp6_sock, inet6),
.useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
.usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
offsetof(struct sctp6_sock, sctp.subscribe) +