all: use siphash24 for hashing

siphash24() is wildly used by projects nowadays.

It's certainly slower then our djb hashing that we used before.
But quite likely it's fast enough for us, given how wildly it is
used. I think it would be hard to profile NetworkManager to show
that the performance of hash tables is the issue, be it with
djb or siphash24.

Certainly with siphash24() it's much harder to exploit the hashing
algorithm to cause worst case hash operations (provided that the
seed is kept private). Does this better resistance against a denial
of service matter for us? Probably not, but let's better be safe then
sorry.

Note that systemd's implementation uses a different seed for each hash
table (at least, after the hash table grows to a certain size).
We don't do that and use only one global seed.
This commit is contained in:
Thomas Haller 2017-10-13 17:16:23 +02:00
parent 3434261811
commit ee76b0979f
10 changed files with 147 additions and 252 deletions

View file

@ -425,6 +425,7 @@ libnm_core_lib_h_priv = \
shared/nm-utils/nm-shared-utils.h \
shared/nm-utils/nm-random-utils.h \
shared/nm-utils/nm-udev-utils.h \
shared/nm-utils/siphash24.h \
shared/nm-meta-setting.h \
libnm-core/crypto.h \
libnm-core/nm-connection-private.h \
@ -443,6 +444,7 @@ libnm_core_lib_c_real = \
shared/nm-utils/nm-shared-utils.c \
shared/nm-utils/nm-random-utils.c \
shared/nm-utils/nm-udev-utils.c \
shared/nm-utils/siphash24.c \
shared/nm-meta-setting.c \
libnm-core/crypto.c \
libnm-core/nm-connection.c \
@ -1127,6 +1129,7 @@ src_libsystemd_nm_la_cppflags = \
-I$(builddir)/libnm-core \
-I$(srcdir)/src \
-I$(srcdir)/src/systemd/sd-adapt \
-I$(srcdir)/shared/nm-utils \
-I$(srcdir)/src/systemd/src/systemd \
-I$(srcdir)/src/systemd/src/basic \
-I$(srcdir)/src/systemd/src/shared \
@ -1213,8 +1216,6 @@ src_libsystemd_nm_la_SOURCES = \
src/systemd/src/basic/refcnt.h \
src/systemd/src/basic/set.h \
src/systemd/src/basic/signal-util.h \
src/systemd/src/basic/siphash24.c \
src/systemd/src/basic/siphash24.h \
src/systemd/src/basic/socket-util.c \
src/systemd/src/basic/socket-util.h \
src/systemd/src/basic/sparse-endian.h \
@ -1228,7 +1229,6 @@ src_libsystemd_nm_la_SOURCES = \
src/systemd/src/basic/time-util.c \
src/systemd/src/basic/time-util.h \
src/systemd/src/basic/umask-util.h \
src/systemd/src/basic/unaligned.h \
src/systemd/src/basic/utf8.c \
src/systemd/src/basic/utf8.h \
src/systemd/src/basic/util.c \
@ -3009,6 +3009,9 @@ $(src_tests_test_wired_defname_OBJECTS): $(libnm_core_lib_h_pub_mkenums)
$(src_tests_test_utils_OBJECTS): $(libnm_core_lib_h_pub_mkenums)
src_tests_test_systemd_CPPFLAGS = $(src_libsystemd_nm_la_cppflags)
src_tests_test_systemd_SOURCES = \
shared/nm-utils/siphash24.c \
src/tests/test-systemd.c
src_tests_test_systemd_LDADD = \
src/libsystemd-nm.la \
$(src_libsystemd_nm_la_libadd)
@ -3200,6 +3203,8 @@ clients_common_libnmc_base_la_SOURCES = \
shared/nm-utils/nm-random-utils.h \
shared/nm-utils/nm-shared-utils.c \
shared/nm-utils/nm-shared-utils.h \
shared/nm-utils/siphash24.c \
shared/nm-utils/siphash24.h \
\
clients/common/nm-secret-agent-simple.c \
clients/common/nm-secret-agent-simple.h \

View file

@ -87,18 +87,17 @@ _test_hash_str (const char *str)
const guint SEED = 10;
nm_hash_init (&h, SEED);
nm_hash_update_str (&h, str);
nm_hash_update_str0 (&h, str);
v = nm_hash_complete (&h);
{
/* assert that hashing a string and a buffer yields the
* same result.
*
* I think that is a desirable property. */
nm_hash_init (&h, SEED);
nm_hash_update_mem (&h, str, str ? strlen (str) : 0);
v2 = nm_hash_complete (&h);
}
/* assert that hashing a string and a buffer yields the
* same result.
*
* I think that is a desirable property. */
nm_hash_init (&h, SEED);
nm_hash_update_mem (&h, str, strlen (str));
v2 = nm_hash_complete (&h);
g_assert (v == v2);
return v;
}
@ -106,35 +105,10 @@ _test_hash_str (const char *str)
static void
test_nm_hash (void)
{
NMHashState h;
_test_hash_str ("");
_test_hash_str ("a");
_test_hash_str ("aa");
_test_hash_str ("diceros bicornis longipes");
memset (&h, 0, sizeof (h));
g_assert_cmpint (nm_hash_complete (&h), ==, 1396707757u);
/* note how two different string still always hash the same,
* although we use a global seed that we initialize each time
* differently.
*
* The aim would be that two collisions depend on the seed value,
* which they currently don't. */
g_assert_cmpint (nm_hash_str ("BA"), ==, nm_hash_str ("Ab"));
/* with the current hasing algorighm, once we know two words that hash
* the same, we can trivally find more collions by concatenating
* them (which is bad). */
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("AbAbAb"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("AbAbBA"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("AbBAAb"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("AbBABA"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("BAAbAb"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("BAAbBA"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("BABAAb"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("BABABA"));
}
/*****************************************************************************/

View file

@ -23,33 +23,59 @@
#include "nm-hash-utils.h"
#include <stdint.h>
#include "nm-shared-utils.h"
#include "nm-random-utils.h"
/*****************************************************************************/
#define HASH_KEY_SIZE 16u
#define HASH_KEY_SIZE_GUINT ((HASH_KEY_SIZE + sizeof (guint) - 1) / sizeof (guint))
G_STATIC_ASSERT (sizeof (guint) * HASH_KEY_SIZE_GUINT >= HASH_KEY_SIZE);
static const guint8 *
_get_hash_key (void)
{
static const guint8 *volatile global_seed = NULL;
const guint8 *g;
g = global_seed;
if (G_UNLIKELY (g == NULL)) {
/* the returned hash is aligned to guin64, hence, it is save
* to use it as guint* or guint64* pointer. */
static union {
guint8 v8[HASH_KEY_SIZE];
} g_arr _nm_alignas (guint64);
static gsize g_lock;
if (g_once_init_enter (&g_lock)) {
nm_utils_random_bytes (g_arr.v8, sizeof (g_arr.v8));
g_atomic_pointer_compare_and_exchange (&global_seed, NULL, g_arr.v8);
g = g_arr.v8;
g_once_init_leave (&g_lock, 1);
} else {
g = global_seed;
nm_assert (g);
}
}
return g;
}
void
nm_hash_init (NMHashState *state, guint static_seed)
{
static volatile guint global_seed = 0;
guint g, s;
const guint8 *g;
guint seed[HASH_KEY_SIZE_GUINT];
nm_assert (state);
/* we xor @seed with a random @global_seed. This is to make the hashing behavior
* less predictable and harder to exploit collisions. */
g = global_seed;
if (G_UNLIKELY (g == 0)) {
nm_utils_random_bytes (&s, sizeof (s));
if (s == 0)
s = 42;
g_atomic_int_compare_and_exchange ((int *) &global_seed, 0, s);
g = global_seed;
nm_assert (g);
}
s = g ^ static_seed;
state->hash = s;
g = _get_hash_key ();
memcpy (seed, g, HASH_KEY_SIZE);
seed[0] ^= static_seed;
siphash24_init (&state->_state, (const guint8 *) seed);
}
guint
@ -57,8 +83,11 @@ nm_hash_str (const char *str)
{
NMHashState h;
nm_hash_init (&h, 1867854211u);
nm_hash_update_str (&h, str);
if (str) {
nm_hash_init (&h, 1867854211u);
nm_hash_update_str (&h, str);
} else
nm_hash_init (&h, 842995561u);
return nm_hash_complete (&h);
}
@ -68,6 +97,21 @@ nm_str_hash (gconstpointer str)
return nm_hash_str (str);
}
guint
nm_hash_ptr (gconstpointer ptr)
{
guint h;
h = ((const guint *) _get_hash_key ())[0];
if (sizeof (ptr) <= sizeof (guint))
h = h ^ ((guint) ((uintptr_t) ptr));
else
h = h ^ ((guint) (((uintptr_t) ptr) >> 32)) ^ ((guint) ((uintptr_t) ptr));
return h ?: 2907677551u;
}
guint
nm_direct_hash (gconstpointer ptr)
{

View file

@ -22,10 +22,10 @@
#ifndef __NM_HASH_UTILS_H__
#define __NM_HASH_UTILS_H__
#include <stdint.h>
#include "siphash24.h"
typedef struct {
guint hash;
struct siphash _state;
} NMHashState;
void nm_hash_init (NMHashState *state, guint static_seed);
@ -33,93 +33,94 @@ void nm_hash_init (NMHashState *state, guint static_seed);
static inline guint
nm_hash_complete (NMHashState *state)
{
guint64 h;
nm_assert (state);
h = siphash24_finalize (&state->_state);
/* we don't ever want to return a zero hash.
*
* NMPObject requires that in _idx_obj_part(), and it's just a good idea. */
return state->hash ?: 1396707757u;
return (((guint) (h >> 32)) ^ ((guint) h)) ?: 1396707757u;
}
static inline void
nm_hash_update (NMHashState *state, const void *ptr, gsize n)
{
nm_assert (state);
nm_assert (ptr);
nm_assert (n > 0);
siphash24_compress (ptr, n, &state->_state);
}
static inline void
nm_hash_update_uint (NMHashState *state, guint val)
{
guint h;
nm_assert (state);
h = state->hash;
h = (h << 5) + h + val;
state->hash = h;
nm_hash_update (state, &val, sizeof (val));
}
static inline void
nm_hash_update_uint64 (NMHashState *state, guint64 val)
{
guint h;
nm_assert (state);
h = state->hash;
h = (h << 5) + h + ((guint) val);
h = (h << 5) + h + ((guint) (val >> 32));
state->hash = h;
nm_hash_update (state, &val, sizeof (val));
}
static inline void
nm_hash_update_ptr (NMHashState *state, gconstpointer ptr)
{
if (sizeof (ptr) <= sizeof (guint))
nm_hash_update_uint (state, ((guint) ((uintptr_t) ptr)));
else
nm_hash_update_uint64 (state, (guint64) ((uintptr_t) ptr));
nm_hash_update (state, &ptr, sizeof (ptr));
}
static inline void
nm_hash_update_mem (NMHashState *state, const void *ptr, gsize n)
{
gsize i;
guint h;
/* This also hashes the length of the data. That means,
* hashing two consecutive binary fields (of arbitrary
* length), will hash differently. That is,
* [[1,1], []] differs from [[1],[1]].
*
* If you have a constant length (sizeof), use nm_hash_update()
* instead. */
nm_hash_update (state, &n, sizeof (n));
if (n > 0)
siphash24_compress (ptr, n, &state->_state);
}
nm_assert (state);
static inline void
nm_hash_update_str0 (NMHashState *state, const char *str)
{
if (str)
nm_hash_update_mem (state, str, strlen (str));
else {
gsize n = G_MAXSIZE;
/* use the same hash seed as nm_hash_update_str().
* That way, nm_hash_update_str(&h, s) is identical to
* nm_hash_update_mem(&h, s, strlen(s)). */
h = state->hash;
for (i = 0; i < n; i++)
h = (h << 5) + h + ((guint) ((const guint8 *) ptr)[i]);
h = (h << 5) + h + 1774132687u;
state->hash = h;
nm_hash_update (state, &n, sizeof (n));
}
}
static inline void
nm_hash_update_str (NMHashState *state, const char *str)
{
const guint8 *p = (const guint8 *) str;
guint8 c;
guint h;
nm_assert (state);
/* Note that NULL hashes differently from "". */
h = state->hash;
if (str) {
while ((c = *p++))
h = (h << 5) + h + ((guint) c);
h = (h << 5) + h + 1774132687u;
} else
h = (h << 5) + h + 2967906233u;
state->hash = h;
nm_assert (str);
nm_hash_update (state, str, strlen (str) + 1);
}
static inline guint
nm_hash_ptr (gconstpointer ptr)
{
if (sizeof (ptr) <= sizeof (guint))
return (guint) ((uintptr_t) ptr);
else
return ((guint) (((uintptr_t) ptr) >> 32)) ^ ((guint) ((uintptr_t) ptr));
}
#if _NM_CC_SUPPORT_GENERIC
/* Like nm_hash_update_str(), but restricted to arrays only. nm_hash_update_str() only works
* with a @str argument that cannot be NULL. If you have a string pointer, that is never NULL, use
* nm_hash_update() instead. */
#define nm_hash_update_strarr(state, str) \
(_Generic (&(str), \
const char (*) [sizeof (str)]: nm_hash_update_str ((state), (str)), \
char (*) [sizeof (str)]: nm_hash_update_str ((state), (str))) \
)
#else
#define nm_hash_update_strarr(state, str) nm_hash_update_str ((state), (str))
#endif
guint nm_hash_ptr (gconstpointer ptr);
guint nm_direct_hash (gconstpointer str);
guint nm_hash_str (const char *str);

View file

@ -17,11 +17,12 @@
coding style)
*/
#include "nm-sd-adapt.h"
#include "nm-default.h"
#define assert(cond) nm_assert (cond)
#include <stdio.h>
#include "macro.h"
#include "siphash24.h"
#include "unaligned.h"

View file

@ -277,8 +277,8 @@ lldp_neighbor_id_hash (gconstpointer ptr)
NMHashState h;
nm_hash_init (&h, 23423423u);
nm_hash_update_str (&h, neigh->chassis_id);
nm_hash_update_str (&h, neigh->port_id);
nm_hash_update_str0 (&h, neigh->chassis_id);
nm_hash_update_str0 (&h, neigh->port_id);
nm_hash_update_uint (&h, neigh->chassis_id_type);
nm_hash_update_uint (&h, neigh->port_id_type);
return nm_hash_complete (&h);

View file

@ -5131,7 +5131,7 @@ nm_platform_link_hash (const NMPlatformLink *obj)
nm_hash_init (&h, 99413953u);
nm_hash_update_uint (&h, obj->ifindex);
nm_hash_update_uint (&h, obj->type);
nm_hash_update_str (&h, obj->name);
nm_hash_update_strarr (&h, obj->name);
nm_hash_update_uint (&h, obj->master);
nm_hash_update_uint (&h, obj->parent);
nm_hash_update_uint (&h, obj->n_ifi_flags);
@ -5141,8 +5141,8 @@ nm_platform_link_hash (const NMPlatformLink *obj)
nm_hash_update_uint (&h, obj->arptype);
nm_hash_update_uint (&h, obj->addr.len);
nm_hash_update_uint (&h, obj->inet6_addr_gen_mode_inv);
nm_hash_update_str (&h, obj->kind);
nm_hash_update_str (&h, obj->driver);
nm_hash_update_str0 (&h, obj->kind);
nm_hash_update_str0 (&h, obj->driver);
nm_hash_update_mem (&h, obj->addr.data, obj->addr.len);
nm_hash_update_mem (&h, &obj->inet6_token, sizeof (obj->inet6_token));
nm_hash_update_uint (&h, obj->rx_packets);
@ -5223,8 +5223,7 @@ nm_platform_lnk_infiniband_hash (const NMPlatformLnkInfiniband *obj)
nm_hash_init (&h, 1748638583u);
nm_hash_update_uint (&h, obj->p_key);
if (obj->mode)
nm_hash_update_str (&h, obj->mode);
nm_hash_update_str0 (&h, obj->mode);
return nm_hash_complete (&h);
}
@ -5481,7 +5480,7 @@ nm_platform_ip4_address_hash (const NMPlatformIP4Address *obj)
nm_hash_update_uint (&h, obj->lifetime);
nm_hash_update_uint (&h, obj->preferred);
nm_hash_update_uint (&h, obj->n_ifa_flags);
nm_hash_update_str (&h, obj->label);
nm_hash_update_strarr (&h, obj->label);
}
return nm_hash_complete (&h);
}

View file

@ -186,7 +186,7 @@ _idx_obj_part (const DedupMultiIdxType *idx_type,
/* we request a hash from obj_a. Hash the relevant parts. */
nm_hash_init (&h, 2126752699u);
nm_hash_update_uint (&h, idx_type->cache_id_type);
nm_hash_update_str (&h, obj_a->link.name);
nm_hash_update_strarr (&h, obj_a->link.name);
return _HASH_NON_ZERO (&h);
}
/* just return 1, to indicate that obj_a is partitionable by this idx_type. */

View file

@ -1,129 +0,0 @@
#pragma once
/***
This file is part of systemd.
Copyright 2014 Tom Gundersen
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <endian.h>
#include <stdint.h>
/* BE */
static inline uint16_t unaligned_read_be16(const void *_u) {
const uint8_t *u = _u;
return (((uint16_t) u[0]) << 8) |
((uint16_t) u[1]);
}
static inline uint32_t unaligned_read_be32(const void *_u) {
const uint8_t *u = _u;
return (((uint32_t) unaligned_read_be16(u)) << 16) |
((uint32_t) unaligned_read_be16(u + 2));
}
static inline uint64_t unaligned_read_be64(const void *_u) {
const uint8_t *u = _u;
return (((uint64_t) unaligned_read_be32(u)) << 32) |
((uint64_t) unaligned_read_be32(u + 4));
}
static inline void unaligned_write_be16(void *_u, uint16_t a) {
uint8_t *u = _u;
u[0] = (uint8_t) (a >> 8);
u[1] = (uint8_t) a;
}
static inline void unaligned_write_be32(void *_u, uint32_t a) {
uint8_t *u = _u;
unaligned_write_be16(u, (uint16_t) (a >> 16));
unaligned_write_be16(u + 2, (uint16_t) a);
}
static inline void unaligned_write_be64(void *_u, uint64_t a) {
uint8_t *u = _u;
unaligned_write_be32(u, (uint32_t) (a >> 32));
unaligned_write_be32(u + 4, (uint32_t) a);
}
/* LE */
static inline uint16_t unaligned_read_le16(const void *_u) {
const uint8_t *u = _u;
return (((uint16_t) u[1]) << 8) |
((uint16_t) u[0]);
}
static inline uint32_t unaligned_read_le32(const void *_u) {
const uint8_t *u = _u;
return (((uint32_t) unaligned_read_le16(u + 2)) << 16) |
((uint32_t) unaligned_read_le16(u));
}
static inline uint64_t unaligned_read_le64(const void *_u) {
const uint8_t *u = _u;
return (((uint64_t) unaligned_read_le32(u + 4)) << 32) |
((uint64_t) unaligned_read_le32(u));
}
static inline void unaligned_write_le16(void *_u, uint16_t a) {
uint8_t *u = _u;
u[0] = (uint8_t) a;
u[1] = (uint8_t) (a >> 8);
}
static inline void unaligned_write_le32(void *_u, uint32_t a) {
uint8_t *u = _u;
unaligned_write_le16(u, (uint16_t) a);
unaligned_write_le16(u + 2, (uint16_t) (a >> 16));
}
static inline void unaligned_write_le64(void *_u, uint64_t a) {
uint8_t *u = _u;
unaligned_write_le32(u, (uint32_t) a);
unaligned_write_le32(u + 4, (uint32_t) (a >> 32));
}
#if __BYTE_ORDER == __BIG_ENDIAN
#define unaligned_read_ne16 unaligned_read_be16
#define unaligned_read_ne32 unaligned_read_be32
#define unaligned_read_ne64 unaligned_read_be64
#define unaligned_write_ne16 unaligned_write_be16
#define unaligned_write_ne32 unaligned_write_be32
#define unaligned_write_ne64 unaligned_write_be64
#else
#define unaligned_read_ne16 unaligned_read_le16
#define unaligned_read_ne32 unaligned_read_le32
#define unaligned_read_ne64 unaligned_read_le64
#define unaligned_write_ne16 unaligned_write_le16
#define unaligned_write_ne32 unaligned_write_le32
#define unaligned_write_ne64 unaligned_write_le64
#endif