shared: add NMRefString

I'd like to refactor libnm's caching. Note that cached D-Bus objects
have repeated strings all over the place. For example every object will
have a set of D-Bus interfaces (strings) and properties (strings) and an
object path (which is referenced by other objects). We can save a lot of
redundant strings by deduplicating/interning them. Also, by interning
them, we can compare them using pointer equality.

Add a NMRefString implementation for this.

Maybe an alternative name would be NMInternedString or NMDedupString, because
this string gets always interned. There is no way to create a NMRefString
that is not interned. Still, NMRefString name sounds better. It is ref-counted
after all.

Notes:

 - glib has GQuark and g_intern_string(). However, such strings cannot
   be unrefered and are leaked indefinitely. It is thus unsuited for
   anything but a fixed set of well-known strings.

 - glib 2.58 adds GRefString, but we cannot use that because we
   currently still use glib 2.40.
   There are some differences:

     - GRefString is just a typedef to char. That means, the glib API
       exposes GRefString like regular character strings.
       NMRefString intentionally does that not. This makes it slightly
       less convenient to pass it to API that expects "const char *".
       But it makes it clear to the reader, that an instance is in fact
       a NMRefString, which means it indicates that the string is
       interned and can be referenced without additional copy.

     - GRefString can be optionally interned. That means you can
       only use pointer equality for comparing values if you know
       that the GRefString was created with g_ref_string_new_intern().
       So, GRefString looks like a "const char *" pointer and even if
       you know it's a GRefString, you might not know whether it is
       interned. NMRefString is always interned, and you can always
       compare it using pointer equality.

  - In the past I already proposed a different implementation for a
    ref-string. That made different choices. For example NMRefString
    then was a typedef to "const char *", it did not support interning
    but deduplication (without a global cache), ref/unref was not
    thread safe (but then there was no global cache so that two threads
    could still use the API independently).

The point is, there are various choices to make. GRefString, the
previous NMRefString implementation and the one here, all have pros and
cons. I think for the purpose where I intend NMRefString (dedup and
efficient comparison), it is a preferable implementation.

Ah, and of course NMRefString is an immutable string, which is a nice
property.
This commit is contained in:
Thomas Haller 2019-09-02 07:54:28 +02:00
parent dd33b3a14e
commit 908fadec96
4 changed files with 242 additions and 0 deletions

View file

@ -373,6 +373,8 @@ shared_nm_glib_aux_libnm_glib_aux_la_SOURCES = \
shared/nm-glib-aux/nm-obj.h \
shared/nm-glib-aux/nm-random-utils.c \
shared/nm-glib-aux/nm-random-utils.h \
shared/nm-glib-aux/nm-ref-string.c \
shared/nm-glib-aux/nm-ref-string.h \
shared/nm-glib-aux/nm-secret-utils.c \
shared/nm-glib-aux/nm-secret-utils.h \
shared/nm-glib-aux/nm-shared-utils.c \

View file

@ -185,6 +185,7 @@ shared_nm_glib_aux = static_library(
'nm-glib-aux/nm-json-aux.c',
'nm-glib-aux/nm-keyfile-aux.c',
'nm-glib-aux/nm-random-utils.c',
'nm-glib-aux/nm-ref-string.c',
'nm-glib-aux/nm-secret-utils.c',
'nm-glib-aux/nm-shared-utils.c',
'nm-glib-aux/nm-time-utils.c'),

View file

@ -0,0 +1,187 @@
// SPDX-License-Identifier: LGPL-2.1+
#include "nm-default.h"
#include "nm-ref-string.h"
/*****************************************************************************/
typedef struct {
NMRefString r;
volatile int ref_count;
char str_data[];
} RefString;
G_LOCK_DEFINE_STATIC (gl_lock);
static GHashTable *gl_hash;
/* the first field of NMRefString is a pointer to the NUL terminated string.
* This also allows to compare strings with nm_pstr_equal(), although, pointer
* equality might be better. */
G_STATIC_ASSERT (G_STRUCT_OFFSET (NMRefString, str) == 0);
G_STATIC_ASSERT (G_STRUCT_OFFSET (RefString, r) == 0);
G_STATIC_ASSERT (G_STRUCT_OFFSET (RefString, r.str) == 0);
/*****************************************************************************/
static guint
_ref_string_hash (gconstpointer ptr)
{
const RefString *a = ptr;
NMHashState h;
nm_hash_init (&h, 1463435489u);
nm_hash_update (&h, a->r.str, a->r.len);
return nm_hash_complete (&h);
}
static gboolean
_ref_string_equal (gconstpointer pa, gconstpointer pb)
{
const RefString *a = pa;
const RefString *b = pb;
return a->r.len == b->r.len
&& memcmp (a->r.str, b->r.str, a->r.len) == 0;
}
/*****************************************************************************/
static void
_ASSERT (const RefString *rstr0)
{
int r;
nm_assert (rstr0);
G_LOCK (gl_lock);
r = g_atomic_int_get (&rstr0->ref_count);
nm_assert (r > 0);
nm_assert (r < G_MAXINT);
nm_assert (rstr0 == g_hash_table_lookup (gl_hash, rstr0));
G_UNLOCK (gl_lock);
}
/**
* nm_ref_string_new_len:
* @cstr: the string to intern. Must contain @len bytes.
* If @len is zero, @cstr may be %NULL. Note that it is
* accetable that the string contains a NUL character
* within the first @len bytes. That is, the string is
* not treated as a NUL terminated string, but as binary.
* Also, contrary to strncpy(), this will read all the
* first @len bytes. It won't stop at the first NUL.
* @len: the length of the string (usually there is no NUL character
* within the first @len bytes, but that would be acceptable as well
* to add binary data).
*
* Note that the resulting NMRefString instance will always be NUL terminated
* (at position @len).
*
* Note that NMRefString are always interned/deduplicated. If such a string
* already exists, the existing instance will be refered and returned.
*
*
* Since all NMRefString are shared and interned, you may use
* pointer equality to compare them. Note that if a NMRefString contains
* a NUL character (meaning, if
*
* strlen (nm_ref_string_get_str (str)) != nm_ref_string_get_len (str)
*
* ), then pointer in-equality does not mean that the NUL terminated strings
* are also unequal. In other words, for strings that contain NUL characters,
*
* if (str1 != str2)
* assert (!nm_streq0 (nm_ref_string_get_str (str1), nm_ref_string_get_str (str2)));
*
* might not hold!
*
*
* NMRefString is thread-safe.
*
* Returns: (transfer full): the interned string. This is
* never %NULL, but note that %NULL is also a valid NMRefString.
* The result must be unrefed with nm_ref_string_unref().
*/
NMRefString *
nm_ref_string_new_len (const char *cstr, gsize len)
{
RefString *rstr0;
G_LOCK (gl_lock);
if (G_UNLIKELY (!gl_hash)) {
gl_hash = g_hash_table_new_full (_ref_string_hash, _ref_string_equal, g_free, NULL);
rstr0 = NULL;
} else {
NMRefString rr_lookup = {
.len = len,
.str = cstr,
};
rstr0 = g_hash_table_lookup (gl_hash, &rr_lookup);
}
if (rstr0) {
nm_assert (({
int r = g_atomic_int_get (&rstr0->ref_count);
(r >= 0 && r < G_MAXINT);
}));
g_atomic_int_inc (&rstr0->ref_count);
} else {
rstr0 = g_malloc (sizeof (RefString) + 1 + len);
rstr0->ref_count = 1;
*((gsize *) rstr0->r.len) = len;
*((const char **) rstr0->r.str) = rstr0->str_data;
if (len > 0)
memcpy (rstr0->str_data, cstr, len);
rstr0->str_data[len] = '\0';
if (!g_hash_table_add (gl_hash, rstr0))
nm_assert_not_reached ();
}
G_UNLOCK (gl_lock);
return &rstr0->r;
}
NMRefString *
nm_ref_string_ref (NMRefString *rstr)
{
RefString *const rstr0 = (RefString *) rstr;
if (!rstr)
return NULL;
_ASSERT (rstr0);
g_atomic_int_inc (&rstr0->ref_count);
return &rstr0->r;
}
void
_nm_ref_string_unref_non_null (NMRefString *rstr)
{
RefString *const rstr0 = (RefString *) rstr;
_ASSERT (rstr0);
if (G_LIKELY (!g_atomic_int_dec_and_test (&rstr0->ref_count)))
return;
G_LOCK (gl_lock);
/* in the fast-path above, we already decremented the ref-count to zero.
* We need recheck that the ref-count is still zero. */
if (g_atomic_int_get (&rstr0->ref_count) == 0)
g_hash_table_remove (gl_hash, rstr0);
G_UNLOCK (gl_lock);
}
/*****************************************************************************/

View file

@ -0,0 +1,52 @@
// SPDX-License-Identifier: LGPL-2.1+
#ifndef __NM_REF_STRING_H__
#define __NM_REF_STRING_H__
/*****************************************************************************/
typedef struct {
const char *const str;
const gsize len;
} NMRefString;
/*****************************************************************************/
NMRefString *nm_ref_string_new_len (const char *cstr, gsize len);
static inline NMRefString *
nm_ref_string_new (const char *cstr)
{
return cstr
? nm_ref_string_new_len (cstr, strlen (cstr))
: NULL;
}
NMRefString *nm_ref_string_ref (NMRefString *rstr);
void _nm_ref_string_unref_non_null (NMRefString *rstr);
static inline void
nm_ref_string_unref (NMRefString *rstr)
{
if (rstr)
_nm_ref_string_unref_non_null (rstr);
}
NM_AUTO_DEFINE_FCN_VOID0 (NMRefString *, _nm_auto_ref_string, _nm_ref_string_unref_non_null)
#define nm_auto_ref_string nm_auto(_nm_auto_ref_string)
/*****************************************************************************/
static inline const char *
nm_ref_string_get_str (NMRefString *rstr)
{
return rstr ? rstr->str : NULL;
}
static inline gsize
nm_ref_string_get_len (NMRefString *rstr)
{
return rstr ? rstr->len : 0u;
}
#endif /* __NM_REF_STRING_H__ */