journal: Store offsets to tail entry array objects in chain

Previously, we'd iterate an entry array from start to end every time
we added an entry offset to it. To speed up this operation, we cache
the last entry array in the chain and how many items it contains.
This allows the addition of an entry to the chain to be done in
constant time instead of linear time as we don't have to iterate
the entire chain anymore every time we add an entry.
This commit is contained in:
Daan De Meyer 2022-01-21 18:29:41 +00:00
parent 0e35afff1d
commit e81710d3d0
6 changed files with 117 additions and 47 deletions

View file

@ -177,6 +177,9 @@ _packed_ struct Header {
/* Added in 246 */
le64_t data_hash_chain_depth;
le64_t field_hash_chain_depth;
/* Added in 252 */
le32_t tail_entry_array_offset; \
le32_t tail_entry_array_n_entries; \
};
```
@ -231,6 +234,8 @@ became too frequent.
Similar, **field_hash_chain_depth** is a counter of the deepest chain in the
field hash table, minus one.
**tail_entry_array_offset** and **tail_entry_array_n_entries** allow immediate
access to the last entry array in the global entry array chain.
## Extensibility
@ -397,7 +402,16 @@ _packed_ struct DataObject {
le64_t entry_offset; /* the first array entry we store inline */
le64_t entry_array_offset;
le64_t n_entries;
uint8_t payload[];
union { \
struct { \
uint8_t payload[] ; \
} regular; \
struct { \
le32_t tail_entry_array_offset; \
le32_t tail_entry_array_n_entries; \
uint8_t payload[]; \
} compact; \
}; \
};
```
@ -430,6 +444,9 @@ OBJECT_COMPRESSED_XZ/OBJECT_COMPRESSED_LZ4/OBJECT_COMPRESSED_ZSTD is set in the
`ObjectHeader`, in which case the payload is compressed with the indicated
compression algorithm.
If the `HEADER_INCOMPATIBLE_COMPACT` flag is set, Two extra fields are stored to
allow immediate access to the tail entry array in the DATA object's entry array
chain.
## Field Objects

View file

@ -248,7 +248,7 @@ int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uin
case OBJECT_DATA:
/* All but hash and payload are mutable */
gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
gcry_md_write(f->hmac, journal_file_data_payload_field(f, o), le64toh(o->object.size) - journal_file_data_payload_offset(f));
break;
case OBJECT_FIELD:

View file

@ -65,8 +65,17 @@ struct ObjectHeader {
le64_t entry_offset; /* the first array entry we store inline */ \
le64_t entry_array_offset; \
le64_t n_entries; \
uint8_t payload[]; \
}
union { \
struct { \
uint8_t payload[0]; \
} regular; \
struct { \
le32_t tail_entry_array_offset; \
le32_t tail_entry_array_n_entries; \
uint8_t payload[0]; \
} compact; \
}; \
}
struct DataObject DataObject__contents;
struct DataObject__packed DataObject__contents _packed_;
@ -222,12 +231,15 @@ enum {
/* Added in 246 */ \
le64_t data_hash_chain_depth; \
le64_t field_hash_chain_depth; \
/* Added in 252 */ \
le32_t tail_entry_array_offset; \
le32_t tail_entry_array_n_entries; \
}
struct Header struct_Header__contents;
struct Header__packed struct_Header__contents _packed_;
assert_cc(sizeof(struct Header) == sizeof(struct Header__packed));
assert_cc(sizeof(struct Header) == 256);
assert_cc(sizeof(struct Header) == 264);
#define FSS_HEADER_SIGNATURE \
((const char[]) { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' })

View file

@ -662,7 +662,7 @@ static int journal_file_move_to(
return mmap_cache_fd_get(f->cache_fd, type_to_context(type), keep_always, offset, size, &f->last_stat, ret);
}
static uint64_t minimum_header_size(Object *o) {
static uint64_t minimum_header_size(JournalFile *f, Object *o) {
static const uint64_t table[] = {
[OBJECT_DATA] = sizeof(DataObject),
@ -674,15 +674,22 @@ static uint64_t minimum_header_size(Object *o) {
[OBJECT_TAG] = sizeof(TagObject),
};
assert(f);
assert(o);
if (o->object.type == OBJECT_DATA)
return journal_file_data_payload_offset(f);
if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
return sizeof(ObjectHeader);
return table[o->object.type];
}
static int check_object_header(Object *o, ObjectType type, uint64_t offset) {
static int check_object_header(JournalFile *f, Object *o, ObjectType type, uint64_t offset) {
uint64_t s;
assert(f);
assert(o);
s = le64toh(READ_NOW(o->object.size));
@ -706,7 +713,7 @@ static int check_object_header(Object *o, ObjectType type, uint64_t offset) {
"Attempt to move to object of unexpected type: %" PRIu64,
offset);
if (s < minimum_header_size(o))
if (s < minimum_header_size(f, o))
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Attempt to move to truncated object: %" PRIu64,
offset);
@ -728,10 +735,10 @@ static int check_object(JournalFile *f, Object *o, uint64_t offset) {
le64toh(o->data.n_entries),
offset);
if (le64toh(o->object.size) <= offsetof(Object, data.payload))
if (le64toh(o->object.size) <= journal_file_data_payload_offset(f))
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Bad object size (<= %zu): %" PRIu64 ": %" PRIu64,
offsetof(Object, data.payload),
journal_file_data_payload_offset(f),
le64toh(o->object.size),
offset);
@ -883,7 +890,7 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset
if (r < 0)
return r;
r = check_object_header(o, type, offset);
r = check_object_header(f, o, type, offset);
if (r < 0)
return r;
@ -891,7 +898,7 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset
if (r < 0)
return r;
r = check_object_header(o, type, offset);
r = check_object_header(f, o, type, offset);
if (r < 0)
return r;
@ -935,11 +942,11 @@ int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t of
"Failed to read short object at offset: %" PRIu64,
offset);
r = check_object_header(&o, type, offset);
r = check_object_header(f, &o, type, offset);
if (r < 0)
return r;
if ((size_t) n < minimum_header_size(&o))
if ((size_t) n < minimum_header_size(f, &o))
return log_debug_errno(SYNTHETIC_ERRNO(EIO),
"Short read while reading object: %" PRIu64,
offset);
@ -1541,15 +1548,35 @@ static int journal_file_append_field(
return 0;
}
static Compression maybe_compress_payload(JournalFile *f, uint8_t *dst, const uint8_t *src, uint64_t size, size_t *rsize) {
Compression compression = COMPRESSION_NONE;
#if HAVE_COMPRESSION
if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) {
compression = compress_blob(src, size, dst, size - 1, rsize);
if (compression > 0) {
log_debug("Compressed data object %"PRIu64" -> %zu using %s",
size, *rsize, compression_to_string(compression));
} else
/* Compression didn't work, we don't really care why, let's continue without compression */
compression = COMPRESSION_NONE;
}
#endif
return compression;
}
static int journal_file_append_data(
JournalFile *f,
const void *data, uint64_t size,
Object **ret, uint64_t *ret_offset) {
uint64_t hash, p, fp, osize;
uint64_t hash, p, osize;
Object *o, *fo;
int r, compression = 0;
size_t rsize = 0;
Compression c;
const void *eq;
int r;
assert(f);
@ -1568,32 +1595,20 @@ static int journal_file_append_data(
if (!eq)
return -EINVAL;
osize = offsetof(Object, data.payload) + size;
osize = journal_file_data_payload_offset(f) + size;
r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
if (r < 0)
return r;
o->data.hash = htole64(hash);
#if HAVE_COMPRESSION
if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) {
size_t rsize = 0;
c = maybe_compress_payload(f, journal_file_data_payload_field(f, o), data, size, &rsize);
compression = compress_blob(data, size, o->data.payload, size - 1, &rsize);
if (compression > COMPRESSION_NONE) {
o->object.size = htole64(offsetof(Object, data.payload) + rsize);
o->object.flags |= COMPRESSION_TO_OBJECT_FLAG(compression);
log_debug("Compressed data object %"PRIu64" -> %zu using %s",
size, rsize, compression_to_string(compression));
} else
/* Compression didn't work, we don't really care why, let's continue without compression */
compression = COMPRESSION_NONE;
}
#endif
if (compression == 0)
memcpy_safe(o->data.payload, data, size);
if (c != COMPRESSION_NONE) {
o->object.size = htole64(journal_file_data_payload_offset(f) + rsize);
o->object.flags |= COMPRESSION_TO_OBJECT_FLAG(c);
} else
memcpy_safe(journal_file_data_payload_field(f, o), data, size);
r = journal_file_link_data(f, o, p, hash);
if (r < 0)
@ -1611,7 +1626,7 @@ static int journal_file_append_data(
#endif
/* Create field object ... */
r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, NULL);
if (r < 0)
return r;
@ -1715,17 +1730,17 @@ int journal_file_data_payload(
}
size = le64toh(READ_NOW(o->object.size));
if (size < offsetof(Object, data.payload))
if (size < journal_file_data_payload_offset(f))
return -EBADMSG;
size -= offsetof(Object, data.payload);
size -= journal_file_data_payload_offset(f);
c = COMPRESSION_FROM_OBJECT(o);
if (c < 0)
return -EPROTONOSUPPORT;
return maybe_decompress_payload(f, o->data.payload, size, c, field, field_length, data_threshold,
ret_data, ret_size);
return maybe_decompress_payload(f, journal_file_data_payload_field(f, o), size, c, field,
field_length, data_threshold, ret_data, ret_size);
}
uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) {
@ -1788,6 +1803,8 @@ static void write_entry_array_item(JournalFile *f, Object *o, uint64_t i, uint64
static int link_entry_into_array(JournalFile *f,
le64_t *first,
le64_t *idx,
le32_t *tail,
le32_t *tidx,
uint64_t p) {
int r;
uint64_t n = 0, ap = 0, q, i, a, hidx;
@ -1799,8 +1816,9 @@ static int link_entry_into_array(JournalFile *f,
assert(idx);
assert(p > 0);
a = le64toh(*first);
i = hidx = le64toh(READ_NOW(*idx));
a = tail ? le32toh(*tail) : le64toh(*first);
hidx = le64toh(READ_NOW(*idx));
i = tidx ? le32toh(READ_NOW(*tidx)) : hidx;
while (a > 0) {
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
@ -1811,6 +1829,8 @@ static int link_entry_into_array(JournalFile *f,
if (i < n) {
write_entry_array_item(f, o, i, p);
*idx = htole64(hidx + 1);
if (tidx)
*tidx = htole32(le32toh(*tidx) + 1);
return 0;
}
@ -1851,10 +1871,15 @@ static int link_entry_into_array(JournalFile *f,
o->entry_array.next_entry_array_offset = htole64(q);
}
if (tail)
*tail = htole32(q);
if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
*idx = htole64(hidx + 1);
if (tidx)
*tidx = htole32(1);
return 0;
}
@ -1863,6 +1888,8 @@ static int link_entry_into_array_plus_one(JournalFile *f,
le64_t *extra,
le64_t *first,
le64_t *idx,
le32_t *tail,
le32_t *tidx,
uint64_t p) {
uint64_t hidx;
@ -1883,7 +1910,7 @@ static int link_entry_into_array_plus_one(JournalFile *f,
le64_t i;
i = htole64(hidx - 1);
r = link_entry_into_array(f, first, &i, p);
r = link_entry_into_array(f, first, &i, tail, tidx, p);
if (r < 0)
return r;
}
@ -1907,6 +1934,8 @@ static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offs
&o->data.entry_offset,
&o->data.entry_array_offset,
&o->data.n_entries,
JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_offset : NULL,
JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_n_entries : NULL,
offset);
}
@ -1933,6 +1962,8 @@ static int journal_file_link_entry(
r = link_entry_into_array(f,
&f->header->entry_array_offset,
&f->header->n_entries,
JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_offset) ? &f->header->tail_entry_array_offset : NULL,
JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_n_entries) ? &f->header->tail_entry_array_n_entries : NULL,
offset);
if (r < 0)
return r;

View file

@ -223,6 +223,16 @@ int journal_file_data_payload(
void **ret_data,
size_t *ret_size);
static inline size_t journal_file_data_payload_offset(JournalFile *f) {
return JOURNAL_HEADER_COMPACT(f->header)
? offsetof(Object, data.compact.payload)
: offsetof(Object, data.regular.payload);
}
static inline uint8_t* journal_file_data_payload_field(JournalFile *f, Object *o) {
return JOURNAL_HEADER_COMPACT(f->header) ? o->data.compact.payload : o->data.regular.payload;
}
uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) _pure_;
static inline uint64_t journal_file_entry_array_item(JournalFile *f, Object *o, size_t i) {

View file

@ -170,16 +170,16 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
return -EBADMSG;
}
if (le64toh(o->object.size) - offsetof(Object, data.payload) <= 0) {
if (le64toh(o->object.size) - journal_file_data_payload_offset(f) <= 0) {
error(offset, "Bad object size (<= %zu): %"PRIu64,
offsetof(Object, data.payload),
journal_file_data_payload_offset(f),
le64toh(o->object.size));
return -EBADMSG;
}
h1 = le64toh(o->data.hash);
r = hash_payload(f, o, offset, o->data.payload,
le64toh(o->object.size) - offsetof(Object, data.payload),
r = hash_payload(f, o, offset, journal_file_data_payload_field(f, o),
le64toh(o->object.size) - journal_file_data_payload_offset(f),
&h2);
if (r < 0)
return r;