journal: Use 32-bit entry item object offsets in compact mode

To do this, we move EntryItem out of journal-def.h and turn it into
a host only struct in native endian mode so we can still use it to
ship the necessary info around.

Aside from that, the changes are pretty simple, we introduce some
extra functions to access the right field depending on the mode and
convert all the other code to use those functions instead of
accessing the raw fields.

We also drop the unused entry item hash field in compact mode. We
already stopped doing anything with this field a while ago, now we
actually drop it from the format in compact mode.
This commit is contained in:
Daan De Meyer 2021-11-02 20:50:39 +00:00
parent 99daf3ce03
commit a9089a6604
6 changed files with 107 additions and 62 deletions

View file

@ -461,11 +461,6 @@ field name. It is the head of a singly linked list using DATA's
## Entry Objects
```
_packed_ struct EntryItem {
le64_t object_offset;
le64_t hash;
};
_packed_ struct EntryObject {
ObjectHeader object;
le64_t seqnum;
@ -473,7 +468,15 @@ _packed_ struct EntryObject {
le64_t monotonic;
sd_id128_t boot_id;
le64_t xor_hash;
EntryItem items[];
union { \
struct { \
le64_t object_offset; \
le64_t hash; \
} regular[]; \
struct { \
le32_t object_offset; \
} compact[]; \
} items; \
};
```
@ -499,6 +502,10 @@ The **items[]** array contains references to all DATA objects of this entry,
plus their respective hashes (which are calculated the same way as in the DATA
objects, i.e. keyed by the file ID).
If the `HEADER_INCOMPATIBLE_COMPACT` flag is set, DATA object offsets are stored
as 32-bit integers instead of 64bit and the unused hash field per data object is
not stored anymore.
In the file ENTRY objects are written ordered monotonically by sequence
number. For continuous parts of the file written during the same boot
(i.e. with the same boot_id) the monotonic timestamp is monotonic too. Modulo

View file

@ -24,7 +24,6 @@ typedef struct HashTableObject HashTableObject;
typedef struct EntryArrayObject EntryArrayObject;
typedef struct TagObject TagObject;
typedef struct EntryItem EntryItem;
typedef struct HashItem HashItem;
typedef struct FSSHeader FSSHeader;
@ -85,20 +84,23 @@ struct FieldObject FieldObject__contents;
struct FieldObject__packed FieldObject__contents _packed_;
assert_cc(sizeof(struct FieldObject) == sizeof(struct FieldObject__packed));
struct EntryItem {
le64_t object_offset;
le64_t hash;
} _packed_;
#define EntryObject__contents { \
ObjectHeader object; \
le64_t seqnum; \
le64_t realtime; \
le64_t monotonic; \
sd_id128_t boot_id; \
le64_t xor_hash; \
EntryItem items[]; \
}
#define EntryObject__contents { \
ObjectHeader object; \
le64_t seqnum; \
le64_t realtime; \
le64_t monotonic; \
sd_id128_t boot_id; \
le64_t xor_hash; \
union { \
struct { \
le64_t object_offset; \
le64_t hash; \
} regular[0]; \
struct { \
le32_t object_offset; \
} compact[0]; \
} items; \
}
struct EntryObject EntryObject__contents;
struct EntryObject__packed EntryObject__contents _packed_;

View file

@ -771,17 +771,17 @@ static int check_object(JournalFile *f, Object *o, uint64_t offset) {
sz = le64toh(READ_NOW(o->object.size));
if (sz < offsetof(Object, entry.items) ||
(sz - offsetof(Object, entry.items)) % sizeof(EntryItem) != 0)
(sz - offsetof(Object, entry.items)) % journal_file_entry_item_size(f) != 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Bad entry size (<= %zu): %" PRIu64 ": %" PRIu64,
offsetof(Object, entry.items),
sz,
offset);
if ((sz - offsetof(Object, entry.items)) / sizeof(EntryItem) <= 0)
if ((sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f) <= 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Invalid number items in entry: %" PRIu64 ": %" PRIu64,
(sz - offsetof(Object, entry.items)) / sizeof(EntryItem),
(sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f),
offset);
if (le64toh(o->entry.seqnum) <= 0)
@ -1658,8 +1658,10 @@ static int journal_file_append_data(
return 0;
}
uint64_t journal_file_entry_n_items(Object *o) {
uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) {
uint64_t sz;
assert(f);
assert(o);
if (o->object.type != OBJECT_ENTRY)
@ -1669,7 +1671,7 @@ uint64_t journal_file_entry_n_items(Object *o) {
if (sz < offsetof(Object, entry.items))
return 0;
return (sz - offsetof(Object, entry.items)) / sizeof(EntryItem);
return (sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f);
}
uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) {
@ -1820,15 +1822,13 @@ static int link_entry_into_array_plus_one(JournalFile *f,
return 0;
}
static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
uint64_t p;
static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t p) {
int r;
assert(f);
assert(o);
assert(offset > 0);
p = le64toh(o->entry.items[i].object_offset);
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
@ -1840,8 +1840,13 @@ static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offs
offset);
}
static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
uint64_t n;
static int journal_file_link_entry(
JournalFile *f,
Object *o,
uint64_t offset,
const EntryItem items[],
size_t n_items) {
int r;
assert(f);
@ -1871,15 +1876,14 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
f->header->tail_entry_monotonic = o->entry.monotonic;
/* Link up the items */
n = journal_file_entry_n_items(o);
for (uint64_t i = 0; i < n; i++) {
for (uint64_t i = 0; i < n_items; i++) {
int k;
/* If we fail to link an entry item because we can't allocate a new entry array, don't fail
* immediately but try to link the other entry items since it might still be possible to link
* those if they don't require a new entry array to be allocated. */
k = journal_file_link_entry_item(f, o, offset, i);
k = journal_file_link_entry_item(f, o, offset, items[i].object_offset);
if (k == -E2BIG)
r = k;
else if (k < 0)
@ -1889,12 +1893,26 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
return r;
}
static void write_entry_item(JournalFile *f, Object *o, uint64_t i, const EntryItem *item) {
assert(f);
assert(o);
assert(item);
if (JOURNAL_HEADER_COMPACT(f->header)) {
assert(item->object_offset <= UINT32_MAX);
o->entry.items.compact[i].object_offset = htole32(item->object_offset);
} else {
o->entry.items.regular[i].object_offset = htole64(item->object_offset);
o->entry.items.regular[i].hash = htole64(item->hash);
}
}
static int journal_file_append_entry_internal(
JournalFile *f,
const dual_timestamp *ts,
const sd_id128_t *boot_id,
uint64_t xor_hash,
const EntryItem items[], unsigned n_items,
const EntryItem items[], size_t n_items,
uint64_t *seqnum,
Object **ret, uint64_t *ret_offset) {
uint64_t np;
@ -1907,14 +1925,13 @@ static int journal_file_append_entry_internal(
assert(items || n_items == 0);
assert(ts);
osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
osize = offsetof(Object, entry.items) + (n_items * journal_file_entry_item_size(f));
r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
if (r < 0)
return r;
o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
memcpy_safe(o->entry.items, items, n_items * sizeof(EntryItem));
o->entry.realtime = htole64(ts->realtime);
o->entry.monotonic = htole64(ts->monotonic);
o->entry.xor_hash = htole64(xor_hash);
@ -1922,13 +1939,16 @@ static int journal_file_append_entry_internal(
f->header->boot_id = *boot_id;
o->entry.boot_id = f->header->boot_id;
for (size_t i = 0; i < n_items; i++)
write_entry_item(f, o, i, &items[i]);
#if HAVE_GCRYPT
r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
if (r < 0)
return r;
#endif
r = journal_file_link_entry(f, o, np);
r = journal_file_link_entry(f, o, np, items, n_items);
if (r < 0)
return r;
@ -2031,12 +2051,10 @@ int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t)
}
static int entry_item_cmp(const EntryItem *a, const EntryItem *b) {
return CMP(le64toh(a->object_offset), le64toh(b->object_offset));
return CMP(a->object_offset, b->object_offset);
}
static size_t remove_duplicate_entry_items(EntryItem items[], size_t n) {
/* This function relies on the items array being sorted. */
size_t j = 1;
if (n <= 1)
@ -2111,8 +2129,8 @@ int journal_file_append_entry(
xor_hash ^= le64toh(o->data.hash);
items[i] = (EntryItem) {
.object_offset = htole64(p),
.hash = o->data.hash,
.object_offset = p,
.hash = le64toh(o->data.hash),
};
}
@ -3785,7 +3803,7 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
};
boot_id = &o->entry.boot_id;
n = journal_file_entry_n_items(o);
n = journal_file_entry_n_items(from, o);
items = newa(EntryItem, n);
for (uint64_t i = 0; i < n; i++) {
@ -3795,7 +3813,7 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
void *data;
Object *u;
q = le64toh(o->entry.items[i].object_offset);
q = journal_file_entry_item_object_offset(from, o, i);
r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
if (r < 0)
@ -3848,8 +3866,8 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
xor_hash ^= le64toh(u->data.hash);
items[i] = (EntryItem) {
.object_offset = htole64(h),
.hash = u->data.hash,
.object_offset = h,
.hash = le64toh(u->data.hash),
};
r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);

View file

@ -127,6 +127,11 @@ typedef enum JournalFileFlags {
JOURNAL_SEAL = 1 << 1,
} JournalFileFlags;
typedef struct {
uint64_t object_offset;
uint64_t hash;
} EntryItem;
int journal_file_open(
int fd,
const char *fname,
@ -193,7 +198,20 @@ int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t of
int journal_file_tail_end_by_pread(JournalFile *f, uint64_t *ret_offset);
int journal_file_tail_end_by_mmap(JournalFile *f, uint64_t *ret_offset);
uint64_t journal_file_entry_n_items(Object *o) _pure_;
static inline uint64_t journal_file_entry_item_object_offset(JournalFile *f, Object *o, size_t i) {
assert(f);
assert(o);
return JOURNAL_HEADER_COMPACT(f->header) ? le32toh(o->entry.items.compact[i].object_offset) :
le64toh(o->entry.items.regular[i].object_offset);
}
static inline size_t journal_file_entry_item_size(JournalFile *f) {
assert(f);
return JOURNAL_HEADER_COMPACT(f->header) ? sizeof_field(Object, entry.items.compact[0]) :
sizeof_field(Object, entry.items.regular[0]);
}
uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) _pure_;
uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) _pure_;
static inline uint64_t journal_file_entry_array_item(JournalFile *f, Object *o, size_t i) {

View file

@ -240,7 +240,7 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
}
case OBJECT_ENTRY:
if ((le64toh(o->object.size) - offsetof(Object, entry.items)) % sizeof(EntryItem) != 0) {
if ((le64toh(o->object.size) - offsetof(Object, entry.items)) % journal_file_entry_item_size(f) != 0) {
error(offset,
"Bad entry size (<= %zu): %"PRIu64,
offsetof(Object, entry.items),
@ -248,10 +248,10 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
return -EBADMSG;
}
if ((le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem) <= 0) {
if ((le64toh(o->object.size) - offsetof(Object, entry.items)) / journal_file_entry_item_size(f) <= 0) {
error(offset,
"Invalid number items in entry: %"PRIu64,
(le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem));
(le64toh(o->object.size) - offsetof(Object, entry.items)) / journal_file_entry_item_size(f));
return -EBADMSG;
}
@ -276,13 +276,13 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
return -EBADMSG;
}
for (uint64_t i = 0; i < journal_file_entry_n_items(o); i++) {
if (le64toh(o->entry.items[i].object_offset) == 0 ||
!VALID64(le64toh(o->entry.items[i].object_offset))) {
for (uint64_t i = 0; i < journal_file_entry_n_items(f, o); i++) {
if (journal_file_entry_item_object_offset(f, o, i) == 0 ||
!VALID64(journal_file_entry_item_object_offset(f, o, i))) {
error(offset,
"Invalid entry item (%"PRIu64"/%"PRIu64") offset: "OFSfmt,
i, journal_file_entry_n_items(o),
le64toh(o->entry.items[i].object_offset));
i, journal_file_entry_n_items(f, o),
journal_file_entry_item_object_offset(f, o, i));
return -EBADMSG;
}
}
@ -646,12 +646,12 @@ static int verify_entry(
assert(o);
assert(cache_data_fd);
n = journal_file_entry_n_items(o);
n = journal_file_entry_n_items(f, o);
for (i = 0; i < n; i++) {
uint64_t q;
Object *u;
q = le64toh(o->entry.items[i].object_offset);
q = journal_file_entry_item_object_offset(f, o, i);
if (!contains_uint64(cache_data_fd, n_data, q)) {
error(p, "Invalid data object of entry");

View file

@ -2287,14 +2287,14 @@ _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **
field_length = strlen(field);
uint64_t n = journal_file_entry_n_items(o);
uint64_t n = journal_file_entry_n_items(f, o);
for (uint64_t i = 0; i < n; i++) {
Object *d;
uint64_t p, l;
size_t t;
Compression c;
p = le64toh(o->entry.items[i].object_offset);
p = journal_file_entry_item_object_offset(f, o, i);
r = journal_file_move_to_object(f, OBJECT_DATA, p, &d);
if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", i);
@ -2435,10 +2435,10 @@ _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t
if (r < 0)
return r;
for (uint64_t n = journal_file_entry_n_items(o); j->current_field < n; j->current_field++) {
for (uint64_t n = journal_file_entry_n_items(f, o); j->current_field < n; j->current_field++) {
uint64_t p;
p = le64toh(o->entry.items[j->current_field].object_offset);
p = journal_file_entry_item_object_offset(f, o, j->current_field);
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", j->current_field);