test: append to corrupted journals

Introduce a manual test tool that creates a journal, corrupts it by
flipping bits at given offsets, and then attempts to write to the journal.
In ideal case we should handle this gracefully without any crash or
memory corruption.
This commit is contained in:
Frantisek Sumsal 2023-05-13 17:39:35 +02:00
parent 6bbfa9c214
commit 29bdeb5cb3
3 changed files with 323 additions and 0 deletions

View file

@ -111,6 +111,11 @@ tests += [
'sources' : files('test-journal.c'),
'base' : test_journal_base,
},
{
'sources' : files('test-journal-append.c'),
'base' : test_journal_base,
'type' : 'manual',
},
]
fuzzer_journald_base = {

View file

@ -0,0 +1,272 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <fcntl.h>
#include <getopt.h>
#include <stdio.h>
#include <unistd.h>
#include "chattr-util.h"
#include "fd-util.h"
#include "fs-util.h"
#include "io-util.h"
#include "log.h"
#include "managed-journal-file.h"
#include "mmap-cache.h"
#include "parse-util.h"
#include "random-util.h"
#include "rm-rf.h"
#include "strv.h"
#include "terminal-util.h"
#include "tests.h"
#include "tmpfile-util.h"
static int journal_append_message(ManagedJournalFile *mj, const char *message) {
struct iovec iovec;
struct dual_timestamp ts;
assert(mj);
assert(message);
dual_timestamp_get(&ts);
iovec = IOVEC_MAKE_STRING(message);
return journal_file_append_entry(
mj->file,
&ts,
/* boot_id= */ NULL,
&iovec,
/* n_iovec= */ 1,
/* seqnum= */ NULL,
/* seqnum_id= */ NULL,
/* ret_object= */ NULL,
/* ret_offset= */ NULL);
}
static int journal_corrupt_and_append(uint64_t start_offset, uint64_t step) {
_cleanup_(mmap_cache_unrefp) MMapCache *mmap_cache = NULL;
_cleanup_(rm_rf_physical_and_freep) char *tempdir = NULL;
_cleanup_(managed_journal_file_closep) ManagedJournalFile *mj = NULL;
uint64_t start, end;
int r;
mmap_cache = mmap_cache_new();
assert_se(mmap_cache);
/* managed_journal_file_open() requires a valid machine id */
if (sd_id128_get_machine(NULL) < 0)
return log_tests_skipped("No valid machine ID found");
assert_se(mkdtemp_malloc("/tmp/journal-append-XXXXXX", &tempdir) >= 0);
assert_se(chdir(tempdir) >= 0);
(void) chattr_path(tempdir, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
log_debug("Opening journal %s/system.journal", tempdir);
r = managed_journal_file_open(
/* fd= */ -1,
"system.journal",
O_RDWR|O_CREAT,
JOURNAL_COMPRESS,
0644,
/* compress_threshold_bytes= */ UINT64_MAX,
/* metrics= */ NULL,
mmap_cache,
/* deferred_closes= */ NULL,
/* template= */ NULL,
&mj);
if (r < 0)
return log_error_errno(r, "Failed to open the journal: %m");
assert_se(mj);
assert_se(mj->file);
/* Add a couple of initial messages */
for (int i = 0; i < 10; i++) {
_cleanup_free_ char *message = NULL;
assert_se(asprintf(&message, "MESSAGE=Initial message %d", i) >= 0);
r = journal_append_message(mj, message);
if (r < 0)
return log_error_errno(r, "Failed to write to the journal: %m");
}
start = start_offset == UINT64_MAX ? random_u64() % mj->file->last_stat.st_size : start_offset;
end = (uint64_t) mj->file->last_stat.st_size;
/* Print the initial offset at which we start flipping bits, which can be
* later used to reproduce a potential fail */
log_info("Start offset: %" PRIu64 ", corrupt-step: %" PRIu64, start, step);
fflush(stdout);
if (start >= end)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"Start offset >= journal size, can't continue");
for (uint64_t offset = start; offset < end; offset += step) {
_cleanup_free_ char *message = NULL;
uint8_t b;
/* Flip a bit in the journal file */
r = pread(mj->file->fd, &b, 1, offset);
assert_se(r == 1);
b |= 0x1;
r = pwrite(mj->file->fd, &b, 1, offset);
assert_se(r == 1);
/* Close and reopen the journal to flush all caches and remap
* the corrupted journal */
mj = managed_journal_file_close(mj);
r = managed_journal_file_open(
/* fd= */ -1,
"system.journal",
O_RDWR|O_CREAT,
JOURNAL_COMPRESS,
0644,
/* compress_threshold_bytes= */ UINT64_MAX,
/* metrics= */ NULL,
mmap_cache,
/* deferred_closes= */ NULL,
/* template= */ NULL,
&mj);
if (r < 0) {
/* The corrupted journal might get rejected during reopening
* if it's corrupted enough (especially its header), so
* treat this as a success if it doesn't crash */
log_info_errno(r, "Failed to reopen the journal: %m");
break;
}
/* Try to write something to the (possibly corrupted) journal */
assert_se(asprintf(&message, "MESSAGE=Hello world %" PRIu64, offset) >= 0);
r = journal_append_message(mj, message);
if (r < 0) {
/* We care only about crashes or sanitizer errors,
* failed write without any crash is a success */
log_info_errno(r, "Failed to write to the journal: %m");
break;
}
}
return 0;
}
int main(int argc, char *argv[]) {
uint64_t start_offset = UINT64_MAX;
uint64_t iterations = 100;
uint64_t iteration_step = 1;
uint64_t corrupt_step = 31;
bool sequential = false, run_one = false;
int c, r;
test_setup_logging(LOG_DEBUG);
enum {
ARG_START_OFFSET = 0x1000,
ARG_ITERATIONS,
ARG_ITERATION_STEP,
ARG_CORRUPT_STEP,
ARG_SEQUENTIAL,
ARG_RUN_ONE,
};
static const struct option options[] = {
{ "help", no_argument, NULL, 'h' },
{ "start-offset", required_argument, NULL, ARG_START_OFFSET },
{ "iterations", required_argument, NULL, ARG_ITERATIONS },
{ "iteration-step", required_argument, NULL, ARG_ITERATION_STEP },
{ "corrupt-step", required_argument, NULL, ARG_CORRUPT_STEP },
{ "sequential", no_argument, NULL, ARG_SEQUENTIAL },
{ "run-one", required_argument, NULL, ARG_RUN_ONE },
{}
};
assert_se(argc >= 0);
assert_se(argv);
while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
switch (c) {
case 'h':
printf("Syntax:\n"
" %s [OPTION...]\n"
"Options:\n"
" --start-offset=OFFSET Offset at which to start corrupting the journal\n"
" (default: random offset is picked, unless\n"
" --sequential is used - in that case we use 0 + iteration)\n"
" --iterations=ITER Number of iterations to perform before exiting\n"
" (default: 100)\n"
" --iteration-step=STEP Iteration step (default: 1)\n"
" --corrupt-step=STEP Corrupt every n-th byte starting from OFFSET (default: 31)\n"
" --sequential Go through offsets sequentially instead of picking\n"
" a random one on each iteration. If set, we go through\n"
" offsets <0; ITER), or <OFFSET, ITER) if --start-offset=\n"
" is set (default: false)\n"
" --run-one=OFFSET Single shot mode for reproducing issues. Takes the same\n"
" offset as --start-offset= and does only one iteration\n"
, program_invocation_short_name);
return 0;
case ARG_START_OFFSET:
r = safe_atou64(optarg, &start_offset);
if (r < 0)
return log_error_errno(r, "Invalid starting offset: %m");
break;
case ARG_ITERATIONS:
r = safe_atou64(optarg, &iterations);
if (r < 0)
return log_error_errno(r, "Invalid value for iterations: %m");
break;
case ARG_CORRUPT_STEP:
r = safe_atou64(optarg, &corrupt_step);
if (r < 0)
return log_error_errno(r, "Invalid value for corrupt-step: %m");
break;
case ARG_ITERATION_STEP:
r = safe_atou64(optarg, &iteration_step);
if (r < 0)
return log_error_errno(r, "Invalid value for iteration-step: %m");
break;
case ARG_SEQUENTIAL:
sequential = true;
break;
case ARG_RUN_ONE:
r = safe_atou64(optarg, &start_offset);
if (r < 0)
return log_error_errno(r, "Invalid offset: %m");
run_one = true;
break;
case '?':
return -EINVAL;
default:
assert_not_reached();
}
if (run_one)
/* Reproducer mode */
return journal_corrupt_and_append(start_offset, corrupt_step);
for (uint64_t i = 0; i < iterations; i++) {
uint64_t offset = UINT64_MAX;
log_info("Iteration #%" PRIu64 ", step: %" PRIu64, i, iteration_step);
if (sequential)
offset = (start_offset == UINT64_MAX ? 0 : start_offset) + i * iteration_step;
r = journal_corrupt_and_append(offset, corrupt_step);
if (r < 0)
return EXIT_FAILURE;
if (r > 0)
/* Reached the end of the journal file */
break;
}
return EXIT_SUCCESS;
}

View file

@ -0,0 +1,46 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: LGPL-2.1-or-later
set -eux
set -o pipefail
# test-journal-append corrupts the journal file by flipping a bit at a given offset and
# following it by a write to check if we handle appending messages to corrupted journals
# gracefully
TEST_JOURNAL_APPEND=/usr/lib/systemd/tests/unit-tests/manual/test-journal-append
[[ -x "$TEST_JOURNAL_APPEND" ]]
# Corrupt the first ~1024 bytes, this should be pretty quick
"$TEST_JOURNAL_APPEND" --sequential --start-offset=0 --iterations=350 --iteration-step=3
# Skip most of the test when running without acceleration, as it's excruciatingly slow
# (this shouldn't be an issue, as it should run in nspawn as well)
if ! [[ "$(systemd-detect-virt -v)" == "qemu" ]]; then
# Corrupt the beginning of every 1K block between 1K - 32K
for ((i = 1024; i <= (32 * 1024); i += 1024)); do
"$TEST_JOURNAL_APPEND" --sequential --start-offset="$i" --iterations=5 --iteration-step=13
done
# Corrupt the beginning of every 16K block between 32K - 128K
for ((i = (32 * 1024); i <= (256 * 1024); i += (16 * 1024))); do
"$TEST_JOURNAL_APPEND" --sequential --start-offset="$i" --iterations=5 --iteration-step=13
done
# Corrupt the beginning of every 128K block between 128K - 1M
for ((i = (128 * 1024); i <= (1 * 1024 * 1024); i += (128 * 1024))); do
"$TEST_JOURNAL_APPEND" --sequential --start-offset="$i" --iterations=5 --iteration-step=13
done
# And finally the beginning of every 1M block between 1M and 8M
for ((i = (1 * 1024 * 1024); i < (8 * 1024 * 1024); i += (1 * 1024 * 1024))); do
"$TEST_JOURNAL_APPEND" --sequential --start-offset="$i" --iterations=5 --iteration-step=13
done
if [[ "$(nproc)" -ge 2 ]]; then
# Try to corrupt random bytes throughout the journal
"$TEST_JOURNAL_APPEND" --iterations=25
fi
else
"$TEST_JOURNAL_APPEND" --iterations=10
fi