journal: Move more pattern matching logic into pcre2-util

To avoid having "#if HAVE_PCRE2" all throughout the code, let's
confine the pcre2 header specific stuff to pcre2-util.c. Instead of
exposing all the individual symbols from pcre2, let's only expose
three high level functions that do all we need:

- pcre2_pattern_compile(): Compile the regex
- pcre2_pattern_matches(): Check if the compiled regex matches a message
- pcre2_pattern_free(): Free the compiled regex

We expose the compiled pcre2 pattern (which is of type pcre2_code *) as
a void pointer to avoid having to include pcre2.h in all code where we
work with compiled pcre2 patterns. For readability, we typedef void
to pcre2_pattern and use that as the type specifier for compiled pcre2
patterns.
This commit is contained in:
Daan De Meyer 2022-07-22 14:49:42 +02:00
parent 05abe85033
commit 75db32dcd8
3 changed files with 148 additions and 113 deletions

View file

@ -13,11 +13,6 @@
#include <sys/stat.h>
#include <unistd.h>
#if HAVE_PCRE2
# define PCRE2_CODE_UNIT_WIDTH 8
# include <pcre2.h>
#endif
#include "sd-bus.h"
#include "sd-device.h"
#include "sd-journal.h"
@ -133,11 +128,9 @@ static uint64_t arg_vacuum_size = 0;
static uint64_t arg_vacuum_n_files = 0;
static usec_t arg_vacuum_time = 0;
static char **arg_output_fields = NULL;
#if HAVE_PCRE2
static const char *arg_pattern = NULL;
static pcre2_code *arg_compiled_pattern = NULL;
static int arg_case_sensitive = -1; /* -1 means be smart */
#endif
static PatternCompileCase arg_case = PATTERN_COMPILE_CASE_AUTO;
STATIC_DESTRUCTOR_REGISTER(arg_file, strv_freep);
STATIC_DESTRUCTOR_REGISTER(arg_facilities, set_freep);
@ -148,9 +141,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_user_units, strv_freep);
STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
STATIC_DESTRUCTOR_REGISTER(arg_output_fields, strv_freep);
#if HAVE_PCRE2
STATIC_DESTRUCTOR_REGISTER(arg_compiled_pattern, sym_pcre2_code_freep);
#endif
STATIC_DESTRUCTOR_REGISTER(arg_compiled_pattern, pattern_freep);
static enum {
ACTION_SHOW,
@ -180,29 +171,6 @@ typedef struct BootId {
LIST_FIELDS(struct BootId, boot_list);
} BootId;
#if HAVE_PCRE2
static int pattern_compile(const char *pattern, unsigned flags, pcre2_code **out) {
int errorcode, r;
PCRE2_SIZE erroroffset;
pcre2_code *p;
p = sym_pcre2_compile((PCRE2_SPTR8) pattern,
PCRE2_ZERO_TERMINATED, flags, &errorcode, &erroroffset, NULL);
if (!p) {
unsigned char buf[LINE_MAX];
r = sym_pcre2_get_error_message(errorcode, buf, sizeof buf);
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"Bad pattern \"%s\": %s", pattern,
r < 0 ? "unknown error" : (char *)buf);
}
*out = p;
return 0;
}
#endif
static int add_matches_for_device(sd_journal *j, const char *devpath) {
_cleanup_(sd_device_unrefp) sd_device *device = NULL;
sd_device *d = NULL;
@ -918,7 +886,6 @@ static int parse_argv(int argc, char *argv[]) {
break;
}
#if HAVE_PCRE2
case 'g':
arg_pattern = optarg;
break;
@ -928,16 +895,11 @@ static int parse_argv(int argc, char *argv[]) {
r = parse_boolean(optarg);
if (r < 0)
return log_error_errno(r, "Bad --case-sensitive= argument \"%s\": %m", optarg);
arg_case_sensitive = r;
arg_case = r ? PATTERN_COMPILE_CASE_SENSITIVE : PATTERN_COMPILE_CASE_INSENSITIVE;
} else
arg_case_sensitive = true;
arg_case = PATTERN_COMPILE_CASE_SENSITIVE;
break;
#else
case 'g':
case ARG_CASE_SENSITIVE:
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Compiled without pattern matching support");
#endif
case 'S':
r = parse_timestamp(optarg, &arg_since);
@ -1114,44 +1076,11 @@ static int parse_argv(int argc, char *argv[]) {
arg_system_units = strv_free(arg_system_units);
}
#if HAVE_PCRE2
if (arg_pattern) {
unsigned flags;
r = dlopen_pcre2();
if (r < 0)
return r;
if (arg_case_sensitive >= 0)
flags = !arg_case_sensitive * PCRE2_CASELESS;
else {
_cleanup_(sym_pcre2_match_data_freep) pcre2_match_data *md = NULL;
bool has_case;
_cleanup_(sym_pcre2_code_freep) pcre2_code *cs = NULL;
md = sym_pcre2_match_data_create(1, NULL);
if (!md)
return log_oom();
r = pattern_compile("[[:upper:]]", 0, &cs);
if (r < 0)
return r;
r = sym_pcre2_match(cs, (PCRE2_SPTR8) arg_pattern, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL);
has_case = r >= 0;
flags = !has_case * PCRE2_CASELESS;
}
log_debug("Doing case %s matching based on %s",
flags & PCRE2_CASELESS ? "insensitive" : "sensitive",
arg_case_sensitive >= 0 ? "request" : "pattern casing");
r = pattern_compile(arg_pattern, flags, &arg_compiled_pattern);
r = pattern_compile_and_log(arg_pattern, arg_case, &arg_compiled_pattern);
if (r < 0)
return r;
}
#endif
return 1;
}
@ -2703,16 +2632,9 @@ int main(int argc, char *argv[]) {
}
}
#if HAVE_PCRE2
if (arg_compiled_pattern) {
_cleanup_(sym_pcre2_match_data_freep) pcre2_match_data *md = NULL;
const void *message;
size_t len;
PCRE2_SIZE *ovec;
md = sym_pcre2_match_data_create(1, NULL);
if (!md)
return log_oom();
r = sd_journal_get_data(j, "MESSAGE", &message, &len);
if (r < 0) {
@ -2727,33 +2649,15 @@ int main(int argc, char *argv[]) {
assert_se(message = startswith(message, "MESSAGE="));
r = sym_pcre2_match(arg_compiled_pattern,
message,
len - strlen("MESSAGE="),
0, /* start at offset 0 in the subject */
0, /* default options */
md,
NULL);
if (r == PCRE2_ERROR_NOMATCH) {
r = pattern_matches_and_log(arg_compiled_pattern, message,
len - strlen("MESSAGE="), highlight);
if (r < 0)
goto finish;
if (r == 0) {
need_seek = true;
continue;
}
if (r < 0) {
unsigned char buf[LINE_MAX];
int r2;
r2 = sym_pcre2_get_error_message(r, buf, sizeof buf);
log_error("Pattern matching failed: %s",
r2 < 0 ? "unknown error" : (char*) buf);
r = -EINVAL;
goto finish;
}
ovec = sym_pcre2_get_ovector_pointer(md);
highlight[0] = ovec[0];
highlight[1] = ovec[1];
}
#endif
flags =
arg_all * OUTPUT_SHOW_ALL |

View file

@ -14,8 +14,10 @@ pcre2_code* (*sym_pcre2_compile)(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_
int (*sym_pcre2_get_error_message)(int, PCRE2_UCHAR *, PCRE2_SIZE);
int (*sym_pcre2_match)(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, pcre2_match_data *, pcre2_match_context *);
PCRE2_SIZE* (*sym_pcre2_get_ovector_pointer)(pcre2_match_data *);
#endif
int dlopen_pcre2(void) {
#if HAVE_PCRE2
/* So here's something weird: PCRE2 actually renames the symbols exported by the library via C
* macros, so that the exported symbols carry a suffix "_8" but when used from C the suffix is
* gone. In the argument list below we ignore this mangling. Surprisingly (at least to me), we
@ -33,12 +35,123 @@ int dlopen_pcre2(void) {
DLSYM_ARG(pcre2_get_error_message),
DLSYM_ARG(pcre2_match),
DLSYM_ARG(pcre2_get_ovector_pointer));
}
#else
int dlopen_pcre2(void) {
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
"PCRE2 support is not compiled in.");
}
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "PCRE2 support is not compiled in.");
#endif
}
int pattern_compile_and_log(const char *pattern, PatternCompileCase case_, pcre2_code **ret) {
#if HAVE_PCRE2
PCRE2_SIZE erroroffset;
pcre2_code *p;
unsigned flags = 0;
int errorcode, r;
assert(pattern);
r = dlopen_pcre2();
if (r < 0)
return r;
if (case_ == PATTERN_COMPILE_CASE_INSENSITIVE)
flags = PCRE2_CASELESS;
else if (case_ == PATTERN_COMPILE_CASE_AUTO) {
_cleanup_(sym_pcre2_match_data_freep) pcre2_match_data *md = NULL;
bool has_case;
_cleanup_(sym_pcre2_code_freep) pcre2_code *cs = NULL;
md = sym_pcre2_match_data_create(1, NULL);
if (!md)
return log_oom();
r = pattern_compile_and_log("[[:upper:]]", PATTERN_COMPILE_CASE_SENSITIVE, &cs);
if (r < 0)
return r;
r = sym_pcre2_match(cs, (PCRE2_SPTR8) pattern, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL);
has_case = r >= 0;
flags = !has_case * PCRE2_CASELESS;
}
log_debug("Doing case %s matching based on %s",
flags & PCRE2_CASELESS ? "insensitive" : "sensitive",
case_ != PATTERN_COMPILE_CASE_AUTO ? "request" : "pattern casing");
p = sym_pcre2_compile((PCRE2_SPTR8) pattern,
PCRE2_ZERO_TERMINATED, flags, &errorcode, &erroroffset, NULL);
if (!p) {
unsigned char buf[LINE_MAX];
r = sym_pcre2_get_error_message(errorcode, buf, sizeof buf);
return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
"Bad pattern \"%s\": %s", pattern,
r < 0 ? "unknown error" : (char *)buf);
}
if (ret)
*ret = p;
return 0;
#else
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "PCRE2 support is not compiled in.");
#endif
}
int pattern_matches_and_log(pcre2_code *compiled_pattern, const char *message, size_t size, size_t *ret_ovec) {
#if HAVE_PCRE2
_cleanup_(sym_pcre2_match_data_freep) pcre2_match_data *md = NULL;
int r;
assert(compiled_pattern);
assert(message);
/* pattern_compile_and_log() must be called before this function is called and that function already
* dlopens pcre2 so we can assert on it being available here. */
assert(pcre2_dl);
md = sym_pcre2_match_data_create(1, NULL);
if (!md)
return log_oom();
r = sym_pcre2_match(compiled_pattern,
(const unsigned char *)message,
size,
0, /* start at offset 0 in the subject */
0, /* default options */
md,
NULL);
if (r == PCRE2_ERROR_NOMATCH)
return false;
if (r < 0) {
unsigned char buf[LINE_MAX];
r = sym_pcre2_get_error_message(r, buf, sizeof(buf));
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Pattern matching failed: %s",
r < 0 ? "unknown error" : (char*) buf);
}
if (ret_ovec) {
ret_ovec[0] = sym_pcre2_get_ovector_pointer(md)[0];
ret_ovec[1] = sym_pcre2_get_ovector_pointer(md)[1];
}
return true;
#else
return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "PCRE2 support is not compiled in.");
#endif
}
void *pattern_free(pcre2_code *p) {
#if HAVE_PCRE2
if (!p)
return NULL;
assert(pcre2_dl);
sym_pcre2_code_free(p);
return NULL;
#else
assert(p == NULL);
return NULL;
#endif
}

View file

@ -18,6 +18,24 @@ extern PCRE2_SIZE* (*sym_pcre2_get_ovector_pointer)(pcre2_match_data *);
DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(pcre2_match_data*, sym_pcre2_match_data_free, NULL);
DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(pcre2_code*, sym_pcre2_code_free, NULL);
#else
typedef struct {} pcre2_code;
#endif
typedef enum {
PATTERN_COMPILE_CASE_AUTO,
PATTERN_COMPILE_CASE_SENSITIVE,
PATTERN_COMPILE_CASE_INSENSITIVE,
_PATTERN_COMPILE_CASE_MAX,
_PATTERN_COMPILE_CASE_INVALID = -EINVAL,
} PatternCompileCase;
int pattern_compile_and_log(const char *pattern, PatternCompileCase case_, pcre2_code **ret);
int pattern_matches_and_log(pcre2_code *compiled_pattern, const char *message, size_t size, size_t *ret_ovec);
void *pattern_free(pcre2_code *p);
DEFINE_TRIVIAL_CLEANUP_FUNC(pcre2_code*, pattern_free);
int dlopen_pcre2(void);