Skip to content

Commit

Permalink
Merge pull request #394 from bshifter/filterx-regx-subst
Browse files Browse the repository at this point in the history
Filterx regexp_subst supports match groups
  • Loading branch information
alltilla authored Dec 3, 2024
2 parents 7f7966b + a805102 commit e22db34
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 4 deletions.
61 changes: 58 additions & 3 deletions lib/filterx/expr-regexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
FILTERX_FUNC_REGEXP_SUBST_FLAG_GLOBAL_NAME"=(boolean) " \
FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME"=(boolean) " \
FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME"=(boolean) " \
FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME"=(boolean))" \
FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME"=(boolean)" \
FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME"=(boolean))" \

#define FILTERX_FUNC_REGEXP_SEARCH_USAGE "Usage: regexp_search(string, pattern)"

Expand All @@ -48,6 +49,7 @@ typedef struct FilterXReMatchState_
FilterXObject *lhs_obj;
const gchar *lhs_str;
gsize lhs_str_len;
gint rc;
} FilterXReMatchState;

static void
Expand Down Expand Up @@ -112,6 +114,7 @@ _match_inner(FilterXReMatchState *state, pcre2_code_8 *pattern, gint start_offse
gint rc = pcre2_match(pattern, (PCRE2_SPTR) state->lhs_str, (PCRE2_SIZE) state->lhs_str_len, (PCRE2_SIZE) start_offset,
0,
state->match_data, NULL);
state->rc = rc;
if (rc < 0)
{
switch (rc)
Expand Down Expand Up @@ -543,18 +546,67 @@ _is_zero_length_match(PCRE2_SIZE *ovector)
return ovector[0] == ovector[1];
}

static gboolean
_build_replacement_stirng_with_match_groups(const FilterXFuncRegexpSubst *self, FilterXReMatchState *state,
GString *replacement_string)
{
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(state->match_data);
g_string_set_size(replacement_string, 0);
const gchar *rep_ptr = self->replacement;
const gchar *last_ptr = rep_ptr;
gint num_grps = state->rc;

while (*rep_ptr)
{
if (*rep_ptr == '\\')
{
rep_ptr++;
if (*rep_ptr >= '1' && *rep_ptr <= '9')
{
gint grp_idx = *rep_ptr - '0';
if (grp_idx < num_grps)
{
PCRE2_SIZE start = ovector[2 * grp_idx];
PCRE2_SIZE end = ovector[2 * grp_idx + 1];
if (start != PCRE2_UNSET)
{
g_string_append_len(replacement_string, last_ptr, rep_ptr - last_ptr - 1);
last_ptr = rep_ptr + 1;
size_t group_len = end - start;
g_string_append_len(replacement_string, state->lhs_str + start, group_len);
}
}
}
rep_ptr++;
}
else
rep_ptr++;
}
g_string_append_len(replacement_string, last_ptr, rep_ptr - last_ptr);
return TRUE;
}

static FilterXObject *
_replace_matches(const FilterXFuncRegexpSubst *self, FilterXReMatchState *state)
{
GString *new_value = scratch_buffers_alloc();
PCRE2_SIZE *ovector = NULL;
gint pos = 0;
const gchar *replacement_string = self->replacement;

if (self->opts.groups)
{
GString *rep_str = scratch_buffers_alloc();
_build_replacement_stirng_with_match_groups(self, state, rep_str);
replacement_string = rep_str->str;
}

do
{
ovector = pcre2_get_ovector_pointer(state->match_data);

g_string_append_len(new_value, state->lhs_str + pos, _start_offset(ovector) - pos);
g_string_append(new_value, self->replacement);
g_string_append(new_value, replacement_string);

if (_is_zero_length_match(ovector))
{
Expand All @@ -574,7 +626,7 @@ _replace_matches(const FilterXFuncRegexpSubst *self, FilterXReMatchState *state)

// handle the very last of zero lenght matches
if (_is_zero_length_match(ovector))
g_string_append(new_value, self->replacement);
g_string_append(new_value, replacement_string);

return filterx_string_new(new_value->str, new_value->len);
}
Expand Down Expand Up @@ -689,6 +741,9 @@ _extract_optional_flags(FilterXFuncRegexpSubst *self, FilterXFunctionArgs *args,
if (!_extract_literal_bool(args, FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME, &self->opts.utf8,
error))
return FALSE;
if (!_extract_literal_bool(args, FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME,
&self->opts.groups, error))
return FALSE;
return TRUE;
}

Expand Down
2 changes: 2 additions & 0 deletions lib/filterx/expr-regexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#define FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME "utf8"
#define FILTERX_FUNC_REGEXP_SUBST_FLAG_IGNORECASE_NAME "ignorecase"
#define FILTERX_FUNC_REGEXP_SUBST_FLAG_NEWLINE_NAME "newline"
#define FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME "groups"

typedef struct FilterXFuncRegexpSubstOpts_
{
Expand All @@ -41,6 +42,7 @@ typedef struct FilterXFuncRegexpSubstOpts_
gboolean utf8;
gboolean ignorecase;
gboolean newline;
gboolean groups;
} FilterXFuncRegexpSubstOpts;

FilterXExpr *filterx_expr_regexp_match_new(FilterXExpr *lhs, const gchar *pattern);
Expand Down
30 changes: 30 additions & 0 deletions lib/filterx/tests/test_expr_regexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,9 @@ _build_subst_func(const gchar *pattern, const gchar *repr, const gchar *str, Fil
if (opts.utf8)
args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_UTF8_NAME,
filterx_literal_new(filterx_boolean_new(TRUE))));
if (opts.groups)
args = g_list_append(args, filterx_function_arg_new(FILTERX_FUNC_REGEXP_SUBST_FLAG_GROUPS_NAME,
filterx_literal_new(filterx_boolean_new(TRUE))));

GError *err = NULL;
FilterXExpr *func = filterx_function_regexp_subst_new(filterx_function_args_new(args, NULL), &err);
Expand Down Expand Up @@ -527,6 +530,33 @@ Test(filterx_expr_regexp, regexp_subst_match_opt_ignorecase_nojit)
filterx_object_unref(result_alt);
}

Test(filterx_expr_regexp, regexp_subst_group_subst)
{
FilterXFuncRegexpSubstOpts opts = {};
FilterXObject *result = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "\\3-\\2-\\1", "25-02-2022", opts);
cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string)));
const gchar *res = filterx_string_get_value_ref(result, NULL);
cr_assert_str_eq(res, "\\3-\\2-\\1");
filterx_object_unref(result);

FilterXFuncRegexpSubstOpts opts_alt = {.groups = TRUE};
FilterXObject *result_alt = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "\\3-\\2-\\1", "25-02-2022", opts_alt);
cr_assert(filterx_object_is_type(result_alt, &FILTERX_TYPE_NAME(string)));
const gchar *res_alt = filterx_string_get_value_ref(result_alt, NULL);
cr_assert_str_eq(res_alt, "2022-02-25");
filterx_object_unref(result_alt);
}

Test(filterx_expr_regexp, regexp_subst_group_subst_without_ref)
{
FilterXFuncRegexpSubstOpts opts = {.groups = TRUE};
FilterXObject *result = _sub("(\\d{2})-(\\d{2})-(\\d{4})", "group without ref", "25-02-2022", opts);
cr_assert(filterx_object_is_type(result, &FILTERX_TYPE_NAME(string)));
const gchar *res = filterx_string_get_value_ref(result, NULL);
cr_assert_str_eq(res, "group without ref");
filterx_object_unref(result);
}

static void
setup(void)
{
Expand Down
8 changes: 7 additions & 1 deletion tests/light/functional_tests/filterx/test_filterx.py
Original file line number Diff line number Diff line change
Expand Up @@ -1964,6 +1964,9 @@ def test_regexp_subst(config, syslog_ng):
$MSG.orgrp_global = regexp_subst("foobarbaz", "(fo|az)", "!", global=true);
$MSG.ignore_case_control = regexp_subst("FoObArBaz", "(o|a)", "!", global=true);
$MSG.ignore_case = regexp_subst("FoObArBaz", "(o|a)", "!", ignorecase=true, global=true);
$MSG.groups_off = regexp_subst("25-02-2022", /(\d{2})-(\d{2})-(\d{4})/, "\\3-\\2-\\1");;
$MSG.groups_on = regexp_subst("25-02-2022", /(\d{2})-(\d{2})-(\d{4})/, "\\3-\\2-\\1", groups=true);
$MSG.mixed_grps = regexp_subst("25-02-2022", /(\d{2})-(\d{2})-(\d{4})/, "foo:\\3-\\2-\\1:bar:baz", groups=true);
""",
)
syslog_ng.start(config)
Expand All @@ -1982,7 +1985,10 @@ def test_regexp_subst(config, syslog_ng):
r""""zero_length_match_global":"!f!o!o!b!a!r!b!a!z!","""
r""""orgrp_global":"!obarb!","""
r""""ignore_case_control":"F!ObArB!z","""
r""""ignore_case":"F!!b!rB!z"}""" + "\n"
r""""ignore_case":"F!!b!rB!z","""
r""""groups_off":"\\3-\\2-\\1","""
r""""groups_on":"2022-02-25","""
r""""mixed_grps":"foo:2022-02-25:bar:baz"}""" + "\n"
)
assert file_true.read_log() == exp

Expand Down

0 comments on commit e22db34

Please sign in to comment.