aboutsummaryrefslogtreecommitdiff
path: root/src/work.c
diff options
context:
space:
mode:
authorThomas Voss <thomas.voss@humanwave.nl> 2026-03-19 16:37:58 +0100
committerThomas Voss <thomas.voss@humanwave.nl> 2026-03-19 16:37:58 +0100
commitac56f0167d0e26c35adc4639015e0cbbeecf6262 (patch)
treea376eb7a1bd3052737420e668c216a5ac5f77cf8 /src/work.c
parent19e2b9bc23e71ccafdd754686d4881058b8a1fb5 (diff)
Fix pattern matching on anchorsHEADmaster
The previous code would find all matches by searching for a match, chopping it off of the string view, and then matching again in a loop. This caused bugs with anchors such as ^, because x/^a/ would match *every* instance of ‘a’ instead of just ‘a’s at the start of the matching context. This PR switches the matching engine to use the offset parameter of pcre2_jit_match() to correctly handle anchors.
Diffstat (limited to 'src/work.c')
-rw-r--r--src/work.c68
1 files changed, 43 insertions, 25 deletions
diff --git a/src/work.c b/src/work.c
index 7013726..3a138fb 100644
--- a/src/work.c
+++ b/src/work.c
@@ -228,10 +228,9 @@ DEFINE_OPERATOR(h)
pcre2_match_data *md =
pcre2_match_data_create_from_pattern(ops[opi].re, nullptr);
- u8view_t sv_save = sv;
ptrdiff_t origlen = array_len(*hl);
- for (;;) {
- int n = pcre2_jit_match(ops[opi].re, sv.p, sv.len, 0,
+ for (ptrdiff_t off = 0;;) {
+ int n = pcre2_jit_match(ops[opi].re, sv.p, sv.len, off,
PCRE2_NOTEMPTY, md, nullptr);
if (n == PCRE2_ERROR_NOMATCH)
break;
@@ -240,10 +239,10 @@ DEFINE_OPERATOR(h)
size_t *ov = pcre2_get_ovector_pointer(md);
array_push(hl, ((u8view_t){sv.p + ov[0], ov[1] - ov[0]}));
- VSHFT(&sv, ov[1]);
+ off = ov[1];
}
pcre2_match_data_free(md);
- operator_dispatch(opi + 1, sv_save, hl);
+ operator_dispatch(opi + 1, sv, hl);
array_hdr(*hl)->len = origlen;
}
@@ -256,22 +255,29 @@ DEFINE_OPERATOR(H)
pcre2_match_data *md =
pcre2_match_data_create_from_pattern(ops[opi].re, nullptr);
- u8view_t sv_save = sv;
- ptrdiff_t origlen = array_len(*hl);
- for (;;) {
- int n = pcre2_jit_match(ops[opi].re, sv.p, sv.len, 0, PCRE2_NOTEMPTY,
- md, nullptr);
+ ptrdiff_t prvend = 0, origlen = array_len(*hl);
+
+ for (ptrdiff_t off = 0;;) {
+ int n = pcre2_jit_match(ops[opi].re, sv.p, sv.len, off, 0, md, nullptr);
if (n == PCRE2_ERROR_NOMATCH)
break;
if (n < 0)
pcre2_bitch_and_die(n, "failed to match regex");
size_t *ov = pcre2_get_ovector_pointer(md);
- array_push(hl, ((u8view_t){sv.p, ov[0]}));
- VSHFT(&sv, ov[1]);
+ if (ov[0] - prvend != 0)
+ array_push(hl, ((u8view_t){sv.p + prvend, ov[0] - prvend}));
+
+ prvend = off = ov[1];
+ if (ov[0] == ov[1])
+ off++;
}
+
+ if (prvend < sv.len)
+ array_push(hl, ((u8view_t){sv.p + prvend, sv.len - prvend}));
+
pcre2_match_data_free(md);
- operator_dispatch(opi + 1, sv_save, hl);
+ operator_dispatch(opi + 1, sv, hl);
array_hdr(*hl)->len = origlen;
}
@@ -279,9 +285,9 @@ DEFINE_OPERATOR(x)
{
pcre2_match_data *md =
pcre2_match_data_create_from_pattern(ops[opi].re, nullptr);
- for (;;) {
- int n = pcre2_jit_match(ops[opi].re, sv.p, sv.len, 0, PCRE2_NOTEMPTY,
- md, nullptr);
+
+ for (ptrdiff_t off = 0;;) {
+ int n = pcre2_jit_match(ops[opi].re, sv.p, sv.len, off, 0, md, nullptr);
if (n == PCRE2_ERROR_NOMATCH)
break;
if (n < 0)
@@ -289,7 +295,9 @@ DEFINE_OPERATOR(x)
size_t *ov = pcre2_get_ovector_pointer(md);
operator_dispatch(opi + 1, (u8view_t){sv.p + ov[0], ov[1] - ov[0]}, hl);
- VSHFT(&sv, ov[1]);
+ off = ov[1];
+ if (ov[0] == ov[1])
+ off++;
}
pcre2_match_data_free(md);
}
@@ -298,21 +306,31 @@ DEFINE_OPERATOR(X)
{
pcre2_match_data *md =
pcre2_match_data_create_from_pattern(ops[opi].re, nullptr);
- for (;;) {
- int n = pcre2_jit_match(ops[opi].re, sv.p, sv.len, 0, PCRE2_NOTEMPTY,
- md, nullptr);
+
+ ptrdiff_t prvend = 0;
+
+ for (ptrdiff_t off = 0;;) {
+ int n = pcre2_jit_match(ops[opi].re, sv.p, sv.len, off, 0, md, nullptr);
if (n == PCRE2_ERROR_NOMATCH)
break;
if (n < 0)
pcre2_bitch_and_die(n, "failed to match regex");
size_t *ov = pcre2_get_ovector_pointer(md);
- if (ov[0] != 0)
- operator_dispatch(opi + 1, (u8view_t){sv.p, ov[0]}, hl);
- VSHFT(&sv, ov[1]);
+ if (ov[0] > (size_t)prvend) {
+ u8view_t sub = {sv.p + prvend, ov[0] - prvend};
+ operator_dispatch(opi + 1, sub, hl);
+ }
+
+ prvend = off = ov[1];
+ if (ov[0] == ov[1])
+ off++;
+ }
+
+ if (prvend < sv.len) {
+ u8view_t sub = {sv.p + prvend, sv.len - prvend};
+ operator_dispatch(opi + 1, sub, hl);
}
- if (sv.len != 0)
- operator_dispatch(opi + 1, sv, hl);
pcre2_match_data_free(md);
}