From 048050a0d8f7e283515bad35b411612e4b2af707 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Fri, 26 Jan 2024 19:37:33 +0100 Subject: Validate that input is well-formed UTF-8 --- src/grab.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/grab.c b/src/grab.c index 9f8137b..c16b368 100644 --- a/src/grab.c +++ b/src/grab.c @@ -349,22 +349,30 @@ grab(struct ops ops, FILE *stream, const char *filename) chars.len += n = fread(chars.buf + chars.len, 1, BUFSIZ, stream); } while (n == BUFSIZ); - if (ferror(stream)) + if (ferror(stream)) { warn("fread: %s", filename); - else { - struct sv sv = { - .p = chars.buf, - .len = chars.len, - }; - struct matches ms; + goto out; + } - dainit(&ms, 4); - pos.col = pos.row = 1; - pos.bp = pos.p = chars.buf; - op_table[(uchar)ops.buf[0].c](sv, &ms, ops, 0, filename); - free(ms.buf); + const char8_t *p; + struct sv sv = { + .p = chars.buf, + .len = chars.len, + }; + struct matches ms; + + if (p = u8chk(chars.buf, chars.len)) { + warnx("%s: Invalid UTF-8 near ‘%02X’", filename, *p); + goto out; } + dainit(&ms, 4); + pos.col = pos.row = 1; + pos.bp = pos.p = chars.buf; + op_table[(uchar)ops.buf[0].c](sv, &ms, ops, 0, filename); + free(ms.buf); + +out: free(chars.buf); } -- cgit v1.2.3