diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-01-26 19:37:33 +0100 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-01-26 19:37:33 +0100 |
commit | 048050a0d8f7e283515bad35b411612e4b2af707 (patch) | |
tree | 95ad0686f81c60dfb9b71a0a2c983d2deefdd453 | |
parent | ee83cd1274773e7d6ad04800c2717c2fa7441ce2 (diff) |
Validate that input is well-formed UTF-8
-rw-r--r-- | src/grab.c | 32 |
1 files changed, 20 insertions, 12 deletions
@@ -349,22 +349,30 @@ grab(struct ops ops, FILE *stream, const char *filename) chars.len += n = fread(chars.buf + chars.len, 1, BUFSIZ, stream); } while (n == BUFSIZ); - if (ferror(stream)) + if (ferror(stream)) { warn("fread: %s", filename); - else { - struct sv sv = { - .p = chars.buf, - .len = chars.len, - }; - struct matches ms; + goto out; + } - dainit(&ms, 4); - pos.col = pos.row = 1; - pos.bp = pos.p = chars.buf; - op_table[(uchar)ops.buf[0].c](sv, &ms, ops, 0, filename); - free(ms.buf); + const char8_t *p; + struct sv sv = { + .p = chars.buf, + .len = chars.len, + }; + struct matches ms; + + if (p = u8chk(chars.buf, chars.len)) { + warnx("%s: Invalid UTF-8 near ‘%02X’", filename, *p); + goto out; } + dainit(&ms, 4); + pos.col = pos.row = 1; + pos.bp = pos.p = chars.buf; + op_table[(uchar)ops.buf[0].c](sv, &ms, ops, 0, filename); + free(ms.buf); + +out: free(chars.buf); } |