From 6a3a62a41abc81a82d6cb6c5491d5ac778256150 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sun, 5 May 2024 15:35:27 +0200 Subject: Rename freadrune() to u8fgetr() + improvements --- lib/mbio/freadrune.c | 49 ------------------------------------------------- lib/mbio/u8fgetr.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 49 deletions(-) delete mode 100644 lib/mbio/freadrune.c create mode 100644 lib/mbio/u8fgetr.c (limited to 'lib/mbio') diff --git a/lib/mbio/freadrune.c b/lib/mbio/freadrune.c deleted file mode 100644 index 5c24f52..0000000 --- a/lib/mbio/freadrune.c +++ /dev/null @@ -1,49 +0,0 @@ -#include -#include - -#include "macros.h" -#include "mbio.h" -#include "mbstring.h" -#include "rune.h" - -#define RETURN_INVAL \ - do { \ - *ch = RUNE_ERROR; \ - return 3; \ - } while (false) - -int -freadrune(rune *ch, FILE *stream) -{ - int c, n = 0; - char8_t buf[U8_LEN_MAX]; - - if ((c = fgetc(stream)) == EOF) - goto eof_or_err; - - buf[0] = (char8_t)c; - n = u8byte1(c) ? 0 : u8byte2(c) ? 1 : u8byte3(c) ? 2 : u8byte4(c) ? 3 : 4; - - if (n == 0) { - *ch = buf[0]; - return 1; - } else if (n == 4) - RETURN_INVAL; - - for (int i = 0; i < n; i++) { - if ((c = fgetc(stream)) == EOF) - goto eof_or_err; - if (!u8bytec(c)) - RETURN_INVAL; - buf[i + 1] = c; - } - - return u8tor(ch, buf); - -eof_or_err: - if (ferror(stream)) - return MBERR; - if (n == 0) - return MBEOF; - RETURN_INVAL; -} diff --git a/lib/mbio/u8fgetr.c b/lib/mbio/u8fgetr.c new file mode 100644 index 0000000..f880a9e --- /dev/null +++ b/lib/mbio/u8fgetr.c @@ -0,0 +1,44 @@ +#include +#include + +#include "macros.h" +#include "mbio.h" +#include "mbstring.h" +#include "rune.h" + +/* Width in bytes of U+FFFD REPLACEMENT CHARACTER */ +constexpr int ERR_WIDTH = 3; + +int +u8fgetr(rune *ch, FILE *stream) +{ + int c, n = 0; + char8_t buf[U8_LEN_MAX]; + + if ((c = fgetc(stream)) == EOF) + goto eof; + + n = u8byte1(c) ? 0 : u8byte2(c) ? 1 : u8byte3(c) ? 2 : u8byte4(c) ? 3 : -1; + if (n == -1) + goto err; + + buf[0] = (char8_t)c; + for (int i = 0; i < n; i++) { + /* If fgetc() returns EOF we must have malformed UTF-8 unless the EOF + was the result of a reading error. */ + if ((c = fgetc(stream)) == EOF || !u8bytec(c)) { + if (ferror(stream)) + goto eof; + goto err; + } + buf[i + 1] = c; + } + + return u8tor(ch, buf); +eof: + *ch = MBEOF; + return 0; +err: + *ch = RUNE_ERROR; + return ERR_WIDTH; +} -- cgit v1.2.3