diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-05-05 15:35:27 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-05-05 15:35:27 +0200 |
commit | 6a3a62a41abc81a82d6cb6c5491d5ac778256150 (patch) | |
tree | 42073dec9ea4ad0c747f32144b12948ac3fb1394 /lib | |
parent | 812dad01590185aa268ab452b2328e08c3725a55 (diff) |
Rename freadrune() to u8fgetr() + improvements
Diffstat (limited to 'lib')
-rw-r--r-- | lib/mbio/freadrune.c | 49 | ||||
-rw-r--r-- | lib/mbio/u8fgetr.c | 44 |
2 files changed, 44 insertions, 49 deletions
diff --git a/lib/mbio/freadrune.c b/lib/mbio/freadrune.c deleted file mode 100644 index 5c24f52..0000000 --- a/lib/mbio/freadrune.c +++ /dev/null @@ -1,49 +0,0 @@ -#include <stdio.h> -#include <string.h> - -#include "macros.h" -#include "mbio.h" -#include "mbstring.h" -#include "rune.h" - -#define RETURN_INVAL \ - do { \ - *ch = RUNE_ERROR; \ - return 3; \ - } while (false) - -int -freadrune(rune *ch, FILE *stream) -{ - int c, n = 0; - char8_t buf[U8_LEN_MAX]; - - if ((c = fgetc(stream)) == EOF) - goto eof_or_err; - - buf[0] = (char8_t)c; - n = u8byte1(c) ? 0 : u8byte2(c) ? 1 : u8byte3(c) ? 2 : u8byte4(c) ? 3 : 4; - - if (n == 0) { - *ch = buf[0]; - return 1; - } else if (n == 4) - RETURN_INVAL; - - for (int i = 0; i < n; i++) { - if ((c = fgetc(stream)) == EOF) - goto eof_or_err; - if (!u8bytec(c)) - RETURN_INVAL; - buf[i + 1] = c; - } - - return u8tor(ch, buf); - -eof_or_err: - if (ferror(stream)) - return MBERR; - if (n == 0) - return MBEOF; - RETURN_INVAL; -} diff --git a/lib/mbio/u8fgetr.c b/lib/mbio/u8fgetr.c new file mode 100644 index 0000000..f880a9e --- /dev/null +++ b/lib/mbio/u8fgetr.c @@ -0,0 +1,44 @@ +#include <stdio.h> +#include <string.h> + +#include "macros.h" +#include "mbio.h" +#include "mbstring.h" +#include "rune.h" + +/* Width in bytes of U+FFFDÂ REPLACEMENTÂ CHARACTER */ +constexpr int ERR_WIDTH = 3; + +int +u8fgetr(rune *ch, FILE *stream) +{ + int c, n = 0; + char8_t buf[U8_LEN_MAX]; + + if ((c = fgetc(stream)) == EOF) + goto eof; + + n = u8byte1(c) ? 0 : u8byte2(c) ? 1 : u8byte3(c) ? 2 : u8byte4(c) ? 3 : -1; + if (n == -1) + goto err; + + buf[0] = (char8_t)c; + for (int i = 0; i < n; i++) { + /* If fgetc() returns EOF we must have malformed UTF-8 unless the EOF + was the result of a reading error. */ + if ((c = fgetc(stream)) == EOF || !u8bytec(c)) { + if (ferror(stream)) + goto eof; + goto err; + } + buf[i + 1] = c; + } + + return u8tor(ch, buf); +eof: + *ch = MBEOF; + return 0; +err: + *ch = RUNE_ERROR; + return ERR_WIDTH; +} |