diff options
-rw-r--r-- | include/mbio.h | 3 | ||||
-rw-r--r-- | lib/mbio/freadrune.c | 49 | ||||
-rw-r--r-- | lib/mbio/u8fgetr.c | 44 |
3 files changed, 45 insertions, 51 deletions
diff --git a/include/mbio.h b/include/mbio.h index 339c650..116231c 100644 --- a/include/mbio.h +++ b/include/mbio.h @@ -6,9 +6,8 @@ #include "_charN_t.h" #include "_rune.h" -int freadrune(rune *, FILE *); +int u8fgetr(rune *, FILE *); constexpr rune MBEOF = 0x110000; -constexpr rune MBERR = 0x110001; #endif /* !MLIB_MBIO_H */ diff --git a/lib/mbio/freadrune.c b/lib/mbio/freadrune.c deleted file mode 100644 index 5c24f52..0000000 --- a/lib/mbio/freadrune.c +++ /dev/null @@ -1,49 +0,0 @@ -#include <stdio.h> -#include <string.h> - -#include "macros.h" -#include "mbio.h" -#include "mbstring.h" -#include "rune.h" - -#define RETURN_INVAL \ - do { \ - *ch = RUNE_ERROR; \ - return 3; \ - } while (false) - -int -freadrune(rune *ch, FILE *stream) -{ - int c, n = 0; - char8_t buf[U8_LEN_MAX]; - - if ((c = fgetc(stream)) == EOF) - goto eof_or_err; - - buf[0] = (char8_t)c; - n = u8byte1(c) ? 0 : u8byte2(c) ? 1 : u8byte3(c) ? 2 : u8byte4(c) ? 3 : 4; - - if (n == 0) { - *ch = buf[0]; - return 1; - } else if (n == 4) - RETURN_INVAL; - - for (int i = 0; i < n; i++) { - if ((c = fgetc(stream)) == EOF) - goto eof_or_err; - if (!u8bytec(c)) - RETURN_INVAL; - buf[i + 1] = c; - } - - return u8tor(ch, buf); - -eof_or_err: - if (ferror(stream)) - return MBERR; - if (n == 0) - return MBEOF; - RETURN_INVAL; -} diff --git a/lib/mbio/u8fgetr.c b/lib/mbio/u8fgetr.c new file mode 100644 index 0000000..f880a9e --- /dev/null +++ b/lib/mbio/u8fgetr.c @@ -0,0 +1,44 @@ +#include <stdio.h> +#include <string.h> + +#include "macros.h" +#include "mbio.h" +#include "mbstring.h" +#include "rune.h" + +/* Width in bytes of U+FFFDÂ REPLACEMENTÂ CHARACTER */ +constexpr int ERR_WIDTH = 3; + +int +u8fgetr(rune *ch, FILE *stream) +{ + int c, n = 0; + char8_t buf[U8_LEN_MAX]; + + if ((c = fgetc(stream)) == EOF) + goto eof; + + n = u8byte1(c) ? 0 : u8byte2(c) ? 1 : u8byte3(c) ? 2 : u8byte4(c) ? 3 : -1; + if (n == -1) + goto err; + + buf[0] = (char8_t)c; + for (int i = 0; i < n; i++) { + /* If fgetc() returns EOF we must have malformed UTF-8 unless the EOF + was the result of a reading error. */ + if ((c = fgetc(stream)) == EOF || !u8bytec(c)) { + if (ferror(stream)) + goto eof; + goto err; + } + buf[i + 1] = c; + } + + return u8tor(ch, buf); +eof: + *ch = MBEOF; + return 0; +err: + *ch = RUNE_ERROR; + return ERR_WIDTH; +} |