aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-05-05 15:35:27 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-05-05 15:35:27 +0200
commit6a3a62a41abc81a82d6cb6c5491d5ac778256150 (patch)
tree42073dec9ea4ad0c747f32144b12948ac3fb1394 /lib
parent812dad01590185aa268ab452b2328e08c3725a55 (diff)
Rename freadrune() to u8fgetr() + improvements
Diffstat (limited to 'lib')
-rw-r--r--lib/mbio/freadrune.c49
-rw-r--r--lib/mbio/u8fgetr.c44
2 files changed, 44 insertions, 49 deletions
diff --git a/lib/mbio/freadrune.c b/lib/mbio/freadrune.c
deleted file mode 100644
index 5c24f52..0000000
--- a/lib/mbio/freadrune.c
+++ /dev/null
@@ -1,49 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-
-#include "macros.h"
-#include "mbio.h"
-#include "mbstring.h"
-#include "rune.h"
-
-#define RETURN_INVAL \
- do { \
- *ch = RUNE_ERROR; \
- return 3; \
- } while (false)
-
-int
-freadrune(rune *ch, FILE *stream)
-{
- int c, n = 0;
- char8_t buf[U8_LEN_MAX];
-
- if ((c = fgetc(stream)) == EOF)
- goto eof_or_err;
-
- buf[0] = (char8_t)c;
- n = u8byte1(c) ? 0 : u8byte2(c) ? 1 : u8byte3(c) ? 2 : u8byte4(c) ? 3 : 4;
-
- if (n == 0) {
- *ch = buf[0];
- return 1;
- } else if (n == 4)
- RETURN_INVAL;
-
- for (int i = 0; i < n; i++) {
- if ((c = fgetc(stream)) == EOF)
- goto eof_or_err;
- if (!u8bytec(c))
- RETURN_INVAL;
- buf[i + 1] = c;
- }
-
- return u8tor(ch, buf);
-
-eof_or_err:
- if (ferror(stream))
- return MBERR;
- if (n == 0)
- return MBEOF;
- RETURN_INVAL;
-}
diff --git a/lib/mbio/u8fgetr.c b/lib/mbio/u8fgetr.c
new file mode 100644
index 0000000..f880a9e
--- /dev/null
+++ b/lib/mbio/u8fgetr.c
@@ -0,0 +1,44 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "macros.h"
+#include "mbio.h"
+#include "mbstring.h"
+#include "rune.h"
+
+/* Width in bytes of U+FFFD REPLACEMENT CHARACTER */
+constexpr int ERR_WIDTH = 3;
+
+int
+u8fgetr(rune *ch, FILE *stream)
+{
+ int c, n = 0;
+ char8_t buf[U8_LEN_MAX];
+
+ if ((c = fgetc(stream)) == EOF)
+ goto eof;
+
+ n = u8byte1(c) ? 0 : u8byte2(c) ? 1 : u8byte3(c) ? 2 : u8byte4(c) ? 3 : -1;
+ if (n == -1)
+ goto err;
+
+ buf[0] = (char8_t)c;
+ for (int i = 0; i < n; i++) {
+ /* If fgetc() returns EOF we must have malformed UTF-8 unless the EOF
+ was the result of a reading error. */
+ if ((c = fgetc(stream)) == EOF || !u8bytec(c)) {
+ if (ferror(stream))
+ goto eof;
+ goto err;
+ }
+ buf[i + 1] = c;
+ }
+
+ return u8tor(ch, buf);
+eof:
+ *ch = MBEOF;
+ return 0;
+err:
+ *ch = RUNE_ERROR;
+ return ERR_WIDTH;
+}