aboutsummaryrefslogtreecommitdiff
path: root/vendor/librune/man/u8tor.3
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-01-21 03:03:58 +0100
committerThomas Voss <mail@thomasvoss.com> 2024-01-21 03:03:58 +0100
commit4f93f935dc7a981ca073a322425c3f5929ffb644 (patch)
tree4460586408ec7fdfcecf3ba4584f0435067125a6 /vendor/librune/man/u8tor.3
parent72ea25a4d73e3e026366d4165f5bc4ec9e7418cb (diff)
Support line- & column-based match locations
Diffstat (limited to 'vendor/librune/man/u8tor.3')
-rw-r--r--vendor/librune/man/u8tor.390
1 files changed, 90 insertions, 0 deletions
diff --git a/vendor/librune/man/u8tor.3 b/vendor/librune/man/u8tor.3
new file mode 100644
index 0000000..147f7c1
--- /dev/null
+++ b/vendor/librune/man/u8tor.3
@@ -0,0 +1,90 @@
+.Dd January 18 2024
+.Dt U8TOR 3
+.Os
+.Sh NAME
+.Nm u8tor ,
+.Nm u8tor_uc
+.Nd decode UTF-8 into a rune
+.Sh LIBRARY
+.Lb librune
+.Sh SYNOPSIS
+.In utf8.h
+.Ft int
+.Fn u8tor "rune *ch" "const char8_t *s"
+.Ft int
+.Fn u8tor_uc "rune *ch" "const char8_t *s"
+.Sh DESCRIPTION
+The
+.Fn u8tor
+and
+.Fn u8tor_uc
+functions decode the first rune in the UTF-8 buffer
+.Fa s ,
+storing the result in the rune pointed to by
+.Fa ch .
+Both functions return the number of bytes which compose the decoded
+UTF-8.
+.Pp
+The two functions are nearly identical,
+however
+.Fn u8tor_uc
+performs fewer range checks than
+.Fn u8tor
+allowing it to process data more efficiently.
+When provided with invalid UTF-8 however,
+.Fn u8tor_uc
+engages in undefined-behavior.
+The
+.Fn u8tor
+function on the other hand handles invalid UTF-8 by storing
+.Dv RUNE_ERROR
+in
+.Fa ch
+and returning 1.
+.Sh RETURN VALUES
+The
+.Fn u8tor
+and
+.Fn u8tor_uc
+functions return the number of bytes from
+.Fa s
+decoded into
+.Fa ch .
+.Pp
+The
+.Fn u8tor
+function returns 1 on invalid UTF-8.
+.Sh EXAMPLES
+The following call to
+.Fn u8tor
+attempts to decode the first UTF-8 codepoint in
+.Va buf .
+.Bd -literal -offset indent
+/* Implementation of read_codepoint() omitted */
+
+int w;
+rune ch;
+char8_t *buf = read_codepoint(stdin);
+
+w = u8tor(&ch, buf);
+if (ch == RUNE_ERROR) {
+ fputs("Got invalid UTF-8 codepoint", stderr);
+ exit(EXIT_FAILURE);
+}
+printf("Got rune ā€˜%.*sā€™\en", w, buf);
+.Ed
+.Sh SEE ALSO
+.Xr rtou8 3 ,
+.Xr u8chk 3 ,
+.Xr u8next 3 ,
+.Xr unicode 7 ,
+.Xr utf\-8 7
+.Sh STANDARDS
+.Rs
+.%A F. Yergeau
+.%D November 2003
+.%R RFC 3629
+.%T UTF-8, a transformation format of ISO 10646
+.Re
+.Sh AUTHORS
+.An Thomas Voss Aq Mt mail@thomasvoss.com