aboutsummaryrefslogtreecommitdiff
path: root/vendor/librune/man/u8wdth.3
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-01-21 03:03:58 +0100
committerThomas Voss <mail@thomasvoss.com> 2024-01-21 03:03:58 +0100
commit4f93f935dc7a981ca073a322425c3f5929ffb644 (patch)
tree4460586408ec7fdfcecf3ba4584f0435067125a6 /vendor/librune/man/u8wdth.3
parent72ea25a4d73e3e026366d4165f5bc4ec9e7418cb (diff)
Support line- & column-based match locations
Diffstat (limited to 'vendor/librune/man/u8wdth.3')
-rw-r--r--vendor/librune/man/u8wdth.369
1 files changed, 69 insertions, 0 deletions
diff --git a/vendor/librune/man/u8wdth.3 b/vendor/librune/man/u8wdth.3
new file mode 100644
index 0000000..60fcada
--- /dev/null
+++ b/vendor/librune/man/u8wdth.3
@@ -0,0 +1,69 @@
+.Dd January 16 2024
+.Dt U8WDTH 3
+.Os
+.Sh NAME
+.Nm u8wdth
+.Nd Unicode codepoint width
+.Sh LIBRARY
+.Lb librune
+.Sh SYNOPSIS
+.In utf8.h
+.Ft int
+.Fn u8wdth "rune ch"
+.Sh DESCRIPTION
+The
+.Fn u8wdth
+function returns the number of bytes that would be occupied by the
+Unicode-codepoint
+.Fa ch
+if it was encoded as UTF-8.
+If
+.Fa ch
+is greater than
+.Dv RUNE_MAX ,
+a width of 0 is returned.
+.Pp
+If the exact UTF-8 encoded size of a codepoint is not relevant and you
+simply wish to allocate a buffer capable of holding a given number of
+UTF-8 codepoints,
+the
+.Dv U8_LEN_MAX
+macro may be preferable.
+.Pp
+This function treats invalid codepoints smaller than
+.Dv RUNE_MAX
+such as UTF-16 surrogates as valid.
+.Sh RETURN VALUES
+The
+.Fn u8wdth
+function returns the number of bytes required to UTF-8 encode the
+codepoint
+.Fa ch .
+.Sh EXAMPLES
+The following example allocates a buffer which is exactly large enough to
+hold the given UTF-32 string once it is converted to UTF-8.
+.Bd -literal -offset indent
+#define lengthof(a) (sizeof(a) / sizeof(*(a)))
+
+size_t bufsiz = 0;
+char8_t *buf;
+char32_t s[] = U\(dqIJsselmeer\(dq; /* ‘IJ’ takes 2 bytes */
+
+for (size_t i = 0; i < lengthof(s) - 1; i++)
+ bufsiz += u8wdth(s[i]);
+buf = malloc(bufsiz);
+.Ed
+.Sh SEE ALSO
+.Xr u8glen 3 ,
+.Xr u8len 3 ,
+.Xr unicode 7 ,
+.Xr utf-8 7
+.Sh STANDARDS
+.Rs
+.%A F. Yergeau
+.%D November 2003
+.%R RFC 3629
+.%T UTF-8, a transformation format of ISO 10646
+.Re
+.Sh AUTHORS
+.An Thomas Voss Aq Mt mail@thomasvoss.com