aboutsummaryrefslogtreecommitdiff
path: root/man
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-05-04 04:01:45 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-05-04 04:01:45 +0200
commitac1b4bcbaeaee7d2ef9132dcdc254f2d08691650 (patch)
tree90250966629653f0462cf17bc0b6f2476fb6d1fc /man
parent8b923ba5e5bb37ea26350b4c1c688b8697706609 (diff)
Go all in on string views, and fix manuals
Diffstat (limited to 'man')
-rw-r--r--man/u8len.337
-rw-r--r--man/u8next.345
-rw-r--r--man/u8tor.314
-rw-r--r--man/usage.38
4 files changed, 48 insertions, 56 deletions
diff --git a/man/u8len.3 b/man/u8len.3
index f4d152f..5b51cd0 100644
--- a/man/u8len.3
+++ b/man/u8len.3
@@ -1,4 +1,4 @@
-.Dd 27 April 2024
+.Dd 4 May 2024
.Dt U8LEN 3
.Os
.Sh NAME
@@ -9,38 +9,38 @@
.Sh SYNOPSIS
.In mbstring.h
.Ft size_t
-.Fn u8len "const char8_t *s" "size_t n"
+.Fn u8len "struct u8view sv"
.Sh DESCRIPTION
The
.Fn u8len
function returns the number of UTF-8 encoded Unicode codepoints in the
-buffer
-.Fa s
-of length
-.Fa n
-bytes.
+string view
+.Fa sv .
.Pp
Invalid bytes are interpreted as having a length of 1 byte.
.Sh RETURN VALUES
The
.Fn u8len
-function returns the number of codepoints in the buffer
-.Fa s .
+function returns the number of codepoints in the string view
+.Fa sv .
.Sh EXAMPLES
The following call to
.Fn u8len
will return 17 while the call to
.Fn strlen
will return 22 as a result of use of multibyte-characters in
-.Fa s .
+.Fa sv .
.Bd -literal -offset indent
-struct u8view sv = U8(u8\(dq„Der Große Duden“\(dq);
-size_t blen = strlen((char *)sv.p);
-size_t cplen = u8len(U8_ARGS(sv));
+size_t n;
+struct u8view sv = U8(\(dq„Der Große Duden“\(dq);
+
+n = u8len(sv); /* 17 */
+n = strlen((char *)sv.p); /* 22 */
.Ed
.Sh SEE ALSO
-.Xr u8gcnt 3 ,
.Xr U8 3 ,
+.Xr u8gcnt 3 ,
+.Xr u8view 3 ,
.Xr unicode 7 ,
.Xr utf\-8 7
.Sh STANDARDS
@@ -56,10 +56,11 @@ size_t cplen = u8len(U8_ARGS(sv));
The return value of
.Fn u8len
does not necessarily represent the number of human-preceived characters
-in the given buffer;
-multiple codepoints may combine to form one human-preceived character
-that spans a single column.
-To count user-preceived codepoints
+in the given string view;
+multiple codepoints may combine to form one human-preceived character.
+These human-preceived characters may even take up multiple columns in a
+monospaced-environment such as in a terminal emulator.
+To count user-preceived characters
.Pq also known as graphemes ,
you may want to use the
.Xr u8gcnt 3
diff --git a/man/u8next.3 b/man/u8next.3
index 1ba39f0..68079f1 100644
--- a/man/u8next.3
+++ b/man/u8next.3
@@ -1,4 +1,4 @@
-.Dd 20 February 2024
+.Dd 4 May 2024
.Dt U8NEXT 3
.Os
.Sh NAME
@@ -10,30 +10,25 @@
.Sh SYNOPSIS
.In mbstring.h
.Ft int
-.Fn u8next "rune *ch" "const char8_t **s" "size_t *n"
+.Fn u8next "rune *ch" "struct u8view sv"
.Ft int
.Fn u8prev "rune *ch" "const char8_t **s" "const char8_t *start"
.Sh DESCRIPTION
The
.Fn u8next
-function decodes the first rune in the UTF-8 encoded string pointed to by
-.Fa s
-of length
-.Fa n
+function decodes the first rune in the UTF-8 encoded string view
+.Fa sv
and stores the result in
.Fa ch .
-It then updates
-.Fa s
-to point to the next codepoint in the buffer and updates the length
-.Fa n
-accordingly.
+It then shrinks
+.Fa sv
+so that the decoded rune is removed.
.Pp
The
.Fn u8prev
function takes a pointer
.Fa start
-which points to the start of the string instead of a length,
-and updates
+which points to the start of the string and updates
.Fa s
to point to the previous codepoint in the buffer.
The rune
@@ -59,19 +54,16 @@ or 0 at the end of iteration.
The following calls to
.Fn u8next
iterate over and print all the codepoints in
-.Va s .
+.Va sv .
.Bd -literal -offset indent
#include <rune.h> /* For PRIXRUNE; see rune(3) */
-#define STRING u8"Ta’ Ħaġrat"
-
int w;
rune ch;
-const char8_t *s = STRING;
-size_t n = sizeof(STRING) - 1;
+struct u8view sv = U8("Ta’ Ħaġrat");
-while (w = u8next(&ch, &s, &n))
- printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, s - w);
+while (w = u8next(&ch, &sv))
+ printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, sv.p - w);
.Ed
.Pp
The following example is the same as the previous,
@@ -81,23 +73,20 @@ function to iterate backwards.
.Bd -literal -offset indent
#include <rune.h> /* For PRIXRUNE; see rune(3) */
-#define STRING u8"Ta’ Ħaġrat"
-
int w;
rune ch;
-const char8_t *s, *start;
-size_t n = sizeof(STRING) - 1;
-
-start = STRING;
-s = start + n;
+struct u8view sv = U8("Ta’ Ħaġrat");
+const char8_t *s = sv.p + sv.len;
-while (w = u8prev(&ch, &s, start))
+while (w = u8prev(&ch, &s, sv.p))
printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, s);
.Ed
.Sh SEE ALSO
.Xr rune 3 ,
+.Xr U8 3 ,
.Xr u8gnext 3 ,
.Xr u8tor 3 ,
+.Xr u8view 3type ,
.Xr RUNE_ERROR 3const ,
.Xr unicode 7 ,
.Xr utf\-8 7
diff --git a/man/u8tor.3 b/man/u8tor.3
index 6e3511e..8886193 100644
--- a/man/u8tor.3
+++ b/man/u8tor.3
@@ -1,4 +1,4 @@
-.Dd 10 March 2024
+.Dd 4 May 2024
.Dt U8TOR 3
.Os
.Sh NAME
@@ -37,18 +37,20 @@ The following call to
attempts to decode the first UTF-8 codepoint in
.Va buf .
.Bd -literal -offset indent
-/* Implementation of read_codepoint() omitted */
+#include <errors.h> /* For err(); see errors(3) */
+#include <rune.h> /* For PRIXRUNE; see rune(3) */
rune ch;
-char8_t *buf = read_codepoint(stdin);
+char8_t *buf = u8"Γειά σου Κόσμε";
int w = u8tor(&ch, buf);
if (ch == RUNE_ERROR)
- errx("Got invalid UTF-8 codepoint");
-printf("Got rune ‘%.*s’\en", w, buf);
+ err("Got invalid UTF-8 codepoint");
+printf("Got rune ‘%.*s’ (U+%04" PRIXRUNE ")\en", w, buf, ch);
.Ed
.Sh SEE ALSO
-.Xr errx 3mlib ,
+.Xr errors 3 ,
.Xr rtou8 3 ,
+.Xr rune 3 ,
.Xr u8chk 3 ,
.Xr u8next 3 ,
.Xr RUNE_ERROR 3const ,
diff --git a/man/usage.3 b/man/usage.3
index 92b9b43..ead0f29 100644
--- a/man/usage.3
+++ b/man/usage.3
@@ -1,4 +1,4 @@
-.Dd 27 April 2024
+.Dd 4 May 2024
.Dt USAGE 3
.Os
.Sh NAME
@@ -34,9 +34,9 @@ be provided to the example executable.
#include <optparse.h>
static const struct op_option opts[] = {
- {'a', U8(nullptr), OPT_NONE},
- {'b', U8(nullptr), OPT_NONE},
- {'h', U8(nullptr), OPT_NONE},
+ {'a', U8(), OPT_NONE},
+ {'b', U8(), OPT_NONE},
+ {'h', U8("help"), OPT_NONE},
};
int