Go all in on string views, and fix manuals

author: Thomas Voss <mail@thomasvoss.com> 2024-05-04 04:01:45 +0200
committer: Thomas Voss <mail@thomasvoss.com> 2024-05-04 04:01:45 +0200
commit: ac1b4bcbaeaee7d2ef9132dcdc254f2d08691650 (patch)
tree: 90250966629653f0462cf17bc0b6f2476fb6d1fc /man
parent: 8b923ba5e5bb37ea26350b4c1c688b8697706609 (diff)
4 files changed, 48 insertions, 56 deletions
diff --git a/man/u8len.3 b/man/u8len.3
index f4d152f..5b51cd0 100644
--- a/man/u8len.3
+++ b/man/u8len.3
@@ -1,4 +1,4 @@
-.Dd 27 April 2024
+.Dd 4 May 2024
 .Dt U8LEN 3
 .Os
 .Sh NAME
@@ -9,38 +9,38 @@
 .Sh SYNOPSIS
 .In mbstring.h
 .Ft size_t
-.Fn u8len "const char8_t *s" "size_t n"
+.Fn u8len "struct u8view sv"
 .Sh DESCRIPTION
 The
 .Fn u8len
 function returns the number of UTF-8 encoded Unicode codepoints in the
-buffer
-.Fa s
-of length
-.Fa n
-bytes.
+string view
+.Fa sv .
 .Pp
 Invalid bytes are interpreted as having a length of 1 byte.
 .Sh RETURN VALUES
 The
 .Fn u8len
-function returns the number of codepoints in the buffer
-.Fa s .
+function returns the number of codepoints in the string view
+.Fa sv .
 .Sh EXAMPLES
 The following call to
 .Fn u8len
 will return 17 while the call to
 .Fn strlen
 will return 22 as a result of use of multibyte-characters in
-.Fa s .
+.Fa sv .
 .Bd -literal -offset indent
-struct u8view sv = U8(u8\(dq„Der Große Duden“\(dq);
-size_t blen = strlen((char *)sv.p);
-size_t cplen = u8len(U8_ARGS(sv));
+size_t n;
+struct u8view sv = U8(\(dq„Der Große Duden“\(dq);
+
+n = u8len(sv);            /* 17 */
+n = strlen((char *)sv.p); /* 22 */
 .Ed
 .Sh SEE ALSO
-.Xr u8gcnt 3 ,
 .Xr U8 3 ,
+.Xr u8gcnt 3 ,
+.Xr u8view 3 ,
 .Xr unicode 7 ,
 .Xr utf\-8 7
 .Sh STANDARDS
@@ -56,10 +56,11 @@ size_t cplen = u8len(U8_ARGS(sv));
 The return value of
 .Fn u8len
 does not necessarily represent the number of human-preceived characters
-in the given buffer;
-multiple codepoints may combine to form one human-preceived character
-that spans a single column.
-To count user-preceived codepoints
+in the given string view;
+multiple codepoints may combine to form one human-preceived character.
+These human-preceived characters may even take up multiple columns in a
+monospaced-environment such as in a terminal emulator.
+To count user-preceived characters
 .Pq also known as graphemes ,
 you may want to use the
 .Xr u8gcnt 3
diff --git a/man/u8next.3 b/man/u8next.3
index 1ba39f0..68079f1 100644
--- a/man/u8next.3
+++ b/man/u8next.3
@@ -1,4 +1,4 @@
-.Dd 20 February 2024
+.Dd 4 May 2024
 .Dt U8NEXT 3
 .Os
 .Sh NAME
@@ -10,30 +10,25 @@
 .Sh SYNOPSIS
 .In mbstring.h
 .Ft int
-.Fn u8next "rune *ch" "const char8_t **s" "size_t *n"
+.Fn u8next "rune *ch" "struct u8view sv"
 .Ft int
 .Fn u8prev "rune *ch" "const char8_t **s" "const char8_t *start"
 .Sh DESCRIPTION
 The
 .Fn u8next
-function decodes the first rune in the UTF-8 encoded string pointed to by
-.Fa s
-of length
-.Fa n
+function decodes the first rune in the UTF-8 encoded string view
+.Fa sv
 and stores the result in
 .Fa ch .
-It then updates
-.Fa s
-to point to the next codepoint in the buffer and updates the length
-.Fa n
-accordingly.
+It then shrinks
+.Fa sv
+so that the decoded rune is removed.
 .Pp
 The
 .Fn u8prev
 function takes a pointer
 .Fa start
-which points to the start of the string instead of a length,
-and updates
+which points to the start of the string and updates
 .Fa s
 to point to the previous codepoint in the buffer.
 The rune
@@ -59,19 +54,16 @@ or 0 at the end of iteration.
 The following calls to
 .Fn u8next
 iterate over and print all the codepoints in
-.Va s .
+.Va sv .
 .Bd -literal -offset indent
 #include <rune.h> /* For PRIXRUNE; see rune(3) */
 
-#define STRING u8"Ta’ Ħaġrat"
-
 int w;
 rune ch;
-const char8_t *s = STRING;
-size_t n = sizeof(STRING) - 1;
+struct u8view sv = U8("Ta’ Ħaġrat");
 
-while (w = u8next(&ch, &s, &n))
-	printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, s - w);
+while (w = u8next(&ch, &sv))
+	printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, sv.p - w);
 .Ed
 .Pp
 The following example is the same as the previous,
@@ -81,23 +73,20 @@ function to iterate backwards.
 .Bd -literal -offset indent
 #include <rune.h> /* For PRIXRUNE; see rune(3) */
 
-#define STRING u8"Ta’ Ħaġrat"
-
 int w;
 rune ch;
-const char8_t *s, *start;
-size_t n = sizeof(STRING) - 1;
-
-start = STRING;
-s = start + n;
+struct u8view sv = U8("Ta’ Ħaġrat");
+const char8_t *s = sv.p + sv.len;
 
-while (w = u8prev(&ch, &s, start))
+while (w = u8prev(&ch, &s, sv.p))
 	printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, s);
 .Ed
 .Sh SEE ALSO
 .Xr rune 3 ,
+.Xr U8 3 ,
 .Xr u8gnext 3 ,
 .Xr u8tor 3 ,
+.Xr u8view 3type ,
 .Xr RUNE_ERROR 3const ,
 .Xr unicode 7 ,
 .Xr utf\-8 7
diff --git a/man/u8tor.3 b/man/u8tor.3
index 6e3511e..8886193 100644
--- a/man/u8tor.3
+++ b/man/u8tor.3
@@ -1,4 +1,4 @@
-.Dd 10 March 2024
+.Dd 4 May 2024
 .Dt U8TOR 3
 .Os
 .Sh NAME
@@ -37,18 +37,20 @@ The following call to
 attempts to decode the first UTF-8 codepoint in
 .Va buf .
 .Bd -literal -offset indent
-/* Implementation of read_codepoint() omitted */
+#include <errors.h> /* For err(); see errors(3) */
+#include <rune.h> /* For PRIXRUNE; see rune(3) */
 
 rune ch;
-char8_t *buf = read_codepoint(stdin);
+char8_t *buf = u8"Γειά σου Κόσμε";
 int w = u8tor(&ch, buf);
 if (ch == RUNE_ERROR)
-	errx("Got invalid UTF-8 codepoint");
-printf("Got rune ‘%.*s’\en", w, buf);
+	err("Got invalid UTF-8 codepoint");
+printf("Got rune ‘%.*s’ (U+%04" PRIXRUNE ")\en", w, buf, ch);
 .Ed
 .Sh SEE ALSO
-.Xr errx 3mlib ,
+.Xr errors 3 ,
 .Xr rtou8 3 ,
+.Xr rune 3 ,
 .Xr u8chk 3 ,
 .Xr u8next 3 ,
 .Xr RUNE_ERROR 3const ,
diff --git a/man/usage.3 b/man/usage.3
index 92b9b43..ead0f29 100644
--- a/man/usage.3
+++ b/man/usage.3
@@ -1,4 +1,4 @@
-.Dd 27 April 2024
+.Dd 4 May 2024
 .Dt USAGE 3
 .Os
 .Sh NAME
@@ -34,9 +34,9 @@ be provided to the example executable.
 #include <optparse.h>
  
 static const struct op_option opts[] = {
-	{'a', U8(nullptr), OPT_NONE},
-	{'b', U8(nullptr), OPT_NONE},
-	{'h', U8(nullptr), OPT_NONE},
+	{'a', U8(),       OPT_NONE},
+	{'b', U8(),       OPT_NONE},
+	{'h', U8("help"), OPT_NONE},
 };
 
 int
author	Thomas Voss <mail@thomasvoss.com>	2024-05-04 04:01:45 +0200
committer	Thomas Voss <mail@thomasvoss.com>	2024-05-04 04:01:45 +0200
commit	ac1b4bcbaeaee7d2ef9132dcdc254f2d08691650 (patch)
tree	90250966629653f0462cf17bc0b6f2476fb6d1fc /man
parent	8b923ba5e5bb37ea26350b4c1c688b8697706609 (diff)