1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
.Dd January 27 2024
.Dt U8NEXT 3
.Os
.Sh NAME
.Nm u8next ,
.Nm u8prev
.Nd iterate over Unicode codepoints
.Sh LIBRARY
.Lb librune
.Sh SYNOPSIS
.In mbstring.h
.Ft int
.Fn u8next "rune *ch" "const char8_t **s" "size_t *n"
.Ft int
.Fn u8prev "rune *ch" "const char8_t **s" "const char8_t *start"
.Sh DESCRIPTION
The
.Fn u8next
function decodes the first rune in the UTF-8 encoded string pointed to by
.Fa s
of length
.Fa n
and stores the result in
.Fa ch .
It then updates
.Fa s
to point to the next codepoint in the buffer and updates the length
.Fa n
accordingly.
.Pp
The
.Fn u8prev
function takes a pointer
.Fa start
which points to the start of the string instead of a length,
and updates
.Fa s
to point to the previous codepoint in the buffer.
The rune
.Fa ch
is set to UTF-8 codepoint pointed to by
.Fa s
after iteration.
.Pp
Both of these functions assume the input is valid UTF-8.
.Sh RETURN VALUES
The
.Fn u8next
and
.Fn u8prev
functions return the length of the UTF-8-encoded rune iterated over in
bytes,
or 0 at the end of iteration.
.Sh EXAMPLES
The following calls to
.Fn u8next
iterate over and print all the codepoints in
.Va s .
.Bd -literal -offset indent
#include <rune.h> /* For PRIXRUNE; see rune(3) */
#define STRING u8"Ta’ Ħaġrat"
int w;
rune ch;
const char8_t *s = STRING;
size_t n = sizeof(STRING) - 1;
while (w = u8next(&ch, &s, &n))
printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, s - w);
.Ed
.Pp
The following example is the same as the previous,
but it uses the
.Fn u8prev
function to iterate backwards.
.Bd -literal -offset indent
#include <rune.h> /* For PRIXRUNE; see rune(3) */
#define STRING u8"Ta’ Ħaġrat"
int w;
rune ch;
const char8_t *s, *start;
size_t n = sizeof(STRING) - 1;
start = STRING;
s = start + n;
while (w = u8prev(&ch, &s, start))
printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, s);
.Ed
.Sh SEE ALSO
.Xr rune 3 ,
.Xr u8gnext 3 ,
.Xr u8tor 3 ,
.Xr unicode 7 ,
.Xr utf\-8 7
.Sh STANDARDS
.Rs
.%A F. Yergeau
.%D November 2003
.%R RFC 3629
.%T UTF-8, a transformation format of ISO 10646
.Re
.Sh AUTHORS
.An Thomas Voss Aq Mt mail@thomasvoss.com
|