diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-01-21 03:03:58 +0100 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-01-21 03:03:58 +0100 |
commit | 4f93f935dc7a981ca073a322425c3f5929ffb644 (patch) | |
tree | 4460586408ec7fdfcecf3ba4584f0435067125a6 /vendor/librune/man/u8next.3 | |
parent | 72ea25a4d73e3e026366d4165f5bc4ec9e7418cb (diff) |
Support line- & column-based match locations
Diffstat (limited to 'vendor/librune/man/u8next.3')
-rw-r--r-- | vendor/librune/man/u8next.3 | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/vendor/librune/man/u8next.3 b/vendor/librune/man/u8next.3 new file mode 100644 index 0000000..93a4f5d --- /dev/null +++ b/vendor/librune/man/u8next.3 @@ -0,0 +1,111 @@ +.Dd January 18 2024 +.Dt U8NEXT 3 +.Os +.Sh NAME +.Nm u8next , +.Nm u8prev +.Nd iterate over Unicode codepoints +.Sh LIBRARY +.Lb librune +.Sh SYNOPSIS +.In utf8.h +.Ft "const char8_t *" +.Fn u8next "rune *ch" "const char8_t **s" "size_t *n" +.Ft "const char8_t *" +.Fn u8prev "rune *ch" "const char8_t **s" "const char8_t *start" +.Sh DESCRIPTION +The +.Fn u8next +function decodes the first rune in the UTF-8 encoded string pointed to by +.Fa s +of length +.Fa n +and stores the result in +.Fa ch . +It then updates +.Fa s +to point to the next codepoint in the buffer and updates the length +.Fa n +accordingly. +.Pp +The +.Fn u8prev +function takes a pointer +.Fa start +which points to the start of the string instead of a length, +and updates +.Fa s +to point to the previous codepoint in the buffer. +The rune +.Fa ch +is set to UTF-8 codepoint pointed to by +.Fa s +after iteration. +.Pp +Both of these functions assume the input is valid UTF-8. +.Sh RETURN VALUES +The +.Fn u8next +and +.Fn u8prev +functions return the updated value of +.Fa s +or +.Dv NULL +at the end of iteration. +.Sh EXAMPLES +The following calls to +.Fn u8next +iterate over and print all the codepoints in +.Va s . +.Bd -literal -offset indent +#include <rune.h> /* For PRIXRUNE; see rune(3) */ + +#define STRING u8"Ta’ Ħaġrat" + +rune ch; +const char8_t *s = STRING; +size_t n = sizeof(STRING) - 1; + +while (u8next(&ch, &s, &n)) { + int w = u8wdth(ch); + printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, s - w); +} +.Ed +.Pp +The following example is the same as the previous, +but it uses the +.Fn u8prev +function to iterate backwards. +.Bd -literal -offset indent +#include <rune.h> /* For PRIXRUNE; see rune(3) */ + +#define STRING u8"Ta’ Ħaġrat" + +rune ch; +const char8_t *s, *start; +size_t n = sizeof(STRING) - 1; + +start = STRING; +s = start + n; + +while (u8prev(&ch, &s, start)) { + int w = u8wdth(ch); + printf("U+%04" PRIXRUNE ": ‘%.*s’\en", ch, w, s); +} +.Ed +.Sh SEE ALSO +.Xr rune 3 , +.Xr u8gnext 3 , +.Xr u8tor 3 , +.Xr unicode 7 , +.Xr utf\-8 7 +.Sh STANDARDS +.Rs +.%A F. Yergeau +.%D November 2003 +.%R RFC 3629 +.%T UTF-8, a transformation format of ISO 10646 +.Re +.Sh AUTHORS +.An Thomas Voss Aq Mt mail@thomasvoss.com |