aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-22 21:06:52 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-22 21:08:09 +0200
commitc0a983a29af17415ef29058d72f1a9cd99ddd83f (patch)
tree0c77ccd6905491ab7c39c6c386ba0721e6f0d4e6
parentff14e4801643f8c69b5d31e183bfb71943ee519f (diff)
Fix various bugs in word segmentation
-rw-r--r--README9
-rwxr-xr-xgen/string/wbrk109
-rw-r--r--include/unicode/_wbrk.h1178
-rw-r--r--lib/unicode/string/u8wnext.c340
4 files changed, 1505 insertions, 131 deletions
diff --git a/README b/README
index fc6e5de..a158aac 100644
--- a/README
+++ b/README
@@ -105,7 +105,7 @@ FEATURES:
• Properties related to case-mapping are context-aware (via a
context-struct argument)
• unicode/string.h
- • Grapheme iteration and -counting
+ • Iteration and counting of graphemes and words in a string
• Unicode-aware case-mapping of strings with truncation checking
• Case-mapping supports optional language-specific quirks (Azeri,
Lithuanian, German, etc.)
@@ -113,10 +113,9 @@ FEATURES:
PLANNED FEATURES:
- • Missing Unicode Properties (unicode/prop.h)
- • String Case Conversions (unicode/string.h)
- • Unicode Normalization (unicode/string.h)
- • Word- and Line Segmentation (unicode/string.h)
+ • String Case Conversions (unicode/string.h)
+ • Unicode Normalization (unicode/string.h)
+ • Line- and Sentence Segmentation (unicode/string.h)
BUGS:
diff --git a/gen/string/wbrk b/gen/string/wbrk
new file mode 100755
index 0000000..1966356
--- /dev/null
+++ b/gen/string/wbrk
@@ -0,0 +1,109 @@
+#!/bin/sh
+
+set -e
+cd "${0%/*}/../.."
+exec >include/unicode/_wbrk.h
+
+gawk '
+BEGIN {
+ FS = " *(; *|#.*)"
+
+ map["ALetter"] = "LE"
+ map["CR"] = "CR"
+ map["Double_Quote"] = "DQ"
+ map["E_Base"] = "EB"
+ map["E_Base_GAZ"] = "EBG"
+ map["E_Modifier"] = "EM"
+ map["Extended_Pictographic"] = "EXTPICT"
+ map["Extend"] = "EXTEND"
+ map["ExtendNumLet"] = "EX"
+ map["Format"] = "FO"
+ map["Glue_After_Zwj"] = "GAZ"
+ map["Hebrew_Letter"] = "HL"
+ map["Katakana"] = "KA"
+ map["LF"] = "LF"
+ map["MidLetter"] = "ML"
+ map["MidNumLet"] = "MB"
+ map["MidNum"] = "MN"
+ map["Newline"] = "NL"
+ map["Numeric"] = "NU"
+ map["Other"] = "XX"
+ map["Regional_Indicator"] = "RI"
+ map["Single_Quote"] = "SQ"
+ map["WSegSpace"] = "WSEGSPACE"
+ map["ZWJ"] = "ZWJ"
+
+ print "/* This file is autogenerated by gen/string/wbrk; DO NOT EDIT. */"
+ print ""
+ print "#ifndef MLIB_UNICODE__WBRK_H"
+ print "#define MLIB_UNICODE__WBRK_H"
+ print ""
+ print "#include <inttypes.h>"
+ print ""
+ print "#include \"rune.h\""
+ print ""
+ print "enum wbrk_prop : uint_least8_t {"
+ print "\tWBRK_XX = 0, /* Other */"
+ print "\tWBRK_CR, /* CR */"
+ print "\tWBRK_DQ, /* Double Quote */"
+ print "\tWBRK_EB, /* E Base */"
+ print "\tWBRK_EBG, /* E Base GAZ */"
+ print "\tWBRK_EM, /* E Modifier */"
+ print "\tWBRK_EOT, /* End of Text */"
+ print "\tWBRK_EX, /* ExtendNumLet */"
+ print "\tWBRK_EXTEND, /* Extend */"
+ print "\tWBRK_EXTPICT, /* Extended Pictographic */"
+ print "\tWBRK_EXTPICT_LE, /* Extended Pictographic and ALetter */"
+ print "\tWBRK_FO, /* Format */"
+ print "\tWBRK_GAZ, /* Glue After Zwj */"
+ print "\tWBRK_HL, /* Hebrew Letter */"
+ print "\tWBRK_KA, /* Katakana */"
+ print "\tWBRK_LE, /* ALetter */"
+ print "\tWBRK_LF, /* LF */"
+ print "\tWBRK_MB, /* MidNumLet */"
+ print "\tWBRK_ML, /* MidLetter */"
+ print "\tWBRK_MN, /* MidNum */"
+ print "\tWBRK_NL, /* Newline */"
+ print "\tWBRK_NU, /* Numeric */"
+ print "\tWBRK_RI, /* Regional Indicator */"
+ print "\tWBRK_SQ, /* Single Quote */"
+ print "\tWBRK_WSEGSPACE, /* WSegSpace */"
+ print "\tWBRK_ZWJ, /* ZWJ */"
+ print "};"
+ print ""
+ print "const struct {"
+ print "\trune lo, hi;"
+ print "\tenum wbrk_prop val;"
+ print "} wbrk_lookup[] = {"
+}
+
+/^[A-F0-9]/ {
+ if (map[$2] == "")
+ next
+
+ n = split($1, a, /\.\./)
+ lo = strtonum("0X" a[1])
+ hi = strtonum("0X" a[n])
+
+ for (i = lo; i <= hi; i++) {
+ s = "WBRK_" map[$2]
+ if (props[i] == "WBRK_LE" && s == "WBRK_EXTPICT")
+ s = "WBRK_EXTPICT_LE"
+ props[i] = s
+ }
+}
+
+END {
+ for (i = 0; i <= 0x10FFFF; i++) {
+ if (!props[i])
+ continue
+ for (lo = i; props[i] == props[i + 1]; i++)
+ ;
+ printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i]
+ }
+
+ print "};"
+ print ""
+ print "#endif /* !MLIB_UNICODE__WBRK_H */"
+}
+' data/WordBreakProperty data/emoji-data | sed 's/\s*$//'
diff --git a/include/unicode/_wbrk.h b/include/unicode/_wbrk.h
new file mode 100644
index 0000000..625f571
--- /dev/null
+++ b/include/unicode/_wbrk.h
@@ -0,0 +1,1178 @@
+/* This file is autogenerated by gen/string/wbrk; DO NOT EDIT. */
+
+#ifndef MLIB_UNICODE__WBRK_H
+#define MLIB_UNICODE__WBRK_H
+
+#include <inttypes.h>
+
+#include "rune.h"
+
+enum wbrk_prop : uint_least8_t {
+ WBRK_XX = 0, /* Other */
+ WBRK_CR, /* CR */
+ WBRK_DQ, /* Double Quote */
+ WBRK_EB, /* E Base */
+ WBRK_EBG, /* E Base GAZ */
+ WBRK_EM, /* E Modifier */
+ WBRK_EOT, /* End of Text */
+ WBRK_EX, /* ExtendNumLet */
+ WBRK_EXTEND, /* Extend */
+ WBRK_EXTPICT, /* Extended Pictographic */
+ WBRK_EXTPICT_LE, /* Extended Pictographic and ALetter */
+ WBRK_FO, /* Format */
+ WBRK_GAZ, /* Glue After Zwj */
+ WBRK_HL, /* Hebrew Letter */
+ WBRK_KA, /* Katakana */
+ WBRK_LE, /* ALetter */
+ WBRK_LF, /* LF */
+ WBRK_MB, /* MidNumLet */
+ WBRK_ML, /* MidLetter */
+ WBRK_MN, /* MidNum */
+ WBRK_NL, /* Newline */
+ WBRK_NU, /* Numeric */
+ WBRK_RI, /* Regional Indicator */
+ WBRK_SQ, /* Single Quote */
+ WBRK_WSEGSPACE, /* WSegSpace */
+ WBRK_ZWJ, /* ZWJ */
+};
+
+const struct {
+ rune lo, hi;
+ enum wbrk_prop val;
+} wbrk_lookup[] = {
+ {RUNE_C(0x00000A), RUNE_C(0x00000A), WBRK_LF},
+ {RUNE_C(0x00000B), RUNE_C(0x00000C), WBRK_NL},
+ {RUNE_C(0x00000D), RUNE_C(0x00000D), WBRK_CR},
+ {RUNE_C(0x000020), RUNE_C(0x000020), WBRK_WSEGSPACE},
+ {RUNE_C(0x000022), RUNE_C(0x000022), WBRK_DQ},
+ {RUNE_C(0x000027), RUNE_C(0x000027), WBRK_SQ},
+ {RUNE_C(0x00002C), RUNE_C(0x00002C), WBRK_MN},
+ {RUNE_C(0x00002E), RUNE_C(0x00002E), WBRK_MB},
+ {RUNE_C(0x000030), RUNE_C(0x000039), WBRK_NU},
+ {RUNE_C(0x00003A), RUNE_C(0x00003A), WBRK_ML},
+ {RUNE_C(0x00003B), RUNE_C(0x00003B), WBRK_MN},
+ {RUNE_C(0x000041), RUNE_C(0x00005A), WBRK_LE},
+ {RUNE_C(0x00005F), RUNE_C(0x00005F), WBRK_EX},
+ {RUNE_C(0x000061), RUNE_C(0x00007A), WBRK_LE},
+ {RUNE_C(0x000085), RUNE_C(0x000085), WBRK_NL},
+ {RUNE_C(0x0000A9), RUNE_C(0x0000A9), WBRK_EXTPICT},
+ {RUNE_C(0x0000AA), RUNE_C(0x0000AA), WBRK_LE},
+ {RUNE_C(0x0000AD), RUNE_C(0x0000AD), WBRK_FO},
+ {RUNE_C(0x0000AE), RUNE_C(0x0000AE), WBRK_EXTPICT},
+ {RUNE_C(0x0000B5), RUNE_C(0x0000B5), WBRK_LE},
+ {RUNE_C(0x0000B7), RUNE_C(0x0000B7), WBRK_ML},
+ {RUNE_C(0x0000BA), RUNE_C(0x0000BA), WBRK_LE},
+ {RUNE_C(0x0000C0), RUNE_C(0x0000D6), WBRK_LE},
+ {RUNE_C(0x0000D8), RUNE_C(0x0000F6), WBRK_LE},
+ {RUNE_C(0x0000F8), RUNE_C(0x0002D7), WBRK_LE},
+ {RUNE_C(0x0002DE), RUNE_C(0x0002FF), WBRK_LE},
+ {RUNE_C(0x000300), RUNE_C(0x00036F), WBRK_EXTEND},
+ {RUNE_C(0x000370), RUNE_C(0x000374), WBRK_LE},
+ {RUNE_C(0x000376), RUNE_C(0x000377), WBRK_LE},
+ {RUNE_C(0x00037A), RUNE_C(0x00037D), WBRK_LE},
+ {RUNE_C(0x00037E), RUNE_C(0x00037E), WBRK_MN},
+ {RUNE_C(0x00037F), RUNE_C(0x00037F), WBRK_LE},
+ {RUNE_C(0x000386), RUNE_C(0x000386), WBRK_LE},
+ {RUNE_C(0x000387), RUNE_C(0x000387), WBRK_ML},
+ {RUNE_C(0x000388), RUNE_C(0x00038A), WBRK_LE},
+ {RUNE_C(0x00038C), RUNE_C(0x00038C), WBRK_LE},
+ {RUNE_C(0x00038E), RUNE_C(0x0003A1), WBRK_LE},
+ {RUNE_C(0x0003A3), RUNE_C(0x0003F5), WBRK_LE},
+ {RUNE_C(0x0003F7), RUNE_C(0x000481), WBRK_LE},
+ {RUNE_C(0x000483), RUNE_C(0x000489), WBRK_EXTEND},
+ {RUNE_C(0x00048A), RUNE_C(0x00052F), WBRK_LE},
+ {RUNE_C(0x000531), RUNE_C(0x000556), WBRK_LE},
+ {RUNE_C(0x000559), RUNE_C(0x00055C), WBRK_LE},
+ {RUNE_C(0x00055E), RUNE_C(0x00055E), WBRK_LE},
+ {RUNE_C(0x00055F), RUNE_C(0x00055F), WBRK_ML},
+ {RUNE_C(0x000560), RUNE_C(0x000588), WBRK_LE},
+ {RUNE_C(0x000589), RUNE_C(0x000589), WBRK_MN},
+ {RUNE_C(0x00058A), RUNE_C(0x00058A), WBRK_LE},
+ {RUNE_C(0x000591), RUNE_C(0x0005BD), WBRK_EXTEND},
+ {RUNE_C(0x0005BF), RUNE_C(0x0005BF), WBRK_EXTEND},
+ {RUNE_C(0x0005C1), RUNE_C(0x0005C2), WBRK_EXTEND},
+ {RUNE_C(0x0005C4), RUNE_C(0x0005C5), WBRK_EXTEND},
+ {RUNE_C(0x0005C7), RUNE_C(0x0005C7), WBRK_EXTEND},
+ {RUNE_C(0x0005D0), RUNE_C(0x0005EA), WBRK_HL},
+ {RUNE_C(0x0005EF), RUNE_C(0x0005F2), WBRK_HL},
+ {RUNE_C(0x0005F3), RUNE_C(0x0005F3), WBRK_LE},
+ {RUNE_C(0x0005F4), RUNE_C(0x0005F4), WBRK_ML},
+ {RUNE_C(0x000600), RUNE_C(0x000605), WBRK_NU},
+ {RUNE_C(0x00060C), RUNE_C(0x00060D), WBRK_MN},
+ {RUNE_C(0x000610), RUNE_C(0x00061A), WBRK_EXTEND},
+ {RUNE_C(0x00061C), RUNE_C(0x00061C), WBRK_FO},
+ {RUNE_C(0x000620), RUNE_C(0x00064A), WBRK_LE},
+ {RUNE_C(0x00064B), RUNE_C(0x00065F), WBRK_EXTEND},
+ {RUNE_C(0x000660), RUNE_C(0x000669), WBRK_NU},
+ {RUNE_C(0x00066B), RUNE_C(0x00066B), WBRK_NU},
+ {RUNE_C(0x00066C), RUNE_C(0x00066C), WBRK_MN},
+ {RUNE_C(0x00066E), RUNE_C(0x00066F), WBRK_LE},
+ {RUNE_C(0x000670), RUNE_C(0x000670), WBRK_EXTEND},
+ {RUNE_C(0x000671), RUNE_C(0x0006D3), WBRK_LE},
+ {RUNE_C(0x0006D5), RUNE_C(0x0006D5), WBRK_LE},
+ {RUNE_C(0x0006D6), RUNE_C(0x0006DC), WBRK_EXTEND},
+ {RUNE_C(0x0006DD), RUNE_C(0x0006DD), WBRK_NU},
+ {RUNE_C(0x0006DF), RUNE_C(0x0006E4), WBRK_EXTEND},
+ {RUNE_C(0x0006E5), RUNE_C(0x0006E6), WBRK_LE},
+ {RUNE_C(0x0006E7), RUNE_C(0x0006E8), WBRK_EXTEND},
+ {RUNE_C(0x0006EA), RUNE_C(0x0006ED), WBRK_EXTEND},
+ {RUNE_C(0x0006EE), RUNE_C(0x0006EF), WBRK_LE},
+ {RUNE_C(0x0006F0), RUNE_C(0x0006F9), WBRK_NU},
+ {RUNE_C(0x0006FA), RUNE_C(0x0006FC), WBRK_LE},
+ {RUNE_C(0x0006FF), RUNE_C(0x0006FF), WBRK_LE},
+ {RUNE_C(0x00070F), RUNE_C(0x000710), WBRK_LE},
+ {RUNE_C(0x000711), RUNE_C(0x000711), WBRK_EXTEND},
+ {RUNE_C(0x000712), RUNE_C(0x00072F), WBRK_LE},
+ {RUNE_C(0x000730), RUNE_C(0x00074A), WBRK_EXTEND},
+ {RUNE_C(0x00074D), RUNE_C(0x0007A5), WBRK_LE},
+ {RUNE_C(0x0007A6), RUNE_C(0x0007B0), WBRK_EXTEND},
+ {RUNE_C(0x0007B1), RUNE_C(0x0007B1), WBRK_LE},
+ {RUNE_C(0x0007C0), RUNE_C(0x0007C9), WBRK_NU},
+ {RUNE_C(0x0007CA), RUNE_C(0x0007EA), WBRK_LE},
+ {RUNE_C(0x0007EB), RUNE_C(0x0007F3), WBRK_EXTEND},
+ {RUNE_C(0x0007F4), RUNE_C(0x0007F5), WBRK_LE},
+ {RUNE_C(0x0007F8), RUNE_C(0x0007F8), WBRK_MN},
+ {RUNE_C(0x0007FA), RUNE_C(0x0007FA), WBRK_LE},
+ {RUNE_C(0x0007FD), RUNE_C(0x0007FD), WBRK_EXTEND},
+ {RUNE_C(0x000800), RUNE_C(0x000815), WBRK_LE},
+ {RUNE_C(0x000816), RUNE_C(0x000819), WBRK_EXTEND},
+ {RUNE_C(0x00081A), RUNE_C(0x00081A), WBRK_LE},
+ {RUNE_C(0x00081B), RUNE_C(0x000823), WBRK_EXTEND},
+ {RUNE_C(0x000824), RUNE_C(0x000824), WBRK_LE},
+ {RUNE_C(0x000825), RUNE_C(0x000827), WBRK_EXTEND},
+ {RUNE_C(0x000828), RUNE_C(0x000828), WBRK_LE},
+ {RUNE_C(0x000829), RUNE_C(0x00082D), WBRK_EXTEND},
+ {RUNE_C(0x000840), RUNE_C(0x000858), WBRK_LE},
+ {RUNE_C(0x000859), RUNE_C(0x00085B), WBRK_EXTEND},
+ {RUNE_C(0x000860), RUNE_C(0x00086A), WBRK_LE},
+ {RUNE_C(0x000870), RUNE_C(0x000887), WBRK_LE},
+ {RUNE_C(0x000889), RUNE_C(0x00088E), WBRK_LE},
+ {RUNE_C(0x000890), RUNE_C(0x000891), WBRK_NU},
+ {RUNE_C(0x000898), RUNE_C(0x00089F), WBRK_EXTEND},
+ {RUNE_C(0x0008A0), RUNE_C(0x0008C9), WBRK_LE},
+ {RUNE_C(0x0008CA), RUNE_C(0x0008E1), WBRK_EXTEND},
+ {RUNE_C(0x0008E2), RUNE_C(0x0008E2), WBRK_NU},
+ {RUNE_C(0x0008E3), RUNE_C(0x000903), WBRK_EXTEND},
+ {RUNE_C(0x000904), RUNE_C(0x000939), WBRK_LE},
+ {RUNE_C(0x00093A), RUNE_C(0x00093C), WBRK_EXTEND},
+ {RUNE_C(0x00093D), RUNE_C(0x00093D), WBRK_LE},
+ {RUNE_C(0x00093E), RUNE_C(0x00094F), WBRK_EXTEND},
+ {RUNE_C(0x000950), RUNE_C(0x000950), WBRK_LE},
+ {RUNE_C(0x000951), RUNE_C(0x000957), WBRK_EXTEND},
+ {RUNE_C(0x000958), RUNE_C(0x000961), WBRK_LE},
+ {RUNE_C(0x000962), RUNE_C(0x000963), WBRK_EXTEND},
+ {RUNE_C(0x000966), RUNE_C(0x00096F), WBRK_NU},
+ {RUNE_C(0x000971), RUNE_C(0x000980), WBRK_LE},
+ {RUNE_C(0x000981), RUNE_C(0x000983), WBRK_EXTEND},
+ {RUNE_C(0x000985), RUNE_C(0x00098C), WBRK_LE},
+ {RUNE_C(0x00098F), RUNE_C(0x000990), WBRK_LE},
+ {RUNE_C(0x000993), RUNE_C(0x0009A8), WBRK_LE},
+ {RUNE_C(0x0009AA), RUNE_C(0x0009B0), WBRK_LE},
+ {RUNE_C(0x0009B2), RUNE_C(0x0009B2), WBRK_LE},
+ {RUNE_C(0x0009B6), RUNE_C(0x0009B9), WBRK_LE},
+ {RUNE_C(0x0009BC), RUNE_C(0x0009BC), WBRK_EXTEND},
+ {RUNE_C(0x0009BD), RUNE_C(0x0009BD), WBRK_LE},
+ {RUNE_C(0x0009BE), RUNE_C(0x0009C4), WBRK_EXTEND},
+ {RUNE_C(0x0009C7), RUNE_C(0x0009C8), WBRK_EXTEND},
+ {RUNE_C(0x0009CB), RUNE_C(0x0009CD), WBRK_EXTEND},
+ {RUNE_C(0x0009CE), RUNE_C(0x0009CE), WBRK_LE},
+ {RUNE_C(0x0009D7), RUNE_C(0x0009D7), WBRK_EXTEND},
+ {RUNE_C(0x0009DC), RUNE_C(0x0009DD), WBRK_LE},
+ {RUNE_C(0x0009DF), RUNE_C(0x0009E1), WBRK_LE},
+ {RUNE_C(0x0009E2), RUNE_C(0x0009E3), WBRK_EXTEND},
+ {RUNE_C(0x0009E6), RUNE_C(0x0009EF), WBRK_NU},
+ {RUNE_C(0x0009F0), RUNE_C(0x0009F1), WBRK_LE},
+ {RUNE_C(0x0009FC), RUNE_C(0x0009FC), WBRK_LE},
+ {RUNE_C(0x0009FE), RUNE_C(0x0009FE), WBRK_EXTEND},
+ {RUNE_C(0x000A01), RUNE_C(0x000A03), WBRK_EXTEND},
+ {RUNE_C(0x000A05), RUNE_C(0x000A0A), WBRK_LE},
+ {RUNE_C(0x000A0F), RUNE_C(0x000A10), WBRK_LE},
+ {RUNE_C(0x000A13), RUNE_C(0x000A28), WBRK_LE},
+ {RUNE_C(0x000A2A), RUNE_C(0x000A30), WBRK_LE},
+ {RUNE_C(0x000A32), RUNE_C(0x000A33), WBRK_LE},
+ {RUNE_C(0x000A35), RUNE_C(0x000A36), WBRK_LE},
+ {RUNE_C(0x000A38), RUNE_C(0x000A39), WBRK_LE},
+ {RUNE_C(0x000A3C), RUNE_C(0x000A3C), WBRK_EXTEND},
+ {RUNE_C(0x000A3E), RUNE_C(0x000A42), WBRK_EXTEND},
+ {RUNE_C(0x000A47), RUNE_C(0x000A48), WBRK_EXTEND},
+ {RUNE_C(0x000A4B), RUNE_C(0x000A4D), WBRK_EXTEND},
+ {RUNE_C(0x000A51), RUNE_C(0x000A51), WBRK_EXTEND},
+ {RUNE_C(0x000A59), RUNE_C(0x000A5C), WBRK_LE},
+ {RUNE_C(0x000A5E), RUNE_C(0x000A5E), WBRK_LE},
+ {RUNE_C(0x000A66), RUNE_C(0x000A6F), WBRK_NU},
+ {RUNE_C(0x000A70), RUNE_C(0x000A71), WBRK_EXTEND},
+ {RUNE_C(0x000A72), RUNE_C(0x000A74), WBRK_LE},
+ {RUNE_C(0x000A75), RUNE_C(0x000A75), WBRK_EXTEND},
+ {RUNE_C(0x000A81), RUNE_C(0x000A83), WBRK_EXTEND},
+ {RUNE_C(0x000A85), RUNE_C(0x000A8D), WBRK_LE},
+ {RUNE_C(0x000A8F), RUNE_C(0x000A91), WBRK_LE},
+ {RUNE_C(0x000A93), RUNE_C(0x000AA8), WBRK_LE},
+ {RUNE_C(0x000AAA), RUNE_C(0x000AB0), WBRK_LE},
+ {RUNE_C(0x000AB2), RUNE_C(0x000AB3), WBRK_LE},
+ {RUNE_C(0x000AB5), RUNE_C(0x000AB9), WBRK_LE},
+ {RUNE_C(0x000ABC), RUNE_C(0x000ABC), WBRK_EXTEND},
+ {RUNE_C(0x000ABD), RUNE_C(0x000ABD), WBRK_LE},
+ {RUNE_C(0x000ABE), RUNE_C(0x000AC5), WBRK_EXTEND},
+ {RUNE_C(0x000AC7), RUNE_C(0x000AC9), WBRK_EXTEND},
+ {RUNE_C(0x000ACB), RUNE_C(0x000ACD), WBRK_EXTEND},
+ {RUNE_C(0x000AD0), RUNE_C(0x000AD0), WBRK_LE},
+ {RUNE_C(0x000AE0), RUNE_C(0x000AE1), WBRK_LE},
+ {RUNE_C(0x000AE2), RUNE_C(0x000AE3), WBRK_EXTEND},
+ {RUNE_C(0x000AE6), RUNE_C(0x000AEF), WBRK_NU},
+ {RUNE_C(0x000AF9), RUNE_C(0x000AF9), WBRK_LE},
+ {RUNE_C(0x000AFA), RUNE_C(0x000AFF), WBRK_EXTEND},
+ {RUNE_C(0x000B01), RUNE_C(0x000B03), WBRK_EXTEND},
+ {RUNE_C(0x000B05), RUNE_C(0x000B0C), WBRK_LE},
+ {RUNE_C(0x000B0F), RUNE_C(0x000B10), WBRK_LE},
+ {RUNE_C(0x000B13), RUNE_C(0x000B28), WBRK_LE},
+ {RUNE_C(0x000B2A), RUNE_C(0x000B30), WBRK_LE},
+ {RUNE_C(0x000B32), RUNE_C(0x000B33), WBRK_LE},
+ {RUNE_C(0x000B35), RUNE_C(0x000B39), WBRK_LE},
+ {RUNE_C(0x000B3C), RUNE_C(0x000B3C), WBRK_EXTEND},
+ {RUNE_C(0x000B3D), RUNE_C(0x000B3D), WBRK_LE},
+ {RUNE_C(0x000B3E), RUNE_C(0x000B44), WBRK_EXTEND},
+ {RUNE_C(0x000B47), RUNE_C(0x000B48), WBRK_EXTEND},
+ {RUNE_C(0x000B4B), RUNE_C(0x000B4D), WBRK_EXTEND},
+ {RUNE_C(0x000B55), RUNE_C(0x000B57), WBRK_EXTEND},
+ {RUNE_C(0x000B5C), RUNE_C(0x000B5D), WBRK_LE},
+ {RUNE_C(0x000B5F), RUNE_C(0x000B61), WBRK_LE},
+ {RUNE_C(0x000B62), RUNE_C(0x000B63), WBRK_EXTEND},
+ {RUNE_C(0x000B66), RUNE_C(0x000B6F), WBRK_NU},
+ {RUNE_C(0x000B71), RUNE_C(0x000B71), WBRK_LE},
+ {RUNE_C(0x000B82), RUNE_C(0x000B82), WBRK_EXTEND},
+ {RUNE_C(0x000B83), RUNE_C(0x000B83), WBRK_LE},
+ {RUNE_C(0x000B85), RUNE_C(0x000B8A), WBRK_LE},
+ {RUNE_C(0x000B8E), RUNE_C(0x000B90), WBRK_LE},
+ {RUNE_C(0x000B92), RUNE_C(0x000B95), WBRK_LE},
+ {RUNE_C(0x000B99), RUNE_C(0x000B9A), WBRK_LE},
+ {RUNE_C(0x000B9C), RUNE_C(0x000B9C), WBRK_LE},
+ {RUNE_C(0x000B9E), RUNE_C(0x000B9F), WBRK_LE},
+ {RUNE_C(0x000BA3), RUNE_C(0x000BA4), WBRK_LE},
+ {RUNE_C(0x000BA8), RUNE_C(0x000BAA), WBRK_LE},
+ {RUNE_C(0x000BAE), RUNE_C(0x000BB9), WBRK_LE},
+ {RUNE_C(0x000BBE), RUNE_C(0x000BC2), WBRK_EXTEND},
+ {RUNE_C(0x000BC6), RUNE_C(0x000BC8), WBRK_EXTEND},
+ {RUNE_C(0x000BCA), RUNE_C(0x000BCD), WBRK_EXTEND},
+ {RUNE_C(0x000BD0), RUNE_C(0x000BD0), WBRK_LE},
+ {RUNE_C(0x000BD7), RUNE_C(0x000BD7), WBRK_EXTEND},
+ {RUNE_C(0x000BE6), RUNE_C(0x000BEF), WBRK_NU},
+ {RUNE_C(0x000C00), RUNE_C(0x000C04), WBRK_EXTEND},
+ {RUNE_C(0x000C05), RUNE_C(0x000C0C), WBRK_LE},
+ {RUNE_C(0x000C0E), RUNE_C(0x000C10), WBRK_LE},
+ {RUNE_C(0x000C12), RUNE_C(0x000C28), WBRK_LE},
+ {RUNE_C(0x000C2A), RUNE_C(0x000C39), WBRK_LE},
+ {RUNE_C(0x000C3C), RUNE_C(0x000C3C), WBRK_EXTEND},
+ {RUNE_C(0x000C3D), RUNE_C(0x000C3D), WBRK_LE},
+ {RUNE_C(0x000C3E), RUNE_C(0x000C44), WBRK_EXTEND},
+ {RUNE_C(0x000C46), RUNE_C(0x000C48), WBRK_EXTEND},
+ {RUNE_C(0x000C4A), RUNE_C(0x000C4D), WBRK_EXTEND},
+ {RUNE_C(0x000C55), RUNE_C(0x000C56), WBRK_EXTEND},
+ {RUNE_C(0x000C58), RUNE_C(0x000C5A), WBRK_LE},
+ {RUNE_C(0x000C5D), RUNE_C(0x000C5D), WBRK_LE},
+ {RUNE_C(0x000C60), RUNE_C(0x000C61), WBRK_LE},
+ {RUNE_C(0x000C62), RUNE_C(0x000C63), WBRK_EXTEND},
+ {RUNE_C(0x000C66), RUNE_C(0x000C6F), WBRK_NU},
+ {RUNE_C(0x000C80), RUNE_C(0x000C80), WBRK_LE},
+ {RUNE_C(0x000C81), RUNE_C(0x000C83), WBRK_EXTEND},
+ {RUNE_C(0x000C85), RUNE_C(0x000C8C), WBRK_LE},
+ {RUNE_C(0x000C8E), RUNE_C(0x000C90), WBRK_LE},
+ {RUNE_C(0x000C92), RUNE_C(0x000CA8), WBRK_LE},
+ {RUNE_C(0x000CAA), RUNE_C(0x000CB3), WBRK_LE},
+ {RUNE_C(0x000CB5), RUNE_C(0x000CB9), WBRK_LE},
+ {RUNE_C(0x000CBC), RUNE_C(0x000CBC), WBRK_EXTEND},
+ {RUNE_C(0x000CBD), RUNE_C(0x000CBD), WBRK_LE},
+ {RUNE_C(0x000CBE), RUNE_C(0x000CC4), WBRK_EXTEND},
+ {RUNE_C(0x000CC6), RUNE_C(0x000CC8), WBRK_EXTEND},
+ {RUNE_C(0x000CCA), RUNE_C(0x000CCD), WBRK_EXTEND},
+ {RUNE_C(0x000CD5), RUNE_C(0x000CD6), WBRK_EXTEND},
+ {RUNE_C(0x000CDD), RUNE_C(0x000CDE), WBRK_LE},
+ {RUNE_C(0x000CE0), RUNE_C(0x000CE1), WBRK_LE},
+ {RUNE_C(0x000CE2), RUNE_C(0x000CE3), WBRK_EXTEND},
+ {RUNE_C(0x000CE6), RUNE_C(0x000CEF), WBRK_NU},
+ {RUNE_C(0x000CF1), RUNE_C(0x000CF2), WBRK_LE},
+ {RUNE_C(0x000CF3), RUNE_C(0x000CF3), WBRK_EXTEND},
+ {RUNE_C(0x000D00), RUNE_C(0x000D03), WBRK_EXTEND},
+ {RUNE_C(0x000D04), RUNE_C(0x000D0C), WBRK_LE},
+ {RUNE_C(0x000D0E), RUNE_C(0x000D10), WBRK_LE},
+ {RUNE_C(0x000D12), RUNE_C(0x000D3A), WBRK_LE},
+ {RUNE_C(0x000D3B), RUNE_C(0x000D3C), WBRK_EXTEND},
+ {RUNE_C(0x000D3D), RUNE_C(0x000D3D), WBRK_LE},
+ {RUNE_C(0x000D3E), RUNE_C(0x000D44), WBRK_EXTEND},
+ {RUNE_C(0x000D46), RUNE_C(0x000D48), WBRK_EXTEND},
+ {RUNE_C(0x000D4A), RUNE_C(0x000D4D), WBRK_EXTEND},
+ {RUNE_C(0x000D4E), RUNE_C(0x000D4E), WBRK_LE},
+ {RUNE_C(0x000D54), RUNE_C(0x000D56), WBRK_LE},
+ {RUNE_C(0x000D57), RUNE_C(0x000D57), WBRK_EXTEND},
+ {RUNE_C(0x000D5F), RUNE_C(0x000D61), WBRK_LE},
+ {RUNE_C(0x000D62), RUNE_C(0x000D63), WBRK_EXTEND},
+ {RUNE_C(0x000D66), RUNE_C(0x000D6F), WBRK_NU},
+ {RUNE_C(0x000D7A), RUNE_C(0x000D7F), WBRK_LE},
+ {RUNE_C(0x000D81), RUNE_C(0x000D83), WBRK_EXTEND},
+ {RUNE_C(0x000D85), RUNE_C(0x000D96), WBRK_LE},
+ {RUNE_C(0x000D9A), RUNE_C(0x000DB1), WBRK_LE},
+ {RUNE_C(0x000DB3), RUNE_C(0x000DBB), WBRK_LE},
+ {RUNE_C(0x000DBD), RUNE_C(0x000DBD), WBRK_LE},
+ {RUNE_C(0x000DC0), RUNE_C(0x000DC6), WBRK_LE},
+ {RUNE_C(0x000DCA), RUNE_C(0x000DCA), WBRK_EXTEND},
+ {RUNE_C(0x000DCF), RUNE_C(0x000DD4), WBRK_EXTEND},
+ {RUNE_C(0x000DD6), RUNE_C(0x000DD6), WBRK_EXTEND},
+ {RUNE_C(0x000DD8), RUNE_C(0x000DDF), WBRK_EXTEND},
+ {RUNE_C(0x000DE6), RUNE_C(0x000DEF), WBRK_NU},
+ {RUNE_C(0x000DF2), RUNE_C(0x000DF3), WBRK_EXTEND},
+ {RUNE_C(0x000E31), RUNE_C(0x000E31), WBRK_EXTEND},
+ {RUNE_C(0x000E34), RUNE_C(0x000E3A), WBRK_EXTEND},
+ {RUNE_C(0x000E47), RUNE_C(0x000E4E), WBRK_EXTEND},
+ {RUNE_C(0x000E50), RUNE_C(0x000E59), WBRK_NU},
+ {RUNE_C(0x000EB1), RUNE_C(0x000EB1), WBRK_EXTEND},
+ {RUNE_C(0x000EB4), RUNE_C(0x000EBC), WBRK_EXTEND},
+ {RUNE_C(0x000EC8), RUNE_C(0x000ECE), WBRK_EXTEND},
+ {RUNE_C(0x000ED0), RUNE_C(0x000ED9), WBRK_NU},
+ {RUNE_C(0x000F00), RUNE_C(0x000F00), WBRK_LE},
+ {RUNE_C(0x000F18), RUNE_C(0x000F19), WBRK_EXTEND},
+ {RUNE_C(0x000F20), RUNE_C(0x000F29), WBRK_NU},
+ {RUNE_C(0x000F35), RUNE_C(0x000F35), WBRK_EXTEND},
+ {RUNE_C(0x000F37), RUNE_C(0x000F37), WBRK_EXTEND},
+ {RUNE_C(0x000F39), RUNE_C(0x000F39), WBRK_EXTEND},
+ {RUNE_C(0x000F3E), RUNE_C(0x000F3F), WBRK_EXTEND},
+ {RUNE_C(0x000F40), RUNE_C(0x000F47), WBRK_LE},
+ {RUNE_C(0x000F49), RUNE_C(0x000F6C), WBRK_LE},
+ {RUNE_C(0x000F71), RUNE_C(0x000F84), WBRK_EXTEND},
+ {RUNE_C(0x000F86), RUNE_C(0x000F87), WBRK_EXTEND},
+ {RUNE_C(0x000F88), RUNE_C(0x000F8C), WBRK_LE},
+ {RUNE_C(0x000F8D), RUNE_C(0x000F97), WBRK_EXTEND},
+ {RUNE_C(0x000F99), RUNE_C(0x000FBC), WBRK_EXTEND},
+ {RUNE_C(0x000FC6), RUNE_C(0x000FC6), WBRK_EXTEND},
+ {RUNE_C(0x00102B), RUNE_C(0x00103E), WBRK_EXTEND},
+ {RUNE_C(0x001040), RUNE_C(0x001049), WBRK_NU},
+ {RUNE_C(0x001056), RUNE_C(0x001059), WBRK_EXTEND},
+ {RUNE_C(0x00105E), RUNE_C(0x001060), WBRK_EXTEND},
+ {RUNE_C(0x001062), RUNE_C(0x001064), WBRK_EXTEND},
+ {RUNE_C(0x001067), RUNE_C(0x00106D), WBRK_EXTEND},
+ {RUNE_C(0x001071), RUNE_C(0x001074), WBRK_EXTEND},
+ {RUNE_C(0x001082), RUNE_C(0x00108D), WBRK_EXTEND},
+ {RUNE_C(0x00108F), RUNE_C(0x00108F), WBRK_EXTEND},
+ {RUNE_C(0x001090), RUNE_C(0x001099), WBRK_NU},
+ {RUNE_C(0x00109A), RUNE_C(0x00109D), WBRK_EXTEND},
+ {RUNE_C(0x0010A0), RUNE_C(0x0010C5), WBRK_LE},
+ {RUNE_C(0x0010C7), RUNE_C(0x0010C7), WBRK_LE},
+ {RUNE_C(0x0010CD), RUNE_C(0x0010CD), WBRK_LE},
+ {RUNE_C(0x0010D0), RUNE_C(0x0010FA), WBRK_LE},
+ {RUNE_C(0x0010FC), RUNE_C(0x001248), WBRK_LE},
+ {RUNE_C(0x00124A), RUNE_C(0x00124D), WBRK_LE},
+ {RUNE_C(0x001250), RUNE_C(0x001256), WBRK_LE},
+ {RUNE_C(0x001258), RUNE_C(0x001258), WBRK_LE},
+ {RUNE_C(0x00125A), RUNE_C(0x00125D), WBRK_LE},
+ {RUNE_C(0x001260), RUNE_C(0x001288), WBRK_LE},
+ {RUNE_C(0x00128A), RUNE_C(0x00128D), WBRK_LE},
+ {RUNE_C(0x001290), RUNE_C(0x0012B0), WBRK_LE},
+ {RUNE_C(0x0012B2), RUNE_C(0x0012B5), WBRK_LE},
+ {RUNE_C(0x0012B8), RUNE_C(0x0012BE), WBRK_LE},
+ {RUNE_C(0x0012C0), RUNE_C(0x0012C0), WBRK_LE},
+ {RUNE_C(0x0012C2), RUNE_C(0x0012C5), WBRK_LE},
+ {RUNE_C(0x0012C8), RUNE_C(0x0012D6), WBRK_LE},
+ {RUNE_C(0x0012D8), RUNE_C(0x001310), WBRK_LE},
+ {RUNE_C(0x001312), RUNE_C(0x001315), WBRK_LE},
+ {RUNE_C(0x001318), RUNE_C(0x00135A), WBRK_LE},
+ {RUNE_C(0x00135D), RUNE_C(0x00135F), WBRK_EXTEND},
+ {RUNE_C(0x001380), RUNE_C(0x00138F), WBRK_LE},
+ {RUNE_C(0x0013A0), RUNE_C(0x0013F5), WBRK_LE},
+ {RUNE_C(0x0013F8), RUNE_C(0x0013FD), WBRK_LE},
+ {RUNE_C(0x001401), RUNE_C(0x00166C), WBRK_LE},
+ {RUNE_C(0x00166F), RUNE_C(0x00167F), WBRK_LE},
+ {RUNE_C(0x001680), RUNE_C(0x001680), WBRK_WSEGSPACE},
+ {RUNE_C(0x001681), RUNE_C(0x00169A), WBRK_LE},
+ {RUNE_C(0x0016A0), RUNE_C(0x0016EA), WBRK_LE},
+ {RUNE_C(0x0016EE), RUNE_C(0x0016F8), WBRK_LE},
+ {RUNE_C(0x001700), RUNE_C(0x001711), WBRK_LE},
+ {RUNE_C(0x001712), RUNE_C(0x001715), WBRK_EXTEND},
+ {RUNE_C(0x00171F), RUNE_C(0x001731), WBRK_LE},
+ {RUNE_C(0x001732), RUNE_C(0x001734), WBRK_EXTEND},
+ {RUNE_C(0x001740), RUNE_C(0x001751), WBRK_LE},
+ {RUNE_C(0x001752), RUNE_C(0x001753), WBRK_EXTEND},
+ {RUNE_C(0x001760), RUNE_C(0x00176C), WBRK_LE},
+ {RUNE_C(0x00176E), RUNE_C(0x001770), WBRK_LE},
+ {RUNE_C(0x001772), RUNE_C(0x001773), WBRK_EXTEND},
+ {RUNE_C(0x0017B4), RUNE_C(0x0017D3), WBRK_EXTEND},
+ {RUNE_C(0x0017DD), RUNE_C(0x0017DD), WBRK_EXTEND},
+ {RUNE_C(0x0017E0), RUNE_C(0x0017E9), WBRK_NU},
+ {RUNE_C(0x00180B), RUNE_C(0x00180D), WBRK_EXTEND},
+ {RUNE_C(0x00180E), RUNE_C(0x00180E), WBRK_FO},
+ {RUNE_C(0x00180F), RUNE_C(0x00180F), WBRK_EXTEND},
+ {RUNE_C(0x001810), RUNE_C(0x001819), WBRK_NU},
+ {RUNE_C(0x001820), RUNE_C(0x001878), WBRK_LE},
+ {RUNE_C(0x001880), RUNE_C(0x001884), WBRK_LE},
+ {RUNE_C(0x001885), RUNE_C(0x001886), WBRK_EXTEND},
+ {RUNE_C(0x001887), RUNE_C(0x0018A8), WBRK_LE},
+ {RUNE_C(0x0018A9), RUNE_C(0x0018A9), WBRK_EXTEND},
+ {RUNE_C(0x0018AA), RUNE_C(0x0018AA), WBRK_LE},
+ {RUNE_C(0x0018B0), RUNE_C(0x0018F5), WBRK_LE},
+ {RUNE_C(0x001900), RUNE_C(0x00191E), WBRK_LE},
+ {RUNE_C(0x001920), RUNE_C(0x00192B), WBRK_EXTEND},
+ {RUNE_C(0x001930), RUNE_C(0x00193B), WBRK_EXTEND},
+ {RUNE_C(0x001946), RUNE_C(0x00194F), WBRK_NU},
+ {RUNE_C(0x0019D0), RUNE_C(0x0019D9), WBRK_NU},
+ {RUNE_C(0x001A00), RUNE_C(0x001A16), WBRK_LE},
+ {RUNE_C(0x001A17), RUNE_C(0x001A1B), WBRK_EXTEND},
+ {RUNE_C(0x001A55), RUNE_C(0x001A5E), WBRK_EXTEND},
+ {RUNE_C(0x001A60), RUNE_C(0x001A7C), WBRK_EXTEND},
+ {RUNE_C(0x001A7F), RUNE_C(0x001A7F), WBRK_EXTEND},
+ {RUNE_C(0x001A80), RUNE_C(0x001A89), WBRK_NU},
+ {RUNE_C(0x001A90), RUNE_C(0x001A99), WBRK_NU},
+ {RUNE_C(0x001AB0), RUNE_C(0x001ACE), WBRK_EXTEND},
+ {RUNE_C(0x001B00), RUNE_C(0x001B04), WBRK_EXTEND},
+ {RUNE_C(0x001B05), RUNE_C(0x001B33), WBRK_LE},
+ {RUNE_C(0x001B34), RUNE_C(0x001B44), WBRK_EXTEND},
+ {RUNE_C(0x001B45), RUNE_C(0x001B4C), WBRK_LE},
+ {RUNE_C(0x001B50), RUNE_C(0x001B59), WBRK_NU},
+ {RUNE_C(0x001B6B), RUNE_C(0x001B73), WBRK_EXTEND},
+ {RUNE_C(0x001B80), RUNE_C(0x001B82), WBRK_EXTEND},
+ {RUNE_C(0x001B83), RUNE_C(0x001BA0), WBRK_LE},
+ {RUNE_C(0x001BA1), RUNE_C(0x001BAD), WBRK_EXTEND},
+ {RUNE_C(0x001BAE), RUNE_C(0x001BAF), WBRK_LE},
+ {RUNE_C(0x001BB0), RUNE_C(0x001BB9), WBRK_NU},
+ {RUNE_C(0x001BBA), RUNE_C(0x001BE5), WBRK_LE},
+ {RUNE_C(0x001BE6), RUNE_C(0x001BF3), WBRK_EXTEND},
+ {RUNE_C(0x001C00), RUNE_C(0x001C23), WBRK_LE},
+ {RUNE_C(0x001C24), RUNE_C(0x001C37), WBRK_EXTEND},
+ {RUNE_C(0x001C40), RUNE_C(0x001C49), WBRK_NU},
+ {RUNE_C(0x001C4D), RUNE_C(0x001C4F), WBRK_LE},
+ {RUNE_C(0x001C50), RUNE_C(0x001C59), WBRK_NU},
+ {RUNE_C(0x001C5A), RUNE_C(0x001C7D), WBRK_LE},
+ {RUNE_C(0x001C80), RUNE_C(0x001C88), WBRK_LE},
+ {RUNE_C(0x001C90), RUNE_C(0x001CBA), WBRK_LE},
+ {RUNE_C(0x001CBD), RUNE_C(0x001CBF), WBRK_LE},
+ {RUNE_C(0x001CD0), RUNE_C(0x001CD2), WBRK_EXTEND},
+ {RUNE_C(0x001CD4), RUNE_C(0x001CE8), WBRK_EXTEND},
+ {RUNE_C(0x001CE9), RUNE_C(0x001CEC), WBRK_LE},
+ {RUNE_C(0x001CED), RUNE_C(0x001CED), WBRK_EXTEND},
+ {RUNE_C(0x001CEE), RUNE_C(0x001CF3), WBRK_LE},
+ {RUNE_C(0x001CF4), RUNE_C(0x001CF4), WBRK_EXTEND},
+ {RUNE_C(0x001CF5), RUNE_C(0x001CF6), WBRK_LE},
+ {RUNE_C(0x001CF7), RUNE_C(0x001CF9), WBRK_EXTEND},
+ {RUNE_C(0x001CFA), RUNE_C(0x001CFA), WBRK_LE},
+ {RUNE_C(0x001D00), RUNE_C(0x001DBF), WBRK_LE},
+ {RUNE_C(0x001DC0), RUNE_C(0x001DFF), WBRK_EXTEND},
+ {RUNE_C(0x001E00), RUNE_C(0x001F15), WBRK_LE},
+ {RUNE_C(0x001F18), RUNE_C(0x001F1D), WBRK_LE},
+ {RUNE_C(0x001F20), RUNE_C(0x001F45), WBRK_LE},
+ {RUNE_C(0x001F48), RUNE_C(0x001F4D), WBRK_LE},
+ {RUNE_C(0x001F50), RUNE_C(0x001F57), WBRK_LE},
+ {RUNE_C(0x001F59), RUNE_C(0x001F59), WBRK_LE},
+ {RUNE_C(0x001F5B), RUNE_C(0x001F5B), WBRK_LE},
+ {RUNE_C(0x001F5D), RUNE_C(0x001F5D), WBRK_LE},
+ {RUNE_C(0x001F5F), RUNE_C(0x001F7D), WBRK_LE},
+ {RUNE_C(0x001F80), RUNE_C(0x001FB4), WBRK_LE},
+ {RUNE_C(0x001FB6), RUNE_C(0x001FBC), WBRK_LE},
+ {RUNE_C(0x001FBE), RUNE_C(0x001FBE), WBRK_LE},
+ {RUNE_C(0x001FC2), RUNE_C(0x001FC4), WBRK_LE},
+ {RUNE_C(0x001FC6), RUNE_C(0x001FCC), WBRK_LE},
+ {RUNE_C(0x001FD0), RUNE_C(0x001FD3), WBRK_LE},
+ {RUNE_C(0x001FD6), RUNE_C(0x001FDB), WBRK_LE},
+ {RUNE_C(0x001FE0), RUNE_C(0x001FEC), WBRK_LE},
+ {RUNE_C(0x001FF2), RUNE_C(0x001FF4), WBRK_LE},
+ {RUNE_C(0x001FF6), RUNE_C(0x001FFC), WBRK_LE},
+ {RUNE_C(0x002000), RUNE_C(0x002006), WBRK_WSEGSPACE},
+ {RUNE_C(0x002008), RUNE_C(0x00200A), WBRK_WSEGSPACE},
+ {RUNE_C(0x00200C), RUNE_C(0x00200C), WBRK_EXTEND},
+ {RUNE_C(0x00200D), RUNE_C(0x00200D), WBRK_ZWJ},
+ {RUNE_C(0x00200E), RUNE_C(0x00200F), WBRK_FO},
+ {RUNE_C(0x002018), RUNE_C(0x002019), WBRK_MB},
+ {RUNE_C(0x002024), RUNE_C(0x002024), WBRK_MB},
+ {RUNE_C(0x002027), RUNE_C(0x002027), WBRK_ML},
+ {RUNE_C(0x002028), RUNE_C(0x002029), WBRK_NL},
+ {RUNE_C(0x00202A), RUNE_C(0x00202E), WBRK_FO},
+ {RUNE_C(0x00202F), RUNE_C(0x00202F), WBRK_EX},
+ {RUNE_C(0x00203C), RUNE_C(0x00203C), WBRK_EXTPICT},
+ {RUNE_C(0x00203F), RUNE_C(0x002040), WBRK_EX},
+ {RUNE_C(0x002044), RUNE_C(0x002044), WBRK_MN},
+ {RUNE_C(0x002049), RUNE_C(0x002049), WBRK_EXTPICT},
+ {RUNE_C(0x002054), RUNE_C(0x002054), WBRK_EX},
+ {RUNE_C(0x00205F), RUNE_C(0x00205F), WBRK_WSEGSPACE},
+ {RUNE_C(0x002060), RUNE_C(0x002064), WBRK_FO},
+ {RUNE_C(0x002066), RUNE_C(0x00206F), WBRK_FO},
+ {RUNE_C(0x002071), RUNE_C(0x002071), WBRK_LE},
+ {RUNE_C(0x00207F), RUNE_C(0x00207F), WBRK_LE},
+ {RUNE_C(0x002090), RUNE_C(0x00209C), WBRK_LE},
+ {RUNE_C(0x0020D0), RUNE_C(0x0020F0), WBRK_EXTEND},
+ {RUNE_C(0x002102), RUNE_C(0x002102), WBRK_LE},
+ {RUNE_C(0x002107), RUNE_C(0x002107), WBRK_LE},
+ {RUNE_C(0x00210A), RUNE_C(0x002113), WBRK_LE},
+ {RUNE_C(0x002115), RUNE_C(0x002115), WBRK_LE},
+ {RUNE_C(0x002119), RUNE_C(0x00211D), WBRK_LE},
+ {RUNE_C(0x002122), RUNE_C(0x002122), WBRK_EXTPICT},
+ {RUNE_C(0x002124), RUNE_C(0x002124), WBRK_LE},
+ {RUNE_C(0x002126), RUNE_C(0x002126), WBRK_LE},
+ {RUNE_C(0x002128), RUNE_C(0x002128), WBRK_LE},
+ {RUNE_C(0x00212A), RUNE_C(0x00212D), WBRK_LE},
+ {RUNE_C(0x00212F), RUNE_C(0x002138), WBRK_LE},
+ {RUNE_C(0x002139), RUNE_C(0x002139), WBRK_EXTPICT_LE},
+ {RUNE_C(0x00213C), RUNE_C(0x00213F), WBRK_LE},
+ {RUNE_C(0x002145), RUNE_C(0x002149), WBRK_LE},
+ {RUNE_C(0x00214E), RUNE_C(0x00214E), WBRK_LE},
+ {RUNE_C(0x002160), RUNE_C(0x002188), WBRK_LE},
+ {RUNE_C(0x002194), RUNE_C(0x002199), WBRK_EXTPICT},
+ {RUNE_C(0x0021A9), RUNE_C(0x0021AA), WBRK_EXTPICT},
+ {RUNE_C(0x00231A), RUNE_C(0x00231B), WBRK_EXTPICT},
+ {RUNE_C(0x002328), RUNE_C(0x002328), WBRK_EXTPICT},
+ {RUNE_C(0x002388), RUNE_C(0x002388), WBRK_EXTPICT},
+ {RUNE_C(0x0023CF), RUNE_C(0x0023CF), WBRK_EXTPICT},
+ {RUNE_C(0x0023E9), RUNE_C(0x0023F3), WBRK_EXTPICT},
+ {RUNE_C(0x0023F8), RUNE_C(0x0023FA), WBRK_EXTPICT},
+ {RUNE_C(0x0024B6), RUNE_C(0x0024C1), WBRK_LE},
+ {RUNE_C(0x0024C2), RUNE_C(0x0024C2), WBRK_EXTPICT_LE},
+ {RUNE_C(0x0024C3), RUNE_C(0x0024E9), WBRK_LE},
+ {RUNE_C(0x0025AA), RUNE_C(0x0025AB), WBRK_EXTPICT},
+ {RUNE_C(0x0025B6), RUNE_C(0x0025B6), WBRK_EXTPICT},
+ {RUNE_C(0x0025C0), RUNE_C(0x0025C0), WBRK_EXTPICT},
+ {RUNE_C(0x0025FB), RUNE_C(0x0025FE), WBRK_EXTPICT},
+ {RUNE_C(0x002600), RUNE_C(0x002605), WBRK_EXTPICT},
+ {RUNE_C(0x002607), RUNE_C(0x002612), WBRK_EXTPICT},
+ {RUNE_C(0x002614), RUNE_C(0x002685), WBRK_EXTPICT},
+ {RUNE_C(0x002690), RUNE_C(0x002705), WBRK_EXTPICT},
+ {RUNE_C(0x002708), RUNE_C(0x002712), WBRK_EXTPICT},
+ {RUNE_C(0x002714), RUNE_C(0x002714), WBRK_EXTPICT},
+ {RUNE_C(0x002716), RUNE_C(0x002716), WBRK_EXTPICT},
+ {RUNE_C(0x00271D), RUNE_C(0x00271D), WBRK_EXTPICT},
+ {RUNE_C(0x002721), RUNE_C(0x002721), WBRK_EXTPICT},
+ {RUNE_C(0x002728), RUNE_C(0x002728), WBRK_EXTPICT},
+ {RUNE_C(0x002733), RUNE_C(0x002734), WBRK_EXTPICT},
+ {RUNE_C(0x002744), RUNE_C(0x002744), WBRK_EXTPICT},
+ {RUNE_C(0x002747), RUNE_C(0x002747), WBRK_EXTPICT},
+ {RUNE_C(0x00274C), RUNE_C(0x00274C), WBRK_EXTPICT},
+ {RUNE_C(0x00274E), RUNE_C(0x00274E), WBRK_EXTPICT},
+ {RUNE_C(0x002753), RUNE_C(0x002755), WBRK_EXTPICT},
+ {RUNE_C(0x002757), RUNE_C(0x002757), WBRK_EXTPICT},
+ {RUNE_C(0x002763), RUNE_C(0x002767), WBRK_EXTPICT},
+ {RUNE_C(0x002795), RUNE_C(0x002797), WBRK_EXTPICT},
+ {RUNE_C(0x0027A1), RUNE_C(0x0027A1), WBRK_EXTPICT},
+ {RUNE_C(0x0027B0), RUNE_C(0x0027B0), WBRK_EXTPICT},
+ {RUNE_C(0x0027BF), RUNE_C(0x0027BF), WBRK_EXTPICT},
+ {RUNE_C(0x002934), RUNE_C(0x002935), WBRK_EXTPICT},
+ {RUNE_C(0x002B05), RUNE_C(0x002B07), WBRK_EXTPICT},
+ {RUNE_C(0x002B1B), RUNE_C(0x002B1C), WBRK_EXTPICT},
+ {RUNE_C(0x002B50), RUNE_C(0x002B50), WBRK_EXTPICT},
+ {RUNE_C(0x002B55), RUNE_C(0x002B55), WBRK_EXTPICT},
+ {RUNE_C(0x002C00), RUNE_C(0x002CE4), WBRK_LE},
+ {RUNE_C(0x002CEB), RUNE_C(0x002CEE), WBRK_LE},
+ {RUNE_C(0x002CEF), RUNE_C(0x002CF1), WBRK_EXTEND},
+ {RUNE_C(0x002CF2), RUNE_C(0x002CF3), WBRK_LE},
+ {RUNE_C(0x002D00), RUNE_C(0x002D25), WBRK_LE},
+ {RUNE_C(0x002D27), RUNE_C(0x002D27), WBRK_LE},
+ {RUNE_C(0x002D2D), RUNE_C(0x002D2D), WBRK_LE},
+ {RUNE_C(0x002D30), RUNE_C(0x002D67), WBRK_LE},
+ {RUNE_C(0x002D6F), RUNE_C(0x002D6F), WBRK_LE},
+ {RUNE_C(0x002D7F), RUNE_C(0x002D7F), WBRK_EXTEND},
+ {RUNE_C(0x002D80), RUNE_C(0x002D96), WBRK_LE},
+ {RUNE_C(0x002DA0), RUNE_C(0x002DA6), WBRK_LE},
+ {RUNE_C(0x002DA8), RUNE_C(0x002DAE), WBRK_LE},
+ {RUNE_C(0x002DB0), RUNE_C(0x002DB6), WBRK_LE},
+ {RUNE_C(0x002DB8), RUNE_C(0x002DBE), WBRK_LE},
+ {RUNE_C(0x002DC0), RUNE_C(0x002DC6), WBRK_LE},
+ {RUNE_C(0x002DC8), RUNE_C(0x002DCE), WBRK_LE},
+ {RUNE_C(0x002DD0), RUNE_C(0x002DD6), WBRK_LE},
+ {RUNE_C(0x002DD8), RUNE_C(0x002DDE), WBRK_LE},
+ {RUNE_C(0x002DE0), RUNE_C(0x002DFF), WBRK_EXTEND},
+ {RUNE_C(0x002E2F), RUNE_C(0x002E2F), WBRK_LE},
+ {RUNE_C(0x003000), RUNE_C(0x003000), WBRK_WSEGSPACE},
+ {RUNE_C(0x003005), RUNE_C(0x003005), WBRK_LE},
+ {RUNE_C(0x00302A), RUNE_C(0x00302F), WBRK_EXTEND},
+ {RUNE_C(0x003030), RUNE_C(0x003030), WBRK_EXTPICT},
+ {RUNE_C(0x003031), RUNE_C(0x003035), WBRK_KA},
+ {RUNE_C(0x00303B), RUNE_C(0x00303C), WBRK_LE},
+ {RUNE_C(0x00303D), RUNE_C(0x00303D), WBRK_EXTPICT},
+ {RUNE_C(0x003099), RUNE_C(0x00309A), WBRK_EXTEND},
+ {RUNE_C(0x00309B), RUNE_C(0x00309C), WBRK_KA},
+ {RUNE_C(0x0030A0), RUNE_C(0x0030FA), WBRK_KA},
+ {RUNE_C(0x0030FC), RUNE_C(0x0030FF), WBRK_KA},
+ {RUNE_C(0x003105), RUNE_C(0x00312F), WBRK_LE},
+ {RUNE_C(0x003131), RUNE_C(0x00318E), WBRK_LE},
+ {RUNE_C(0x0031A0), RUNE_C(0x0031BF), WBRK_LE},
+ {RUNE_C(0x0031F0), RUNE_C(0x0031FF), WBRK_KA},
+ {RUNE_C(0x003297), RUNE_C(0x003297), WBRK_EXTPICT},
+ {RUNE_C(0x003299), RUNE_C(0x003299), WBRK_EXTPICT},
+ {RUNE_C(0x0032D0), RUNE_C(0x0032FE), WBRK_KA},
+ {RUNE_C(0x003300), RUNE_C(0x003357), WBRK_KA},
+ {RUNE_C(0x00A000), RUNE_C(0x00A48C), WBRK_LE},
+ {RUNE_C(0x00A4D0), RUNE_C(0x00A4FD), WBRK_LE},
+ {RUNE_C(0x00A500), RUNE_C(0x00A60C), WBRK_LE},
+ {RUNE_C(0x00A610), RUNE_C(0x00A61F), WBRK_LE},
+ {RUNE_C(0x00A620), RUNE_C(0x00A629), WBRK_NU},
+ {RUNE_C(0x00A62A), RUNE_C(0x00A62B), WBRK_LE},
+ {RUNE_C(0x00A640), RUNE_C(0x00A66E), WBRK_LE},
+ {RUNE_C(0x00A66F), RUNE_C(0x00A672), WBRK_EXTEND},
+ {RUNE_C(0x00A674), RUNE_C(0x00A67D), WBRK_EXTEND},
+ {RUNE_C(0x00A67F), RUNE_C(0x00A69D), WBRK_LE},
+ {RUNE_C(0x00A69E), RUNE_C(0x00A69F), WBRK_EXTEND},
+ {RUNE_C(0x00A6A0), RUNE_C(0x00A6EF), WBRK_LE},
+ {RUNE_C(0x00A6F0), RUNE_C(0x00A6F1), WBRK_EXTEND},
+ {RUNE_C(0x00A708), RUNE_C(0x00A7CA), WBRK_LE},
+ {RUNE_C(0x00A7D0), RUNE_C(0x00A7D1), WBRK_LE},
+ {RUNE_C(0x00A7D3), RUNE_C(0x00A7D3), WBRK_LE},
+ {RUNE_C(0x00A7D5), RUNE_C(0x00A7D9), WBRK_LE},
+ {RUNE_C(0x00A7F2), RUNE_C(0x00A801), WBRK_LE},
+ {RUNE_C(0x00A802), RUNE_C(0x00A802), WBRK_EXTEND},
+ {RUNE_C(0x00A803), RUNE_C(0x00A805), WBRK_LE},
+ {RUNE_C(0x00A806), RUNE_C(0x00A806), WBRK_EXTEND},
+ {RUNE_C(0x00A807), RUNE_C(0x00A80A), WBRK_LE},
+ {RUNE_C(0x00A80B), RUNE_C(0x00A80B), WBRK_EXTEND},
+ {RUNE_C(0x00A80C), RUNE_C(0x00A822), WBRK_LE},
+ {RUNE_C(0x00A823), RUNE_C(0x00A827), WBRK_EXTEND},
+ {RUNE_C(0x00A82C), RUNE_C(0x00A82C), WBRK_EXTEND},
+ {RUNE_C(0x00A840), RUNE_C(0x00A873), WBRK_LE},
+ {RUNE_C(0x00A880), RUNE_C(0x00A881), WBRK_EXTEND},
+ {RUNE_C(0x00A882), RUNE_C(0x00A8B3), WBRK_LE},
+ {RUNE_C(0x00A8B4), RUNE_C(0x00A8C5), WBRK_EXTEND},
+ {RUNE_C(0x00A8D0), RUNE_C(0x00A8D9), WBRK_NU},
+ {RUNE_C(0x00A8E0), RUNE_C(0x00A8F1), WBRK_EXTEND},
+ {RUNE_C(0x00A8F2), RUNE_C(0x00A8F7), WBRK_LE},
+ {RUNE_C(0x00A8FB), RUNE_C(0x00A8FB), WBRK_LE},
+ {RUNE_C(0x00A8FD), RUNE_C(0x00A8FE), WBRK_LE},
+ {RUNE_C(0x00A8FF), RUNE_C(0x00A8FF), WBRK_EXTEND},
+ {RUNE_C(0x00A900), RUNE_C(0x00A909), WBRK_NU},
+ {RUNE_C(0x00A90A), RUNE_C(0x00A925), WBRK_LE},
+ {RUNE_C(0x00A926), RUNE_C(0x00A92D), WBRK_EXTEND},
+ {RUNE_C(0x00A930), RUNE_C(0x00A946), WBRK_LE},
+ {RUNE_C(0x00A947), RUNE_C(0x00A953), WBRK_EXTEND},
+ {RUNE_C(0x00A960), RUNE_C(0x00A97C), WBRK_LE},
+ {RUNE_C(0x00A980), RUNE_C(0x00A983), WBRK_EXTEND},
+ {RUNE_C(0x00A984), RUNE_C(0x00A9B2), WBRK_LE},
+ {RUNE_C(0x00A9B3), RUNE_C(0x00A9C0), WBRK_EXTEND},
+ {RUNE_C(0x00A9CF), RUNE_C(0x00A9CF), WBRK_LE},
+ {RUNE_C(0x00A9D0), RUNE_C(0x00A9D9), WBRK_NU},
+ {RUNE_C(0x00A9E5), RUNE_C(0x00A9E5), WBRK_EXTEND},
+ {RUNE_C(0x00A9F0), RUNE_C(0x00A9F9), WBRK_NU},
+ {RUNE_C(0x00AA00), RUNE_C(0x00AA28), WBRK_LE},
+ {RUNE_C(0x00AA29), RUNE_C(0x00AA36), WBRK_EXTEND},
+ {RUNE_C(0x00AA40), RUNE_C(0x00AA42), WBRK_LE},
+ {RUNE_C(0x00AA43), RUNE_C(0x00AA43), WBRK_EXTEND},
+ {RUNE_C(0x00AA44), RUNE_C(0x00AA4B), WBRK_LE},
+ {RUNE_C(0x00AA4C), RUNE_C(0x00AA4D), WBRK_EXTEND},
+ {RUNE_C(0x00AA50), RUNE_C(0x00AA59), WBRK_NU},
+ {RUNE_C(0x00AA7B), RUNE_C(0x00AA7D), WBRK_EXTEND},
+ {RUNE_C(0x00AAB0), RUNE_C(0x00AAB0), WBRK_EXTEND},
+ {RUNE_C(0x00AAB2), RUNE_C(0x00AAB4), WBRK_EXTEND},
+ {RUNE_C(0x00AAB7), RUNE_C(0x00AAB8), WBRK_EXTEND},
+ {RUNE_C(0x00AABE), RUNE_C(0x00AABF), WBRK_EXTEND},
+ {RUNE_C(0x00AAC1), RUNE_C(0x00AAC1), WBRK_EXTEND},
+ {RUNE_C(0x00AAE0), RUNE_C(0x00AAEA), WBRK_LE},
+ {RUNE_C(0x00AAEB), RUNE_C(0x00AAEF), WBRK_EXTEND},
+ {RUNE_C(0x00AAF2), RUNE_C(0x00AAF4), WBRK_LE},
+ {RUNE_C(0x00AAF5), RUNE_C(0x00AAF6), WBRK_EXTEND},
+ {RUNE_C(0x00AB01), RUNE_C(0x00AB06), WBRK_LE},
+ {RUNE_C(0x00AB09), RUNE_C(0x00AB0E), WBRK_LE},
+ {RUNE_C(0x00AB11), RUNE_C(0x00AB16), WBRK_LE},
+ {RUNE_C(0x00AB20), RUNE_C(0x00AB26), WBRK_LE},
+ {RUNE_C(0x00AB28), RUNE_C(0x00AB2E), WBRK_LE},
+ {RUNE_C(0x00AB30), RUNE_C(0x00AB69), WBRK_LE},
+ {RUNE_C(0x00AB70), RUNE_C(0x00ABE2), WBRK_LE},
+ {RUNE_C(0x00ABE3), RUNE_C(0x00ABEA), WBRK_EXTEND},
+ {RUNE_C(0x00ABEC), RUNE_C(0x00ABED), WBRK_EXTEND},
+ {RUNE_C(0x00ABF0), RUNE_C(0x00ABF9), WBRK_NU},
+ {RUNE_C(0x00AC00), RUNE_C(0x00D7A3), WBRK_LE},
+ {RUNE_C(0x00D7B0), RUNE_C(0x00D7C6), WBRK_LE},
+ {RUNE_C(0x00D7CB), RUNE_C(0x00D7FB), WBRK_LE},
+ {RUNE_C(0x00FB00), RUNE_C(0x00FB06), WBRK_LE},
+ {RUNE_C(0x00FB13), RUNE_C(0x00FB17), WBRK_LE},
+ {RUNE_C(0x00FB1D), RUNE_C(0x00FB1D), WBRK_HL},
+ {RUNE_C(0x00FB1E), RUNE_C(0x00FB1E), WBRK_EXTEND},
+ {RUNE_C(0x00FB1F), RUNE_C(0x00FB28), WBRK_HL},
+ {RUNE_C(0x00FB2A), RUNE_C(0x00FB36), WBRK_HL},
+ {RUNE_C(0x00FB38), RUNE_C(0x00FB3C), WBRK_HL},
+ {RUNE_C(0x00FB3E), RUNE_C(0x00FB3E), WBRK_HL},
+ {RUNE_C(0x00FB40), RUNE_C(0x00FB41), WBRK_HL},
+ {RUNE_C(0x00FB43), RUNE_C(0x00FB44), WBRK_HL},
+ {RUNE_C(0x00FB46), RUNE_C(0x00FB4F), WBRK_HL},
+ {RUNE_C(0x00FB50), RUNE_C(0x00FBB1), WBRK_LE},
+ {RUNE_C(0x00FBD3), RUNE_C(0x00FD3D), WBRK_LE},
+ {RUNE_C(0x00FD50), RUNE_C(0x00FD8F), WBRK_LE},
+ {RUNE_C(0x00FD92), RUNE_C(0x00FDC7), WBRK_LE},
+ {RUNE_C(0x00FDF0), RUNE_C(0x00FDFB), WBRK_LE},
+ {RUNE_C(0x00FE00), RUNE_C(0x00FE0F), WBRK_EXTEND},
+ {RUNE_C(0x00FE10), RUNE_C(0x00FE10), WBRK_MN},
+ {RUNE_C(0x00FE13), RUNE_C(0x00FE13), WBRK_ML},
+ {RUNE_C(0x00FE14), RUNE_C(0x00FE14), WBRK_MN},
+ {RUNE_C(0x00FE20), RUNE_C(0x00FE2F), WBRK_EXTEND},
+ {RUNE_C(0x00FE33), RUNE_C(0x00FE34), WBRK_EX},
+ {RUNE_C(0x00FE4D), RUNE_C(0x00FE4F), WBRK_EX},
+ {RUNE_C(0x00FE50), RUNE_C(0x00FE50), WBRK_MN},
+ {RUNE_C(0x00FE52), RUNE_C(0x00FE52), WBRK_MB},
+ {RUNE_C(0x00FE54), RUNE_C(0x00FE54), WBRK_MN},
+ {RUNE_C(0x00FE55), RUNE_C(0x00FE55), WBRK_ML},
+ {RUNE_C(0x00FE70), RUNE_C(0x00FE74), WBRK_LE},
+ {RUNE_C(0x00FE76), RUNE_C(0x00FEFC), WBRK_LE},
+ {RUNE_C(0x00FEFF), RUNE_C(0x00FEFF), WBRK_FO},
+ {RUNE_C(0x00FF07), RUNE_C(0x00FF07), WBRK_MB},
+ {RUNE_C(0x00FF0C), RUNE_C(0x00FF0C), WBRK_MN},
+ {RUNE_C(0x00FF0E), RUNE_C(0x00FF0E), WBRK_MB},
+ {RUNE_C(0x00FF10), RUNE_C(0x00FF19), WBRK_NU},
+ {RUNE_C(0x00FF1A), RUNE_C(0x00FF1A), WBRK_ML},
+ {RUNE_C(0x00FF1B), RUNE_C(0x00FF1B), WBRK_MN},
+ {RUNE_C(0x00FF21), RUNE_C(0x00FF3A), WBRK_LE},
+ {RUNE_C(0x00FF3F), RUNE_C(0x00FF3F), WBRK_EX},
+ {RUNE_C(0x00FF41), RUNE_C(0x00FF5A), WBRK_LE},
+ {RUNE_C(0x00FF66), RUNE_C(0x00FF9D), WBRK_KA},
+ {RUNE_C(0x00FF9E), RUNE_C(0x00FF9F), WBRK_EXTEND},
+ {RUNE_C(0x00FFA0), RUNE_C(0x00FFBE), WBRK_LE},
+ {RUNE_C(0x00FFC2), RUNE_C(0x00FFC7), WBRK_LE},
+ {RUNE_C(0x00FFCA), RUNE_C(0x00FFCF), WBRK_LE},
+ {RUNE_C(0x00FFD2), RUNE_C(0x00FFD7), WBRK_LE},
+ {RUNE_C(0x00FFDA), RUNE_C(0x00FFDC), WBRK_LE},
+ {RUNE_C(0x00FFF9), RUNE_C(0x00FFFB), WBRK_FO},
+ {RUNE_C(0x010000), RUNE_C(0x01000B), WBRK_LE},
+ {RUNE_C(0x01000D), RUNE_C(0x010026), WBRK_LE},
+ {RUNE_C(0x010028), RUNE_C(0x01003A), WBRK_LE},
+ {RUNE_C(0x01003C), RUNE_C(0x01003D), WBRK_LE},
+ {RUNE_C(0x01003F), RUNE_C(0x01004D), WBRK_LE},
+ {RUNE_C(0x010050), RUNE_C(0x01005D), WBRK_LE},
+ {RUNE_C(0x010080), RUNE_C(0x0100FA), WBRK_LE},
+ {RUNE_C(0x010140), RUNE_C(0x010174), WBRK_LE},
+ {RUNE_C(0x0101FD), RUNE_C(0x0101FD), WBRK_EXTEND},
+ {RUNE_C(0x010280), RUNE_C(0x01029C), WBRK_LE},
+ {RUNE_C(0x0102A0), RUNE_C(0x0102D0), WBRK_LE},
+ {RUNE_C(0x0102E0), RUNE_C(0x0102E0), WBRK_EXTEND},
+ {RUNE_C(0x010300), RUNE_C(0x01031F), WBRK_LE},
+ {RUNE_C(0x01032D), RUNE_C(0x01034A), WBRK_LE},
+ {RUNE_C(0x010350), RUNE_C(0x010375), WBRK_LE},
+ {RUNE_C(0x010376), RUNE_C(0x01037A), WBRK_EXTEND},
+ {RUNE_C(0x010380), RUNE_C(0x01039D), WBRK_LE},
+ {RUNE_C(0x0103A0), RUNE_C(0x0103C3), WBRK_LE},
+ {RUNE_C(0x0103C8), RUNE_C(0x0103CF), WBRK_LE},
+ {RUNE_C(0x0103D1), RUNE_C(0x0103D5), WBRK_LE},
+ {RUNE_C(0x010400), RUNE_C(0x01049D), WBRK_LE},
+ {RUNE_C(0x0104A0), RUNE_C(0x0104A9), WBRK_NU},
+ {RUNE_C(0x0104B0), RUNE_C(0x0104D3), WBRK_LE},
+ {RUNE_C(0x0104D8), RUNE_C(0x0104FB), WBRK_LE},
+ {RUNE_C(0x010500), RUNE_C(0x010527), WBRK_LE},
+ {RUNE_C(0x010530), RUNE_C(0x010563), WBRK_LE},
+ {RUNE_C(0x010570), RUNE_C(0x01057A), WBRK_LE},
+ {RUNE_C(0x01057C), RUNE_C(0x01058A), WBRK_LE},
+ {RUNE_C(0x01058C), RUNE_C(0x010592), WBRK_LE},
+ {RUNE_C(0x010594), RUNE_C(0x010595), WBRK_LE},
+ {RUNE_C(0x010597), RUNE_C(0x0105A1), WBRK_LE},
+ {RUNE_C(0x0105A3), RUNE_C(0x0105B1), WBRK_LE},
+ {RUNE_C(0x0105B3), RUNE_C(0x0105B9), WBRK_LE},
+ {RUNE_C(0x0105BB), RUNE_C(0x0105BC), WBRK_LE},
+ {RUNE_C(0x010600), RUNE_C(0x010736), WBRK_LE},
+ {RUNE_C(0x010740), RUNE_C(0x010755), WBRK_LE},
+ {RUNE_C(0x010760), RUNE_C(0x010767), WBRK_LE},
+ {RUNE_C(0x010780), RUNE_C(0x010785), WBRK_LE},
+ {RUNE_C(0x010787), RUNE_C(0x0107B0), WBRK_LE},
+ {RUNE_C(0x0107B2), RUNE_C(0x0107BA), WBRK_LE},
+ {RUNE_C(0x010800), RUNE_C(0x010805), WBRK_LE},
+ {RUNE_C(0x010808), RUNE_C(0x010808), WBRK_LE},
+ {RUNE_C(0x01080A), RUNE_C(0x010835), WBRK_LE},
+ {RUNE_C(0x010837), RUNE_C(0x010838), WBRK_LE},
+ {RUNE_C(0x01083C), RUNE_C(0x01083C), WBRK_LE},
+ {RUNE_C(0x01083F), RUNE_C(0x010855), WBRK_LE},
+ {RUNE_C(0x010860), RUNE_C(0x010876), WBRK_LE},
+ {RUNE_C(0x010880), RUNE_C(0x01089E), WBRK_LE},
+ {RUNE_C(0x0108E0), RUNE_C(0x0108F2), WBRK_LE},
+ {RUNE_C(0x0108F4), RUNE_C(0x0108F5), WBRK_LE},
+ {RUNE_C(0x010900), RUNE_C(0x010915), WBRK_LE},
+ {RUNE_C(0x010920), RUNE_C(0x010939), WBRK_LE},
+ {RUNE_C(0x010980), RUNE_C(0x0109B7), WBRK_LE},
+ {RUNE_C(0x0109BE), RUNE_C(0x0109BF), WBRK_LE},
+ {RUNE_C(0x010A00), RUNE_C(0x010A00), WBRK_LE},
+ {RUNE_C(0x010A01), RUNE_C(0x010A03), WBRK_EXTEND},
+ {RUNE_C(0x010A05), RUNE_C(0x010A06), WBRK_EXTEND},
+ {RUNE_C(0x010A0C), RUNE_C(0x010A0F), WBRK_EXTEND},
+ {RUNE_C(0x010A10), RUNE_C(0x010A13), WBRK_LE},
+ {RUNE_C(0x010A15), RUNE_C(0x010A17), WBRK_LE},
+ {RUNE_C(0x010A19), RUNE_C(0x010A35), WBRK_LE},
+ {RUNE_C(0x010A38), RUNE_C(0x010A3A), WBRK_EXTEND},
+ {RUNE_C(0x010A3F), RUNE_C(0x010A3F), WBRK_EXTEND},
+ {RUNE_C(0x010A60), RUNE_C(0x010A7C), WBRK_LE},
+ {RUNE_C(0x010A80), RUNE_C(0x010A9C), WBRK_LE},
+ {RUNE_C(0x010AC0), RUNE_C(0x010AC7), WBRK_LE},
+ {RUNE_C(0x010AC9), RUNE_C(0x010AE4), WBRK_LE},
+ {RUNE_C(0x010AE5), RUNE_C(0x010AE6), WBRK_EXTEND},
+ {RUNE_C(0x010B00), RUNE_C(0x010B35), WBRK_LE},
+ {RUNE_C(0x010B40), RUNE_C(0x010B55), WBRK_LE},
+ {RUNE_C(0x010B60), RUNE_C(0x010B72), WBRK_LE},
+ {RUNE_C(0x010B80), RUNE_C(0x010B91), WBRK_LE},
+ {RUNE_C(0x010C00), RUNE_C(0x010C48), WBRK_LE},
+ {RUNE_C(0x010C80), RUNE_C(0x010CB2), WBRK_LE},
+ {RUNE_C(0x010CC0), RUNE_C(0x010CF2), WBRK_LE},
+ {RUNE_C(0x010D00), RUNE_C(0x010D23), WBRK_LE},
+ {RUNE_C(0x010D24), RUNE_C(0x010D27), WBRK_EXTEND},
+ {RUNE_C(0x010D30), RUNE_C(0x010D39), WBRK_NU},
+ {RUNE_C(0x010E80), RUNE_C(0x010EA9), WBRK_LE},
+ {RUNE_C(0x010EAB), RUNE_C(0x010EAC), WBRK_EXTEND},
+ {RUNE_C(0x010EB0), RUNE_C(0x010EB1), WBRK_LE},
+ {RUNE_C(0x010EFD), RUNE_C(0x010EFF), WBRK_EXTEND},
+ {RUNE_C(0x010F00), RUNE_C(0x010F1C), WBRK_LE},
+ {RUNE_C(0x010F27), RUNE_C(0x010F27), WBRK_LE},
+ {RUNE_C(0x010F30), RUNE_C(0x010F45), WBRK_LE},
+ {RUNE_C(0x010F46), RUNE_C(0x010F50), WBRK_EXTEND},
+ {RUNE_C(0x010F70), RUNE_C(0x010F81), WBRK_LE},
+ {RUNE_C(0x010F82), RUNE_C(0x010F85), WBRK_EXTEND},
+ {RUNE_C(0x010FB0), RUNE_C(0x010FC4), WBRK_LE},
+ {RUNE_C(0x010FE0), RUNE_C(0x010FF6), WBRK_LE},
+ {RUNE_C(0x011000), RUNE_C(0x011002), WBRK_EXTEND},
+ {RUNE_C(0x011003), RUNE_C(0x011037), WBRK_LE},
+ {RUNE_C(0x011038), RUNE_C(0x011046), WBRK_EXTEND},
+ {RUNE_C(0x011066), RUNE_C(0x01106F), WBRK_NU},
+ {RUNE_C(0x011070), RUNE_C(0x011070), WBRK_EXTEND},
+ {RUNE_C(0x011071), RUNE_C(0x011072), WBRK_LE},
+ {RUNE_C(0x011073), RUNE_C(0x011074), WBRK_EXTEND},
+ {RUNE_C(0x011075), RUNE_C(0x011075), WBRK_LE},
+ {RUNE_C(0x01107F), RUNE_C(0x011082), WBRK_EXTEND},
+ {RUNE_C(0x011083), RUNE_C(0x0110AF), WBRK_LE},
+ {RUNE_C(0x0110B0), RUNE_C(0x0110BA), WBRK_EXTEND},
+ {RUNE_C(0x0110BD), RUNE_C(0x0110BD), WBRK_NU},
+ {RUNE_C(0x0110C2), RUNE_C(0x0110C2), WBRK_EXTEND},
+ {RUNE_C(0x0110CD), RUNE_C(0x0110CD), WBRK_NU},
+ {RUNE_C(0x0110D0), RUNE_C(0x0110E8), WBRK_LE},
+ {RUNE_C(0x0110F0), RUNE_C(0x0110F9), WBRK_NU},
+ {RUNE_C(0x011100), RUNE_C(0x011102), WBRK_EXTEND},
+ {RUNE_C(0x011103), RUNE_C(0x011126), WBRK_LE},
+ {RUNE_C(0x011127), RUNE_C(0x011134), WBRK_EXTEND},
+ {RUNE_C(0x011136), RUNE_C(0x01113F), WBRK_NU},
+ {RUNE_C(0x011144), RUNE_C(0x011144), WBRK_LE},
+ {RUNE_C(0x011145), RUNE_C(0x011146), WBRK_EXTEND},
+ {RUNE_C(0x011147), RUNE_C(0x011147), WBRK_LE},
+ {RUNE_C(0x011150), RUNE_C(0x011172), WBRK_LE},
+ {RUNE_C(0x011173), RUNE_C(0x011173), WBRK_EXTEND},
+ {RUNE_C(0x011176), RUNE_C(0x011176), WBRK_LE},
+ {RUNE_C(0x011180), RUNE_C(0x011182), WBRK_EXTEND},
+ {RUNE_C(0x011183), RUNE_C(0x0111B2), WBRK_LE},
+ {RUNE_C(0x0111B3), RUNE_C(0x0111C0), WBRK_EXTEND},
+ {RUNE_C(0x0111C1), RUNE_C(0x0111C4), WBRK_LE},
+ {RUNE_C(0x0111C9), RUNE_C(0x0111CC), WBRK_EXTEND},
+ {RUNE_C(0x0111CE), RUNE_C(0x0111CF), WBRK_EXTEND},
+ {RUNE_C(0x0111D0), RUNE_C(0x0111D9), WBRK_NU},
+ {RUNE_C(0x0111DA), RUNE_C(0x0111DA), WBRK_LE},
+ {RUNE_C(0x0111DC), RUNE_C(0x0111DC), WBRK_LE},
+ {RUNE_C(0x011200), RUNE_C(0x011211), WBRK_LE},
+ {RUNE_C(0x011213), RUNE_C(0x01122B), WBRK_LE},
+ {RUNE_C(0x01122C), RUNE_C(0x011237), WBRK_EXTEND},
+ {RUNE_C(0x01123E), RUNE_C(0x01123E), WBRK_EXTEND},
+ {RUNE_C(0x01123F), RUNE_C(0x011240), WBRK_LE},
+ {RUNE_C(0x011241), RUNE_C(0x011241), WBRK_EXTEND},
+ {RUNE_C(0x011280), RUNE_C(0x011286), WBRK_LE},
+ {RUNE_C(0x011288), RUNE_C(0x011288), WBRK_LE},
+ {RUNE_C(0x01128A), RUNE_C(0x01128D), WBRK_LE},
+ {RUNE_C(0x01128F), RUNE_C(0x01129D), WBRK_LE},
+ {RUNE_C(0x01129F), RUNE_C(0x0112A8), WBRK_LE},
+ {RUNE_C(0x0112B0), RUNE_C(0x0112DE), WBRK_LE},
+ {RUNE_C(0x0112DF), RUNE_C(0x0112EA), WBRK_EXTEND},
+ {RUNE_C(0x0112F0), RUNE_C(0x0112F9), WBRK_NU},
+ {RUNE_C(0x011300), RUNE_C(0x011303), WBRK_EXTEND},
+ {RUNE_C(0x011305), RUNE_C(0x01130C), WBRK_LE},
+ {RUNE_C(0x01130F), RUNE_C(0x011310), WBRK_LE},
+ {RUNE_C(0x011313), RUNE_C(0x011328), WBRK_LE},
+ {RUNE_C(0x01132A), RUNE_C(0x011330), WBRK_LE},
+ {RUNE_C(0x011332), RUNE_C(0x011333), WBRK_LE},
+ {RUNE_C(0x011335), RUNE_C(0x011339), WBRK_LE},
+ {RUNE_C(0x01133B), RUNE_C(0x01133C), WBRK_EXTEND},
+ {RUNE_C(0x01133D), RUNE_C(0x01133D), WBRK_LE},
+ {RUNE_C(0x01133E), RUNE_C(0x011344), WBRK_EXTEND},
+ {RUNE_C(0x011347), RUNE_C(0x011348), WBRK_EXTEND},
+ {RUNE_C(0x01134B), RUNE_C(0x01134D), WBRK_EXTEND},
+ {RUNE_C(0x011350), RUNE_C(0x011350), WBRK_LE},
+ {RUNE_C(0x011357), RUNE_C(0x011357), WBRK_EXTEND},
+ {RUNE_C(0x01135D), RUNE_C(0x011361), WBRK_LE},
+ {RUNE_C(0x011362), RUNE_C(0x011363), WBRK_EXTEND},
+ {RUNE_C(0x011366), RUNE_C(0x01136C), WBRK_EXTEND},
+ {RUNE_C(0x011370), RUNE_C(0x011374), WBRK_EXTEND},
+ {RUNE_C(0x011400), RUNE_C(0x011434), WBRK_LE},
+ {RUNE_C(0x011435), RUNE_C(0x011446), WBRK_EXTEND},
+ {RUNE_C(0x011447), RUNE_C(0x01144A), WBRK_LE},
+ {RUNE_C(0x011450), RUNE_C(0x011459), WBRK_NU},
+ {RUNE_C(0x01145E), RUNE_C(0x01145E), WBRK_EXTEND},
+ {RUNE_C(0x01145F), RUNE_C(0x011461), WBRK_LE},
+ {RUNE_C(0x011480), RUNE_C(0x0114AF), WBRK_LE},
+ {RUNE_C(0x0114B0), RUNE_C(0x0114C3), WBRK_EXTEND},
+ {RUNE_C(0x0114C4), RUNE_C(0x0114C5), WBRK_LE},
+ {RUNE_C(0x0114C7), RUNE_C(0x0114C7), WBRK_LE},
+ {RUNE_C(0x0114D0), RUNE_C(0x0114D9), WBRK_NU},
+ {RUNE_C(0x011580), RUNE_C(0x0115AE), WBRK_LE},
+ {RUNE_C(0x0115AF), RUNE_C(0x0115B5), WBRK_EXTEND},
+ {RUNE_C(0x0115B8), RUNE_C(0x0115C0), WBRK_EXTEND},
+ {RUNE_C(0x0115D8), RUNE_C(0x0115DB), WBRK_LE},
+ {RUNE_C(0x0115DC), RUNE_C(0x0115DD), WBRK_EXTEND},
+ {RUNE_C(0x011600), RUNE_C(0x01162F), WBRK_LE},
+ {RUNE_C(0x011630), RUNE_C(0x011640), WBRK_EXTEND},
+ {RUNE_C(0x011644), RUNE_C(0x011644), WBRK_LE},
+ {RUNE_C(0x011650), RUNE_C(0x011659), WBRK_NU},
+ {RUNE_C(0x011680), RUNE_C(0x0116AA), WBRK_LE},
+ {RUNE_C(0x0116AB), RUNE_C(0x0116B7), WBRK_EXTEND},
+ {RUNE_C(0x0116B8), RUNE_C(0x0116B8), WBRK_LE},
+ {RUNE_C(0x0116C0), RUNE_C(0x0116C9), WBRK_NU},
+ {RUNE_C(0x01171D), RUNE_C(0x01172B), WBRK_EXTEND},
+ {RUNE_C(0x011730), RUNE_C(0x011739), WBRK_NU},
+ {RUNE_C(0x011800), RUNE_C(0x01182B), WBRK_LE},
+ {RUNE_C(0x01182C), RUNE_C(0x01183A), WBRK_EXTEND},
+ {RUNE_C(0x0118A0), RUNE_C(0x0118DF), WBRK_LE},
+ {RUNE_C(0x0118E0), RUNE_C(0x0118E9), WBRK_NU},
+ {RUNE_C(0x0118FF), RUNE_C(0x011906), WBRK_LE},
+ {RUNE_C(0x011909), RUNE_C(0x011909), WBRK_LE},
+ {RUNE_C(0x01190C), RUNE_C(0x011913), WBRK_LE},
+ {RUNE_C(0x011915), RUNE_C(0x011916), WBRK_LE},
+ {RUNE_C(0x011918), RUNE_C(0x01192F), WBRK_LE},
+ {RUNE_C(0x011930), RUNE_C(0x011935), WBRK_EXTEND},
+ {RUNE_C(0x011937), RUNE_C(0x011938), WBRK_EXTEND},
+ {RUNE_C(0x01193B), RUNE_C(0x01193E), WBRK_EXTEND},
+ {RUNE_C(0x01193F), RUNE_C(0x01193F), WBRK_LE},
+ {RUNE_C(0x011940), RUNE_C(0x011940), WBRK_EXTEND},
+ {RUNE_C(0x011941), RUNE_C(0x011941), WBRK_LE},
+ {RUNE_C(0x011942), RUNE_C(0x011943), WBRK_EXTEND},
+ {RUNE_C(0x011950), RUNE_C(0x011959), WBRK_NU},
+ {RUNE_C(0x0119A0), RUNE_C(0x0119A7), WBRK_LE},
+ {RUNE_C(0x0119AA), RUNE_C(0x0119D0), WBRK_LE},
+ {RUNE_C(0x0119D1), RUNE_C(0x0119D7), WBRK_EXTEND},
+ {RUNE_C(0x0119DA), RUNE_C(0x0119E0), WBRK_EXTEND},
+ {RUNE_C(0x0119E1), RUNE_C(0x0119E1), WBRK_LE},
+ {RUNE_C(0x0119E3), RUNE_C(0x0119E3), WBRK_LE},
+ {RUNE_C(0x0119E4), RUNE_C(0x0119E4), WBRK_EXTEND},
+ {RUNE_C(0x011A00), RUNE_C(0x011A00), WBRK_LE},
+ {RUNE_C(0x011A01), RUNE_C(0x011A0A), WBRK_EXTEND},
+ {RUNE_C(0x011A0B), RUNE_C(0x011A32), WBRK_LE},
+ {RUNE_C(0x011A33), RUNE_C(0x011A39), WBRK_EXTEND},
+ {RUNE_C(0x011A3A), RUNE_C(0x011A3A), WBRK_LE},
+ {RUNE_C(0x011A3B), RUNE_C(0x011A3E), WBRK_EXTEND},
+ {RUNE_C(0x011A47), RUNE_C(0x011A47), WBRK_EXTEND},
+ {RUNE_C(0x011A50), RUNE_C(0x011A50), WBRK_LE},
+ {RUNE_C(0x011A51), RUNE_C(0x011A5B), WBRK_EXTEND},
+ {RUNE_C(0x011A5C), RUNE_C(0x011A89), WBRK_LE},
+ {RUNE_C(0x011A8A), RUNE_C(0x011A99), WBRK_EXTEND},
+ {RUNE_C(0x011A9D), RUNE_C(0x011A9D), WBRK_LE},
+ {RUNE_C(0x011AB0), RUNE_C(0x011AF8), WBRK_LE},
+ {RUNE_C(0x011C00), RUNE_C(0x011C08), WBRK_LE},
+ {RUNE_C(0x011C0A), RUNE_C(0x011C2E), WBRK_LE},
+ {RUNE_C(0x011C2F), RUNE_C(0x011C36), WBRK_EXTEND},
+ {RUNE_C(0x011C38), RUNE_C(0x011C3F), WBRK_EXTEND},
+ {RUNE_C(0x011C40), RUNE_C(0x011C40), WBRK_LE},
+ {RUNE_C(0x011C50), RUNE_C(0x011C59), WBRK_NU},
+ {RUNE_C(0x011C72), RUNE_C(0x011C8F), WBRK_LE},
+ {RUNE_C(0x011C92), RUNE_C(0x011CA7), WBRK_EXTEND},
+ {RUNE_C(0x011CA9), RUNE_C(0x011CB6), WBRK_EXTEND},
+ {RUNE_C(0x011D00), RUNE_C(0x011D06), WBRK_LE},
+ {RUNE_C(0x011D08), RUNE_C(0x011D09), WBRK_LE},
+ {RUNE_C(0x011D0B), RUNE_C(0x011D30), WBRK_LE},
+ {RUNE_C(0x011D31), RUNE_C(0x011D36), WBRK_EXTEND},
+ {RUNE_C(0x011D3A), RUNE_C(0x011D3A), WBRK_EXTEND},
+ {RUNE_C(0x011D3C), RUNE_C(0x011D3D), WBRK_EXTEND},
+ {RUNE_C(0x011D3F), RUNE_C(0x011D45), WBRK_EXTEND},
+ {RUNE_C(0x011D46), RUNE_C(0x011D46), WBRK_LE},
+ {RUNE_C(0x011D47), RUNE_C(0x011D47), WBRK_EXTEND},
+ {RUNE_C(0x011D50), RUNE_C(0x011D59), WBRK_NU},
+ {RUNE_C(0x011D60), RUNE_C(0x011D65), WBRK_LE},
+ {RUNE_C(0x011D67), RUNE_C(0x011D68), WBRK_LE},
+ {RUNE_C(0x011D6A), RUNE_C(0x011D89), WBRK_LE},
+ {RUNE_C(0x011D8A), RUNE_C(0x011D8E), WBRK_EXTEND},
+ {RUNE_C(0x011D90), RUNE_C(0x011D91), WBRK_EXTEND},
+ {RUNE_C(0x011D93), RUNE_C(0x011D97), WBRK_EXTEND},
+ {RUNE_C(0x011D98), RUNE_C(0x011D98), WBRK_LE},
+ {RUNE_C(0x011DA0), RUNE_C(0x011DA9), WBRK_NU},
+ {RUNE_C(0x011EE0), RUNE_C(0x011EF2), WBRK_LE},
+ {RUNE_C(0x011EF3), RUNE_C(0x011EF6), WBRK_EXTEND},
+ {RUNE_C(0x011F00), RUNE_C(0x011F01), WBRK_EXTEND},
+ {RUNE_C(0x011F02), RUNE_C(0x011F02), WBRK_LE},
+ {RUNE_C(0x011F03), RUNE_C(0x011F03), WBRK_EXTEND},
+ {RUNE_C(0x011F04), RUNE_C(0x011F10), WBRK_LE},
+ {RUNE_C(0x011F12), RUNE_C(0x011F33), WBRK_LE},
+ {RUNE_C(0x011F34), RUNE_C(0x011F3A), WBRK_EXTEND},
+ {RUNE_C(0x011F3E), RUNE_C(0x011F42), WBRK_EXTEND},
+ {RUNE_C(0x011F50), RUNE_C(0x011F59), WBRK_NU},
+ {RUNE_C(0x011FB0), RUNE_C(0x011FB0), WBRK_LE},
+ {RUNE_C(0x012000), RUNE_C(0x012399), WBRK_LE},
+ {RUNE_C(0x012400), RUNE_C(0x01246E), WBRK_LE},
+ {RUNE_C(0x012480), RUNE_C(0x012543), WBRK_LE},
+ {RUNE_C(0x012F90), RUNE_C(0x012FF0), WBRK_LE},
+ {RUNE_C(0x013000), RUNE_C(0x01342F), WBRK_LE},
+ {RUNE_C(0x013430), RUNE_C(0x01343F), WBRK_FO},
+ {RUNE_C(0x013440), RUNE_C(0x013440), WBRK_EXTEND},
+ {RUNE_C(0x013441), RUNE_C(0x013446), WBRK_LE},
+ {RUNE_C(0x013447), RUNE_C(0x013455), WBRK_EXTEND},
+ {RUNE_C(0x014400), RUNE_C(0x014646), WBRK_LE},
+ {RUNE_C(0x016800), RUNE_C(0x016A38), WBRK_LE},
+ {RUNE_C(0x016A40), RUNE_C(0x016A5E), WBRK_LE},
+ {RUNE_C(0x016A60), RUNE_C(0x016A69), WBRK_NU},
+ {RUNE_C(0x016A70), RUNE_C(0x016ABE), WBRK_LE},
+ {RUNE_C(0x016AC0), RUNE_C(0x016AC9), WBRK_NU},
+ {RUNE_C(0x016AD0), RUNE_C(0x016AED), WBRK_LE},
+ {RUNE_C(0x016AF0), RUNE_C(0x016AF4), WBRK_EXTEND},
+ {RUNE_C(0x016B00), RUNE_C(0x016B2F), WBRK_LE},
+ {RUNE_C(0x016B30), RUNE_C(0x016B36), WBRK_EXTEND},
+ {RUNE_C(0x016B40), RUNE_C(0x016B43), WBRK_LE},
+ {RUNE_C(0x016B50), RUNE_C(0x016B59), WBRK_NU},
+ {RUNE_C(0x016B63), RUNE_C(0x016B77), WBRK_LE},
+ {RUNE_C(0x016B7D), RUNE_C(0x016B8F), WBRK_LE},
+ {RUNE_C(0x016E40), RUNE_C(0x016E7F), WBRK_LE},
+ {RUNE_C(0x016F00), RUNE_C(0x016F4A), WBRK_LE},
+ {RUNE_C(0x016F4F), RUNE_C(0x016F4F), WBRK_EXTEND},
+ {RUNE_C(0x016F50), RUNE_C(0x016F50), WBRK_LE},
+ {RUNE_C(0x016F51), RUNE_C(0x016F87), WBRK_EXTEND},
+ {RUNE_C(0x016F8F), RUNE_C(0x016F92), WBRK_EXTEND},
+ {RUNE_C(0x016F93), RUNE_C(0x016F9F), WBRK_LE},
+ {RUNE_C(0x016FE0), RUNE_C(0x016FE1), WBRK_LE},
+ {RUNE_C(0x016FE3), RUNE_C(0x016FE3), WBRK_LE},
+ {RUNE_C(0x016FE4), RUNE_C(0x016FE4), WBRK_EXTEND},
+ {RUNE_C(0x016FF0), RUNE_C(0x016FF1), WBRK_EXTEND},
+ {RUNE_C(0x01AFF0), RUNE_C(0x01AFF3), WBRK_KA},
+ {RUNE_C(0x01AFF5), RUNE_C(0x01AFFB), WBRK_KA},
+ {RUNE_C(0x01AFFD), RUNE_C(0x01AFFE), WBRK_KA},
+ {RUNE_C(0x01B000), RUNE_C(0x01B000), WBRK_KA},
+ {RUNE_C(0x01B120), RUNE_C(0x01B122), WBRK_KA},
+ {RUNE_C(0x01B155), RUNE_C(0x01B155), WBRK_KA},
+ {RUNE_C(0x01B164), RUNE_C(0x01B167), WBRK_KA},
+ {RUNE_C(0x01BC00), RUNE_C(0x01BC6A), WBRK_LE},
+ {RUNE_C(0x01BC70), RUNE_C(0x01BC7C), WBRK_LE},
+ {RUNE_C(0x01BC80), RUNE_C(0x01BC88), WBRK_LE},
+ {RUNE_C(0x01BC90), RUNE_C(0x01BC99), WBRK_LE},
+ {RUNE_C(0x01BC9D), RUNE_C(0x01BC9E), WBRK_EXTEND},
+ {RUNE_C(0x01BCA0), RUNE_C(0x01BCA3), WBRK_FO},
+ {RUNE_C(0x01CF00), RUNE_C(0x01CF2D), WBRK_EXTEND},
+ {RUNE_C(0x01CF30), RUNE_C(0x01CF46), WBRK_EXTEND},
+ {RUNE_C(0x01D165), RUNE_C(0x01D169), WBRK_EXTEND},
+ {RUNE_C(0x01D16D), RUNE_C(0x01D172), WBRK_EXTEND},
+ {RUNE_C(0x01D173), RUNE_C(0x01D17A), WBRK_FO},
+ {RUNE_C(0x01D17B), RUNE_C(0x01D182), WBRK_EXTEND},
+ {RUNE_C(0x01D185), RUNE_C(0x01D18B), WBRK_EXTEND},
+ {RUNE_C(0x01D1AA), RUNE_C(0x01D1AD), WBRK_EXTEND},
+ {RUNE_C(0x01D242), RUNE_C(0x01D244), WBRK_EXTEND},
+ {RUNE_C(0x01D400), RUNE_C(0x01D454), WBRK_LE},
+ {RUNE_C(0x01D456), RUNE_C(0x01D49C), WBRK_LE},
+ {RUNE_C(0x01D49E), RUNE_C(0x01D49F), WBRK_LE},
+ {RUNE_C(0x01D4A2), RUNE_C(0x01D4A2), WBRK_LE},
+ {RUNE_C(0x01D4A5), RUNE_C(0x01D4A6), WBRK_LE},
+ {RUNE_C(0x01D4A9), RUNE_C(0x01D4AC), WBRK_LE},
+ {RUNE_C(0x01D4AE), RUNE_C(0x01D4B9), WBRK_LE},
+ {RUNE_C(0x01D4BB), RUNE_C(0x01D4BB), WBRK_LE},
+ {RUNE_C(0x01D4BD), RUNE_C(0x01D4C3), WBRK_LE},
+ {RUNE_C(0x01D4C5), RUNE_C(0x01D505), WBRK_LE},
+ {RUNE_C(0x01D507), RUNE_C(0x01D50A), WBRK_LE},
+ {RUNE_C(0x01D50D), RUNE_C(0x01D514), WBRK_LE},
+ {RUNE_C(0x01D516), RUNE_C(0x01D51C), WBRK_LE},
+ {RUNE_C(0x01D51E), RUNE_C(0x01D539), WBRK_LE},
+ {RUNE_C(0x01D53B), RUNE_C(0x01D53E), WBRK_LE},
+ {RUNE_C(0x01D540), RUNE_C(0x01D544), WBRK_LE},
+ {RUNE_C(0x01D546), RUNE_C(0x01D546), WBRK_LE},
+ {RUNE_C(0x01D54A), RUNE_C(0x01D550), WBRK_LE},
+ {RUNE_C(0x01D552), RUNE_C(0x01D6A5), WBRK_LE},
+ {RUNE_C(0x01D6A8), RUNE_C(0x01D6C0), WBRK_LE},
+ {RUNE_C(0x01D6C2), RUNE_C(0x01D6DA), WBRK_LE},
+ {RUNE_C(0x01D6DC), RUNE_C(0x01D6FA), WBRK_LE},
+ {RUNE_C(0x01D6FC), RUNE_C(0x01D714), WBRK_LE},
+ {RUNE_C(0x01D716), RUNE_C(0x01D734), WBRK_LE},
+ {RUNE_C(0x01D736), RUNE_C(0x01D74E), WBRK_LE},
+ {RUNE_C(0x01D750), RUNE_C(0x01D76E), WBRK_LE},
+ {RUNE_C(0x01D770), RUNE_C(0x01D788), WBRK_LE},
+ {RUNE_C(0x01D78A), RUNE_C(0x01D7A8), WBRK_LE},
+ {RUNE_C(0x01D7AA), RUNE_C(0x01D7C2), WBRK_LE},
+ {RUNE_C(0x01D7C4), RUNE_C(0x01D7CB), WBRK_LE},
+ {RUNE_C(0x01D7CE), RUNE_C(0x01D7FF), WBRK_NU},
+ {RUNE_C(0x01DA00), RUNE_C(0x01DA36), WBRK_EXTEND},
+ {RUNE_C(0x01DA3B), RUNE_C(0x01DA6C), WBRK_EXTEND},
+ {RUNE_C(0x01DA75), RUNE_C(0x01DA75), WBRK_EXTEND},
+ {RUNE_C(0x01DA84), RUNE_C(0x01DA84), WBRK_EXTEND},
+ {RUNE_C(0x01DA9B), RUNE_C(0x01DA9F), WBRK_EXTEND},
+ {RUNE_C(0x01DAA1), RUNE_C(0x01DAAF), WBRK_EXTEND},
+ {RUNE_C(0x01DF00), RUNE_C(0x01DF1E), WBRK_LE},
+ {RUNE_C(0x01DF25), RUNE_C(0x01DF2A), WBRK_LE},
+ {RUNE_C(0x01E000), RUNE_C(0x01E006), WBRK_EXTEND},
+ {RUNE_C(0x01E008), RUNE_C(0x01E018), WBRK_EXTEND},
+ {RUNE_C(0x01E01B), RUNE_C(0x01E021), WBRK_EXTEND},
+ {RUNE_C(0x01E023), RUNE_C(0x01E024), WBRK_EXTEND},
+ {RUNE_C(0x01E026), RUNE_C(0x01E02A), WBRK_EXTEND},
+ {RUNE_C(0x01E030), RUNE_C(0x01E06D), WBRK_LE},
+ {RUNE_C(0x01E08F), RUNE_C(0x01E08F), WBRK_EXTEND},
+ {RUNE_C(0x01E100), RUNE_C(0x01E12C), WBRK_LE},
+ {RUNE_C(0x01E130), RUNE_C(0x01E136), WBRK_EXTEND},
+ {RUNE_C(0x01E137), RUNE_C(0x01E13D), WBRK_LE},
+ {RUNE_C(0x01E140), RUNE_C(0x01E149), WBRK_NU},
+ {RUNE_C(0x01E14E), RUNE_C(0x01E14E), WBRK_LE},
+ {RUNE_C(0x01E290), RUNE_C(0x01E2AD), WBRK_LE},
+ {RUNE_C(0x01E2AE), RUNE_C(0x01E2AE), WBRK_EXTEND},
+ {RUNE_C(0x01E2C0), RUNE_C(0x01E2EB), WBRK_LE},
+ {RUNE_C(0x01E2EC), RUNE_C(0x01E2EF), WBRK_EXTEND},
+ {RUNE_C(0x01E2F0), RUNE_C(0x01E2F9), WBRK_NU},
+ {RUNE_C(0x01E4D0), RUNE_C(0x01E4EB), WBRK_LE},
+ {RUNE_C(0x01E4EC), RUNE_C(0x01E4EF), WBRK_EXTEND},
+ {RUNE_C(0x01E4F0), RUNE_C(0x01E4F9), WBRK_NU},
+ {RUNE_C(0x01E7E0), RUNE_C(0x01E7E6), WBRK_LE},
+ {RUNE_C(0x01E7E8), RUNE_C(0x01E7EB), WBRK_LE},
+ {RUNE_C(0x01E7ED), RUNE_C(0x01E7EE), WBRK_LE},
+ {RUNE_C(0x01E7F0), RUNE_C(0x01E7FE), WBRK_LE},
+ {RUNE_C(0x01E800), RUNE_C(0x01E8C4), WBRK_LE},
+ {RUNE_C(0x01E8D0), RUNE_C(0x01E8D6), WBRK_EXTEND},
+ {RUNE_C(0x01E900), RUNE_C(0x01E943), WBRK_LE},
+ {RUNE_C(0x01E944), RUNE_C(0x01E94A), WBRK_EXTEND},
+ {RUNE_C(0x01E94B), RUNE_C(0x01E94B), WBRK_LE},
+ {RUNE_C(0x01E950), RUNE_C(0x01E959), WBRK_NU},
+ {RUNE_C(0x01EE00), RUNE_C(0x01EE03), WBRK_LE},
+ {RUNE_C(0x01EE05), RUNE_C(0x01EE1F), WBRK_LE},
+ {RUNE_C(0x01EE21), RUNE_C(0x01EE22), WBRK_LE},
+ {RUNE_C(0x01EE24), RUNE_C(0x01EE24), WBRK_LE},
+ {RUNE_C(0x01EE27), RUNE_C(0x01EE27), WBRK_LE},
+ {RUNE_C(0x01EE29), RUNE_C(0x01EE32), WBRK_LE},
+ {RUNE_C(0x01EE34), RUNE_C(0x01EE37), WBRK_LE},
+ {RUNE_C(0x01EE39), RUNE_C(0x01EE39), WBRK_LE},
+ {RUNE_C(0x01EE3B), RUNE_C(0x01EE3B), WBRK_LE},
+ {RUNE_C(0x01EE42), RUNE_C(0x01EE42), WBRK_LE},
+ {RUNE_C(0x01EE47), RUNE_C(0x01EE47), WBRK_LE},
+ {RUNE_C(0x01EE49), RUNE_C(0x01EE49), WBRK_LE},
+ {RUNE_C(0x01EE4B), RUNE_C(0x01EE4B), WBRK_LE},
+ {RUNE_C(0x01EE4D), RUNE_C(0x01EE4F), WBRK_LE},
+ {RUNE_C(0x01EE51), RUNE_C(0x01EE52), WBRK_LE},
+ {RUNE_C(0x01EE54), RUNE_C(0x01EE54), WBRK_LE},
+ {RUNE_C(0x01EE57), RUNE_C(0x01EE57), WBRK_LE},
+ {RUNE_C(0x01EE59), RUNE_C(0x01EE59), WBRK_LE},
+ {RUNE_C(0x01EE5B), RUNE_C(0x01EE5B), WBRK_LE},
+ {RUNE_C(0x01EE5D), RUNE_C(0x01EE5D), WBRK_LE},
+ {RUNE_C(0x01EE5F), RUNE_C(0x01EE5F), WBRK_LE},
+ {RUNE_C(0x01EE61), RUNE_C(0x01EE62), WBRK_LE},
+ {RUNE_C(0x01EE64), RUNE_C(0x01EE64), WBRK_LE},
+ {RUNE_C(0x01EE67), RUNE_C(0x01EE6A), WBRK_LE},
+ {RUNE_C(0x01EE6C), RUNE_C(0x01EE72), WBRK_LE},
+ {RUNE_C(0x01EE74), RUNE_C(0x01EE77), WBRK_LE},
+ {RUNE_C(0x01EE79), RUNE_C(0x01EE7C), WBRK_LE},
+ {RUNE_C(0x01EE7E), RUNE_C(0x01EE7E), WBRK_LE},
+ {RUNE_C(0x01EE80), RUNE_C(0x01EE89), WBRK_LE},
+ {RUNE_C(0x01EE8B), RUNE_C(0x01EE9B), WBRK_LE},
+ {RUNE_C(0x01EEA1), RUNE_C(0x01EEA3), WBRK_LE},
+ {RUNE_C(0x01EEA5), RUNE_C(0x01EEA9), WBRK_LE},
+ {RUNE_C(0x01EEAB), RUNE_C(0x01EEBB), WBRK_LE},
+ {RUNE_C(0x01F000), RUNE_C(0x01F0FF), WBRK_EXTPICT},
+ {RUNE_C(0x01F10D), RUNE_C(0x01F10F), WBRK_EXTPICT},
+ {RUNE_C(0x01F12F), RUNE_C(0x01F12F), WBRK_EXTPICT},
+ {RUNE_C(0x01F130), RUNE_C(0x01F149), WBRK_LE},
+ {RUNE_C(0x01F150), RUNE_C(0x01F169), WBRK_LE},
+ {RUNE_C(0x01F16C), RUNE_C(0x01F16F), WBRK_EXTPICT},
+ {RUNE_C(0x01F170), RUNE_C(0x01F171), WBRK_EXTPICT_LE},
+ {RUNE_C(0x01F172), RUNE_C(0x01F17D), WBRK_LE},
+ {RUNE_C(0x01F17E), RUNE_C(0x01F17F), WBRK_EXTPICT_LE},
+ {RUNE_C(0x01F180), RUNE_C(0x01F189), WBRK_LE},
+ {RUNE_C(0x01F18E), RUNE_C(0x01F18E), WBRK_EXTPICT},
+ {RUNE_C(0x01F191), RUNE_C(0x01F19A), WBRK_EXTPICT},
+ {RUNE_C(0x01F1AD), RUNE_C(0x01F1E5), WBRK_EXTPICT},
+ {RUNE_C(0x01F1E6), RUNE_C(0x01F1FF), WBRK_RI},
+ {RUNE_C(0x01F201), RUNE_C(0x01F20F), WBRK_EXTPICT},
+ {RUNE_C(0x01F21A), RUNE_C(0x01F21A), WBRK_EXTPICT},
+ {RUNE_C(0x01F22F), RUNE_C(0x01F22F), WBRK_EXTPICT},
+ {RUNE_C(0x01F232), RUNE_C(0x01F23A), WBRK_EXTPICT},
+ {RUNE_C(0x01F23C), RUNE_C(0x01F23F), WBRK_EXTPICT},
+ {RUNE_C(0x01F249), RUNE_C(0x01F3FA), WBRK_EXTPICT},
+ {RUNE_C(0x01F3FB), RUNE_C(0x01F3FF), WBRK_EXTEND},
+ {RUNE_C(0x01F400), RUNE_C(0x01F53D), WBRK_EXTPICT},
+ {RUNE_C(0x01F546), RUNE_C(0x01F64F), WBRK_EXTPICT},
+ {RUNE_C(0x01F680), RUNE_C(0x01F6FF), WBRK_EXTPICT},
+ {RUNE_C(0x01F774), RUNE_C(0x01F77F), WBRK_EXTPICT},
+ {RUNE_C(0x01F7D5), RUNE_C(0x01F7FF), WBRK_EXTPICT},
+ {RUNE_C(0x01F80C), RUNE_C(0x01F80F), WBRK_EXTPICT},
+ {RUNE_C(0x01F848), RUNE_C(0x01F84F), WBRK_EXTPICT},
+ {RUNE_C(0x01F85A), RUNE_C(0x01F85F), WBRK_EXTPICT},
+ {RUNE_C(0x01F888), RUNE_C(0x01F88F), WBRK_EXTPICT},
+ {RUNE_C(0x01F8AE), RUNE_C(0x01F8FF), WBRK_EXTPICT},
+ {RUNE_C(0x01F90C), RUNE_C(0x01F93A), WBRK_EXTPICT},
+ {RUNE_C(0x01F93C), RUNE_C(0x01F945), WBRK_EXTPICT},
+ {RUNE_C(0x01F947), RUNE_C(0x01FAFF), WBRK_EXTPICT},
+ {RUNE_C(0x01FBF0), RUNE_C(0x01FBF9), WBRK_NU},
+ {RUNE_C(0x01FC00), RUNE_C(0x01FFFD), WBRK_EXTPICT},
+ {RUNE_C(0x0E0001), RUNE_C(0x0E0001), WBRK_FO},
+ {RUNE_C(0x0E0020), RUNE_C(0x0E007F), WBRK_EXTEND},
+ {RUNE_C(0x0E0100), RUNE_C(0x0E01EF), WBRK_EXTEND},
+};
+
+#endif /* !MLIB_UNICODE__WBRK_H */
diff --git a/lib/unicode/string/u8wnext.c b/lib/unicode/string/u8wnext.c
index 4236cff..5e893c6 100644
--- a/lib/unicode/string/u8wnext.c
+++ b/lib/unicode/string/u8wnext.c
@@ -1,161 +1,249 @@
+/* The approach of this implementation is heavily inspired by libgrapheme
+ (written by Laslo Hunhold <dev@frign.de>), and my email-correspondance with
+ Laslo. */
+
+#include "_bsearch.h"
#include "macros.h"
#include "mbstring.h"
-#include "unicode/prop.h"
+#include "unicode/_wbrk.h"
#include "unicode/string.h"
-#define IS_AHLETTER(cp) ((cp) == WB_LE || (cp) == WB_HL)
-#define IS_MIDNUMLETQ(cp) ((cp) == WB_MB || (cp) == WB_SQ)
+_MLIB_DEFINE_BSEARCH(enum wbrk_prop, wbrk_lookup, WBRK_XX)
-#define RET(x) \
- do { \
- ws->prev_ap = ap; \
- return (x); \
- } while (false)
+#define IS_MIDNUMLETQ(xp) ((xp) == WBRK_MB || (xp) == WBRK_SQ)
+#define IS_AHLETTER(xp) \
+ ((xp) == WBRK_LE || (xp) == WBRK_EXTPICT_LE || (xp) == WBRK_HL)
+#define IS_IGNORE(xp) \
+ ((xp) == WBRK_EXTEND || (xp) == WBRK_FO || (xp) == WBRK_ZWJ)
struct wbrk_state {
- int ri_parity;
- enum uprop_wb prev_ap;
+ struct {
+ enum wbrk_prop prev[2], next[2];
+ } raw, skip;
+ struct u8view raw_v, skip_v, mid_v;
+ int ri_parity : 1;
};
-static bool u8iswbrk(const char8_t **, size_t *, struct wbrk_state *);
+static bool advance(struct wbrk_state *);
+static size_t findwbrk(struct u8view);
+static struct wbrk_state mkwbrkstate(struct u8view);
size_t
u8wnext(struct u8view *w, const char8_t **s, size_t *n)
{
- ASSUME(s != nullptr);
ASSUME(n != nullptr);
+ ASSUME(s != nullptr);
+ ASSUME(*s != nullptr);
if (*n == 0)
return 0;
- const char8_t *p = *s;
+ size_t off = findwbrk((struct u8view){*s, *n});
if (w != nullptr)
- w->p = p;
-
- size_t m = *n;
- struct wbrk_state ws = {};
- while (!u8iswbrk(&p, &m, &ws))
- ;
-
- ptrdiff_t d = p - *s;
- *n -= d;
- *s = p;
- if (w)
- w->len = d;
- return d;
+ *w = (struct u8view){*s, off};
+
+ ASSUME(*n >= off);
+ *s += off;
+ *n -= off;
+ return off;
}
-bool
-u8iswbrk(const char8_t **s, size_t *n, struct wbrk_state *ws)
+size_t
+findwbrk(struct u8view sv)
{
- ASSUME(s != nullptr);
- ASSUME(n != nullptr);
- ASSUME(ws != nullptr);
-
- rune a, b, c;
- enum uprop_wb ap, bp, cp;
- a = b = c = ap = bp = cp = 0;
-
- u8next(&a, s, n);
-
- {
- const char8_t *s_cpy = *s;
- size_t n_cpy = *n;
- u8next(&b, &s_cpy, &n_cpy);
- u8next(&c, &s_cpy, &n_cpy);
+ ASSUME(sv.p != nullptr);
+
+ struct wbrk_state ws = mkwbrkstate(sv);
+
+ while (advance(&ws)) {
+#define prev ws.raw.prev
+#define next ws.raw.next
+ /* WB3 */
+ if (prev[0] == WBRK_CR && next[0] == WBRK_LF)
+ continue;
+
+ /* WB3a */
+ if (prev[0] == WBRK_NL || prev[0] == WBRK_CR || prev[0] == WBRK_LF)
+ break;
+
+ /* WB3b */
+ if (next[0] == WBRK_NL || next[0] == WBRK_CR || next[0] == WBRK_LF)
+ break;
+
+ /* WB3c */
+ if (prev[0] == WBRK_ZWJ
+ && (next[0] == WBRK_EXTPICT || next[0] == WBRK_EXTPICT_LE))
+ {
+ continue;
+ }
+
+ /* WB3d */
+ if (prev[0] == WBRK_WSEGSPACE && next[0] == WBRK_WSEGSPACE)
+ continue;
+
+ /* WB4 */
+ if (next[0] == WBRK_EXTEND || next[0] == WBRK_FO || next[0] == WBRK_ZWJ)
+ continue;
+
+#undef prev
+#undef next
+#define prev ws.skip.prev
+#define next ws.skip.next
+
+ /* WB5 */
+ if (IS_AHLETTER(prev[0]) && IS_AHLETTER(next[0]))
+ continue;
+
+ /* WB6 */
+ if (IS_AHLETTER(prev[0])
+ && (next[0] == WBRK_ML || IS_MIDNUMLETQ(next[0]))
+ && IS_AHLETTER(next[1]))
+ {
+ continue;
+ }
+
+ /* WB7 */
+ if (IS_AHLETTER(prev[1])
+ && (prev[0] == WBRK_ML || IS_MIDNUMLETQ(prev[0]))
+ && IS_AHLETTER(next[0]))
+ {
+ continue;
+ }
+
+ /* WB7a */
+ if (prev[0] == WBRK_HL && next[0] == WBRK_SQ)
+ continue;
+
+ /* WB7b */
+ if (prev[0] == WBRK_HL && next[0] == WBRK_DQ && next[1] == WBRK_HL)
+ continue;
+
+ /* WB7c */
+ if (prev[1] == WBRK_HL && prev[0] == WBRK_DQ && next[0] == WBRK_HL)
+ continue;
+
+ /* WB8 */
+ if (prev[0] == WBRK_NU && next[0] == WBRK_NU)
+ continue;
+
+ /* WB9 */
+ if (IS_AHLETTER(prev[0]) && next[0] == WBRK_NU)
+ continue;
+
+ /* WB10 */
+ if (prev[0] == WBRK_NU && IS_AHLETTER(next[0]))
+ continue;
+
+ /* WB11 */
+ if (prev[1] == WBRK_NU && (prev[0] == WBRK_MN || IS_MIDNUMLETQ(prev[0]))
+ && next[0] == WBRK_NU)
+ {
+ continue;
+ }
+
+ /* WB12 */
+ if (prev[0] == WBRK_NU && (next[0] == WBRK_MN || IS_MIDNUMLETQ(next[0]))
+ && next[1] == WBRK_NU)
+ {
+ continue;
+ }
+
+ /* WB13 */
+ if (prev[0] == WBRK_KA && next[0] == WBRK_KA)
+ continue;
+
+ /* WB13a */
+ if ((IS_AHLETTER(prev[0]) || prev[0] == WBRK_NU || prev[0] == WBRK_KA
+ || prev[0] == WBRK_EX)
+ && next[0] == WBRK_EX)
+ {
+ continue;
+ }
+
+ /* WB13b */
+ if (prev[0] == WBRK_EX
+ && (IS_AHLETTER(next[0]) || next[0] == WBRK_NU
+ || next[0] == WBRK_KA))
+ {
+ continue;
+ }
+
+ /* WB15 & WB16 */
+ if (next[0] == WBRK_RI && ws.ri_parity)
+ continue;
+
+ /* WB999 */
+ break;
+#undef prev
+#undef next
}
- ws->ri_parity = ws->ri_parity == 0 && uprop_is_ri(a);
-
- /* WB1 & WB2 */
- if (!a || !b)
- RET(true);
-
- /* WB3 */
- if (a == '\r' && b == '\n')
- RET(false);
-
- /* WB3a */
- if (a == '\r' || a == '\n' || (ap = uprop_get_wb(a)) == WB_NL)
- RET(true);
-
- /* WB3b */
- if (b == '\r' || b == '\n' || (bp = uprop_get_wb(b)) == WB_NL)
- RET(true);
-
- /* WB3c */
- if (ap == WB_ZWJ && uprop_is_extpict(b))
- RET(false);
-
- /* WB3d */
- if (ap == WB_WSEGSPACE && bp == WB_WSEGSPACE)
- RET(false);
-
- /* WB4 */
- if (bp == WB_FO || bp == WB_EXTEND || bp == WB_ZWJ)
- RET(false);
-
- /* WB5 */
- if (IS_AHLETTER(ap) && IS_AHLETTER(bp))
- RET(false);
+ return ws.mid_v.p - sv.p;
+}
- /* WB6 */
- cp = uprop_get_wb(c);
- if (IS_AHLETTER(ap) && (bp == WB_ML || IS_MIDNUMLETQ(bp))
- && IS_AHLETTER(cp))
+struct wbrk_state
+mkwbrkstate(struct u8view sv)
+{
+ struct wbrk_state ws = {
+ .raw = {{WBRK_EOT, WBRK_EOT}, {WBRK_EOT, WBRK_EOT}},
+ .skip = {{WBRK_EOT, WBRK_EOT}, {WBRK_EOT, WBRK_EOT}},
+ .mid_v = sv,
+ .raw_v = sv,
+ .skip_v = sv,
+ };
+
+ static_assert(sizeof(ws.skip.next) == sizeof(ws.raw.next));
+
+ rune ch;
+ for (size_t i = 0;
+ i < lengthof(ws.raw.next) && u8next(&ch, U8_ARGSP(ws.raw_v)) != 0; i++)
{
- RET(false);
+ ws.raw.next[i] = mlib_lookup(ch);
}
- /* WB7 */
- if (IS_AHLETTER(ws->prev_ap) && (ap == WB_ML || IS_MIDNUMLETQ(ap))
- && IS_AHLETTER(bp))
+ for (size_t i = 0;
+ i < lengthof(ws.raw.next) && u8next(&ch, U8_ARGSP(ws.skip_v)) != 0;)
{
- RET(false);
+ ws.skip.next[i] = mlib_lookup(ch);
+ if (!IS_IGNORE(ws.skip.next[i]))
+ i++;
}
- /* WB7a & WB7b */
- if (ap == WB_HL && (bp == WB_SQ || (bp == WB_DQ && cp == WB_HL)))
- RET(false);
-
- /* WB7c */
- if (ws->prev_ap == WB_HL && ap == WB_DQ && bp == WB_HL)
- RET(false);
-
- /* WB8, WB9, & WB10 */
- if ((ap == WB_NU || IS_AHLETTER(ap)) && (bp == WB_NU || IS_AHLETTER(bp)))
- RET(false);
-
- /* WB11 */
- if (ws->prev_ap == WB_NU && (ap == WB_MN || IS_MIDNUMLETQ(ap))
- && bp == WB_NU)
- {
- RET(false);
- }
-
- /* WB12 */
- if (ap == WB_NU && (bp == WB_MN || IS_MIDNUMLETQ(bp)) && cp == WB_NU)
- RET(false);
-
- /* WB13 */
- if (ap == WB_KA && bp == WB_KA)
- RET(false);
+ return ws;
+}
- /* WB13a */
- if ((IS_AHLETTER(ap) || ap == WB_NU || ap == WB_KA || ap == WB_EX)
- && bp == WB_EX)
- {
- RET(false);
+bool
+advance(struct wbrk_state *ws)
+{
+ if (ws->raw.next[0] == WBRK_EOT)
+ return false;
+
+ /* Shift the prop window over by 1 */
+ rune ch;
+ ws->raw.prev[1] = ws->raw.prev[0];
+ ws->raw.prev[0] = ws->raw.next[0];
+ ws->raw.next[0] = ws->raw.next[1];
+ ws->raw.next[1] =
+ u8next(&ch, U8_ARGSP(ws->raw_v)) != 0 ? mlib_lookup(ch) : WBRK_EOT;
+
+ /* Increment the midpoint */
+ u8next(nullptr, U8_ARGSP(ws->mid_v));
+
+ /* Ignore ignorable properties */
+ if (!IS_IGNORE(ws->raw.prev[0])) {
+ ws->skip.prev[1] = ws->skip.prev[0];
+ ws->skip.prev[0] = ws->skip.next[0];
+ ws->skip.next[0] = ws->skip.next[1];
+ ws->ri_parity = ws->ri_parity == 0 && ws->skip.prev[0] == WBRK_RI;
+
+ do {
+ if (u8next(&ch, U8_ARGSP(ws->skip_v)) == 0) {
+ ws->skip.next[1] = WBRK_EOT;
+ break;
+ }
+ ws->skip.next[1] = mlib_lookup(ch);
+ } while (IS_IGNORE(ws->skip.next[1]));
}
- /* WB13b */
- if (ap == WB_EX && (IS_AHLETTER(bp) || bp == WB_NU || bp == WB_KA))
- RET(false);
-
- /* WB15 & WB16 */
- if (ap == WB_RI && bp == WB_RI && ws->ri_parity == 1)
- RET(false);
-
- /* WB999 */
- RET(true);
+ return true;
}