diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-04-09 18:51:28 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-09 18:51:28 +0200 |
commit | 13a5681b7b4528fed6f402bfca8eca505e175a77 (patch) | |
tree | 137429c97fed0cbf166dab60cad306f54d8a89c6 /gen/string | |
parent | 5ab5635d06763ed86edc24e98fb366e993515ec4 (diff) |
Move script to gen/string/
Diffstat (limited to 'gen/string')
-rwxr-xr-x | gen/string/gbrk | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/gen/string/gbrk b/gen/string/gbrk new file mode 100755 index 0000000..ecde7f5 --- /dev/null +++ b/gen/string/gbrk @@ -0,0 +1,97 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >include/unicode/__gbrk.h + +cat <<C +/* This file is autogenerated by gen/string/gbrk; DO NOT EDIT. */ + +#ifndef MLIB_UNICODE___GBRK_H +#define MLIB_UNICODE___GBRK_H + +/* clang-format off */ + +#include "__rune.h" + +typedef enum { + GBP_OTHER = 0, + + GBP_CTRL = 1 << 0, /* Control */ + GBP_EXT = 1 << 1, /* Extend */ + GBP_PIC = 1 << 2, /* Extended_Pictographic */ + GBP_PREP = 1 << 3, /* Prepend */ + GBP_RI = 1 << 4, /* Regional_Indicator */ + GBP_SM = 1 << 5, /* SpacingMark */ + GBP_ZWJ = 1 << 6, /* ZWJ */ + + GBP_HNGL_L = 1 << 7, /* Hangul L */ + GBP_HNGL_LV = 1 << 8, /* Hangul LV */ + GBP_HNGL_LVT = 1 << 9, /* Hangul LVT */ + GBP_HNGL_T = 1 << 10, /* Hangul T */ + GBP_HNGL_V = 1 << 11, /* Hangul V */ + + GBP_INDC_CNSNT = 1 << 12, /* Indic Consonant */ + GBP_INDC_EXT = 1 << 13, /* Indic Extend */ + GBP_INDC_LNK = 1 << 14, /* Indic Linker */ +} gbrk_prop; + +static const struct { + rune lo, hi; + gbrk_prop val; +} gbrk_prop_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" + map["Control"] = "CTRL" + map["Extend"] = "EXT" + map["Extended_Pictographic"] = "PIC" + map["Prepend"] = "PREP" + map["Regional_Indicator"] = "RI" + map["SpacingMark"] = "SM" + map["ZWJ"] = "ZWJ" + + map["L"] = "HNGL_L" + map["LV"] = "HNGL_LV" + map["LVT"] = "HNGL_LVT" + map["T"] = "HNGL_T" + map["V"] = "HNGL_V" + + map["InCB; Consonant"] = "INDC_CNSNT" + map["InCB; Extend"] = "INDC_EXT" + map["InCB; Linker"] = "INDC_LNK" +} + +map[$2] { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) { + s = "GBP_" map[$2] + props[i] = props[i] ? props[i] " | " s : s + } +} + +END { + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{0x%06X, 0x%06X, %s},\n", lo, i, props[lo] + } +} +' data/GraphemeBreakProperty.txt \ + data/DerivedCoreProperties.txt \ + data/emoji-data.txt \ +| sort + +cat <<C +}; + +#endif /* !MLIB_UNICODE___GBRK_H */ +C |