diff options
Diffstat (limited to 'vendor/librune/gen/gbrk')
-rwxr-xr-x | vendor/librune/gen/gbrk | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/vendor/librune/gen/gbrk b/vendor/librune/gen/gbrk new file mode 100755 index 0000000..577c2c9 --- /dev/null +++ b/vendor/librune/gen/gbrk @@ -0,0 +1,114 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/gbrk/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune/gbrk + wget -q "$1" -O "$name" + fi +} + +set -e +cd "${0%/*}/.." +exec >include/internal/gbrk_lookup.h + +readonly URL1='https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt' +readonly URL2='https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt' +readonly URL3='https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt' + +cache "$URL1" & +cache "$URL2" & +cache "$URL3" & +wait + +cat <<C +/* This file is autogenerated by gen/gbrk; DO NOT EDIT. */ + +#ifndef RUNE_INTERNAL_GBRK_LOOKUP_H +#define RUNE_INTERNAL_GBRK_LOOKUP_H + +/* IWYU pragma: private */ +/* clang-format off */ + +#include "types.h" + +typedef enum { + GBP_OTHER = 0, + + GBP_CTRL = 1 << 0, /* Control */ + GBP_EXT = 1 << 1, /* Extend */ + GBP_PIC = 1 << 2, /* Extended_Pictographic */ + GBP_PREP = 1 << 3, /* Prepend */ + GBP_RI = 1 << 4, /* Regional_Indicator */ + GBP_SM = 1 << 5, /* SpacingMark */ + GBP_ZWJ = 1 << 6, /* ZWJ */ + + GBP_HNGL_L = 1 << 7, /* Hangul L */ + GBP_HNGL_LV = 1 << 8, /* Hangul LV */ + GBP_HNGL_LVT = 1 << 9, /* Hangul LVT */ + GBP_HNGL_T = 1 << 10, /* Hangul T */ + GBP_HNGL_V = 1 << 11, /* Hangul V */ + + GBP_INDC_CNSNT = 1 << 12, /* Indic Consonant */ + GBP_INDC_EXT = 1 << 13, /* Indic Extend */ + GBP_INDC_LNK = 1 << 14, /* Indic Linker */ +} gbrk_prop; + +static const struct { + rune lo, hi; + gbrk_prop val; +} gbrk_prop_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" + map["Control"] = "CTRL" + map["Extend"] = "EXT" + map["Extended_Pictographic"] = "PIC" + map["Prepend"] = "PREP" + map["Regional_Indicator"] = "RI" + map["SpacingMark"] = "SM" + map["ZWJ"] = "ZWJ" + + map["L"] = "HNGL_L" + map["LV"] = "HNGL_LV" + map["LVT"] = "HNGL_LVT" + map["T"] = "HNGL_T" + map["V"] = "HNGL_V" + + map["InCB; Consonant"] = "INDC_CNSNT" + map["InCB; Extend"] = "INDC_EXT" + map["InCB; Linker"] = "INDC_LNK" +} + +map[$2] { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) { + s = "GBP_" map[$2] + props[i] = props[i] ? props[i] " | " s : s + } +} + +END { + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{0x%06X, 0x%06X, %s},\n", lo, i, props[lo] + } +} +' /tmp/librune/gbrk/* | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_GBRK_LOOKUP_H */ +C |