aboutsummaryrefslogtreecommitdiff
path: root/vendor/librune/gen/gbrk
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/librune/gen/gbrk')
-rwxr-xr-xvendor/librune/gen/gbrk115
1 files changed, 115 insertions, 0 deletions
diff --git a/vendor/librune/gen/gbrk b/vendor/librune/gen/gbrk
new file mode 100755
index 0000000..5cbd87f
--- /dev/null
+++ b/vendor/librune/gen/gbrk
@@ -0,0 +1,115 @@
+#!/bin/sh
+
+cache()
+{
+ name="/tmp/librune/$(basename "$1")"
+ if test ! -f "$name"
+ then
+ mkdir -p /tmp/librune
+ wget -q "$1" -O "$name"
+ fi
+}
+
+set -e
+cd "${0%/*}/.."
+exec >lib/internal/gbrk_lookup.h
+
+readonly URL1='https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt'
+readonly URL2='https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt'
+readonly URL3='https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt'
+
+cache "$URL1" &
+cache "$URL2" &
+cache "$URL3" &
+wait
+
+cat <<C
+/* This file is autogenerated by gen/gbrk; DO NOT EDIT. */
+
+/* TODO: Change tables to constexpr from const when Clangd gets better */
+
+#ifndef RUNE_INTERNAL_GBRK_LOOKUP_H
+#define RUNE_INTERNAL_GBRK_LOOKUP_H
+
+/* clang-format off */
+
+#include "types.h"
+
+typedef enum {
+ GBP_OTHER = 0,
+
+ GBP_CTRL = 1 << 0, /* Control */
+ GBP_EXT = 1 << 1, /* Extend */
+ GBP_PIC = 1 << 2, /* Extended_Pictographic */
+ GBP_PREP = 1 << 3, /* Prepend */
+ GBP_RI = 1 << 4, /* Regional_Indicator */
+ GBP_SM = 1 << 5, /* SpacingMark */
+ GBP_ZWJ = 1 << 6, /* ZWJ */
+
+ GBP_HNGL_L = 1 << 7, /* Hangul L */
+ GBP_HNGL_LV = 1 << 8, /* Hangul LV */
+ GBP_HNGL_LVT = 1 << 9, /* Hangul LVT */
+ GBP_HNGL_T = 1 << 10, /* Hangul T */
+ GBP_HNGL_V = 1 << 11, /* Hangul V */
+
+ GBP_INDC_CNSNT = 1 << 12, /* Indic Consonant */
+ GBP_INDC_EXT = 1 << 13, /* Indic Extend */
+ GBP_INDC_LNK = 1 << 14, /* Indic Linker */
+} gbrk_prop;
+
+static const struct {
+ rune lo, hi;
+ gbrk_prop prop;
+} gbrk_prop_tbl[] = {
+C
+
+gawk '
+BEGIN {
+ FS = "( *#.*| +; +)"
+ map["Control"] = "CTRL"
+ map["Extend"] = "EXT"
+ map["Extended_Pictographic"] = "PIC"
+ map["Prepend"] = "PREP"
+ map["Regional_Indicator"] = "RI"
+ map["SpacingMark"] = "SM"
+ map["ZWJ"] = "ZWJ"
+
+ map["L"] = "HNGL_L"
+ map["LV"] = "HNGL_LV"
+ map["LVT"] = "HNGL_LVT"
+ map["T"] = "HNGL_T"
+ map["V"] = "HNGL_V"
+
+ map["InCB; Consonant"] = "INDC_CNSNT"
+ map["InCB; Extend"] = "INDC_EXT"
+ map["InCB; Linker"] = "INDC_LNK"
+}
+
+map[$2] {
+ n = split($1, a, /\.\./)
+ lo = strtonum("0X" a[1])
+ hi = strtonum("0X" a[n])
+
+ for (i = lo; i <= hi; i++) {
+ s = "GBP_" map[$2]
+ props[i] = props[i] ? props[i] " | " s : s
+ }
+}
+
+END {
+ for (i = 0; i <= 0x10FFFF; i++) {
+ if (!props[i])
+ continue
+ lo = i
+ while (props[lo] == props[i + 1])
+ i++
+ printf "\t{0x%05X, 0x%05X, %s},\n", lo, i, props[lo]
+ }
+}
+' /tmp/librune/* | sort
+
+cat <<C
+};
+
+#endif /* !RUNE_INTERNAL_GBRK_LOOKUP_H */
+C