diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-01-21 03:03:58 +0100 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-01-21 03:03:58 +0100 |
commit | 4f93f935dc7a981ca073a322425c3f5929ffb644 (patch) | |
tree | 4460586408ec7fdfcecf3ba4584f0435067125a6 /vendor/librune/gen/gbrk | |
parent | 72ea25a4d73e3e026366d4165f5bc4ec9e7418cb (diff) |
Support line- & column-based match locations
Diffstat (limited to 'vendor/librune/gen/gbrk')
-rwxr-xr-x | vendor/librune/gen/gbrk | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/vendor/librune/gen/gbrk b/vendor/librune/gen/gbrk new file mode 100755 index 0000000..5cbd87f --- /dev/null +++ b/vendor/librune/gen/gbrk @@ -0,0 +1,115 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune + wget -q "$1" -O "$name" + fi +} + +set -e +cd "${0%/*}/.." +exec >lib/internal/gbrk_lookup.h + +readonly URL1='https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt' +readonly URL2='https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt' +readonly URL3='https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt' + +cache "$URL1" & +cache "$URL2" & +cache "$URL3" & +wait + +cat <<C +/* This file is autogenerated by gen/gbrk; DO NOT EDIT. */ + +/* TODO: Change tables to constexpr from const when Clangd gets better */ + +#ifndef RUNE_INTERNAL_GBRK_LOOKUP_H +#define RUNE_INTERNAL_GBRK_LOOKUP_H + +/* clang-format off */ + +#include "types.h" + +typedef enum { + GBP_OTHER = 0, + + GBP_CTRL = 1 << 0, /* Control */ + GBP_EXT = 1 << 1, /* Extend */ + GBP_PIC = 1 << 2, /* Extended_Pictographic */ + GBP_PREP = 1 << 3, /* Prepend */ + GBP_RI = 1 << 4, /* Regional_Indicator */ + GBP_SM = 1 << 5, /* SpacingMark */ + GBP_ZWJ = 1 << 6, /* ZWJ */ + + GBP_HNGL_L = 1 << 7, /* Hangul L */ + GBP_HNGL_LV = 1 << 8, /* Hangul LV */ + GBP_HNGL_LVT = 1 << 9, /* Hangul LVT */ + GBP_HNGL_T = 1 << 10, /* Hangul T */ + GBP_HNGL_V = 1 << 11, /* Hangul V */ + + GBP_INDC_CNSNT = 1 << 12, /* Indic Consonant */ + GBP_INDC_EXT = 1 << 13, /* Indic Extend */ + GBP_INDC_LNK = 1 << 14, /* Indic Linker */ +} gbrk_prop; + +static const struct { + rune lo, hi; + gbrk_prop prop; +} gbrk_prop_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" + map["Control"] = "CTRL" + map["Extend"] = "EXT" + map["Extended_Pictographic"] = "PIC" + map["Prepend"] = "PREP" + map["Regional_Indicator"] = "RI" + map["SpacingMark"] = "SM" + map["ZWJ"] = "ZWJ" + + map["L"] = "HNGL_L" + map["LV"] = "HNGL_LV" + map["LVT"] = "HNGL_LVT" + map["T"] = "HNGL_T" + map["V"] = "HNGL_V" + + map["InCB; Consonant"] = "INDC_CNSNT" + map["InCB; Extend"] = "INDC_EXT" + map["InCB; Linker"] = "INDC_LNK" +} + +map[$2] { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) { + s = "GBP_" map[$2] + props[i] = props[i] ? props[i] " | " s : s + } +} + +END { + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{0x%05X, 0x%05X, %s},\n", lo, i, props[lo] + } +} +' /tmp/librune/* | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_GBRK_LOOKUP_H */ +C |