#!/bin/sh cache() { name="/tmp/librune/$(basename "$1")" if test ! -f "$name" then mkdir -p /tmp/librune wget -q "$1" -O "$name" fi } set -e cd "${0%/*}/.." exec >lib/internal/gbrk_lookup.h readonly URL1='https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt' readonly URL2='https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt' readonly URL3='https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt' cache "$URL1" & cache "$URL2" & cache "$URL3" & wait cat <<C /* This file is autogenerated by gen/gbrk; DO NOT EDIT. */ /* TODO: Change tables to constexpr from const when Clangd gets better */ #ifndef RUNE_INTERNAL_GBRK_LOOKUP_H #define RUNE_INTERNAL_GBRK_LOOKUP_H /* clang-format off */ #include "types.h" typedef enum { GBP_OTHER = 0, GBP_CTRL = 1 << 0, /* Control */ GBP_EXT = 1 << 1, /* Extend */ GBP_PIC = 1 << 2, /* Extended_Pictographic */ GBP_PREP = 1 << 3, /* Prepend */ GBP_RI = 1 << 4, /* Regional_Indicator */ GBP_SM = 1 << 5, /* SpacingMark */ GBP_ZWJ = 1 << 6, /* ZWJ */ GBP_HNGL_L = 1 << 7, /* Hangul L */ GBP_HNGL_LV = 1 << 8, /* Hangul LV */ GBP_HNGL_LVT = 1 << 9, /* Hangul LVT */ GBP_HNGL_T = 1 << 10, /* Hangul T */ GBP_HNGL_V = 1 << 11, /* Hangul V */ GBP_INDC_CNSNT = 1 << 12, /* Indic Consonant */ GBP_INDC_EXT = 1 << 13, /* Indic Extend */ GBP_INDC_LNK = 1 << 14, /* Indic Linker */ } gbrk_prop; static const struct { rune lo, hi; gbrk_prop prop; } gbrk_prop_tbl[] = { C gawk ' BEGIN { FS = "( *#.*| +; +)" map["Control"] = "CTRL" map["Extend"] = "EXT" map["Extended_Pictographic"] = "PIC" map["Prepend"] = "PREP" map["Regional_Indicator"] = "RI" map["SpacingMark"] = "SM" map["ZWJ"] = "ZWJ" map["L"] = "HNGL_L" map["LV"] = "HNGL_LV" map["LVT"] = "HNGL_LVT" map["T"] = "HNGL_T" map["V"] = "HNGL_V" map["InCB; Consonant"] = "INDC_CNSNT" map["InCB; Extend"] = "INDC_EXT" map["InCB; Linker"] = "INDC_LNK" } map[$2] { n = split($1, a, /\.\./) lo = strtonum("0X" a[1]) hi = strtonum("0X" a[n]) for (i = lo; i <= hi; i++) { s = "GBP_" map[$2] props[i] = props[i] ? props[i] " | " s : s } } END { for (i = 0; i <= 0x10FFFF; i++) { if (!props[i]) continue lo = i while (props[lo] == props[i + 1]) i++ printf "\t{0x%05X, 0x%05X, %s},\n", lo, i, props[lo] } } ' /tmp/librune/* | sort cat <<C }; #endif /* !RUNE_INTERNAL_GBRK_LOOKUP_H */ C