diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2024-01-21 03:03:58 +0100 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2024-01-21 03:03:58 +0100 | 
| commit | 4f93f935dc7a981ca073a322425c3f5929ffb644 (patch) | |
| tree | 4460586408ec7fdfcecf3ba4584f0435067125a6 /vendor/librune/gen/gbrk | |
| parent | 72ea25a4d73e3e026366d4165f5bc4ec9e7418cb (diff) | |
Support line- & column-based match locations
Diffstat (limited to 'vendor/librune/gen/gbrk')
| -rwxr-xr-x | vendor/librune/gen/gbrk | 115 | 
1 files changed, 115 insertions, 0 deletions
| diff --git a/vendor/librune/gen/gbrk b/vendor/librune/gen/gbrk new file mode 100755 index 0000000..5cbd87f --- /dev/null +++ b/vendor/librune/gen/gbrk @@ -0,0 +1,115 @@ +#!/bin/sh + +cache() +{ +	name="/tmp/librune/$(basename "$1")" +	if test ! -f "$name" +	then +		mkdir -p /tmp/librune +		wget -q "$1" -O "$name" +	fi +} + +set -e +cd "${0%/*}/.." +exec >lib/internal/gbrk_lookup.h + +readonly URL1='https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt' +readonly URL2='https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt' +readonly URL3='https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt' + +cache "$URL1" & +cache "$URL2" & +cache "$URL3" & +wait + +cat <<C +/* This file is autogenerated by gen/gbrk; DO NOT EDIT. */ + +/* TODO: Change tables to constexpr from const when Clangd gets better */ + +#ifndef RUNE_INTERNAL_GBRK_LOOKUP_H +#define RUNE_INTERNAL_GBRK_LOOKUP_H + +/* clang-format off */ + +#include "types.h" + +typedef enum { +	GBP_OTHER = 0, + +	GBP_CTRL = 1 << 0, /* Control */ +	GBP_EXT  = 1 << 1, /* Extend */ +	GBP_PIC  = 1 << 2, /* Extended_Pictographic */ +	GBP_PREP = 1 << 3, /* Prepend */ +	GBP_RI   = 1 << 4, /* Regional_Indicator */ +	GBP_SM   = 1 << 5, /* SpacingMark */ +	GBP_ZWJ  = 1 << 6, /* ZWJ */ + +	GBP_HNGL_L   = 1 <<  7, /* Hangul L */ +	GBP_HNGL_LV  = 1 <<  8, /* Hangul LV */ +	GBP_HNGL_LVT = 1 <<  9, /* Hangul LVT */ +	GBP_HNGL_T   = 1 << 10, /* Hangul T */ +	GBP_HNGL_V   = 1 << 11, /* Hangul V */ + +	GBP_INDC_CNSNT = 1 << 12, /* Indic Consonant */ +	GBP_INDC_EXT   = 1 << 13, /* Indic Extend */ +	GBP_INDC_LNK   = 1 << 14, /* Indic Linker */ +} gbrk_prop; + +static const struct { +	rune lo, hi; +	gbrk_prop prop; +} gbrk_prop_tbl[] = { +C + +gawk ' +BEGIN { +	FS = "( *#.*| +; +)" +	map["Control"]               = "CTRL" +	map["Extend"]                = "EXT" +	map["Extended_Pictographic"] = "PIC" +	map["Prepend"]               = "PREP" +	map["Regional_Indicator"]    = "RI" +	map["SpacingMark"]           = "SM" +	map["ZWJ"]                   = "ZWJ" + +	map["L"]   = "HNGL_L" +	map["LV"]  = "HNGL_LV" +	map["LVT"] = "HNGL_LVT" +	map["T"]   = "HNGL_T" +	map["V"]   = "HNGL_V" + +	map["InCB; Consonant"] = "INDC_CNSNT" +	map["InCB; Extend"]    = "INDC_EXT" +	map["InCB; Linker"]    = "INDC_LNK" +} + +map[$2] { +	n = split($1, a, /\.\./) +	lo = strtonum("0X" a[1]) +	hi = strtonum("0X" a[n]) + +	for (i = lo; i <= hi; i++) { +		s = "GBP_" map[$2] +		props[i] = props[i] ? props[i] " | " s : s +	} +} + +END { +	for (i = 0; i <= 0x10FFFF; i++) { +		if (!props[i]) +			continue +		lo = i +		while (props[lo] == props[i + 1]) +			i++ +		printf "\t{0x%05X, 0x%05X, %s},\n", lo, i, props[lo] +	} +} +' /tmp/librune/* | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_GBRK_LOOKUP_H */ +C |