diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-02-13 13:02:28 +0100 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-02-13 13:11:47 +0100 |
commit | 79e6af86ca526d5fb56af6f6ca3da713e3a5e9f9 (patch) | |
tree | 752f1c26d1f122dcf58374ac78db109c9578be45 /vendor/librune/gen |
Genesis commit
Diffstat (limited to 'vendor/librune/gen')
-rwxr-xr-x | vendor/librune/gen/gbrk | 114 | ||||
-rwxr-xr-x | vendor/librune/gen/mkfile | 37 | ||||
-rwxr-xr-x | vendor/librune/gen/rtype-dt | 98 | ||||
-rwxr-xr-x | vendor/librune/gen/rtype-equideo | 64 | ||||
-rwxr-xr-x | vendor/librune/gen/rtype-gc | 107 | ||||
-rwxr-xr-x | vendor/librune/gen/rtype-jg | 69 | ||||
-rwxr-xr-x | vendor/librune/gen/rtype-jt | 97 | ||||
-rwxr-xr-x | vendor/librune/gen/rtype-nt | 97 | ||||
-rwxr-xr-x | vendor/librune/gen/rtype-nv | 68 | ||||
-rwxr-xr-x | vendor/librune/gen/rtype-prop | 155 | ||||
-rw-r--r-- | vendor/librune/gen/rtype-prop.awk | 72 | ||||
-rwxr-xr-x | vendor/librune/gen/rtype-vo | 100 |
12 files changed, 1078 insertions, 0 deletions
diff --git a/vendor/librune/gen/gbrk b/vendor/librune/gen/gbrk new file mode 100755 index 0000000..577c2c9 --- /dev/null +++ b/vendor/librune/gen/gbrk @@ -0,0 +1,114 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/gbrk/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune/gbrk + wget -q "$1" -O "$name" + fi +} + +set -e +cd "${0%/*}/.." +exec >include/internal/gbrk_lookup.h + +readonly URL1='https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt' +readonly URL2='https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt' +readonly URL3='https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt' + +cache "$URL1" & +cache "$URL2" & +cache "$URL3" & +wait + +cat <<C +/* This file is autogenerated by gen/gbrk; DO NOT EDIT. */ + +#ifndef RUNE_INTERNAL_GBRK_LOOKUP_H +#define RUNE_INTERNAL_GBRK_LOOKUP_H + +/* IWYU pragma: private */ +/* clang-format off */ + +#include "types.h" + +typedef enum { + GBP_OTHER = 0, + + GBP_CTRL = 1 << 0, /* Control */ + GBP_EXT = 1 << 1, /* Extend */ + GBP_PIC = 1 << 2, /* Extended_Pictographic */ + GBP_PREP = 1 << 3, /* Prepend */ + GBP_RI = 1 << 4, /* Regional_Indicator */ + GBP_SM = 1 << 5, /* SpacingMark */ + GBP_ZWJ = 1 << 6, /* ZWJ */ + + GBP_HNGL_L = 1 << 7, /* Hangul L */ + GBP_HNGL_LV = 1 << 8, /* Hangul LV */ + GBP_HNGL_LVT = 1 << 9, /* Hangul LVT */ + GBP_HNGL_T = 1 << 10, /* Hangul T */ + GBP_HNGL_V = 1 << 11, /* Hangul V */ + + GBP_INDC_CNSNT = 1 << 12, /* Indic Consonant */ + GBP_INDC_EXT = 1 << 13, /* Indic Extend */ + GBP_INDC_LNK = 1 << 14, /* Indic Linker */ +} gbrk_prop; + +static const struct { + rune lo, hi; + gbrk_prop val; +} gbrk_prop_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" + map["Control"] = "CTRL" + map["Extend"] = "EXT" + map["Extended_Pictographic"] = "PIC" + map["Prepend"] = "PREP" + map["Regional_Indicator"] = "RI" + map["SpacingMark"] = "SM" + map["ZWJ"] = "ZWJ" + + map["L"] = "HNGL_L" + map["LV"] = "HNGL_LV" + map["LVT"] = "HNGL_LVT" + map["T"] = "HNGL_T" + map["V"] = "HNGL_V" + + map["InCB; Consonant"] = "INDC_CNSNT" + map["InCB; Extend"] = "INDC_EXT" + map["InCB; Linker"] = "INDC_LNK" +} + +map[$2] { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) { + s = "GBP_" map[$2] + props[i] = props[i] ? props[i] " | " s : s + } +} + +END { + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{0x%06X, 0x%06X, %s},\n", lo, i, props[lo] + } +} +' /tmp/librune/gbrk/* | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_GBRK_LOOKUP_H */ +C diff --git a/vendor/librune/gen/mkfile b/vendor/librune/gen/mkfile new file mode 100755 index 0000000..a55b08a --- /dev/null +++ b/vendor/librune/gen/mkfile @@ -0,0 +1,37 @@ +#!/bin/sh + +set -e +cd "${0%/*}/.." +exec >Makefile + +cat <<make +.POSIX: + +CC = cc +CFLAGS = -Wall -Wextra -Wpedantic -g -ggdb3 -fanalyzer -Iinclude + +make + +printf 'objs = \\\n' +find lib -name '*.c' | sort | sed ' + s/c$/o/ + s/.*/\t& \\/ + $s/ \\$// +' + +cat <<make + +all: \$(objs) + +make + +find lib -name '*.c' | sort | sed -E 's/(.*)c$/\1o: \1c/' + +cat <<make + +clean: + find lib -name '*.o' -delete + +ls: + @echo "alias ls='ls --color=auto -I \"*.o\" -I \"compile_commands.json\"'" >&2 +make diff --git a/vendor/librune/gen/rtype-dt b/vendor/librune/gen/rtype-dt new file mode 100755 index 0000000..927b54f --- /dev/null +++ b/vendor/librune/gen/rtype-dt @@ -0,0 +1,98 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/rtype/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune/rtype + wget -q "$1" -O "$name" + fi +} + +set -e +cd "${0%/*}/.." +exec >include/internal/rtype/dt.h + +readonly URL='https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedDecompositionType.txt' +cache "$URL" + +cat <<C +/* This file is autogenerated by gen/rtype-dt; DO NOT EDIT. */ + +#ifndef RUNE_INTERNAL_RTYPE_DT_H +#define RUNE_INTERNAL_RTYPE_DT_H + +/* IWYU pragma: private */ +/* clang-format off */ + +#include "../types.h" +#include "../../rtype.h" +#include "../../rune.h" + +static const rprop_dt_bf rtype_dt_lat1_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = $2 +} + +END { + for (i = 0; i <= 0xFF; i++) + printf "%12s,\n", "DT_" (props[i] ? toupper(props[i]) : "NONE") +} +' /tmp/librune/rtype/DerivedDecompositionType.txt \ +| paste -d' ' - - - - \ +| sed 's/^/\t/' + +cat <<C +}; + +static const struct { + rune lo, hi; + rprop_dt_bf val; +} rtype_dt_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = $2 +} + +END { + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf("\t{RUNE_C(0x%06X), RUNE_C(0x%06X), DT_%s},\n", + lo, i, toupper(props[lo])) + } +} +' /tmp/librune/rtype/DerivedDecompositionType.txt | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_RTYPE_DT_H */ +C diff --git a/vendor/librune/gen/rtype-equideo b/vendor/librune/gen/rtype-equideo new file mode 100755 index 0000000..794e239 --- /dev/null +++ b/vendor/librune/gen/rtype-equideo @@ -0,0 +1,64 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/rtype/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune/rtype + wget -q "$1" -O "$name" + fi +} + +set -e +cd "${0%/*}/.." +exec >include/internal/rtype/equideo.h + +readonly URL='https://www.unicode.org/Public/UCD/latest/ucd/EquivalentUnifiedIdeograph.txt' +cache "$URL" + +cat <<C +/* This file is autogenerated by gen/rtype-equideo; DO NOT EDIT. */ + +#ifndef RUNE_INTERNAL_RTYPE_EQUIDEO_H +#define RUNE_INTERNAL_RTYPE_EQUIDEO_H + +/* IWYU pragma: private */ +/* clang-format off */ + +#include "../types.h" +#include "../../rune.h" + +static const struct { + rune key, val; +} rtype_equideo_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = strtonum("0X" $2) +} + +END { + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X)},\n", i, props[i] + } +} +' /tmp/librune/rtype/EquivalentUnifiedIdeograph.txt | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_RTYPE_EQUIDEO_H */ +C diff --git a/vendor/librune/gen/rtype-gc b/vendor/librune/gen/rtype-gc new file mode 100755 index 0000000..33d1662 --- /dev/null +++ b/vendor/librune/gen/rtype-gc @@ -0,0 +1,107 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/rtype/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune/rtype + wget -q "$1" -O "$name" + fi +} + +set -e +cd "${0%/*}/.." +exec >include/internal/rtype/gc.h + +readonly URL='https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt' +cache "$URL" + +cat <<C +/* This file is autogenerated by gen/rtype-gc; DO NOT EDIT. */ + +#ifndef RUNE_INTERNAL_RTYPE_GC_H +#define RUNE_INTERNAL_RTYPE_GC_H + +/* IWYU pragma: private */ +/* clang-format off */ + +#include "../types.h" +#include "../../rtype.h" +#include "../../rune.h" + +static const rprop_gc_bf rtype_gc_lat1_tbl[] = { +C + +gawk ' +BEGIN { + FS = ";" +} + +{ + s = "GC_" toupper($3) + lo = strtonum("0X" $1) + + if ($2 ~ /First/) { + getline + hi = strtonum("0X" $1) + } else + hi = lo + + for (i = lo; i <= hi; i++) + props[i] = s +} + +END { + for (i = 0; i <= 0xFF; i++) + print props[i] "," +} +' /tmp/librune/rtype/UnicodeData.txt \ +| paste -d' ' - - - - - - - - \ +| sed 's/^/\t/' + +cat <<C +}; + +static const struct { + rune lo, hi; + rprop_gc_bf val; +} rtype_gc_tbl[] = { +C + +gawk ' +BEGIN { + FS = ";" +} + +{ + s = "GC_" toupper($3) + lo = strtonum("0X" $1) + + if ($2 ~ /First/) { + getline + hi = strtonum("0X" $1) + } else + hi = lo + + for (i = lo; i <= hi; i++) + props[i] = s +} + +END { + for (i = 0x100; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[lo] + } +} +' /tmp/librune/rtype/UnicodeData.txt | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_RTYPE_GC_H */ +C diff --git a/vendor/librune/gen/rtype-jg b/vendor/librune/gen/rtype-jg new file mode 100755 index 0000000..6576437 --- /dev/null +++ b/vendor/librune/gen/rtype-jg @@ -0,0 +1,69 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/rtype/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune/rtype + wget -q "$1" -O "$name" + fi +} + +set -e +cd "${0%/*}/.." +exec >include/internal/rtype/jg.h + +readonly URL='https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedJoiningGroup.txt' +cache "$URL" + +cat <<C +/* This file is autogenerated by gen/rtype-jg; DO NOT EDIT. */ + +#ifndef RUNE_INTERNAL_RTYPE_JG_H +#define RUNE_INTERNAL_RTYPE_JG_H + +/* IWYU pragma: private */ +/* clang-format off */ + +#include "../types.h" +#include "../../rtype.h" +#include "../../rune.h" + +static const struct { + rune lo, hi; + rprop_jg_bf val; +} rtype_jg_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = toupper($2) +} + +END { + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), JG_%s},\n", lo, i, props[lo] + } +} +' /tmp/librune/rtype/DerivedJoiningGroup.txt | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_RTYPE_JG_H */ +C diff --git a/vendor/librune/gen/rtype-jt b/vendor/librune/gen/rtype-jt new file mode 100755 index 0000000..81185ec --- /dev/null +++ b/vendor/librune/gen/rtype-jt @@ -0,0 +1,97 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/rtype/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune/rtype + wget -q "$1" -O "$name" + fi +} + +set -e +cd "${0%/*}/.." +exec >include/internal/rtype/jt.h + +readonly URL='https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedJoiningType.txt' +cache "$URL" + +cat <<C +/* This file is autogenerated by gen/rtype-jt; DO NOT EDIT. */ + +#ifndef RUNE_INTERNAL_RTYPE_JT_H +#define RUNE_INTERNAL_RTYPE_JT_H + +/* IWYU pragma: private */ +/* clang-format off */ + +#include "../types.h" +#include "../../rtype.h" +#include "../../rune.h" + +static const rprop_jt_bf rtype_jt_lat1_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" +} + +$2 ~ /[UCDRLT]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = $2 +} + +END { + for (i = 0; i <= 0xFF; i++) + printf "JT_%s,\n", props[i] ? props[i] : "U" +} +' /tmp/librune/rtype/DerivedJoiningType.txt \ +| paste -d' ' - - - - - - - - \ +| sed 's/^/\t/' + +cat <<C +}; + +static const struct { + rune lo, hi; + rprop_jt_bf val; +} rtype_jt_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" +} + +$2 ~ /[UCDRLT]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = $2 +} + +END { + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), JT_%s},\n", lo, i, props[lo] + } +} +' /tmp/librune/rtype/DerivedJoiningType.txt | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_RTYPE_JT_H */ +C diff --git a/vendor/librune/gen/rtype-nt b/vendor/librune/gen/rtype-nt new file mode 100755 index 0000000..7dc149f --- /dev/null +++ b/vendor/librune/gen/rtype-nt @@ -0,0 +1,97 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/rtype/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune/rtype + wget -q "$1" -O "$name" + fi +} + +set -e +cd "${0%/*}/.." +exec >include/internal/rtype/nt.h + +readonly URL='https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedNumericType.txt' +cache "$URL" + +cat <<C +/* This file is autogenerated by gen/rtype-nt; DO NOT EDIT. */ + +#ifndef RUNE_INTERNAL_RTYPE_NT_H +#define RUNE_INTERNAL_RTYPE_NT_H + +/* IWYU pragma: private */ +/* clang-format off */ + +#include "../types.h" +#include "../../rtype.h" +#include "../../rune.h" + +static const rprop_nt_bf rtype_nt_lat1_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = toupper($2) +} + +END { + for (i = 0; i <= 0xFF; i++) + printf "%10s,\n", "NT_" (props[i] ? props[i] : "NONE") +} +' /tmp/librune/rtype/DerivedNumericType.txt \ +| paste -d' ' - - - - - - - - \ +| sed 's/^/\t/' + +cat <<C +}; + +static const struct { + rune lo, hi; + rprop_nt_bf val; +} rtype_nt_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = toupper($2) +} + +END { + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), NT_%s},\n", lo, i, props[lo] + } +} +' /tmp/librune/rtype/DerivedNumericType.txt | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_RTYPE_NT_H */ +C diff --git a/vendor/librune/gen/rtype-nv b/vendor/librune/gen/rtype-nv new file mode 100755 index 0000000..f5dd9d4 --- /dev/null +++ b/vendor/librune/gen/rtype-nv @@ -0,0 +1,68 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/rtype/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune/rtype + wget -q "$1" -O "$name" + fi +} + +set -e +cd "${0%/*}/.." +exec >include/internal/rtype/nv.h + +readonly URL='https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedNumericValues.txt' +cache "$URL" + +cat <<C +/* This file is autogenerated by gen/rtype-nv; DO NOT EDIT. */ + +#ifndef RUNE_INTERNAL_RTYPE_NV_H +#define RUNE_INTERNAL_RTYPE_NV_H + +/* IWYU pragma: private */ +/* clang-format off */ + +#include "../types.h" +#include "../../rtype.h" +#include "../../rune.h" + +static const struct { + rune key; + double val; +} rtype_nv_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) { + gsub(/^; /, "", $3) + props[i] = "(double)" $3 + } +} + +END { + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + printf "\t{RUNE_C(0x%06X), %s},\n", i, props[i] + } +} +' /tmp/librune/rtype/DerivedNumericValues.txt | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_RTYPE_NV_H */ +C diff --git a/vendor/librune/gen/rtype-prop b/vendor/librune/gen/rtype-prop new file mode 100755 index 0000000..759a07d --- /dev/null +++ b/vendor/librune/gen/rtype-prop @@ -0,0 +1,155 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/rtype/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune/rtype + wget -q "$1" -O "$name" + fi +} + +cd "${0%/*}/.." + +readonly URL1='https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt' +readonly URL2='https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt' +readonly URL3='https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt' +readonly URL4='https://www.unicode.org/Public/UCD/latest/ucd/DerivedNormalizationProps.txt' +readonly URL5='https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedBinaryProperties.txt' + +cache "$URL1" & +cache "$URL2" & +cache "$URL3" & +cache "$URL4" & +cache "$URL5" & +wait + +props1=' +bidi_c=Bidi_Control +dash=Dash +dep=Deprecated +dia=Diacritic +ext=Extender +hex=Hex_Digit +idbo=IDS_Binary_Operator +id_compat_math_continue=ID_Compat_Math_Continue +id_compat_math_start=ID_Compat_Math_Start +ideo=Ideographic +loe=Logical_Order_Exception +pat_syn=Pattern_Syntax +pcm=Prepended_Concatenation_Mark +qmark=Quotation_Mark +radical=Radical +sd=Soft_Dotted +sterm=Sentence_Terminal +term=Terminal_Punctuation +uideo=Unified_Ideograph +vs=Variation_Selector +wspace=White_Space +' + +props2=' +alpha=Alphabetic +cased=Cased +ci=Case_Ignorable +cwcf=Changes_When_Casefolded +cwcm=Changes_When_Casemapped +cwl=Changes_When_Lowercased +cwt=Changes_When_Titlecased +cwu=Changes_When_Uppercased +di=Default_Ignorable_Code_Point +gr_base=Grapheme_Base +gr_ext=Grapheme_Extend +idc=ID_Continue +ids=ID_Start +incb=Indic_Conjunct_Break +lower=Lowercase +math=Math +upper=Uppercase +xidc=XID_Continue +xids=XID_Start +' + +props3=' +ebase=Emoji_Modifier_Base +ecomp=Emoji_Component +emod=Emoji_Modifier +emoji=Emoji +epres=Emoji_Presentation +extpic=Extended_Pictographic +' + +props4=' +cwkcf=Changes_When_NFKC_Casefolded +' + +props5=' +bidi_m=Bidi_Mirrored +' + +manual=' +ahex=ASCII_Hex_Digit +idst=IDS_Trinary_Operator +idsu=IDS_Unary_Operator +join_c=Join_Control +nchar=Noncharacter_Code_Point +pat_ws=Pattern_White_Space +ri=Regional_Indicator +' + +gen() +{ + local p=${1%%=*} + gawk -M -v prop=${1#*=} -v word=$2 -v short=$p \ + -f gen/rtype-prop.awk /tmp/librune/rtype/$3 \ + >lib/rtype/rprop_${2}_${p}.c + printf 'DONE rprop_%s_%s()\n' $2 $p >&2 +} + +for prop in $props1 +do + gen $prop is PropList.txt & +done + +for prop in $props2 +do + gen $prop is DerivedCoreProperties.txt & +done + +for prop in $props3 +do + gen $prop is emoji-data.txt & +done + +for prop in $props4 +do + gen $prop is DerivedNormalizationProps.txt & +done + +for prop in $props5 +do + gen $prop is DerivedBinaryProperties.txt & +done + +printf '[[unsequenced]] bool rprop_is_%s(rune);\n' \ + $(printf '%s\n' $props1 $props2 $props3 $props4 $props5 | cut -d= -f1) \ +| gawk ' + /PROP PREDICATES END/ { no = 0 } + FILENAME != "-" && !no { print } + FILENAME == "-" { funcs[++i] = $0 } + + /PROP PREDICATES START/ { + no = 1 + asort(funcs) + for (i = 1; i <= length(funcs); i++) + print funcs[i] + } +' - include/rtype.h | sponge include/rtype.h + +wait +for prop in $manual +do + shrt=${prop%%=*} + printf 'Function rprop_is_%s() implemented manually\n' $shrt >&2 +done diff --git a/vendor/librune/gen/rtype-prop.awk b/vendor/librune/gen/rtype-prop.awk new file mode 100644 index 0000000..138b3ab --- /dev/null +++ b/vendor/librune/gen/rtype-prop.awk @@ -0,0 +1,72 @@ +BEGIN { + FS = "( *#.*| +; +)" + + print "/* This file is autogenerated by gen/rtype-prop; DO NOT EDIT. */" + print "" + print "#include \"rtype.h\"" + print "#include \"rune.h\"" + print "" + print "#include \"internal/common.h\"" + print "" + print "/* clang-format off */" + print "" +} + +$2 == prop || (prop == "Indic_Conjunct_Break" && $2 ~ /InCB;/) { + n = split($1, a, /\.\./) + lo = strtonum("0x" a[1]) + hi = strtonum("0x" a[n]) + + for (i = lo; i <= hi; i++) + xs[i] = 1 +} + +END { + if (word == "is") { + for (i = 0; i <= 0xFF; i++) { + if (xs[i]) + mask = or(mask, lshift(1, i)) + } + } + if (mask > 0) { + print "#if BIT_LOOKUP" + print "static const unsigned _BitInt(LATIN1_MAX + 1) mask =" + printf "\t0x%064Xuwb;\n", mask + print "#endif" + print "" + } + + print "static const struct {" + print "\trune lo, hi;" + print "} lookup_tbl[] = {" + + for (i = 0; i <= 0x10FFFF; i++) { + if (!xs[i]) + continue + lo = i + while (xs[i + 1]) + i++ + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X)},\n", lo, i + } + + print "};" + print "" + print "#define TYPE bool" + print "#define TABLE lookup_tbl" + print "#define DEFAULT false" + print "#define HAS_VALUE 0" + print "#include \"internal/rtype/lookup-func.h\"" + print "" + print "bool" + printf "rprop_%s_%s(rune ch)\n", word, short + print "{" + if (mask > 0) { + print "\treturn" + print "#if BIT_LOOKUP" + print "\t\tch <= LATIN1_MAX ? (mask & (1 << ch)) :" + print "#endif" + print "\t\tlookup(ch);" + } else + print "\treturn lookup(ch);" + print "}" +} diff --git a/vendor/librune/gen/rtype-vo b/vendor/librune/gen/rtype-vo new file mode 100755 index 0000000..7c2f65f --- /dev/null +++ b/vendor/librune/gen/rtype-vo @@ -0,0 +1,100 @@ +#!/bin/sh + +cache() +{ + name="/tmp/librune/rtype/$(basename "$1")" + if test ! -f "$name" + then + mkdir -p /tmp/librune/rtype + wget -q "$1" -O "$name" + fi +} + +set -e +cd "${0%/*}/.." +exec >include/internal/rtype/vo.h + +readonly URL='https://www.unicode.org/Public/UCD/latest/ucd/VerticalOrientation.txt' +cache "$URL" + +cat <<C +/* This file is autogenerated by gen/rtype-vo; DO NOT EDIT. */ + +#ifndef RUNE_INTERNAL_RTYPE_VO_H +#define RUNE_INTERNAL_RTYPE_VO_H + +/* IWYU pragma: private */ +/* clang-format off */ + +#include "../types.h" +#include "../../rtype.h" +#include "../../rune.h" + +static const rprop_vo_bf rtype_vo_lat1_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = $2 +} + +END { + for (i = 0; i <= 0xFF; i++) + printf "%5s,\n", "VO_" (props[i] ? toupper(props[i]) : "R") +} +' /tmp/librune/rtype/VerticalOrientation.txt \ +| paste -d' ' - - - - - - - - \ +| sed 's/^/\t/' + +cat <<C +}; + +/* This lookup table omits VO_R entries. As VO_R is the default value, we can + remove them from the lookup table for a dramatic decrease in size. */ +static const struct { + rune lo, hi; + rprop_vo_bf val; +} rtype_vo_tbl[] = { +C + +gawk ' +BEGIN { + FS = "( *#.*| +; +)" +} + +/^[^#]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = $2 +} + +END { + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + lo = i + while (props[lo] == props[i + 1]) + i++ + if ((p = toupper(props[lo])) != "R") + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), VO_%s},\n", lo, i, p + } +} +' /tmp/librune/rtype/VerticalOrientation.txt | sort + +cat <<C +}; + +#endif /* !RUNE_INTERNAL_RTYPE_VO_H */ +C |