diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-01-29 00:42:05 +0100 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-01-29 00:42:05 +0100 |
commit | b8f4479a17e1add06e1532a00ae913cc0f4e9567 (patch) | |
tree | c724d7e1cb053b35cefd87d02109abac846fe0a5 /vendor/librune/include | |
parent | 3d9a150f522278f8ab37e299f9aa187931ea2d56 (diff) |
Bump librune
Diffstat (limited to 'vendor/librune/include')
-rw-r--r-- | vendor/librune/include/internal/common.h | 6 | ||||
-rw-r--r-- | vendor/librune/include/internal/rtype/cat.h (renamed from vendor/librune/include/internal/rtype_lookup.h) | 78 | ||||
-rw-r--r-- | vendor/librune/include/internal/rtype/lookup-func.h | 47 | ||||
-rw-r--r-- | vendor/librune/include/rtype.h | 138 |
4 files changed, 156 insertions, 113 deletions
diff --git a/vendor/librune/include/internal/common.h b/vendor/librune/include/internal/common.h index ace9e63..a05f33c 100644 --- a/vendor/librune/include/internal/common.h +++ b/vendor/librune/include/internal/common.h @@ -3,6 +3,8 @@ /* IWYU pragma: private */ +#include <limits.h> + #define lengthof(a) (sizeof(a) / sizeof(*(a))) #define U1(x) (((x)&0x80) == 0x00) @@ -20,4 +22,8 @@ #define LATIN1_MAX 0xFF +#if BITINT_MAXWIDTH >= LATIN1_MAX + 1 +# define BIT_LOOKUP 1 +#endif + #endif /* !RUNE_INTERNAL_COMMON_H */ diff --git a/vendor/librune/include/internal/rtype_lookup.h b/vendor/librune/include/internal/rtype/cat.h index b4b41fd..d84082a 100644 --- a/vendor/librune/include/internal/rtype_lookup.h +++ b/vendor/librune/include/internal/rtype/cat.h @@ -1,17 +1,15 @@ -/* This file is autogenerated by gen/gbrk; DO NOT EDIT. */ +/* This file is autogenerated by gen/rtype-cat; DO NOT EDIT. */ -/* TODO: Change tables to constexpr from const when Clangd gets better */ - -#ifndef RUNE_INTERNAL_RTYPE_LOOKUP_H -#define RUNE_INTERNAL_RTYPE_LOOKUP_H +#ifndef RUNE_INTERNAL_RTYPE_CAT_H +#define RUNE_INTERNAL_RTYPE_CAT_H /* IWYU pragma: private */ /* clang-format off */ -#include "types.h" -#include "../rtype.h" +#include "../types.h" +#include "../../rtype.h" -static const unicat rtype_lat1_tbl[] = { +static const enum unicat rtype_cat_lat1_tbl[] = { UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, @@ -48,68 +46,8 @@ static const unicat rtype_lat1_tbl[] = { static const struct { rune lo, hi; - unicat cat; + enum unicat val; } rtype_cat_tbl[] = { - {0x000000, 0x00001F, UC_CC}, - {0x000020, 0x000020, UC_ZS}, - {0x000021, 0x000023, UC_PO}, - {0x000024, 0x000024, UC_SC}, - {0x000025, 0x000027, UC_PO}, - {0x000028, 0x000028, UC_PS}, - {0x000029, 0x000029, UC_PE}, - {0x00002A, 0x00002A, UC_PO}, - {0x00002B, 0x00002B, UC_SM}, - {0x00002C, 0x00002C, UC_PO}, - {0x00002D, 0x00002D, UC_PD}, - {0x00002E, 0x00002F, UC_PO}, - {0x000030, 0x000039, UC_ND}, - {0x00003A, 0x00003B, UC_PO}, - {0x00003C, 0x00003E, UC_SM}, - {0x00003F, 0x000040, UC_PO}, - {0x000041, 0x00005A, UC_LU}, - {0x00005B, 0x00005B, UC_PS}, - {0x00005C, 0x00005C, UC_PO}, - {0x00005D, 0x00005D, UC_PE}, - {0x00005E, 0x00005E, UC_SK}, - {0x00005F, 0x00005F, UC_PC}, - {0x000060, 0x000060, UC_SK}, - {0x000061, 0x00007A, UC_LL}, - {0x00007B, 0x00007B, UC_PS}, - {0x00007C, 0x00007C, UC_SM}, - {0x00007D, 0x00007D, UC_PE}, - {0x00007E, 0x00007E, UC_SM}, - {0x00007F, 0x00009F, UC_CC}, - {0x0000A0, 0x0000A0, UC_ZS}, - {0x0000A1, 0x0000A1, UC_PO}, - {0x0000A2, 0x0000A5, UC_SC}, - {0x0000A6, 0x0000A6, UC_SO}, - {0x0000A7, 0x0000A7, UC_PO}, - {0x0000A8, 0x0000A8, UC_SK}, - {0x0000A9, 0x0000A9, UC_SO}, - {0x0000AA, 0x0000AA, UC_LO}, - {0x0000AB, 0x0000AB, UC_PI}, - {0x0000AC, 0x0000AC, UC_SM}, - {0x0000AD, 0x0000AD, UC_CF}, - {0x0000AE, 0x0000AE, UC_SO}, - {0x0000AF, 0x0000AF, UC_SK}, - {0x0000B0, 0x0000B0, UC_SO}, - {0x0000B1, 0x0000B1, UC_SM}, - {0x0000B2, 0x0000B3, UC_NO}, - {0x0000B4, 0x0000B4, UC_SK}, - {0x0000B5, 0x0000B5, UC_LL}, - {0x0000B6, 0x0000B7, UC_PO}, - {0x0000B8, 0x0000B8, UC_SK}, - {0x0000B9, 0x0000B9, UC_NO}, - {0x0000BA, 0x0000BA, UC_LO}, - {0x0000BB, 0x0000BB, UC_PF}, - {0x0000BC, 0x0000BE, UC_NO}, - {0x0000BF, 0x0000BF, UC_PO}, - {0x0000C0, 0x0000D6, UC_LU}, - {0x0000D7, 0x0000D7, UC_SM}, - {0x0000D8, 0x0000DE, UC_LU}, - {0x0000DF, 0x0000F6, UC_LL}, - {0x0000F7, 0x0000F7, UC_SM}, - {0x0000F8, 0x0000FF, UC_LL}, {0x000100, 0x000100, UC_LU}, {0x000101, 0x000101, UC_LL}, {0x000102, 0x000102, UC_LU}, @@ -3354,4 +3292,4 @@ static const struct { {0x100000, 0x10FFFD, UC_CO}, }; -#endif /* !RUNE_INTERNAL_RTYPE_LOOKUP_H */ +#endif /* !RUNE_INTERNAL_RTYPE_CAT_H */ diff --git a/vendor/librune/include/internal/rtype/lookup-func.h b/vendor/librune/include/internal/rtype/lookup-func.h new file mode 100644 index 0000000..91bda8b --- /dev/null +++ b/vendor/librune/include/internal/rtype/lookup-func.h @@ -0,0 +1,47 @@ +#include <stddef.h> + +#include "internal/common.h" + +#ifndef TYPE +# error "TYPE if not defined" +#endif +#ifndef DEFAULT +# error "DEFAULT is not defined" +#endif +#ifndef TABLE +# error "TABLE is not defined" +#endif +#ifndef HAS_VALUE +# error "HAS_VALUE is not defined" +#endif + +[[gnu::always_inline]] static TYPE +lookup(rune ch) +{ + ptrdiff_t lo, hi; + +#ifdef LATIN1_TABLE + if (ch <= LATIN1_MAX) + return LATIN1_TABLE[ch]; +#endif + + lo = 0; + hi = lengthof(TABLE) - 1; + + while (lo <= hi) { + ptrdiff_t i = (lo + hi) / 2; + + if (ch < TABLE[i].lo) + hi = i - 1; + else if (ch > TABLE[i].hi) + lo = i + 1; + else +#if HAS_VALUE + return TABLE[i].val; +#else + return true; +#endif + } + + return DEFAULT; +} diff --git a/vendor/librune/include/rtype.h b/vendor/librune/include/rtype.h index 6c3e8e3..72f054c 100644 --- a/vendor/librune/include/rtype.h +++ b/vendor/librune/include/rtype.h @@ -5,50 +5,52 @@ #include "internal/types.h" -typedef enum [[clang::flag_enum]] : uint_fast32_t { - UC_CN = 0, /* Not Assigned */ - UC_CC = 1 << 0, /* Control */ - UC_CF = 1 << 1, /* Format */ - UC_CO = 1 << 2, /* Private Use */ - UC_CS = 1 << 3, /* Surrogate */ - UC_LL = 1 << 4, /* Lowercase Letter */ - UC_LM = 1 << 5, /* Modifier Letter */ - UC_LO = 1 << 6, /* Other Letter */ - UC_LT = 1 << 7, /* Titlecase Letter */ - UC_LU = 1 << 8, /* Uppercase Letter */ - UC_MC = 1 << 9, /* Spacing Mark */ - UC_ME = 1 << 10, /* Enclosing Mark */ - UC_MN = 1 << 11, /* Nonspacing Mark */ - UC_ND = 1 << 12, /* Decimal Number */ - UC_NL = 1 << 13, /* Letter Number */ - UC_NO = 1 << 14, /* Other Number */ - UC_PC = 1 << 15, /* Connector Punctuation */ - UC_PD = 1 << 16, /* Dash Punctuation */ - UC_PE = 1 << 17, /* Close Punctuation */ - UC_PF = 1 << 18, /* Final Punctuation */ - UC_PI = 1 << 19, /* Initial Punctuation */ - UC_PO = 1 << 20, /* Other Punctuation */ - UC_PS = 1 << 21, /* Open Punctuation */ - UC_SC = 1 << 22, /* Currency Symbol */ - UC_SK = 1 << 23, /* Modifier Symbol */ - UC_SM = 1 << 24, /* Math Symbol */ - UC_SO = 1 << 25, /* Other Symbol */ - UC_ZL = 1 << 26, /* Line Separator */ - UC_ZP = 1 << 27, /* Paragraph Separator */ - UC_ZS = 1 << 28, /* Space Separator */ +/* clang-format off */ +enum [[clang::flag_enum]] unicat : uint_fast32_t { + UC_CN = 0, /* Not Assigned */ + UC_CC = UINT32_C(1) << 0, /* Control */ + UC_CF = UINT32_C(1) << 1, /* Format */ + UC_CO = UINT32_C(1) << 2, /* Private Use */ + UC_CS = UINT32_C(1) << 3, /* Surrogate */ + UC_LL = UINT32_C(1) << 4, /* Lowercase Letter */ + UC_LM = UINT32_C(1) << 5, /* Modifier Letter */ + UC_LO = UINT32_C(1) << 6, /* Other Letter */ + UC_LT = UINT32_C(1) << 7, /* Titlecase Letter */ + UC_LU = UINT32_C(1) << 8, /* Uppercase Letter */ + UC_MC = UINT32_C(1) << 9, /* Spacing Mark */ + UC_ME = UINT32_C(1) << 10, /* Enclosing Mark */ + UC_MN = UINT32_C(1) << 11, /* Nonspacing Mark */ + UC_ND = UINT32_C(1) << 12, /* Decimal Number */ + UC_NL = UINT32_C(1) << 13, /* Letter Number */ + UC_NO = UINT32_C(1) << 14, /* Other Number */ + UC_PC = UINT32_C(1) << 15, /* Connector Punctuation */ + UC_PD = UINT32_C(1) << 16, /* Dash Punctuation */ + UC_PE = UINT32_C(1) << 17, /* Close Punctuation */ + UC_PF = UINT32_C(1) << 18, /* Final Punctuation */ + UC_PI = UINT32_C(1) << 19, /* Initial Punctuation */ + UC_PO = UINT32_C(1) << 20, /* Other Punctuation */ + UC_PS = UINT32_C(1) << 21, /* Open Punctuation */ + UC_SC = UINT32_C(1) << 22, /* Currency Symbol */ + UC_SK = UINT32_C(1) << 23, /* Modifier Symbol */ + UC_SM = UINT32_C(1) << 24, /* Math Symbol */ + UC_SO = UINT32_C(1) << 25, /* Other Symbol */ + UC_ZL = UINT32_C(1) << 26, /* Line Separator */ + UC_ZP = UINT32_C(1) << 27, /* Paragraph Separator */ + UC_ZS = UINT32_C(1) << 28, /* Space Separator */ - UC_C = UC_CC | UC_CF | UC_CN | UC_CO | UC_CS, /* Other */ - UC_L = UC_LL | UC_LM | UC_LO | UC_LT | UC_LU, /* Letter */ - UC_M = UC_MC | UC_ME | UC_MN, /* Mark */ - UC_N = UC_ND | UC_NL | UC_NO, /* Number */ - UC_S = UC_SC | UC_SK | UC_SM | UC_SO, /* Symbol */ - UC_Z = UC_ZL | UC_ZP | UC_ZS, /* Separator */ - UC_P = UC_PC | UC_PD | UC_PE | UC_PF | UC_PI /* Punctuation */ - | UC_PO | UC_PS, -} unicat; - -[[unsequenced]] bool runeis(rune, unicat); + UC_C = UC_CC | UC_CF | UC_CN | UC_CO | UC_CS, /* Other */ + UC_L = UC_LL | UC_LM | UC_LO | UC_LT | UC_LU, /* Letter */ + UC_LC = UC_LU | UC_LL | UC_LT, /* Cased Letter */ + UC_M = UC_MC | UC_ME | UC_MN, /* Mark */ + UC_N = UC_ND | UC_NL | UC_NO, /* Number */ + UC_P = UC_PC | UC_PD | UC_PE | UC_PF | UC_PI /* Punctuation */ + | UC_PO | UC_PS, + UC_S = UC_SC | UC_SK | UC_SM | UC_SO, /* Symbol */ + UC_Z = UC_ZL | UC_ZP | UC_ZS, /* Separator */ +}; +/* clang-format on */ +[[unsequenced]] bool runeis(rune, enum unicat); [[unsequenced]] bool riscntrl(rune); [[unsequenced]] bool risdigit(rune); [[unsequenced]] bool risgraph(rune); @@ -61,4 +63,54 @@ typedef enum [[clang::flag_enum]] : uint_fast32_t { [[unsequenced]] bool ristitle(rune); [[unsequenced]] bool risupper(rune); -#endif /* !RUNE_RTYPE_H */ +/* PROP PREDICATES START */ +[[unsequenced]] bool rune_has_prop_alphabetic(rune); +[[unsequenced]] bool rune_has_prop_ascii_hex_digit(rune); +[[unsequenced]] bool rune_has_prop_bidi_control(rune); +[[unsequenced]] bool rune_has_prop_case_ignorable(rune); +[[unsequenced]] bool rune_has_prop_cased(rune); +[[unsequenced]] bool rune_has_prop_changes_when_casefolded(rune); +[[unsequenced]] bool rune_has_prop_changes_when_casemapped(rune); +[[unsequenced]] bool rune_has_prop_changes_when_lowercased(rune); +[[unsequenced]] bool rune_has_prop_changes_when_titlecased(rune); +[[unsequenced]] bool rune_has_prop_changes_when_uppercased(rune); +[[unsequenced]] bool rune_has_prop_dash(rune); +[[unsequenced]] bool rune_has_prop_default_ignorable_code_point(rune); +[[unsequenced]] bool rune_has_prop_deprecated(rune); +[[unsequenced]] bool rune_has_prop_diacritic(rune); +[[unsequenced]] bool rune_has_prop_extender(rune); +[[unsequenced]] bool rune_has_prop_grapheme_base(rune); +[[unsequenced]] bool rune_has_prop_grapheme_extend(rune); +[[unsequenced]] bool rune_has_prop_hex_digit(rune); +[[unsequenced]] bool rune_has_prop_id_compat_math_continue(rune); +[[unsequenced]] bool rune_has_prop_id_compat_math_start(rune); +[[unsequenced]] bool rune_has_prop_id_continue(rune); +[[unsequenced]] bool rune_has_prop_id_start(rune); +[[unsequenced]] bool rune_has_prop_ideographic(rune); +[[unsequenced]] bool rune_has_prop_ids_binary_operator(rune); +[[unsequenced]] bool rune_has_prop_ids_trinary_operator(rune); +[[unsequenced]] bool rune_has_prop_ids_unary_operator(rune); +[[unsequenced]] bool rune_has_prop_indic_conjunct_break(rune); +[[unsequenced]] bool rune_has_prop_join_control(rune); +[[unsequenced]] bool rune_has_prop_logical_order_exception(rune); +[[unsequenced]] bool rune_has_prop_lowercase(rune); +[[unsequenced]] bool rune_has_prop_math(rune); +[[unsequenced]] bool rune_has_prop_noncharacter_code_point(rune); +[[unsequenced]] bool rune_has_prop_pattern_syntax(rune); +[[unsequenced]] bool rune_has_prop_pattern_white_space(rune); +[[unsequenced]] bool rune_has_prop_prepended_concatenation_mark(rune); +[[unsequenced]] bool rune_has_prop_quotation_mark(rune); +[[unsequenced]] bool rune_has_prop_radical(rune); +[[unsequenced]] bool rune_has_prop_regional_indicator(rune); +[[unsequenced]] bool rune_has_prop_sentence_terminal(rune); +[[unsequenced]] bool rune_has_prop_soft_dotted(rune); +[[unsequenced]] bool rune_has_prop_terminal_punctuation(rune); +[[unsequenced]] bool rune_has_prop_unified_ideograph(rune); +[[unsequenced]] bool rune_has_prop_uppercase(rune); +[[unsequenced]] bool rune_has_prop_variation_selector(rune); +[[unsequenced]] bool rune_has_prop_white_space(rune); +[[unsequenced]] bool rune_has_prop_xid_continue(rune); +[[unsequenced]] bool rune_has_prop_xid_start(rune); +/* PROP PREDICATES END */ + +#endif |