aboutsummaryrefslogtreecommitdiff
path: root/vendor/librune/include
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/librune/include')
-rw-r--r--vendor/librune/include/internal/common.h6
-rw-r--r--vendor/librune/include/internal/rtype/cat.h (renamed from vendor/librune/include/internal/rtype_lookup.h)78
-rw-r--r--vendor/librune/include/internal/rtype/lookup-func.h47
-rw-r--r--vendor/librune/include/rtype.h138
4 files changed, 156 insertions, 113 deletions
diff --git a/vendor/librune/include/internal/common.h b/vendor/librune/include/internal/common.h
index ace9e63..a05f33c 100644
--- a/vendor/librune/include/internal/common.h
+++ b/vendor/librune/include/internal/common.h
@@ -3,6 +3,8 @@
/* IWYU pragma: private */
+#include <limits.h>
+
#define lengthof(a) (sizeof(a) / sizeof(*(a)))
#define U1(x) (((x)&0x80) == 0x00)
@@ -20,4 +22,8 @@
#define LATIN1_MAX 0xFF
+#if BITINT_MAXWIDTH >= LATIN1_MAX + 1
+# define BIT_LOOKUP 1
+#endif
+
#endif /* !RUNE_INTERNAL_COMMON_H */
diff --git a/vendor/librune/include/internal/rtype_lookup.h b/vendor/librune/include/internal/rtype/cat.h
index b4b41fd..d84082a 100644
--- a/vendor/librune/include/internal/rtype_lookup.h
+++ b/vendor/librune/include/internal/rtype/cat.h
@@ -1,17 +1,15 @@
-/* This file is autogenerated by gen/gbrk; DO NOT EDIT. */
+/* This file is autogenerated by gen/rtype-cat; DO NOT EDIT. */
-/* TODO: Change tables to constexpr from const when Clangd gets better */
-
-#ifndef RUNE_INTERNAL_RTYPE_LOOKUP_H
-#define RUNE_INTERNAL_RTYPE_LOOKUP_H
+#ifndef RUNE_INTERNAL_RTYPE_CAT_H
+#define RUNE_INTERNAL_RTYPE_CAT_H
/* IWYU pragma: private */
/* clang-format off */
-#include "types.h"
-#include "../rtype.h"
+#include "../types.h"
+#include "../../rtype.h"
-static const unicat rtype_lat1_tbl[] = {
+static const enum unicat rtype_cat_lat1_tbl[] = {
UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC,
UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC,
UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC, UC_CC,
@@ -48,68 +46,8 @@ static const unicat rtype_lat1_tbl[] = {
static const struct {
rune lo, hi;
- unicat cat;
+ enum unicat val;
} rtype_cat_tbl[] = {
- {0x000000, 0x00001F, UC_CC},
- {0x000020, 0x000020, UC_ZS},
- {0x000021, 0x000023, UC_PO},
- {0x000024, 0x000024, UC_SC},
- {0x000025, 0x000027, UC_PO},
- {0x000028, 0x000028, UC_PS},
- {0x000029, 0x000029, UC_PE},
- {0x00002A, 0x00002A, UC_PO},
- {0x00002B, 0x00002B, UC_SM},
- {0x00002C, 0x00002C, UC_PO},
- {0x00002D, 0x00002D, UC_PD},
- {0x00002E, 0x00002F, UC_PO},
- {0x000030, 0x000039, UC_ND},
- {0x00003A, 0x00003B, UC_PO},
- {0x00003C, 0x00003E, UC_SM},
- {0x00003F, 0x000040, UC_PO},
- {0x000041, 0x00005A, UC_LU},
- {0x00005B, 0x00005B, UC_PS},
- {0x00005C, 0x00005C, UC_PO},
- {0x00005D, 0x00005D, UC_PE},
- {0x00005E, 0x00005E, UC_SK},
- {0x00005F, 0x00005F, UC_PC},
- {0x000060, 0x000060, UC_SK},
- {0x000061, 0x00007A, UC_LL},
- {0x00007B, 0x00007B, UC_PS},
- {0x00007C, 0x00007C, UC_SM},
- {0x00007D, 0x00007D, UC_PE},
- {0x00007E, 0x00007E, UC_SM},
- {0x00007F, 0x00009F, UC_CC},
- {0x0000A0, 0x0000A0, UC_ZS},
- {0x0000A1, 0x0000A1, UC_PO},
- {0x0000A2, 0x0000A5, UC_SC},
- {0x0000A6, 0x0000A6, UC_SO},
- {0x0000A7, 0x0000A7, UC_PO},
- {0x0000A8, 0x0000A8, UC_SK},
- {0x0000A9, 0x0000A9, UC_SO},
- {0x0000AA, 0x0000AA, UC_LO},
- {0x0000AB, 0x0000AB, UC_PI},
- {0x0000AC, 0x0000AC, UC_SM},
- {0x0000AD, 0x0000AD, UC_CF},
- {0x0000AE, 0x0000AE, UC_SO},
- {0x0000AF, 0x0000AF, UC_SK},
- {0x0000B0, 0x0000B0, UC_SO},
- {0x0000B1, 0x0000B1, UC_SM},
- {0x0000B2, 0x0000B3, UC_NO},
- {0x0000B4, 0x0000B4, UC_SK},
- {0x0000B5, 0x0000B5, UC_LL},
- {0x0000B6, 0x0000B7, UC_PO},
- {0x0000B8, 0x0000B8, UC_SK},
- {0x0000B9, 0x0000B9, UC_NO},
- {0x0000BA, 0x0000BA, UC_LO},
- {0x0000BB, 0x0000BB, UC_PF},
- {0x0000BC, 0x0000BE, UC_NO},
- {0x0000BF, 0x0000BF, UC_PO},
- {0x0000C0, 0x0000D6, UC_LU},
- {0x0000D7, 0x0000D7, UC_SM},
- {0x0000D8, 0x0000DE, UC_LU},
- {0x0000DF, 0x0000F6, UC_LL},
- {0x0000F7, 0x0000F7, UC_SM},
- {0x0000F8, 0x0000FF, UC_LL},
{0x000100, 0x000100, UC_LU},
{0x000101, 0x000101, UC_LL},
{0x000102, 0x000102, UC_LU},
@@ -3354,4 +3292,4 @@ static const struct {
{0x100000, 0x10FFFD, UC_CO},
};
-#endif /* !RUNE_INTERNAL_RTYPE_LOOKUP_H */
+#endif /* !RUNE_INTERNAL_RTYPE_CAT_H */
diff --git a/vendor/librune/include/internal/rtype/lookup-func.h b/vendor/librune/include/internal/rtype/lookup-func.h
new file mode 100644
index 0000000..91bda8b
--- /dev/null
+++ b/vendor/librune/include/internal/rtype/lookup-func.h
@@ -0,0 +1,47 @@
+#include <stddef.h>
+
+#include "internal/common.h"
+
+#ifndef TYPE
+# error "TYPE if not defined"
+#endif
+#ifndef DEFAULT
+# error "DEFAULT is not defined"
+#endif
+#ifndef TABLE
+# error "TABLE is not defined"
+#endif
+#ifndef HAS_VALUE
+# error "HAS_VALUE is not defined"
+#endif
+
+[[gnu::always_inline]] static TYPE
+lookup(rune ch)
+{
+ ptrdiff_t lo, hi;
+
+#ifdef LATIN1_TABLE
+ if (ch <= LATIN1_MAX)
+ return LATIN1_TABLE[ch];
+#endif
+
+ lo = 0;
+ hi = lengthof(TABLE) - 1;
+
+ while (lo <= hi) {
+ ptrdiff_t i = (lo + hi) / 2;
+
+ if (ch < TABLE[i].lo)
+ hi = i - 1;
+ else if (ch > TABLE[i].hi)
+ lo = i + 1;
+ else
+#if HAS_VALUE
+ return TABLE[i].val;
+#else
+ return true;
+#endif
+ }
+
+ return DEFAULT;
+}
diff --git a/vendor/librune/include/rtype.h b/vendor/librune/include/rtype.h
index 6c3e8e3..72f054c 100644
--- a/vendor/librune/include/rtype.h
+++ b/vendor/librune/include/rtype.h
@@ -5,50 +5,52 @@
#include "internal/types.h"
-typedef enum [[clang::flag_enum]] : uint_fast32_t {
- UC_CN = 0, /* Not Assigned */
- UC_CC = 1 << 0, /* Control */
- UC_CF = 1 << 1, /* Format */
- UC_CO = 1 << 2, /* Private Use */
- UC_CS = 1 << 3, /* Surrogate */
- UC_LL = 1 << 4, /* Lowercase Letter */
- UC_LM = 1 << 5, /* Modifier Letter */
- UC_LO = 1 << 6, /* Other Letter */
- UC_LT = 1 << 7, /* Titlecase Letter */
- UC_LU = 1 << 8, /* Uppercase Letter */
- UC_MC = 1 << 9, /* Spacing Mark */
- UC_ME = 1 << 10, /* Enclosing Mark */
- UC_MN = 1 << 11, /* Nonspacing Mark */
- UC_ND = 1 << 12, /* Decimal Number */
- UC_NL = 1 << 13, /* Letter Number */
- UC_NO = 1 << 14, /* Other Number */
- UC_PC = 1 << 15, /* Connector Punctuation */
- UC_PD = 1 << 16, /* Dash Punctuation */
- UC_PE = 1 << 17, /* Close Punctuation */
- UC_PF = 1 << 18, /* Final Punctuation */
- UC_PI = 1 << 19, /* Initial Punctuation */
- UC_PO = 1 << 20, /* Other Punctuation */
- UC_PS = 1 << 21, /* Open Punctuation */
- UC_SC = 1 << 22, /* Currency Symbol */
- UC_SK = 1 << 23, /* Modifier Symbol */
- UC_SM = 1 << 24, /* Math Symbol */
- UC_SO = 1 << 25, /* Other Symbol */
- UC_ZL = 1 << 26, /* Line Separator */
- UC_ZP = 1 << 27, /* Paragraph Separator */
- UC_ZS = 1 << 28, /* Space Separator */
+/* clang-format off */
+enum [[clang::flag_enum]] unicat : uint_fast32_t {
+ UC_CN = 0, /* Not Assigned */
+ UC_CC = UINT32_C(1) << 0, /* Control */
+ UC_CF = UINT32_C(1) << 1, /* Format */
+ UC_CO = UINT32_C(1) << 2, /* Private Use */
+ UC_CS = UINT32_C(1) << 3, /* Surrogate */
+ UC_LL = UINT32_C(1) << 4, /* Lowercase Letter */
+ UC_LM = UINT32_C(1) << 5, /* Modifier Letter */
+ UC_LO = UINT32_C(1) << 6, /* Other Letter */
+ UC_LT = UINT32_C(1) << 7, /* Titlecase Letter */
+ UC_LU = UINT32_C(1) << 8, /* Uppercase Letter */
+ UC_MC = UINT32_C(1) << 9, /* Spacing Mark */
+ UC_ME = UINT32_C(1) << 10, /* Enclosing Mark */
+ UC_MN = UINT32_C(1) << 11, /* Nonspacing Mark */
+ UC_ND = UINT32_C(1) << 12, /* Decimal Number */
+ UC_NL = UINT32_C(1) << 13, /* Letter Number */
+ UC_NO = UINT32_C(1) << 14, /* Other Number */
+ UC_PC = UINT32_C(1) << 15, /* Connector Punctuation */
+ UC_PD = UINT32_C(1) << 16, /* Dash Punctuation */
+ UC_PE = UINT32_C(1) << 17, /* Close Punctuation */
+ UC_PF = UINT32_C(1) << 18, /* Final Punctuation */
+ UC_PI = UINT32_C(1) << 19, /* Initial Punctuation */
+ UC_PO = UINT32_C(1) << 20, /* Other Punctuation */
+ UC_PS = UINT32_C(1) << 21, /* Open Punctuation */
+ UC_SC = UINT32_C(1) << 22, /* Currency Symbol */
+ UC_SK = UINT32_C(1) << 23, /* Modifier Symbol */
+ UC_SM = UINT32_C(1) << 24, /* Math Symbol */
+ UC_SO = UINT32_C(1) << 25, /* Other Symbol */
+ UC_ZL = UINT32_C(1) << 26, /* Line Separator */
+ UC_ZP = UINT32_C(1) << 27, /* Paragraph Separator */
+ UC_ZS = UINT32_C(1) << 28, /* Space Separator */
- UC_C = UC_CC | UC_CF | UC_CN | UC_CO | UC_CS, /* Other */
- UC_L = UC_LL | UC_LM | UC_LO | UC_LT | UC_LU, /* Letter */
- UC_M = UC_MC | UC_ME | UC_MN, /* Mark */
- UC_N = UC_ND | UC_NL | UC_NO, /* Number */
- UC_S = UC_SC | UC_SK | UC_SM | UC_SO, /* Symbol */
- UC_Z = UC_ZL | UC_ZP | UC_ZS, /* Separator */
- UC_P = UC_PC | UC_PD | UC_PE | UC_PF | UC_PI /* Punctuation */
- | UC_PO | UC_PS,
-} unicat;
-
-[[unsequenced]] bool runeis(rune, unicat);
+ UC_C = UC_CC | UC_CF | UC_CN | UC_CO | UC_CS, /* Other */
+ UC_L = UC_LL | UC_LM | UC_LO | UC_LT | UC_LU, /* Letter */
+ UC_LC = UC_LU | UC_LL | UC_LT, /* Cased Letter */
+ UC_M = UC_MC | UC_ME | UC_MN, /* Mark */
+ UC_N = UC_ND | UC_NL | UC_NO, /* Number */
+ UC_P = UC_PC | UC_PD | UC_PE | UC_PF | UC_PI /* Punctuation */
+ | UC_PO | UC_PS,
+ UC_S = UC_SC | UC_SK | UC_SM | UC_SO, /* Symbol */
+ UC_Z = UC_ZL | UC_ZP | UC_ZS, /* Separator */
+};
+/* clang-format on */
+[[unsequenced]] bool runeis(rune, enum unicat);
[[unsequenced]] bool riscntrl(rune);
[[unsequenced]] bool risdigit(rune);
[[unsequenced]] bool risgraph(rune);
@@ -61,4 +63,54 @@ typedef enum [[clang::flag_enum]] : uint_fast32_t {
[[unsequenced]] bool ristitle(rune);
[[unsequenced]] bool risupper(rune);
-#endif /* !RUNE_RTYPE_H */
+/* PROP PREDICATES START */
+[[unsequenced]] bool rune_has_prop_alphabetic(rune);
+[[unsequenced]] bool rune_has_prop_ascii_hex_digit(rune);
+[[unsequenced]] bool rune_has_prop_bidi_control(rune);
+[[unsequenced]] bool rune_has_prop_case_ignorable(rune);
+[[unsequenced]] bool rune_has_prop_cased(rune);
+[[unsequenced]] bool rune_has_prop_changes_when_casefolded(rune);
+[[unsequenced]] bool rune_has_prop_changes_when_casemapped(rune);
+[[unsequenced]] bool rune_has_prop_changes_when_lowercased(rune);
+[[unsequenced]] bool rune_has_prop_changes_when_titlecased(rune);
+[[unsequenced]] bool rune_has_prop_changes_when_uppercased(rune);
+[[unsequenced]] bool rune_has_prop_dash(rune);
+[[unsequenced]] bool rune_has_prop_default_ignorable_code_point(rune);
+[[unsequenced]] bool rune_has_prop_deprecated(rune);
+[[unsequenced]] bool rune_has_prop_diacritic(rune);
+[[unsequenced]] bool rune_has_prop_extender(rune);
+[[unsequenced]] bool rune_has_prop_grapheme_base(rune);
+[[unsequenced]] bool rune_has_prop_grapheme_extend(rune);
+[[unsequenced]] bool rune_has_prop_hex_digit(rune);
+[[unsequenced]] bool rune_has_prop_id_compat_math_continue(rune);
+[[unsequenced]] bool rune_has_prop_id_compat_math_start(rune);
+[[unsequenced]] bool rune_has_prop_id_continue(rune);
+[[unsequenced]] bool rune_has_prop_id_start(rune);
+[[unsequenced]] bool rune_has_prop_ideographic(rune);
+[[unsequenced]] bool rune_has_prop_ids_binary_operator(rune);
+[[unsequenced]] bool rune_has_prop_ids_trinary_operator(rune);
+[[unsequenced]] bool rune_has_prop_ids_unary_operator(rune);
+[[unsequenced]] bool rune_has_prop_indic_conjunct_break(rune);
+[[unsequenced]] bool rune_has_prop_join_control(rune);
+[[unsequenced]] bool rune_has_prop_logical_order_exception(rune);
+[[unsequenced]] bool rune_has_prop_lowercase(rune);
+[[unsequenced]] bool rune_has_prop_math(rune);
+[[unsequenced]] bool rune_has_prop_noncharacter_code_point(rune);
+[[unsequenced]] bool rune_has_prop_pattern_syntax(rune);
+[[unsequenced]] bool rune_has_prop_pattern_white_space(rune);
+[[unsequenced]] bool rune_has_prop_prepended_concatenation_mark(rune);
+[[unsequenced]] bool rune_has_prop_quotation_mark(rune);
+[[unsequenced]] bool rune_has_prop_radical(rune);
+[[unsequenced]] bool rune_has_prop_regional_indicator(rune);
+[[unsequenced]] bool rune_has_prop_sentence_terminal(rune);
+[[unsequenced]] bool rune_has_prop_soft_dotted(rune);
+[[unsequenced]] bool rune_has_prop_terminal_punctuation(rune);
+[[unsequenced]] bool rune_has_prop_unified_ideograph(rune);
+[[unsequenced]] bool rune_has_prop_uppercase(rune);
+[[unsequenced]] bool rune_has_prop_variation_selector(rune);
+[[unsequenced]] bool rune_has_prop_white_space(rune);
+[[unsequenced]] bool rune_has_prop_xid_continue(rune);
+[[unsequenced]] bool rune_has_prop_xid_start(rune);
+/* PROP PREDICATES END */
+
+#endif