From 03f69018340f0fae95556321a3d89dcb49910ebe Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Fri, 5 Apr 2024 15:35:58 +0200 Subject: Add more predicates to unicode/prop.h --- gen/prop/bool-props | 4 ++-- include/unicode/prop.h | 8 ++++++++ lib/unicode/prop/uprop_is_idst.c | 7 +++++++ lib/unicode/prop/uprop_is_idsu.c | 7 +++++++ lib/unicode/prop/uprop_is_join_c.c | 7 +++++++ lib/unicode/prop/uprop_is_nchar.c | 10 ++++++++++ lib/unicode/prop/uprop_is_pat_ws.c | 30 ++++++++++++++++++++++++++++++ lib/unicode/prop/uprop_is_ri.c | 8 ++++++++ 8 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 lib/unicode/prop/uprop_is_idst.c create mode 100644 lib/unicode/prop/uprop_is_idsu.c create mode 100644 lib/unicode/prop/uprop_is_join_c.c create mode 100644 lib/unicode/prop/uprop_is_nchar.c create mode 100644 lib/unicode/prop/uprop_is_pat_ws.c create mode 100644 lib/unicode/prop/uprop_is_ri.c diff --git a/gen/prop/bool-props b/gen/prop/bool-props index fe06693..5e93a8e 100755 --- a/gen/prop/bool-props +++ b/gen/prop/bool-props @@ -11,12 +11,13 @@ dep=Deprecated dia=Diacritic ext=Extender hex=Hex_Digit -idsb=IDS_Binary_Operator id_compat_math_continue=ID_Compat_Math_Continue id_compat_math_start=ID_Compat_Math_Start ideo=Ideographic +idsb=IDS_Binary_Operator loe=Logical_Order_Exception pat_syn=Pattern_Syntax +pat_ws=Pattern_White_Space pcm=Prepended_Concatenation_Mark qmark=Quotation_Mark radical=Radical @@ -72,7 +73,6 @@ idst=IDS_Trinary_Operator idsu=IDS_Unary_Operator join_c=Join_Control nchar=Noncharacter_Code_Point -pat_ws=Pattern_White_Space ri=Regional_Indicator ' diff --git a/include/unicode/prop.h b/include/unicode/prop.h index 933e31a..74ffdf9 100644 --- a/include/unicode/prop.h +++ b/include/unicode/prop.h @@ -88,6 +88,7 @@ enum [[clang::flag_enum]] uprop_gc : uint_fast32_t { [[__nodiscard__, __unsequenced__]] bool uprop_is_lower(rune); [[__nodiscard__, __unsequenced__]] bool uprop_is_math(rune); [[__nodiscard__, __unsequenced__]] bool uprop_is_pat_syn(rune); +[[__nodiscard__, __unsequenced__]] bool uprop_is_pat_ws(rune); [[__nodiscard__, __unsequenced__]] bool uprop_is_pcm(rune); [[__nodiscard__, __unsequenced__]] bool uprop_is_qmark(rune); [[__nodiscard__, __unsequenced__]] bool uprop_is_radical(rune); @@ -102,4 +103,11 @@ enum [[clang::flag_enum]] uprop_gc : uint_fast32_t { [[__nodiscard__, __unsequenced__]] bool uprop_is_xids(rune); /* PROP PREDICATES END */ +/* Manually implemented predicates */ +[[__nodiscard__, __unsequenced__]] bool uprop_is_idst(rune); +[[__nodiscard__, __unsequenced__]] bool uprop_is_idsu(rune); +[[__nodiscard__, __unsequenced__]] bool uprop_is_join_c(rune); +[[__nodiscard__, __unsequenced__]] bool uprop_is_nchar(rune); +[[__nodiscard__, __unsequenced__]] bool uprop_is_ri(rune); + #endif /* !MLIB_UNICODE_PROP_H */ diff --git a/lib/unicode/prop/uprop_is_idst.c b/lib/unicode/prop/uprop_is_idst.c new file mode 100644 index 0000000..b303838 --- /dev/null +++ b/lib/unicode/prop/uprop_is_idst.c @@ -0,0 +1,7 @@ +#include "unicode/prop.h" + +bool +uprop_is_idst(rune ch) +{ + return ch == 0x2FF2 || ch == 0x2FF3; +} diff --git a/lib/unicode/prop/uprop_is_idsu.c b/lib/unicode/prop/uprop_is_idsu.c new file mode 100644 index 0000000..66bc3c6 --- /dev/null +++ b/lib/unicode/prop/uprop_is_idsu.c @@ -0,0 +1,7 @@ +#include "unicode/prop.h" + +bool +uprop_is_idsu(rune ch) +{ + return ch == 0x2FFE || ch == 0x2FFF; +} diff --git a/lib/unicode/prop/uprop_is_join_c.c b/lib/unicode/prop/uprop_is_join_c.c new file mode 100644 index 0000000..91dd816 --- /dev/null +++ b/lib/unicode/prop/uprop_is_join_c.c @@ -0,0 +1,7 @@ +#include "unicode/prop.h" + +bool +uprop_is_join_c(rune ch) +{ + return ch == 0x200C || ch == 0x200D; +} diff --git a/lib/unicode/prop/uprop_is_nchar.c b/lib/unicode/prop/uprop_is_nchar.c new file mode 100644 index 0000000..7022d2e --- /dev/null +++ b/lib/unicode/prop/uprop_is_nchar.c @@ -0,0 +1,10 @@ +#include "rune.h" +#include "unicode/prop.h" + +bool +rprop_is_nchar(rune ch) +{ + return (ch >= 0xFDD0 && ch <= 0xFDEF) + || ((ch & 0xFFFE) == 0xFFFE + && (ch & RUNE_C(0xFF0000)) <= RUNE_C(0x0F0000)); +} diff --git a/lib/unicode/prop/uprop_is_pat_ws.c b/lib/unicode/prop/uprop_is_pat_ws.c new file mode 100644 index 0000000..fe2f462 --- /dev/null +++ b/lib/unicode/prop/uprop_is_pat_ws.c @@ -0,0 +1,30 @@ +/* This file is autogenerated by gen/prop/bool-props; DO NOT EDIT. */ + +#include "__bsearch.h" +#include "bitset.h" +#include "rune.h" +#include "unicode/prop.h" + +/* clang-format off */ + +static constexpr bitset(bs, LATIN1_MAX) = { + 0x00, 0x3E, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const struct { + rune lo, hi; +} lookup[] = { + {RUNE_C(0x00200E), RUNE_C(0x00200F)}, + {RUNE_C(0x002028), RUNE_C(0x002029)}, +}; + +__MLIB_DEFINE_BSEARCH_CONTAINS(lookup) + +bool +uprop_is_pat_ws(rune ch) +{ + return ch <= LATIN1_MAX ? TESTBIT(bs, ch) : mlib_lookup_contains(ch); +} diff --git a/lib/unicode/prop/uprop_is_ri.c b/lib/unicode/prop/uprop_is_ri.c new file mode 100644 index 0000000..4ce6a75 --- /dev/null +++ b/lib/unicode/prop/uprop_is_ri.c @@ -0,0 +1,8 @@ +#include "rune.h" +#include "unicode/prop.h" + +bool +uprop_is_ri(rune ch) +{ + return ch >= RUNE_C(0x1F1E6) && ch <= RUNE_C(0x1F1FF); +} -- cgit v1.2.3