aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-05 15:35:58 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-05 15:35:58 +0200
commit03f69018340f0fae95556321a3d89dcb49910ebe (patch)
treeba8193041c9b85e1ff0dd95166a429c640f6e175
parent4bba70f54e9a4ebd576f6a34075321223308e7b8 (diff)
Add more predicates to unicode/prop.h
-rwxr-xr-xgen/prop/bool-props4
-rw-r--r--include/unicode/prop.h8
-rw-r--r--lib/unicode/prop/uprop_is_idst.c7
-rw-r--r--lib/unicode/prop/uprop_is_idsu.c7
-rw-r--r--lib/unicode/prop/uprop_is_join_c.c7
-rw-r--r--lib/unicode/prop/uprop_is_nchar.c10
-rw-r--r--lib/unicode/prop/uprop_is_pat_ws.c30
-rw-r--r--lib/unicode/prop/uprop_is_ri.c8
8 files changed, 79 insertions, 2 deletions
diff --git a/gen/prop/bool-props b/gen/prop/bool-props
index fe06693..5e93a8e 100755
--- a/gen/prop/bool-props
+++ b/gen/prop/bool-props
@@ -11,12 +11,13 @@ dep=Deprecated
dia=Diacritic
ext=Extender
hex=Hex_Digit
-idsb=IDS_Binary_Operator
id_compat_math_continue=ID_Compat_Math_Continue
id_compat_math_start=ID_Compat_Math_Start
ideo=Ideographic
+idsb=IDS_Binary_Operator
loe=Logical_Order_Exception
pat_syn=Pattern_Syntax
+pat_ws=Pattern_White_Space
pcm=Prepended_Concatenation_Mark
qmark=Quotation_Mark
radical=Radical
@@ -72,7 +73,6 @@ idst=IDS_Trinary_Operator
idsu=IDS_Unary_Operator
join_c=Join_Control
nchar=Noncharacter_Code_Point
-pat_ws=Pattern_White_Space
ri=Regional_Indicator
'
diff --git a/include/unicode/prop.h b/include/unicode/prop.h
index 933e31a..74ffdf9 100644
--- a/include/unicode/prop.h
+++ b/include/unicode/prop.h
@@ -88,6 +88,7 @@ enum [[clang::flag_enum]] uprop_gc : uint_fast32_t {
[[__nodiscard__, __unsequenced__]] bool uprop_is_lower(rune);
[[__nodiscard__, __unsequenced__]] bool uprop_is_math(rune);
[[__nodiscard__, __unsequenced__]] bool uprop_is_pat_syn(rune);
+[[__nodiscard__, __unsequenced__]] bool uprop_is_pat_ws(rune);
[[__nodiscard__, __unsequenced__]] bool uprop_is_pcm(rune);
[[__nodiscard__, __unsequenced__]] bool uprop_is_qmark(rune);
[[__nodiscard__, __unsequenced__]] bool uprop_is_radical(rune);
@@ -102,4 +103,11 @@ enum [[clang::flag_enum]] uprop_gc : uint_fast32_t {
[[__nodiscard__, __unsequenced__]] bool uprop_is_xids(rune);
/* PROP PREDICATES END */
+/* Manually implemented predicates */
+[[__nodiscard__, __unsequenced__]] bool uprop_is_idst(rune);
+[[__nodiscard__, __unsequenced__]] bool uprop_is_idsu(rune);
+[[__nodiscard__, __unsequenced__]] bool uprop_is_join_c(rune);
+[[__nodiscard__, __unsequenced__]] bool uprop_is_nchar(rune);
+[[__nodiscard__, __unsequenced__]] bool uprop_is_ri(rune);
+
#endif /* !MLIB_UNICODE_PROP_H */
diff --git a/lib/unicode/prop/uprop_is_idst.c b/lib/unicode/prop/uprop_is_idst.c
new file mode 100644
index 0000000..b303838
--- /dev/null
+++ b/lib/unicode/prop/uprop_is_idst.c
@@ -0,0 +1,7 @@
+#include "unicode/prop.h"
+
+bool
+uprop_is_idst(rune ch)
+{
+ return ch == 0x2FF2 || ch == 0x2FF3;
+}
diff --git a/lib/unicode/prop/uprop_is_idsu.c b/lib/unicode/prop/uprop_is_idsu.c
new file mode 100644
index 0000000..66bc3c6
--- /dev/null
+++ b/lib/unicode/prop/uprop_is_idsu.c
@@ -0,0 +1,7 @@
+#include "unicode/prop.h"
+
+bool
+uprop_is_idsu(rune ch)
+{
+ return ch == 0x2FFE || ch == 0x2FFF;
+}
diff --git a/lib/unicode/prop/uprop_is_join_c.c b/lib/unicode/prop/uprop_is_join_c.c
new file mode 100644
index 0000000..91dd816
--- /dev/null
+++ b/lib/unicode/prop/uprop_is_join_c.c
@@ -0,0 +1,7 @@
+#include "unicode/prop.h"
+
+bool
+uprop_is_join_c(rune ch)
+{
+ return ch == 0x200C || ch == 0x200D;
+}
diff --git a/lib/unicode/prop/uprop_is_nchar.c b/lib/unicode/prop/uprop_is_nchar.c
new file mode 100644
index 0000000..7022d2e
--- /dev/null
+++ b/lib/unicode/prop/uprop_is_nchar.c
@@ -0,0 +1,10 @@
+#include "rune.h"
+#include "unicode/prop.h"
+
+bool
+rprop_is_nchar(rune ch)
+{
+ return (ch >= 0xFDD0 && ch <= 0xFDEF)
+ || ((ch & 0xFFFE) == 0xFFFE
+ && (ch & RUNE_C(0xFF0000)) <= RUNE_C(0x0F0000));
+}
diff --git a/lib/unicode/prop/uprop_is_pat_ws.c b/lib/unicode/prop/uprop_is_pat_ws.c
new file mode 100644
index 0000000..fe2f462
--- /dev/null
+++ b/lib/unicode/prop/uprop_is_pat_ws.c
@@ -0,0 +1,30 @@
+/* This file is autogenerated by gen/prop/bool-props; DO NOT EDIT. */
+
+#include "__bsearch.h"
+#include "bitset.h"
+#include "rune.h"
+#include "unicode/prop.h"
+
+/* clang-format off */
+
+static constexpr bitset(bs, LATIN1_MAX) = {
+ 0x00, 0x3E, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const struct {
+ rune lo, hi;
+} lookup[] = {
+ {RUNE_C(0x00200E), RUNE_C(0x00200F)},
+ {RUNE_C(0x002028), RUNE_C(0x002029)},
+};
+
+__MLIB_DEFINE_BSEARCH_CONTAINS(lookup)
+
+bool
+uprop_is_pat_ws(rune ch)
+{
+ return ch <= LATIN1_MAX ? TESTBIT(bs, ch) : mlib_lookup_contains(ch);
+}
diff --git a/lib/unicode/prop/uprop_is_ri.c b/lib/unicode/prop/uprop_is_ri.c
new file mode 100644
index 0000000..4ce6a75
--- /dev/null
+++ b/lib/unicode/prop/uprop_is_ri.c
@@ -0,0 +1,8 @@
+#include "rune.h"
+#include "unicode/prop.h"
+
+bool
+uprop_is_ri(rune ch)
+{
+ return ch >= RUNE_C(0x1F1E6) && ch <= RUNE_C(0x1F1FF);
+}