aboutsummaryrefslogtreecommitdiff
path: root/include/unicode/prop.h
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-04-24 00:09:05 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-04-24 00:09:05 +0200
commit3b797a5f3ce1d77fa7d0ed991b52553c1b3e8757 (patch)
treee608ecc5b689afaebe1ac3ce112cb2a04e597448 /include/unicode/prop.h
parent9cc2a0368fb0a3aa8b878d1795ed76734beadc02 (diff)
Properly upper- and titlecase ‘i’ and ‘j’ in Lithuanian
Diffstat (limited to 'include/unicode/prop.h')
-rw-r--r--include/unicode/prop.h44
1 files changed, 33 insertions, 11 deletions
diff --git a/include/unicode/prop.h b/include/unicode/prop.h
index d2d6cec..8cb50c5 100644
--- a/include/unicode/prop.h
+++ b/include/unicode/prop.h
@@ -13,31 +13,53 @@ struct rview {
size_t len;
};
-/* clang-format off */
+/* The structures lcctx, tcctx, and ucctx are used to provide context to the
+ casing property functions whos return values are context-dependent. Each
+ group of flags in a context structure is separated by a newline.
+
+ The first group of flags are named using language codes. If one of these
+ flags is set, then language-specific tailorings for the given language are
+ enabled. For example of the ‘az_or_tr’ flag is enabled in ucctx, then the
+ letter ‘i’ is uppercased to ‘İ’ as opposed to ‘I’.
+
+ The second group of flags relate to context specified by the Unicode standard
+ and typically have to do with which characters surround the one being cased.
+ The description for these flags can be found in Table 3-17 of chapter 3 of
+ the Unicode standard[1].
+
+ The third group of flags are extensions provided by MLib, and are documented
+ above or besides the relevant option.
+
+ [1]: https://www.unicode.org/versions/Unicode15.1.0/ch03.pdf#G54277 */
struct lcctx {
- bool az_or_tr : 1; /* Azeri or Turkish */
- bool lt : 1; /* Lithuanian */
+ bool az_or_tr : 1;
+ bool lt : 1;
- bool after_I : 1; /* After ‘I’ */
+ bool after_I : 1; /* After ‘I’ */
bool before_acc : 1; /* Before accent on ‘i’ or ‘j’ in Lithuanian */
bool before_dot : 1; /* Before U+0307 */
bool eow : 1; /* End of word */
};
struct tcctx {
- bool az_or_tr : 1; /* Azeri or Turkish */
- bool lt : 1; /* Lithuanian */
+ bool az_or_tr : 1;
+ bool lt : 1;
- bool after_i : 1; /* After ‘i’ */
+ bool after_soft_dotted : 1;
};
struct ucctx {
- bool az_or_tr : 1; /* Azeri or Turkish */
- bool lt : 1; /* Lithuanian */
+ bool az_or_tr : 1;
+ bool lt : 1;
+
+ bool after_soft_dotted : 1;
- bool ẞ : 1; /* Uppercase ‘ß’ into ‘ẞ’ (instead of ‘SS’) */
- bool after_i : 1; /* After ‘i’ */
+ /* Uppercase the German lowercase-eszett ‘ß’ into the uppercase-eszett ‘ẞ’
+ instead of the typical ‘SS’. The uppercase-eszett was added to the
+ German orthography in 2017 but has not yet seen widespread adoption as of
+ writing (2024). */
+ bool ẞ : 1;
};
/* clang-format on */