diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-04-24 00:09:05 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-04-24 00:09:05 +0200 |
commit | 3b797a5f3ce1d77fa7d0ed991b52553c1b3e8757 (patch) | |
tree | e608ecc5b689afaebe1ac3ce112cb2a04e597448 /include/unicode | |
parent | 9cc2a0368fb0a3aa8b878d1795ed76734beadc02 (diff) |
Properly upper- and titlecase ‘i’ and ‘j’ in Lithuanian
Diffstat (limited to 'include/unicode')
-rw-r--r-- | include/unicode/prop.h | 44 |
1 files changed, 33 insertions, 11 deletions
diff --git a/include/unicode/prop.h b/include/unicode/prop.h index d2d6cec..8cb50c5 100644 --- a/include/unicode/prop.h +++ b/include/unicode/prop.h @@ -13,31 +13,53 @@ struct rview { size_t len; }; -/* clang-format off */ +/* The structures lcctx, tcctx, and ucctx are used to provide context to the + casing property functions whos return values are context-dependent. Each + group of flags in a context structure is separated by a newline. + + The first group of flags are named using language codes. If one of these + flags is set, then language-specific tailorings for the given language are + enabled. For example of the ‘az_or_tr’ flag is enabled in ucctx, then the + letter ‘i’ is uppercased to ‘İ’ as opposed to ‘I’. + + The second group of flags relate to context specified by the Unicode standard + and typically have to do with which characters surround the one being cased. + The description for these flags can be found in Table 3-17 of chapter 3 of + the Unicode standard[1]. + + The third group of flags are extensions provided by MLib, and are documented + above or besides the relevant option. + + [1]: https://www.unicode.org/versions/Unicode15.1.0/ch03.pdf#G54277 */ struct lcctx { - bool az_or_tr : 1; /* Azeri or Turkish */ - bool lt : 1; /* Lithuanian */ + bool az_or_tr : 1; + bool lt : 1; - bool after_I : 1; /* After ‘I’ */ + bool after_I : 1; /* After ‘I’ */ bool before_acc : 1; /* Before accent on ‘i’ or ‘j’ in Lithuanian */ bool before_dot : 1; /* Before U+0307 */ bool eow : 1; /* End of word */ }; struct tcctx { - bool az_or_tr : 1; /* Azeri or Turkish */ - bool lt : 1; /* Lithuanian */ + bool az_or_tr : 1; + bool lt : 1; - bool after_i : 1; /* After ‘i’ */ + bool after_soft_dotted : 1; }; struct ucctx { - bool az_or_tr : 1; /* Azeri or Turkish */ - bool lt : 1; /* Lithuanian */ + bool az_or_tr : 1; + bool lt : 1; + + bool after_soft_dotted : 1; - bool ẞ : 1; /* Uppercase ‘ß’ into ‘ẞ’ (instead of ‘SS’) */ - bool after_i : 1; /* After ‘i’ */ + /* Uppercase the German lowercase-eszett ‘ß’ into the uppercase-eszett ‘ẞ’ + instead of the typical ‘SS’. The uppercase-eszett was added to the + German orthography in 2017 but has not yet seen widespread adoption as of + writing (2024). */ + bool ẞ : 1; }; /* clang-format on */ |