From 3b797a5f3ce1d77fa7d0ed991b52553c1b3e8757 Mon Sep 17 00:00:00 2001
From: Thomas Voss <mail@thomasvoss.com>
Date: Wed, 24 Apr 2024 00:09:05 +0200
Subject: Properly upper- and titlecase ‘i’ and ‘j’ in Lithuanian
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/unicode/prop.h | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

(limited to 'include/unicode')

diff --git a/include/unicode/prop.h b/include/unicode/prop.h
index d2d6cec..8cb50c5 100644
--- a/include/unicode/prop.h
+++ b/include/unicode/prop.h
@@ -13,31 +13,53 @@ struct rview {
 	size_t len;
 };
 
-/* clang-format off */
+/* The structures lcctx, tcctx, and ucctx are used to provide context to the
+   casing property functions whos return values are context-dependent.  Each
+   group of flags in a context structure is separated by a newline.
+
+   The first group of flags are named using language codes.  If one of these
+   flags is set, then language-specific tailorings for the given language are
+   enabled.  For example of the ‘az_or_tr’ flag is enabled in ucctx, then the
+   letter ‘i’ is uppercased to ‘İ’ as opposed to ‘I’.
+
+   The second group of flags relate to context specified by the Unicode standard
+   and typically have to do with which characters surround the one being cased.
+   The description for these flags can be found in Table 3-17 of chapter 3 of
+   the Unicode standard[1].
+
+   The third group of flags are extensions provided by MLib, and are documented
+   above or besides the relevant option.
+
+   [1]: https://www.unicode.org/versions/Unicode15.1.0/ch03.pdf#G54277 */
 
 struct lcctx {
-	bool az_or_tr : 1; /* Azeri or Turkish */
-	bool lt       : 1; /* Lithuanian */
+	bool az_or_tr : 1;
+	bool lt       : 1;
 
-	bool after_I    : 1; /* After ‘I’ */
+	bool after_I : 1;    /* After ‘I’ */
 	bool before_acc : 1; /* Before accent on ‘i’ or ‘j’ in Lithuanian */
 	bool before_dot : 1; /* Before U+0307 */
 	bool eow        : 1; /* End of word */
 };
 
 struct tcctx {
-	bool az_or_tr : 1; /* Azeri or Turkish */
-	bool lt       : 1; /* Lithuanian */
+	bool az_or_tr : 1;
+	bool lt       : 1;
 
-	bool after_i : 1; /* After ‘i’ */
+	bool after_soft_dotted : 1;
 };
 
 struct ucctx {
-	bool az_or_tr : 1; /* Azeri or Turkish */
-	bool lt       : 1; /* Lithuanian */
+	bool az_or_tr : 1;
+	bool lt       : 1;
+
+	bool after_soft_dotted : 1;
 
-	bool ẞ       : 1; /* Uppercase ‘ß’ into ‘ẞ’ (instead of ‘SS’) */
-	bool after_i : 1; /* After ‘i’ */
+	/* Uppercase the German lowercase-eszett ‘ß’ into the uppercase-eszett ‘ẞ’
+	   instead of the typical ‘SS’.  The uppercase-eszett was added to the
+	   German orthography in 2017 but has not yet seen widespread adoption as of
+	   writing (2024). */
+	bool ẞ : 1;
 };
 
 /* clang-format on */
-- 
cgit v1.2.3