aboutsummaryrefslogtreecommitdiff
path: root/test/data
diff options
context:
space:
mode:
Diffstat (limited to 'test/data')
-rw-r--r--test/data/LowercaseTest16
-rw-r--r--test/data/TitlecaseTest8
-rw-r--r--test/data/UppercaseTest3
3 files changed, 17 insertions, 10 deletions
diff --git a/test/data/LowercaseTest b/test/data/LowercaseTest
index c80ccee..c7417fa 100644
--- a/test/data/LowercaseTest
+++ b/test/data/LowercaseTest
@@ -11,10 +11,10 @@ LOREM IPSUM DOLOR SIT AMET, CONSECTETUR ADIPISCING ELIT.;lorem ipsum dolor sit a
# Cyrillic alphabet
СЛАВА УКРАЇНІ ПРОТИ РОСІЙСЬКОЇ АГРЕСІЇ!;слава україні проти російської агресії!;
-# In lithuanian we need to retain the dot above ‘i’ and ‘j’ when there’s an
-# accent above the uppercased variant. Also test with both single-codepoint
-# variants (i.e. U+00CC LATIN CAPITAL I WITH GRAVE) and variants that use
-# combining-characters.
+# In lithuanian we need to retain the dot above ‘i’ and ‘j’ when there’s
+# an accent above the uppercased variant. Also test with both
+# single-codepoint variants (i.e. U+00CC LATIN CAPITAL I WITH GRAVE) and
+# variants that use combining-characters.
Į̃;į̃;
Į̃;į̇̃;LT
J́;j́;
@@ -30,8 +30,12 @@ RÀSTI, MÈSTI, KÌLO;ràsti, mèsti, ki̇̀lo;LT
LJUDEVIT GAJ;ljudevit gaj;
Ljudevit Gaj;ljudevit gaj;
-# Add U+0307 COMBINING DOT ABOVE after ‘i’ when lowercasing ‘İ’ in non-Azeri and
-# -Turkish locales
+# Azeri/Turkish ‘ı’ and ‘i’ are different letters
+I;i;
+I;ı;AZ
+
+# Add U+0307 COMBINING DOT ABOVE after ‘i’ when lowercasing ‘İ’ in
+# non-Azeri and -Turkish locales
İSTANBUL’LUYUM;i̇stanbul’luyum;
İSTANBUL’LUYUM;istanbul’luyum;AZ
diff --git a/test/data/TitlecaseTest b/test/data/TitlecaseTest
index 24256a5..58e6519 100644
--- a/test/data/TitlecaseTest
+++ b/test/data/TitlecaseTest
@@ -23,10 +23,10 @@ complex-language and -script;Complex-Language And -Script;
СЛАВА УКРАЇНІ ПРОТИ РОСІЙСЬКОЇ АГРЕСІЇ!;Слава Україні Проти Російської Агресії!;
слава україні проти російської агресії!;Слава Україні Проти Російської Агресії!;
-# In lithuanian we need to retain the dot above ‘i’ and ‘j’ when there’s an
-# accent above the uppercased variant. Also test with both single-codepoint
-# variants (i.e. U+00CC LATIN CAPITAL I WITH GRAVE) and variants that use
-# combining-characters.
+# In lithuanian we need to retain the dot above ‘i’ and ‘j’ when there’s
+# an accent above the uppercased variant. Also test with both
+# single-codepoint variants (i.e. U+00CC LATIN CAPITAL I WITH GRAVE) and
+# variants that use combining-characters.
i̇̀;İ̀;
i̇̀;Ì;LT
RÀSTI, MÈSTI, KÌLO;Ràsti, Mèsti, Kìlo;
diff --git a/test/data/UppercaseTest b/test/data/UppercaseTest
index e4164d6..facd71c 100644
--- a/test/data/UppercaseTest
+++ b/test/data/UppercaseTest
@@ -1,6 +1,9 @@
# Empty input
;;
+# Short input
+ꮢ;Ꮢ;
+
# Latin alphabet
Lorem ipsum dolor sit amet, consectetur adipiscing elit.;LOREM IPSUM DOLOR SIT AMET, CONSECTETUR ADIPISCING ELIT.;