From de5f416d60f7331b6c86b97ffe5c18176791780f Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sat, 4 May 2024 12:31:18 +0200 Subject: Minor cleanup --- test/data/LowercaseTest | 16 ++++++++++------ test/data/TitlecaseTest | 8 ++++---- test/data/UppercaseTest | 3 +++ 3 files changed, 17 insertions(+), 10 deletions(-) (limited to 'test/data') diff --git a/test/data/LowercaseTest b/test/data/LowercaseTest index c80ccee..c7417fa 100644 --- a/test/data/LowercaseTest +++ b/test/data/LowercaseTest @@ -11,10 +11,10 @@ LOREM IPSUM DOLOR SIT AMET, CONSECTETUR ADIPISCING ELIT.;lorem ipsum dolor sit a # Cyrillic alphabet СЛАВА УКРАЇНІ ПРОТИ РОСІЙСЬКОЇ АГРЕСІЇ!;слава україні проти російської агресії!; -# In lithuanian we need to retain the dot above ‘i’ and ‘j’ when there’s an -# accent above the uppercased variant. Also test with both single-codepoint -# variants (i.e. U+00CC LATIN CAPITAL I WITH GRAVE) and variants that use -# combining-characters. +# In lithuanian we need to retain the dot above ‘i’ and ‘j’ when there’s +# an accent above the uppercased variant. Also test with both +# single-codepoint variants (i.e. U+00CC LATIN CAPITAL I WITH GRAVE) and +# variants that use combining-characters. Į̃;į̃; Į̃;į̇̃;LT J́;j́; @@ -30,8 +30,12 @@ RÀSTI, MÈSTI, KÌLO;ràsti, mèsti, ki̇̀lo;LT LJUDEVIT GAJ;ljudevit gaj; Ljudevit Gaj;ljudevit gaj; -# Add U+0307 COMBINING DOT ABOVE after ‘i’ when lowercasing ‘İ’ in non-Azeri and -# -Turkish locales +# Azeri/Turkish ‘ı’ and ‘i’ are different letters +I;i; +I;ı;AZ + +# Add U+0307 COMBINING DOT ABOVE after ‘i’ when lowercasing ‘İ’ in +# non-Azeri and -Turkish locales İSTANBUL’LUYUM;i̇stanbul’luyum; İSTANBUL’LUYUM;istanbul’luyum;AZ diff --git a/test/data/TitlecaseTest b/test/data/TitlecaseTest index 24256a5..58e6519 100644 --- a/test/data/TitlecaseTest +++ b/test/data/TitlecaseTest @@ -23,10 +23,10 @@ complex-language and -script;Complex-Language And -Script; СЛАВА УКРАЇНІ ПРОТИ РОСІЙСЬКОЇ АГРЕСІЇ!;Слава Україні Проти Російської Агресії!; слава україні проти російської агресії!;Слава Україні Проти Російської Агресії!; -# In lithuanian we need to retain the dot above ‘i’ and ‘j’ when there’s an -# accent above the uppercased variant. Also test with both single-codepoint -# variants (i.e. U+00CC LATIN CAPITAL I WITH GRAVE) and variants that use -# combining-characters. +# In lithuanian we need to retain the dot above ‘i’ and ‘j’ when there’s +# an accent above the uppercased variant. Also test with both +# single-codepoint variants (i.e. U+00CC LATIN CAPITAL I WITH GRAVE) and +# variants that use combining-characters. i̇̀;İ̀; i̇̀;Ì;LT RÀSTI, MÈSTI, KÌLO;Ràsti, Mèsti, Kìlo; diff --git a/test/data/UppercaseTest b/test/data/UppercaseTest index e4164d6..facd71c 100644 --- a/test/data/UppercaseTest +++ b/test/data/UppercaseTest @@ -1,6 +1,9 @@ # Empty input ;; +# Short input +ꮢ;Ꮢ; + # Latin alphabet Lorem ipsum dolor sit amet, consectetur adipiscing elit.;LOREM IPSUM DOLOR SIT AMET, CONSECTETUR ADIPISCING ELIT.; -- cgit v1.2.3