From c6853cefe4b0de30d3fb04d7be8a0a78a23d51d3 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Thu, 3 Oct 2024 01:24:50 +0200 Subject: Update for Unicode 16 --- data/BidiBrackets | 8 +- data/BidiMirroring | 15 +- data/Blocks | 19 +- data/CaseFolding | 35 +- data/CompositionExclusions | 9 +- data/DerivedAge | 64 +- data/DerivedBidiClass | 133 +- data/DerivedBinaryProperties | 12 +- data/DerivedCoreProperties | 834 +++++-- data/DerivedDecompositionType | 24 +- data/DerivedEastAsianWidth | 122 +- data/DerivedJoiningGroup | 26 +- data/DerivedJoiningType | 33 +- data/DerivedLineBreak | 198 +- data/DerivedNormalizationProps | 248 +- data/DerivedNumericType | 17 +- data/DerivedNumericValues | 110 +- data/EquivalentUnifiedIdeograph | 10 +- data/GraphemeBreakProperty | 96 +- data/HangulSyllableType | 8 +- data/IndicPositionalCategory | 64 +- data/IndicSyllabicCategory | 99 +- data/PropList | 147 +- data/ScriptExtensions | 820 ++---- data/Scripts | 153 +- data/SentenceBreakProperty | 104 +- data/SpecialCasing | 22 +- data/UnicodeData | 5203 ++++++++++++++++++++++++++++++++++++++- data/VerticalOrientation | 118 +- data/WordBreakProperty | 80 +- data/emoji-data | 42 +- 31 files changed, 7657 insertions(+), 1216 deletions(-) (limited to 'data') diff --git a/data/BidiBrackets b/data/BidiBrackets index 8cebea4..db4e41b 100644 --- a/data/BidiBrackets +++ b/data/BidiBrackets @@ -1,8 +1,8 @@ -# BidiBrackets-15.1.0.txt -# Date: 2023-01-18 -# © 2023 Unicode®, Inc. +# BidiBrackets-16.0.0.txt +# Date: 2024-02-02 +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ diff --git a/data/BidiMirroring b/data/BidiMirroring index 7e58cc4..d8f60cb 100644 --- a/data/BidiMirroring +++ b/data/BidiMirroring @@ -1,7 +1,8 @@ -# BidiMirroring-15.1.0.txt -# Date: 2023-01-05 -# © 2023 Unicode®, Inc. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# BidiMirroring-16.0.0.txt +# Date: 2024-01-30 +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -15,7 +16,7 @@ # value, for which there is another Unicode character that typically has a glyph # that is the mirror image of the original character's glyph. # -# The repertoire covered by the file is Unicode 15.1.0. +# The repertoire covered by the file is Unicode 16.0.0. # # The file contains a list of lines with mappings from one code point # to another one for character-based mirroring. @@ -44,7 +45,8 @@ # # This file was originally created by Markus Scherer. # Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler, -# and for subsequent versions by Ken Whistler, Laurentiu Iancu, and Roozbeh Pournader. +# and for subsequent versions by Ken Whistler, Laurentiu Iancu, Roozbeh Pournader, +# and Robin Leroy. # # Historical and Compatibility Information: # @@ -542,6 +544,7 @@ FF63; FF62 # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET # 225F; QUESTIONED EQUAL TO # 2260; NOT EQUAL TO # 2262; NOT IDENTICAL TO +# 226D; NOT EQUIVALENT TO # 228C; MULTISET # 22A7; MODELS # 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE diff --git a/data/Blocks b/data/Blocks index 8fa3eaa..1517dde 100644 --- a/data/Blocks +++ b/data/Blocks @@ -1,7 +1,8 @@ -# Blocks-15.1.0.txt -# Date: 2023-07-28, 15:47:20 GMT -# © 2023 Unicode®, Inc. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# Blocks-16.0.0.txt +# Date: 2024-02-02 +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -217,6 +218,7 @@ FFF0..FFFF; Specials 10500..1052F; Elbasan 10530..1056F; Caucasian Albanian 10570..105BF; Vithkuqi +105C0..105FF; Todhri 10600..1077F; Linear A 10780..107BF; Latin Extended-F 10800..1083F; Cypriot Syllabary @@ -239,6 +241,7 @@ FFF0..FFFF; Specials 10C00..10C4F; Old Turkic 10C80..10CFF; Old Hungarian 10D00..10D3F; Hanifi Rohingya +10D40..10D8F; Garay 10E60..10E7F; Rumi Numeral Symbols 10E80..10EBF; Yezidi 10EC0..10EFF; Arabic Extended-C @@ -258,12 +261,14 @@ FFF0..FFFF; Specials 11280..112AF; Multani 112B0..112FF; Khudawadi 11300..1137F; Grantha +11380..113FF; Tulu-Tigalari 11400..1147F; Newa 11480..114DF; Tirhuta 11580..115FF; Siddham 11600..1165F; Modi 11660..1167F; Mongolian Supplement 11680..116CF; Takri +116D0..116FF; Myanmar Extended-C 11700..1174F; Ahom 11800..1184F; Dogra 118A0..118FF; Warang Citi @@ -274,6 +279,7 @@ FFF0..FFFF; Specials 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 11AC0..11AFF; Pau Cin Hau 11B00..11B5F; Devanagari Extended-A +11BC0..11BFF; Sunuwar 11C00..11C6F; Bhaiksuki 11C70..11CBF; Marchen 11D00..11D5F; Masaram Gondi @@ -288,12 +294,15 @@ FFF0..FFFF; Specials 12F90..12FFF; Cypro-Minoan 13000..1342F; Egyptian Hieroglyphs 13430..1345F; Egyptian Hieroglyph Format Controls +13460..143FF; Egyptian Hieroglyphs Extended-A 14400..1467F; Anatolian Hieroglyphs +16100..1613F; Gurung Khema 16800..16A3F; Bamum Supplement 16A40..16A6F; Mro 16A70..16ACF; Tangsa 16AD0..16AFF; Bassa Vah 16B00..16B8F; Pahawh Hmong +16D40..16D7F; Kirat Rai 16E40..16E9F; Medefaidrin 16F00..16F9F; Miao 16FE0..16FFF; Ideographic Symbols and Punctuation @@ -308,6 +317,7 @@ FFF0..FFFF; Specials 1B170..1B2FF; Nushu 1BC00..1BC9F; Duployan 1BCA0..1BCAF; Shorthand Format Controls +1CC00..1CEBF; Symbols for Legacy Computing Supplement 1CF00..1CFCF; Znamenny Musical Notation 1D000..1D0FF; Byzantine Musical Symbols 1D100..1D1FF; Musical Symbols @@ -325,6 +335,7 @@ FFF0..FFFF; Specials 1E290..1E2BF; Toto 1E2C0..1E2FF; Wancho 1E4D0..1E4FF; Nag Mundari +1E5D0..1E5FF; Ol Onal 1E7E0..1E7FF; Ethiopic Extended-B 1E800..1E8DF; Mende Kikakui 1E900..1E95F; Adlam diff --git a/data/CaseFolding b/data/CaseFolding index 69c5c64..1b7a9c1 100644 --- a/data/CaseFolding +++ b/data/CaseFolding @@ -1,8 +1,8 @@ -# CaseFolding-15.1.0.txt -# Date: 2023-05-12, 21:53:10 GMT -# © 2023 Unicode®, Inc. +# CaseFolding-16.0.0.txt +# Date: 2024-04-30, 21:48:11 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -603,6 +603,7 @@ 1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN 1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT 1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK +1C89; C; 1C8A; # CYRILLIC CAPITAL LETTER TJE 1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN 1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN 1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN @@ -1240,9 +1241,13 @@ A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN +A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S +A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA +A7DC; C; 019B; # LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H AB70; C; 13A0; # CHEROKEE SMALL LETTER A AB71; C; 13A1; # CHEROKEE SMALL LETTER E @@ -1525,6 +1530,28 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS 10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN 10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US +10D50; C; 10D70; # GARAY CAPITAL LETTER A +10D51; C; 10D71; # GARAY CAPITAL LETTER CA +10D52; C; 10D72; # GARAY CAPITAL LETTER MA +10D53; C; 10D73; # GARAY CAPITAL LETTER KA +10D54; C; 10D74; # GARAY CAPITAL LETTER BA +10D55; C; 10D75; # GARAY CAPITAL LETTER JA +10D56; C; 10D76; # GARAY CAPITAL LETTER SA +10D57; C; 10D77; # GARAY CAPITAL LETTER WA +10D58; C; 10D78; # GARAY CAPITAL LETTER LA +10D59; C; 10D79; # GARAY CAPITAL LETTER GA +10D5A; C; 10D7A; # GARAY CAPITAL LETTER DA +10D5B; C; 10D7B; # GARAY CAPITAL LETTER XA +10D5C; C; 10D7C; # GARAY CAPITAL LETTER YA +10D5D; C; 10D7D; # GARAY CAPITAL LETTER TA +10D5E; C; 10D7E; # GARAY CAPITAL LETTER RA +10D5F; C; 10D7F; # GARAY CAPITAL LETTER NYA +10D60; C; 10D80; # GARAY CAPITAL LETTER FA +10D61; C; 10D81; # GARAY CAPITAL LETTER NA +10D62; C; 10D82; # GARAY CAPITAL LETTER PA +10D63; C; 10D83; # GARAY CAPITAL LETTER HA +10D64; C; 10D84; # GARAY CAPITAL LETTER OLD KA +10D65; C; 10D85; # GARAY CAPITAL LETTER OLD NA 118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA 118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A 118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI diff --git a/data/CompositionExclusions b/data/CompositionExclusions index db708a7..2f5a804 100644 --- a/data/CompositionExclusions +++ b/data/CompositionExclusions @@ -1,7 +1,8 @@ -# CompositionExclusions-15.1.0.txt -# Date: 2023-01-05 -# © 2023 Unicode®, Inc. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# CompositionExclusions-16.0.0.txt +# Date: 2024-02-02 +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ diff --git a/data/DerivedAge b/data/DerivedAge index 4cfdd54..b4dcd2e 100644 --- a/data/DerivedAge +++ b/data/DerivedAge @@ -1,8 +1,8 @@ -# DerivedAge-15.1.0.txt -# Date: 2023-07-28, 23:33:51 GMT -# © 2023 Unicode®, Inc. +# DerivedAge-16.0.0.txt +# Date: 2024-04-30, 21:48:12 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -2003,4 +2003,60 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT # Total code points: 627 +# ================================================ + +# Age=V16_0 + +# Newly assigned in Unicode 16.0.0 (September, 2024) + +0897 ; 16.0 # ARABIC PEPET +1B4E..1B4F ; 16.0 # [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN +1B7F ; 16.0 # BALINESE PANTI BAWAK +1C89..1C8A ; 16.0 # [2] CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE +2427..2429 ; 16.0 # [3] SYMBOL FOR DELETE SQUARE CHECKER BOARD FORM..SYMBOL FOR DELETE MEDIUM SHADE FORM +31E4..31E5 ; 16.0 # [2] CJK STROKE HXG..CJK STROKE SZP +A7CB..A7CD ; 16.0 # [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER LAMBDA WITH STROKE +105C0..105F3 ; 16.0 # [52] TODHRI LETTER A..TODHRI LETTER OO +10D40..10D65 ; 16.0 # [38] GARAY DIGIT ZERO..GARAY CAPITAL LETTER OLD NA +10D69..10D85 ; 16.0 # [29] GARAY VOWEL SIGN E..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; 16.0 # [2] GARAY PLUS SIGN..GARAY MINUS SIGN +10EC2..10EC4 ; 16.0 # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC ; 16.0 # ARABIC COMBINING ALEF OVERLAY +11380..11389 ; 16.0 # [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; 16.0 # TULU-TIGALARI LETTER EE +1138E ; 16.0 # TULU-TIGALARI LETTER AI +11390..113B5 ; 16.0 # [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7..113C0 ; 16.0 # [10] TULU-TIGALARI SIGN AVAGRAHA..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; 16.0 # TULU-TIGALARI VOWEL SIGN EE +113C5 ; 16.0 # TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; 16.0 # [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113D5 ; 16.0 # [10] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; 16.0 # [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA +113E1..113E2 ; 16.0 # [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA +116D0..116E3 ; 16.0 # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE +11BC0..11BE1 ; 16.0 # [34] SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO +11BF0..11BF9 ; 16.0 # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE +11F5A ; 16.0 # KAWI SIGN NUKTA +13460..143FA ; 16.0 # [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA +16100..16139 ; 16.0 # [58] GURUNG KHEMA LETTER A..GURUNG KHEMA DIGIT NINE +16D40..16D79 ; 16.0 # [58] KIRAT RAI SIGN ANUSVARA..KIRAT RAI DIGIT NINE +18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF +1CC00..1CCF9 ; 16.0 # [250] UP-POINTING GO-KART..OUTLINED DIGIT NINE +1CD00..1CEB3 ; 16.0 # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE +1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN +1F8B2..1F8BB ; 16.0 # [10] RIGHTWARDS ARROW WITH LOWER HOOK..SOUTH WEST ARROW FROM BAR +1F8C0..1F8C1 ; 16.0 # [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1FA89 ; 16.0 # HARP +1FA8F ; 16.0 # SHOVEL +1FABE ; 16.0 # LEAFLESS TREE +1FAC6 ; 16.0 # FINGERPRINT +1FADC ; 16.0 # ROOT VEGETABLE +1FADF ; 16.0 # SPLATTER +1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES +1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE + +# Total code points: 5185 + # EOF diff --git a/data/DerivedBidiClass b/data/DerivedBidiClass index e81a644..2aceac0 100644 --- a/data/DerivedBidiClass +++ b/data/DerivedBidiClass @@ -1,8 +1,8 @@ -# DerivedBidiClass-15.1.0.txt -# Date: 2023-07-28, 23:33:54 GMT -# © 2023 Unicode®, Inc. +# DerivedBidiClass-16.0.0.txt +# Date: 2024-04-30, 21:48:13 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -84,6 +84,7 @@ # 10D00..10D3F Hanifi_Rohingya # @missing: 10D00..10D3F; Arabic_Letter +# 10D40..10D8F Garay # 10E60..10E7F Rumi_Numeral_Symbols # 10E80..10EBF Yezidi # @missing: 10D40..10EBF; Right_To_Left @@ -489,11 +490,12 @@ 1B3D..1B41 ; L # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG 1B43..1B44 ; L # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG 1B45..1B4C ; L # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B4E..1B4F ; L # Po [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN 1B50..1B59 ; L # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE 1B5A..1B60 ; L # Po [7] BALINESE PANTI..BALINESE PAMENENG 1B61..1B6A ; L # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE 1B74..1B7C ; L # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING -1B7D..1B7E ; L # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B7D..1B7F ; L # Po [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK 1B82 ; L # Mc SUNDANESE SIGN PANGWISAD 1B83..1BA0 ; L # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BA1 ; L # Mc SUNDANESE CONSONANT SIGN PAMINGKAL @@ -517,7 +519,7 @@ 1C5A..1C77 ; L # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; L # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; L # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD -1C80..1C88 ; L # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; L # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; L # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; L # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CC0..1CC7 ; L # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA @@ -660,10 +662,10 @@ A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; L # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CA ; L # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A790..A7CD ; L # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; L # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; L # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; L # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; L # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; L # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; L # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; L # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -821,6 +823,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 105A3..105B1 ; L # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; L # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; L # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; L # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; L # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; L # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; L # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -900,6 +903,21 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 11357 ; L # Mc GRANTHA AU LENGTH MARK 1135D..11361 ; L # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL 11362..11363 ; L # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11380..11389 ; L # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; L # Lo TULU-TIGALARI LETTER EE +1138E ; L # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; L # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; L # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; L # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113C2 ; L # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; L # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; L # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; L # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CF ; L # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D1 ; L # Lo TULU-TIGALARI REPHA +113D3 ; L # Lo TULU-TIGALARI SIGN PLUTA +113D4..113D5 ; L # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; L # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 11400..11434 ; L # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; L # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11440..11441 ; L # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -939,7 +957,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 116B8 ; L # Lo TAKRI LETTER ARCHAIC KHA 116B9 ; L # Po TAKRI ABBREVIATION SIGN 116C0..116C9 ; L # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; L # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; L # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171E ; L # Mc AHOM CONSONANT SIGN MEDIAL RA 11720..11721 ; L # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11726 ; L # Mc AHOM VOWEL SIGN E 11730..11739 ; L # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE @@ -991,6 +1011,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 11A9E..11AA2 ; L # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; L # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; L # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BC0..11BE0 ; L # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; L # Po SUNUWAR SIGN PVO +11BF0..11BF9 ; L # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; L # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; L # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; L # Mc BHAIKSUKI VOWEL SIGN AA @@ -1042,7 +1065,11 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 13000..1342F ; L # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13430..1343F ; L # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 13441..13446 ; L # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; L # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 14400..14646 ; L # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; L # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1612A..1612C ; L # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +16130..16139 ; L # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; L # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; L # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; L # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -1061,6 +1088,11 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16B5B..16B61 ; L # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS 16B63..16B77 ; L # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; L # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; L # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; L # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; L # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D6D..16D6F ; L # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA +16D70..16D79 ; L # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE 16E40..16E7F ; L # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; L # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E97..16E9A ; L # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH @@ -1073,7 +1105,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; L # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; L # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1089,6 +1121,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1BC90..1BC99 ; L # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1BC9C ; L # So DUPLOYAN SIGN O WITH CROSS 1BC9F ; L # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1CCD6..1CCEF ; L # So [26] OUTLINED LATIN CAPITAL LETTER A..OUTLINED LATIN CAPITAL LETTER Z 1CF50..1CFC3 ; L # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; L # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; L # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -1122,19 +1155,14 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1D54A..1D550 ; L # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y 1D552..1D6A5 ; L # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J 1D6A8..1D6C0 ; L # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA -1D6C1 ; L # Sm MATHEMATICAL BOLD NABLA 1D6C2..1D6DA ; L # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA 1D6DC..1D6FA ; L # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA -1D6FB ; L # Sm MATHEMATICAL ITALIC NABLA 1D6FC..1D714 ; L # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA 1D716..1D734 ; L # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA -1D735 ; L # Sm MATHEMATICAL BOLD ITALIC NABLA 1D736..1D74E ; L # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA 1D750..1D76E ; L # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA -1D76F ; L # Sm MATHEMATICAL SANS-SERIF BOLD NABLA 1D770..1D788 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA 1D78A..1D7A8 ; L # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA -1D7A9 ; L # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA 1D7AA..1D7C2 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; L # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D800..1D9FF ; L # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD @@ -1159,6 +1187,10 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1E4D0..1E4EA ; L # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL 1E4EB ; L # Lm NAG MUNDARI SIGN OJOD 1E4F0..1E4F9 ; L # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5D0..1E5ED ; L # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; L # Lo OL ONAL SIGN HODDOND +1E5F1..1E5FA ; L # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE +1E5FF ; L # Po OL ONAL ABBREVIATION SIGN 1E7E0..1E7E6 ; L # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; L # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; L # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -1182,8 +1214,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 820462 code points not listed here. -# Total code points: 1096267 +# The above property value applies to 815351 code points not listed here. +# Total code points: 1095513 # ================================================ @@ -1270,6 +1302,13 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 10C80..10CB2 ; R # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; R # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10CFA..10CFF ; R # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10D4A..10D4D ; R # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; R # Lm GARAY VOWEL LENGTH MARK +10D4F ; R # Lo GARAY SUKUN +10D50..10D65 ; R # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; R # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; R # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; R # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10E80..10EA9 ; R # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAD ; R # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE @@ -1288,8 +1327,8 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 1E950..1E959 ; R # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1E95E..1E95F ; R # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# The above property value applies to 2156 code points not listed here. -# Total code points: 3647 +# The above property value applies to 2087 code points not listed here. +# Total code points: 3631 # ================================================ @@ -1305,11 +1344,12 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 2488..249B ; EN # No [20] DIGIT ONE FULL STOP..NUMBER TWENTY FULL STOP FF10..FF19 ; EN # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 102E1..102FB ; EN # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED +1CCF0..1CCF9 ; EN # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; EN # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1F100..1F10A ; EN # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1FBF0..1FBF9 ; EN # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 168 +# Total code points: 178 # ================================================ @@ -1378,9 +1418,10 @@ FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN 0890..0891 ; AN # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 08E2 ; AN # Cf ARABIC DISPUTED END OF AYAH 10D30..10D39 ; AN # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; AN # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE 10E60..10E7E ; AN # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS -# Total code points: 63 +# Total code points: 73 # ================================================ @@ -1608,7 +1649,7 @@ FF1A ; CS # Po FULLWIDTH COLON 239B..23B3 ; ON # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM 23B4..23DB ; ON # So [40] TOP SQUARE BRACKET..FUSE 23DC..23E1 ; ON # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET -23E2..2426 ; ON # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO +23E2..2429 ; ON # So [72] WHITE TRAPEZIUM..SYMBOL FOR DELETE MEDIUM SHADE FORM 2440..244A ; ON # So [11] OCR HOOK..OCR DOUBLE BACKSLASH 2460..2487 ; ON # No [40] CIRCLED DIGIT ONE..PARENTHESIZED NUMBER TWENTY 24EA..24FF ; ON # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO @@ -1781,7 +1822,7 @@ FF1A ; CS # Po FULLWIDTH COLON 309B..309C ; ON # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 30A0 ; ON # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN 30FB ; ON # Po KATAKANA MIDDLE DOT -31C0..31E3 ; ON # So [36] CJK STROKE T..CJK STROKE Q +31C0..31E5 ; ON # So [38] CJK STROKE T..CJK STROKE SZP 31EF ; ON # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 321D..321E ; ON # So [2] PARENTHESIZED KOREAN CHARACTER OJEON..PARENTHESIZED KOREAN CHARACTER O HU 3250 ; ON # So PARTNERSHIP SIGN @@ -1894,19 +1935,27 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 101A0 ; ON # So GREEK SYMBOL TAU RHO 1091F ; ON # Po PHOENICIAN WORD SEPARATOR 10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10D6E ; ON # Pd GARAY HYPHEN 11052..11065 ; ON # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND 11660..1166C ; ON # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT 11FD5..11FDC ; ON # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI 11FE1..11FF1 ; ON # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA 16FE2 ; ON # Po OLD CHINESE HOOK MARK +1CC00..1CCD5 ; ON # So [214] UP-POINTING GO-KART..LOWER RIGHT QUADRANT STANDING KNIGHT +1CD00..1CEB3 ; ON # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1D1E9..1D1EA ; ON # So [2] MUSICAL SYMBOL SORI..MUSICAL SYMBOL KORON 1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; ON # So GREEK MUSICAL LEIMMA 1D300..1D356 ; ON # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D6C1 ; ON # Sm MATHEMATICAL BOLD NABLA 1D6DB ; ON # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6FB ; ON # Sm MATHEMATICAL ITALIC NABLA 1D715 ; ON # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D735 ; ON # Sm MATHEMATICAL BOLD ITALIC NABLA 1D74F ; ON # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D76F ; ON # Sm MATHEMATICAL SANS-SERIF BOLD NABLA 1D789 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D7A9 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA 1D7C3 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1EEF0..1EEF1 ; ON # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B ; ON # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK @@ -1935,20 +1984,20 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1F850..1F859 ; ON # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; ON # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; ON # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B0..1F8B1 ; ON # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F8B0..1F8BB ; ON # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR +1F8C0..1F8C1 ; ON # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1FA53 ; ON # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; ON # So [13] BALLET SHOES..CRUTCH -1FA80..1FA88 ; ON # So [9] YO-YO..FLUTE -1FA90..1FABD ; ON # So [46] RINGED PLANET..WING -1FABF..1FAC5 ; ON # So [7] GOOSE..PERSON WITH CROWN -1FACE..1FADB ; ON # So [14] MOOSE..PEA POD -1FAE0..1FAE8 ; ON # So [9] MELTING FACE..SHAKING FACE +1FA80..1FA89 ; ON # So [10] YO-YO..HARP +1FA8F..1FAC6 ; ON # So [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; ON # So [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; ON # So [11] SPLATTER..FACE WITH BAGS UNDER EYES 1FAF0..1FAF8 ; ON # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK -1FB94..1FBCA ; ON # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON +1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 6034 +# Total code points: 6751 # ================================================ @@ -2023,7 +2072,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 0825..0827 ; NSM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; NSM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; NSM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; NSM # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; NSM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08CA..08E1 ; NSM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; NSM # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 093A ; NSM # Mn DEVANAGARI VOWEL SIGN OE @@ -2223,8 +2272,9 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 10A3F ; NSM # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; NSM # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; NSM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; NSM # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; NSM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; NSM # Mn BRAHMI SIGN ANUSVARA @@ -2255,6 +2305,11 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 11340 ; NSM # Mn GRANTHA VOWEL SIGN II 11366..1136C ; NSM # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; NSM # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; NSM # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; NSM # Mn TULU-TIGALARI SIGN VIRAMA +113D0 ; NSM # Mn TULU-TIGALARI CONJOINER +113D2 ; NSM # Mn TULU-TIGALARI GEMINATION MARK +113E1..113E2 ; NSM # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143F ; NSM # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11442..11444 ; NSM # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA 11446 ; NSM # Mn NEWA SIGN NUKTA @@ -2274,7 +2329,8 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 116AD ; NSM # Mn TAKRI VOWEL SIGN AA 116B0..116B5 ; NSM # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B7 ; NSM # Mn TAKRI SIGN NUKTA -1171D..1171F ; NSM # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; NSM # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; NSM # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; NSM # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; NSM # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER 1182F..11837 ; NSM # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA @@ -2313,8 +2369,11 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 11F36..11F3A ; NSM # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; NSM # Mn KAWI VOWEL SIGN EU 11F42 ; NSM # Mn KAWI CONJOINER +11F5A ; NSM # Mn KAWI SIGN NUKTA 13440 ; NSM # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; NSM # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; NSM # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; NSM # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; NSM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; NSM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; NSM # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -2344,11 +2403,12 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E2AE ; NSM # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; NSM # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; NSM # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; NSM # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR 1E8D0..1E8D6 ; NSM # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1993 +# Total code points: 2028 # ================================================ @@ -2395,6 +2455,7 @@ FDFC ; AL # Sc RIAL SIGN FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT @@ -2440,8 +2501,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 298 code points not listed here. -# Total code points: 1769 +# The above property value applies to 293 code points not listed here. +# Total code points: 1767 # ================================================ diff --git a/data/DerivedBinaryProperties b/data/DerivedBinaryProperties index 2b57127..9d54a59 100644 --- a/data/DerivedBinaryProperties +++ b/data/DerivedBinaryProperties @@ -1,8 +1,8 @@ -# DerivedBinaryProperties-15.1.0.txt -# Date: 2023-01-05, 20:34:33 GMT -# © 2023 Unicode®, Inc. +# DerivedBinaryProperties-16.0.0.txt +# Date: 2024-04-30, 21:48:15 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -51,7 +51,7 @@ 225F..2260 ; Bidi_Mirrored # Sm [2] QUESTIONED EQUAL TO..NOT EQUAL TO 2262 ; Bidi_Mirrored # Sm NOT IDENTICAL TO 2264..226B ; Bidi_Mirrored # Sm [8] LESS-THAN OR EQUAL TO..MUCH GREATER-THAN -226E..228C ; Bidi_Mirrored # Sm [31] NOT LESS-THAN..MULTISET +226D..228C ; Bidi_Mirrored # Sm [32] NOT EQUIVALENT TO..MULTISET 228F..2292 ; Bidi_Mirrored # Sm [4] SQUARE IMAGE OF..SQUARE ORIGINAL OF OR EQUAL TO 2298 ; Bidi_Mirrored # Sm CIRCLED DIVISION SLASH 22A2..22A3 ; Bidi_Mirrored # Sm [2] RIGHT TACK..LEFT TACK @@ -236,6 +236,6 @@ FF63 ; Bidi_Mirrored # Pe HALFWIDTH RIGHT CORNER BRACKET 1D789 ; Bidi_Mirrored # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL 1D7C3 ; Bidi_Mirrored # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL -# Total code points: 553 +# Total code points: 554 # EOF diff --git a/data/DerivedCoreProperties b/data/DerivedCoreProperties index 220c556..1075638 100644 --- a/data/DerivedCoreProperties +++ b/data/DerivedCoreProperties @@ -1,8 +1,8 @@ -# DerivedCoreProperties-15.1.0.txt -# Date: 2023-08-07, 15:21:24 GMT -# © 2023 Unicode®, Inc. +# DerivedCoreProperties-16.0.0.txt +# Date: 2024-05-31, 18:09:32 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -177,6 +177,7 @@ FF5C ; Math # Sm FULLWIDTH VERTICAL LINE FF5E ; Math # Sm FULLWIDTH TILDE FFE2 ; Math # Sm FULLWIDTH NOT SIGN FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +10D8E..10D8F ; Math # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN 1D400..1D454 ; Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F ; Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D @@ -253,7 +254,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1EEAB..1EEBB ; Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Math # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 2310 +# Total code points: 2312 # ================================================ @@ -280,6 +281,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 02EC ; Alphabetic # Lm MODIFIER LETTER VOICING 02EE ; Alphabetic # Lm MODIFIER LETTER DOUBLE APOSTROPHE 0345 ; Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +0363..036F ; Alphabetic # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X 0370..0373 ; Alphabetic # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI 0374 ; Alphabetic # Lm GREEK NUMERAL SIGN 0376..0377 ; Alphabetic # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA @@ -343,6 +345,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0860..086A ; Alphabetic # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; Alphabetic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0889..088E ; Alphabetic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0897 ; Alphabetic # Mn ARABIC PEPET 08A0..08C8 ; Alphabetic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; Alphabetic # Lm ARABIC SMALL FARSI YEH 08D4..08DF ; Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA @@ -710,7 +713,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1C4D..1C4F ; Alphabetic # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; Alphabetic # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; Alphabetic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; Alphabetic # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; Alphabetic # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; Alphabetic # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Alphabetic # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CE9..1CEC ; Alphabetic # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL @@ -723,7 +726,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1D78 ; Alphabetic # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Alphabetic # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Alphabetic # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA -1DE7..1DF4 ; Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +1DD3..1DF4 ; Alphabetic # Mn [34] COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE..COMBINING LATIN SMALL LETTER U WITH DIAERESIS 1E00..1F15 ; Alphabetic # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 1F18..1F1D ; Alphabetic # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 1F20..1F45 ; Alphabetic # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA @@ -830,10 +833,10 @@ A771..A787 ; Alphabetic # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER A788 ; Alphabetic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; Alphabetic # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; Alphabetic # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CA ; Alphabetic # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A790..A7CD ; Alphabetic # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; Alphabetic # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Alphabetic # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; Alphabetic # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; Alphabetic # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Alphabetic # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Alphabetic # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; Alphabetic # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -998,6 +1001,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 105A3..105B1 ; Alphabetic # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; Alphabetic # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; Alphabetic # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; Alphabetic # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; Alphabetic # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; Alphabetic # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; Alphabetic # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -1038,9 +1042,18 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10CC0..10CF2 ; Alphabetic # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D00..10D23 ; Alphabetic # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D4A..10D4D ; Alphabetic # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; Alphabetic # Lm GARAY VOWEL LENGTH MARK +10D4F ; Alphabetic # Lo GARAY SUKUN +10D50..10D65 ; Alphabetic # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69 ; Alphabetic # Mn GARAY VOWEL SIGN E +10D6F ; Alphabetic # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; Alphabetic # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; Alphabetic # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC ; Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Alphabetic # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1121,6 +1134,19 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 11357 ; Alphabetic # Mc GRANTHA AU LENGTH MARK 1135D..11361 ; Alphabetic # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL 11362..11363 ; Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11380..11389 ; Alphabetic # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Alphabetic # Lo TULU-TIGALARI LETTER EE +1138E ; Alphabetic # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; Alphabetic # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; Alphabetic # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; Alphabetic # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Alphabetic # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Alphabetic # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Alphabetic # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Alphabetic # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Alphabetic # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113D1 ; Alphabetic # Lo TULU-TIGALARI REPHA +113D3 ; Alphabetic # Lo TULU-TIGALARI SIGN PLUTA 11400..11434 ; Alphabetic # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI @@ -1163,7 +1189,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 116B0..116B5 ; Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B8 ; Alphabetic # Lo TAKRI LETTER ARCHAIC KHA 11700..1171A ; Alphabetic # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Alphabetic # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Alphabetic # Mc AHOM VOWEL SIGN E @@ -1211,6 +1239,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 11A97 ; Alphabetic # Mc SOYOMBO SIGN VISARGA 11A9D ; Alphabetic # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; Alphabetic # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; Alphabetic # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; Alphabetic # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; Alphabetic # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA @@ -1264,7 +1293,12 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 12F90..12FF0 ; Alphabetic # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 13000..1342F ; Alphabetic # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13441..13446 ; Alphabetic # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; Alphabetic # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 14400..14646 ; Alphabetic # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; Alphabetic # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; Alphabetic # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; Alphabetic # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612E ; Alphabetic # Mn [2] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA CONSONANT SIGN MEDIAL RA 16800..16A38 ; Alphabetic # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; Alphabetic # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A70..16ABE ; Alphabetic # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA @@ -1273,6 +1307,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16B40..16B43 ; Alphabetic # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16B63..16B77 ; Alphabetic # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; Alphabetic # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; Alphabetic # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; Alphabetic # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; Alphabetic # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16E40..16E7F ; Alphabetic # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16F00..16F4A ; Alphabetic # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -1285,7 +1322,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..187F7 ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Alphabetic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; Alphabetic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Alphabetic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1348,6 +1385,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1E2C0..1E2EB ; Alphabetic # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH 1E4D0..1E4EA ; Alphabetic # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL 1E4EB ; Alphabetic # Lm NAG MUNDARI SIGN OJOD +1E5D0..1E5ED ; Alphabetic # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; Alphabetic # Lo OL ONAL SIGN HODDOND 1E7E0..1E7E6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; Alphabetic # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -1402,7 +1441,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 138387 +# Total code points: 142759 # ================================================ @@ -1691,6 +1730,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10FD..10FF ; Lowercase # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13F8..13FD ; Lowercase # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1C80..1C88 ; Lowercase # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Lowercase # L& CYRILLIC SMALL LETTER TJE 1D00..1D2B ; Lowercase # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL 1D2C..1D6A ; Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D6B..1D77 ; Lowercase # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G @@ -2032,11 +2072,13 @@ A7C1 ; Lowercase # L& LATIN SMALL LETTER OLD POLISH O A7C3 ; Lowercase # L& LATIN SMALL LETTER ANGLICANA W A7C8 ; Lowercase # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Lowercase # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7CD ; Lowercase # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D1 ; Lowercase # L& LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Lowercase # L& LATIN SMALL LETTER DOUBLE THORN A7D5 ; Lowercase # L& LATIN SMALL LETTER DOUBLE WYNN A7D7 ; Lowercase # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Lowercase # L& LATIN SMALL LETTER SIGMOID S +A7DB ; Lowercase # L& LATIN SMALL LETTER LAMBDA A7F2..A7F4 ; Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F6 ; Lowercase # L& LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -2060,6 +2102,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 10787..107B0 ; Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 10CC0..10CF2 ; Lowercase # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Lowercase # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Lowercase # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Lowercase # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1D41A..1D433 ; Lowercase # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z @@ -2096,7 +2139,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2544 +# Total code points: 2569 # ================================================ @@ -2379,6 +2422,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 10C7 ; Uppercase # L& GEORGIAN CAPITAL LETTER YN 10CD ; Uppercase # L& GEORGIAN CAPITAL LETTER AEN 13A0..13F5 ; Uppercase # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C89 ; Uppercase # L& CYRILLIC CAPITAL LETTER TJE 1C90..1CBA ; Uppercase # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Uppercase # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1E00 ; Uppercase # L& LATIN CAPITAL LETTER A WITH RING BELOW @@ -2705,9 +2749,12 @@ A7C0 ; Uppercase # L& LATIN CAPITAL LETTER OLD POLISH O A7C2 ; Uppercase # L& LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Uppercase # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Uppercase # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB..A7CC ; Uppercase # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7D0 ; Uppercase # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Uppercase # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; Uppercase # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; Uppercase # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Uppercase # L& LATIN CAPITAL LETTER REVERSED HALF H FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Uppercase # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW @@ -2717,6 +2764,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1058C..10592 ; Uppercase # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE 10594..10595 ; Uppercase # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE 10C80..10CB2 ; Uppercase # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Uppercase # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Uppercase # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Uppercase # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1D400..1D419 ; Uppercase # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -2755,7 +2803,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1951 +# Total code points: 1978 # ================================================ @@ -2800,7 +2848,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 10FD..10FF ; Cased # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13A0..13F5 ; Cased # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV 13F8..13FD ; Cased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV -1C80..1C88 ; Cased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; Cased # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; Cased # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Cased # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1D00..1D2B ; Cased # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL @@ -2863,10 +2911,10 @@ A722..A76F ; Cased # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN A770 ; Cased # Lm MODIFIER LETTER US A771..A787 ; Cased # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A7CA ; Cased # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A790..A7CD ; Cased # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; Cased # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; Cased # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; Cased # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; Cased # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; Cased # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; Cased # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE @@ -2897,6 +2945,8 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 107B2..107BA ; Cased # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 10C80..10CB2 ; Cased # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; Cased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D50..10D65 ; Cased # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D70..10D85 ; Cased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118A0..118DF ; Cased # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E40..16E7F ; Cased # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1D400..1D454 ; Cased # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G @@ -2938,7 +2988,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4526 +# Total code points: 4578 # ================================================ @@ -3015,7 +3065,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 0859..085B ; Case_Ignorable # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 0888 ; Case_Ignorable # Sk ARABIC RAISED ROUND DOT 0890..0891 ; Case_Ignorable # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE -0898..089F ; Case_Ignorable # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; Case_Ignorable # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08C9 ; Case_Ignorable # Lm ARABIC SMALL FARSI YEH 08CA..08E1 ; Case_Ignorable # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E2 ; Case_Ignorable # Cf ARABIC DISPUTED END OF AYAH @@ -3296,8 +3346,11 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10A3F ; Case_Ignorable # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; Case_Ignorable # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Case_Ignorable # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D4E ; Case_Ignorable # Lm GARAY VOWEL LENGTH MARK +10D69..10D6D ; Case_Ignorable # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6F ; Case_Ignorable # Lm GARAY REDUPLICATION MARK 10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; Case_Ignorable # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; Case_Ignorable # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Case_Ignorable # Mn BRAHMI SIGN ANUSVARA @@ -3330,6 +3383,11 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 11340 ; Case_Ignorable # Mn GRANTHA VOWEL SIGN II 11366..1136C ; Case_Ignorable # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Case_Ignorable # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; Case_Ignorable # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; Case_Ignorable # Mn TULU-TIGALARI SIGN VIRAMA +113D0 ; Case_Ignorable # Mn TULU-TIGALARI CONJOINER +113D2 ; Case_Ignorable # Mn TULU-TIGALARI GEMINATION MARK +113E1..113E2 ; Case_Ignorable # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143F ; Case_Ignorable # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11442..11444 ; Case_Ignorable # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA 11446 ; Case_Ignorable # Mn NEWA SIGN NUKTA @@ -3349,7 +3407,8 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 116AD ; Case_Ignorable # Mn TAKRI VOWEL SIGN AA 116B0..116B5 ; Case_Ignorable # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 116B7 ; Case_Ignorable # Mn TAKRI SIGN NUKTA -1171D..1171F ; Case_Ignorable # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Case_Ignorable # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; Case_Ignorable # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; Case_Ignorable # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; Case_Ignorable # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER 1182F..11837 ; Case_Ignorable # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA @@ -3388,12 +3447,17 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 11F36..11F3A ; Case_Ignorable # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; Case_Ignorable # Mn KAWI VOWEL SIGN EU 11F42 ; Case_Ignorable # Mn KAWI CONJOINER +11F5A ; Case_Ignorable # Mn KAWI SIGN NUKTA 13430..1343F ; Case_Ignorable # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 13440 ; Case_Ignorable # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Case_Ignorable # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Case_Ignorable # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; Case_Ignorable # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Case_Ignorable # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Case_Ignorable # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16B40..16B43 ; Case_Ignorable # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16D40..16D42 ; Case_Ignorable # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D6B..16D6C ; Case_Ignorable # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16F4F ; Case_Ignorable # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; Case_Ignorable # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; Case_Ignorable # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 @@ -3432,6 +3496,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 1E2EC..1E2EF ; Case_Ignorable # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EB ; Case_Ignorable # Lm NAG MUNDARI SIGN OJOD 1E4EC..1E4EF ; Case_Ignorable # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; Case_Ignorable # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR 1E8D0..1E8D6 ; Case_Ignorable # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Case_Ignorable # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA 1E94B ; Case_Ignorable # Lm ADLAM NASALIZATION MARK @@ -3440,7 +3505,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2707 +# Total code points: 2749 # ================================================ @@ -3724,6 +3789,7 @@ E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELEC 10C7 ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER YN 10CD ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER AEN 13A0..13F5 ; Changes_When_Lowercased # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C89 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TJE 1C90..1CBA ; Changes_When_Lowercased # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Changes_When_Lowercased # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1E00 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH RING BELOW @@ -4043,9 +4109,12 @@ A7C0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER OLD POLI A7C2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB..A7CC ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7D0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER REVERSED HALF H FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Changes_When_Lowercased # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW @@ -4055,11 +4124,12 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 1058C..10592 ; Changes_When_Lowercased # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE 10594..10595 ; Changes_When_Lowercased # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE 10C80..10CB2 ; Changes_When_Lowercased # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Changes_When_Lowercased # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Changes_When_Lowercased # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1433 +# Total code points: 1460 # ================================================ @@ -4140,7 +4210,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 018C ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH TOPBAR 0192 ; Changes_When_Uppercased # L& LATIN SMALL LETTER F WITH HOOK 0195 ; Changes_When_Uppercased # L& LATIN SMALL LETTER HV -0199..019A ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER L WITH BAR +0199..019B ; Changes_When_Uppercased # L& [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE 019E ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH LONG RIGHT LEG 01A1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN 01A3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER OI @@ -4216,8 +4286,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 0259 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SCHWA 025B..025C ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E 0260..0261 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G -0263 ; Changes_When_Uppercased # L& LATIN SMALL LETTER GAMMA -0265..0266 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK +0263..0266 ; Changes_When_Uppercased # L& [4] LATIN SMALL LETTER GAMMA..LATIN SMALL LETTER H WITH HOOK 0268..026C ; Changes_When_Uppercased # L& [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT 026F ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED M 0271..0272 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK @@ -4357,6 +4426,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 10FD..10FF ; Changes_When_Uppercased # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13F8..13FD ; Changes_When_Uppercased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1C80..1C88 ; Changes_When_Uppercased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TJE 1D79 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR G 1D7D ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH STROKE 1D8E ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH PALATAL HOOK @@ -4676,9 +4746,11 @@ A7C1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER OLD POLISH A7C3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER ANGLICANA W A7C8 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7CD ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CLOSED INSULAR G A7D7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SIGMOID S +A7DB ; Changes_When_Uppercased # L& LATIN SMALL LETTER LAMBDA A7F6 ; Changes_When_Uppercased # L& LATIN SMALL LETTER REVERSED HALF H AB53 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CHI AB70..ABBF ; Changes_When_Uppercased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA @@ -4692,11 +4764,12 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 105B3..105B9 ; Changes_When_Uppercased # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; Changes_When_Uppercased # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE 10CC0..10CF2 ; Changes_When_Uppercased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Changes_When_Uppercased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Changes_When_Uppercased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1525 +# Total code points: 1552 # ================================================ @@ -4777,7 +4850,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 018C ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH TOPBAR 0192 ; Changes_When_Titlecased # L& LATIN SMALL LETTER F WITH HOOK 0195 ; Changes_When_Titlecased # L& LATIN SMALL LETTER HV -0199..019A ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER L WITH BAR +0199..019B ; Changes_When_Titlecased # L& [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE 019E ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH LONG RIGHT LEG 01A1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN 01A3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER OI @@ -4854,8 +4927,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 0259 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SCHWA 025B..025C ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E 0260..0261 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G -0263 ; Changes_When_Titlecased # L& LATIN SMALL LETTER GAMMA -0265..0266 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK +0263..0266 ; Changes_When_Titlecased # L& [4] LATIN SMALL LETTER GAMMA..LATIN SMALL LETTER H WITH HOOK 0268..026C ; Changes_When_Titlecased # L& [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT 026F ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED M 0271..0272 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK @@ -4993,6 +5065,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 0561..0587 ; Changes_When_Titlecased # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 13F8..13FD ; Changes_When_Titlecased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1C80..1C88 ; Changes_When_Titlecased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TJE 1D79 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR G 1D7D ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH STROKE 1D8E ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH PALATAL HOOK @@ -5312,9 +5385,11 @@ A7C1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER OLD POLISH A7C3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER ANGLICANA W A7C8 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY A7CA ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7CD ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CLOSED INSULAR G A7D7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE SCOTS S A7D9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SIGMOID S +A7DB ; Changes_When_Titlecased # L& LATIN SMALL LETTER LAMBDA A7F6 ; Changes_When_Titlecased # L& LATIN SMALL LETTER REVERSED HALF H AB53 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CHI AB70..ABBF ; Changes_When_Titlecased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA @@ -5328,11 +5403,12 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 105B3..105B9 ; Changes_When_Titlecased # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; Changes_When_Titlecased # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE 10CC0..10CF2 ; Changes_When_Titlecased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Changes_When_Titlecased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Changes_When_Titlecased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1452 +# Total code points: 1479 # ================================================ @@ -5623,7 +5699,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 10C7 ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER YN 10CD ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER AEN 13F8..13FD ; Changes_When_Casefolded # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV -1C80..1C88 ; Changes_When_Casefolded # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C89 ; Changes_When_Casefolded # L& [10] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC CAPITAL LETTER TJE 1C90..1CBA ; Changes_When_Casefolded # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Changes_When_Casefolded # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1E00 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH RING BELOW @@ -5945,9 +6021,12 @@ A7C0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER OLD POLI A7C2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER ANGLICANA W A7C4..A7C7 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB..A7CC ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7D0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER REVERSED HALF H AB70..ABBF ; Changes_When_Casefolded # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA FB00..FB06 ; Changes_When_Casefolded # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST @@ -5960,11 +6039,12 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 1058C..10592 ; Changes_When_Casefolded # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE 10594..10595 ; Changes_When_Casefolded # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE 10C80..10CB2 ; Changes_When_Casefolded # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Changes_When_Casefolded # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Changes_When_Casefolded # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1506 +# Total code points: 1533 # ================================================ @@ -5980,8 +6060,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 00D8..00F6 ; Changes_When_Casemapped # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..0137 ; Changes_When_Casemapped # L& [64] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER K WITH CEDILLA 0139..018C ; Changes_When_Casemapped # L& [84] LATIN CAPITAL LETTER L WITH ACUTE..LATIN SMALL LETTER D WITH TOPBAR -018E..019A ; Changes_When_Casemapped # L& [13] LATIN CAPITAL LETTER REVERSED E..LATIN SMALL LETTER L WITH BAR -019C..01A9 ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER ESH +018E..01A9 ; Changes_When_Casemapped # L& [28] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER ESH 01AC..01B9 ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER T WITH HOOK..LATIN SMALL LETTER EZH REVERSED 01BC..01BD ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER TONE FIVE..LATIN SMALL LETTER TONE FIVE 01BF ; Changes_When_Casemapped # L& LATIN LETTER WYNN @@ -5992,8 +6071,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 0259 ; Changes_When_Casemapped # L& LATIN SMALL LETTER SCHWA 025B..025C ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E 0260..0261 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G -0263 ; Changes_When_Casemapped # L& LATIN SMALL LETTER GAMMA -0265..0266 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK +0263..0266 ; Changes_When_Casemapped # L& [4] LATIN SMALL LETTER GAMMA..LATIN SMALL LETTER H WITH HOOK 0268..026C ; Changes_When_Casemapped # L& [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT 026F ; Changes_When_Casemapped # L& LATIN SMALL LETTER TURNED M 0271..0272 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK @@ -6027,7 +6105,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 10FD..10FF ; Changes_When_Casemapped # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13A0..13F5 ; Changes_When_Casemapped # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV 13F8..13FD ; Changes_When_Casemapped # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV -1C80..1C88 ; Changes_When_Casemapped # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; Changes_When_Casemapped # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; Changes_When_Casemapped # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Changes_When_Casemapped # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1D79 ; Changes_When_Casemapped # L& LATIN SMALL LETTER INSULAR G @@ -6078,9 +6156,9 @@ A779..A787 ; Changes_When_Casemapped # L& [15] LATIN CAPITAL LETTER INSULAR A78B..A78D ; Changes_When_Casemapped # L& [3] LATIN CAPITAL LETTER SALTILLO..LATIN CAPITAL LETTER TURNED H A790..A794 ; Changes_When_Casemapped # L& [5] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH PALATAL HOOK A796..A7AE ; Changes_When_Casemapped # L& [25] LATIN CAPITAL LETTER B WITH FLOURISH..LATIN CAPITAL LETTER SMALL CAPITAL I -A7B0..A7CA ; Changes_When_Casemapped # L& [27] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7B0..A7CD ; Changes_When_Casemapped # L& [30] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D6..A7D9 ; Changes_When_Casemapped # L& [4] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN SMALL LETTER SIGMOID S +A7D6..A7DC ; Changes_When_Casemapped # L& [7] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F5..A7F6 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H AB53 ; Changes_When_Casemapped # L& LATIN SMALL LETTER CHI AB70..ABBF ; Changes_When_Casemapped # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA @@ -6101,11 +6179,13 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 105BB..105BC ; Changes_When_Casemapped # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE 10C80..10CB2 ; Changes_When_Casemapped # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; Changes_When_Casemapped # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D50..10D65 ; Changes_When_Casemapped # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D70..10D85 ; Changes_When_Casemapped # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118A0..118DF ; Changes_When_Casemapped # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2927 +# Total code points: 2981 # ================================================ @@ -6364,7 +6444,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 1C4D..1C4F ; ID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; ID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; ID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; ID_Start # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; ID_Start # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; ID_Start # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; ID_Start # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CE9..1CEC ; ID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL @@ -6481,10 +6561,10 @@ A771..A787 ; ID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER I A788 ; ID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Start # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CA ; ID_Start # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A790..A7CD ; ID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; ID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; ID_Start # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; ID_Start # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; ID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; ID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -6603,6 +6683,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 105A3..105B1 ; ID_Start # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; ID_Start # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; ID_Start # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; ID_Start # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; ID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; ID_Start # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; ID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -6639,8 +6720,15 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10C80..10CB2 ; ID_Start # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; ID_Start # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D00..10D23 ; ID_Start # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; ID_Start # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; ID_Start # Lm GARAY VOWEL LENGTH MARK +10D4F ; ID_Start # Lo GARAY SUKUN +10D50..10D65 ; ID_Start # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; ID_Start # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; ID_Start # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; ID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -6679,6 +6767,13 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1133D ; ID_Start # Lo GRANTHA SIGN AVAGRAHA 11350 ; ID_Start # Lo GRANTHA OM 1135D..11361 ; ID_Start # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11380..11389 ; ID_Start # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; ID_Start # Lo TULU-TIGALARI LETTER EE +1138E ; ID_Start # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; ID_Start # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; ID_Start # Lo TULU-TIGALARI SIGN AVAGRAHA +113D1 ; ID_Start # Lo TULU-TIGALARI REPHA +113D3 ; ID_Start # Lo TULU-TIGALARI SIGN PLUTA 11400..11434 ; ID_Start # Lo [53] NEWA LETTER A..NEWA LETTER HA 11447..1144A ; ID_Start # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI 1145F..11461 ; ID_Start # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA @@ -6713,6 +6808,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11A5C..11A89 ; ID_Start # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A9D ; ID_Start # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; ID_Start # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; ID_Start # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; ID_Start # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; ID_Start # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C40 ; ID_Start # Lo BHAIKSUKI SIGN AVAGRAHA @@ -6736,7 +6832,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 12F90..12FF0 ; ID_Start # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 13000..1342F ; ID_Start # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13441..13446 ; ID_Start # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; ID_Start # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 14400..14646 ; ID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; ID_Start # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA 16800..16A38 ; ID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; ID_Start # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A70..16ABE ; ID_Start # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA @@ -6745,6 +6843,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16B40..16B43 ; ID_Start # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16B63..16B77 ; ID_Start # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; ID_Start # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; ID_Start # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; ID_Start # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; ID_Start # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16E40..16E7F ; ID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16F00..16F4A ; ID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; ID_Start # Lo MIAO LETTER NASALIZATION @@ -6753,7 +6854,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK 17000..187F7 ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; ID_Start # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; ID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -6809,6 +6910,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1E2C0..1E2EB ; ID_Start # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH 1E4D0..1E4EA ; ID_Start # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL 1E4EB ; ID_Start # Lm NAG MUNDARI SIGN OJOD +1E5D0..1E5ED ; ID_Start # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; ID_Start # Lo OL ONAL SIGN HODDOND 1E7E0..1E7E6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; ID_Start # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; ID_Start # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -6859,7 +6962,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136967 +# Total code points: 141269 # ================================================ @@ -6966,7 +7069,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0860..086A ; ID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; ID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0889..088E ; ID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL -0898..089F ; ID_Continue # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; ID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; ID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; ID_Continue # Lm ARABIC SMALL FARSI YEH 08CA..08E1 ; ID_Continue # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA @@ -7399,7 +7502,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1C50..1C59 ; ID_Continue # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE 1C5A..1C77 ; ID_Continue # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; ID_Continue # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; ID_Continue # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; ID_Continue # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; ID_Continue # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; ID_Continue # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CD0..1CD2 ; ID_Continue # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -7543,10 +7646,10 @@ A771..A787 ; ID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTE A788 ; ID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; ID_Continue # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CA ; ID_Continue # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A790..A7CD ; ID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; ID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; ID_Continue # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; ID_Continue # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; ID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; ID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; ID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; ID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -7735,6 +7838,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 105A3..105B1 ; ID_Continue # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; ID_Continue # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; ID_Continue # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; ID_Continue # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; ID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; ID_Continue # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; ID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -7779,10 +7883,19 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10D00..10D23 ; ID_Continue # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; ID_Continue # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D30..10D39 ; ID_Continue # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; ID_Continue # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; ID_Continue # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; ID_Continue # Lm GARAY VOWEL LENGTH MARK +10D4F ; ID_Continue # Lo GARAY SUKUN +10D50..10D65 ; ID_Continue # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; ID_Continue # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6F ; ID_Continue # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; ID_Continue # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; ID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10EFD..10EFF ; ID_Continue # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC..10EFF ; ID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -7878,6 +7991,24 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11362..11363 ; ID_Continue # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; ID_Continue # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; ID_Continue # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11380..11389 ; ID_Continue # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; ID_Continue # Lo TULU-TIGALARI LETTER EE +1138E ; ID_Continue # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; ID_Continue # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; ID_Continue # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; ID_Continue # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; ID_Continue # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; ID_Continue # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; ID_Continue # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; ID_Continue # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; ID_Continue # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; ID_Continue # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; ID_Continue # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; ID_Continue # Mn TULU-TIGALARI CONJOINER +113D1 ; ID_Continue # Lo TULU-TIGALARI REPHA +113D2 ; ID_Continue # Mn TULU-TIGALARI GEMINATION MARK +113D3 ; ID_Continue # Lo TULU-TIGALARI SIGN PLUTA +113E1..113E2 ; ID_Continue # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11400..11434 ; ID_Continue # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; ID_Continue # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; ID_Continue # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI @@ -7929,8 +8060,11 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 116B7 ; ID_Continue # Mn TAKRI SIGN NUKTA 116B8 ; ID_Continue # Lo TAKRI LETTER ARCHAIC KHA 116C0..116C9 ; ID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; ID_Continue # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; ID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; ID_Continue # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; ID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; ID_Continue # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; ID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; ID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; ID_Continue # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; ID_Continue # Mc AHOM VOWEL SIGN E @@ -7988,6 +8122,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11A98..11A99 ; ID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER 11A9D ; ID_Continue # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; ID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; ID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BF0..11BF9 ; ID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; ID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; ID_Continue # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; ID_Continue # Mc BHAIKSUKI VOWEL SIGN AA @@ -8041,6 +8177,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11F41 ; ID_Continue # Mc KAWI SIGN KILLER 11F42 ; ID_Continue # Mn KAWI CONJOINER 11F50..11F59 ; ID_Continue # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; ID_Continue # Mn KAWI SIGN NUKTA 11FB0 ; ID_Continue # Lo LISU LETTER YHA 12000..12399 ; ID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; ID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM @@ -8050,7 +8187,13 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 13440 ; ID_Continue # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13441..13446 ; ID_Continue # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 13447..13455 ; ID_Continue # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +13460..143FA ; ID_Continue # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 14400..14646 ; ID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; ID_Continue # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; ID_Continue # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; ID_Continue # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; ID_Continue # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16130..16139 ; ID_Continue # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; ID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; ID_Continue # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; ID_Continue # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -8064,6 +8207,10 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16B50..16B59 ; ID_Continue # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16B63..16B77 ; ID_Continue # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; ID_Continue # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; ID_Continue # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; ID_Continue # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; ID_Continue # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D70..16D79 ; ID_Continue # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE 16E40..16E7F ; ID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16F00..16F4A ; ID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; ID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -8077,7 +8224,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..187F7 ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; ID_Continue # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; ID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; ID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -8092,6 +8239,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1BC80..1BC88 ; ID_Continue # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; ID_Continue # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1BC9D..1BC9E ; ID_Continue # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CCF0..1CCF9 ; ID_Continue # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1CF00..1CF2D ; ID_Continue # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; ID_Continue # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1D165..1D166 ; ID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM @@ -8163,6 +8311,10 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1E4EB ; ID_Continue # Lm NAG MUNDARI SIGN OJOD 1E4EC..1E4EF ; ID_Continue # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E4F0..1E4F9 ; ID_Continue # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5D0..1E5ED ; ID_Continue # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5EE..1E5EF ; ID_Continue # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E5F0 ; ID_Continue # Lo OL ONAL SIGN HODDOND +1E5F1..1E5FA ; ID_Continue # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E7E0..1E7E6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; ID_Continue # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -8218,7 +8370,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140108 +# Total code points: 144541 # ================================================ @@ -8474,7 +8626,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 1C4D..1C4F ; XID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; XID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; XID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; XID_Start # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; XID_Start # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; XID_Start # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; XID_Start # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CE9..1CEC ; XID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL @@ -8590,10 +8742,10 @@ A771..A787 ; XID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Start # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CA ; XID_Start # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A790..A7CD ; XID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; XID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; XID_Start # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; XID_Start # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; XID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; XID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -8717,6 +8869,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 105A3..105B1 ; XID_Start # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; XID_Start # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; XID_Start # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; XID_Start # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; XID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; XID_Start # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; XID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -8753,8 +8906,15 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10C80..10CB2 ; XID_Start # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; XID_Start # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D00..10D23 ; XID_Start # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; XID_Start # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; XID_Start # Lm GARAY VOWEL LENGTH MARK +10D4F ; XID_Start # Lo GARAY SUKUN +10D50..10D65 ; XID_Start # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; XID_Start # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; XID_Start # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; XID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -8793,6 +8953,13 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1133D ; XID_Start # Lo GRANTHA SIGN AVAGRAHA 11350 ; XID_Start # Lo GRANTHA OM 1135D..11361 ; XID_Start # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11380..11389 ; XID_Start # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; XID_Start # Lo TULU-TIGALARI LETTER EE +1138E ; XID_Start # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; XID_Start # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; XID_Start # Lo TULU-TIGALARI SIGN AVAGRAHA +113D1 ; XID_Start # Lo TULU-TIGALARI REPHA +113D3 ; XID_Start # Lo TULU-TIGALARI SIGN PLUTA 11400..11434 ; XID_Start # Lo [53] NEWA LETTER A..NEWA LETTER HA 11447..1144A ; XID_Start # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI 1145F..11461 ; XID_Start # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA @@ -8827,6 +8994,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 11A5C..11A89 ; XID_Start # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A9D ; XID_Start # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; XID_Start # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; XID_Start # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; XID_Start # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; XID_Start # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C40 ; XID_Start # Lo BHAIKSUKI SIGN AVAGRAHA @@ -8850,7 +9018,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 12F90..12FF0 ; XID_Start # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 13000..1342F ; XID_Start # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D 13441..13446 ; XID_Start # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; XID_Start # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 14400..14646 ; XID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; XID_Start # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA 16800..16A38 ; XID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; XID_Start # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A70..16ABE ; XID_Start # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA @@ -8859,6 +9029,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16B40..16B43 ; XID_Start # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16B63..16B77 ; XID_Start # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; XID_Start # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; XID_Start # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; XID_Start # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; XID_Start # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16E40..16E7F ; XID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16F00..16F4A ; XID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; XID_Start # Lo MIAO LETTER NASALIZATION @@ -8867,7 +9040,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK 17000..187F7 ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; XID_Start # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; XID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -8923,6 +9096,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1E2C0..1E2EB ; XID_Start # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH 1E4D0..1E4EA ; XID_Start # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL 1E4EB ; XID_Start # Lm NAG MUNDARI SIGN OJOD +1E5D0..1E5ED ; XID_Start # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; XID_Start # Lo OL ONAL SIGN HODDOND 1E7E0..1E7E6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; XID_Start # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; XID_Start # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -8973,7 +9148,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136944 +# Total code points: 141246 # ================================================ @@ -9076,7 +9251,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0860..086A ; XID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; XID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0889..088E ; XID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL -0898..089F ; XID_Continue # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; XID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; XID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; XID_Continue # Lm ARABIC SMALL FARSI YEH 08CA..08E1 ; XID_Continue # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA @@ -9509,7 +9684,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1C50..1C59 ; XID_Continue # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE 1C5A..1C77 ; XID_Continue # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; XID_Continue # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; XID_Continue # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; XID_Continue # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; XID_Continue # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; XID_Continue # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CD0..1CD2 ; XID_Continue # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -9652,10 +9827,10 @@ A771..A787 ; XID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETT A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; XID_Continue # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CA ; XID_Continue # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A790..A7CD ; XID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; XID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; XID_Continue # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; XID_Continue # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; XID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; XID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; XID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; XID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -9850,6 +10025,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 105A3..105B1 ; XID_Continue # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; XID_Continue # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; XID_Continue # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; XID_Continue # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; XID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; XID_Continue # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; XID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -9894,10 +10070,19 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10D00..10D23 ; XID_Continue # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; XID_Continue # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D30..10D39 ; XID_Continue # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; XID_Continue # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; XID_Continue # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; XID_Continue # Lm GARAY VOWEL LENGTH MARK +10D4F ; XID_Continue # Lo GARAY SUKUN +10D50..10D65 ; XID_Continue # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; XID_Continue # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6F ; XID_Continue # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; XID_Continue # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; XID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10EFD..10EFF ; XID_Continue # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC..10EFF ; XID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -9993,6 +10178,24 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11362..11363 ; XID_Continue # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; XID_Continue # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; XID_Continue # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11380..11389 ; XID_Continue # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; XID_Continue # Lo TULU-TIGALARI LETTER EE +1138E ; XID_Continue # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; XID_Continue # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; XID_Continue # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; XID_Continue # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; XID_Continue # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; XID_Continue # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; XID_Continue # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; XID_Continue # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; XID_Continue # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; XID_Continue # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; XID_Continue # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; XID_Continue # Mn TULU-TIGALARI CONJOINER +113D1 ; XID_Continue # Lo TULU-TIGALARI REPHA +113D2 ; XID_Continue # Mn TULU-TIGALARI GEMINATION MARK +113D3 ; XID_Continue # Lo TULU-TIGALARI SIGN PLUTA +113E1..113E2 ; XID_Continue # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11400..11434 ; XID_Continue # Lo [53] NEWA LETTER A..NEWA LETTER HA 11435..11437 ; XID_Continue # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; XID_Continue # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI @@ -10044,8 +10247,11 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 116B7 ; XID_Continue # Mn TAKRI SIGN NUKTA 116B8 ; XID_Continue # Lo TAKRI LETTER ARCHAIC KHA 116C0..116C9 ; XID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; XID_Continue # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; XID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; XID_Continue # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; XID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; XID_Continue # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; XID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; XID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; XID_Continue # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; XID_Continue # Mc AHOM VOWEL SIGN E @@ -10103,6 +10309,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11A98..11A99 ; XID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER 11A9D ; XID_Continue # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; XID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; XID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BF0..11BF9 ; XID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; XID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; XID_Continue # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; XID_Continue # Mc BHAIKSUKI VOWEL SIGN AA @@ -10156,6 +10364,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11F41 ; XID_Continue # Mc KAWI SIGN KILLER 11F42 ; XID_Continue # Mn KAWI CONJOINER 11F50..11F59 ; XID_Continue # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; XID_Continue # Mn KAWI SIGN NUKTA 11FB0 ; XID_Continue # Lo LISU LETTER YHA 12000..12399 ; XID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U 12400..1246E ; XID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM @@ -10165,7 +10374,13 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 13440 ; XID_Continue # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13441..13446 ; XID_Continue # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN 13447..13455 ; XID_Continue # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +13460..143FA ; XID_Continue # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 14400..14646 ; XID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; XID_Continue # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; XID_Continue # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; XID_Continue # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; XID_Continue # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16130..16139 ; XID_Continue # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16800..16A38 ; XID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ 16A40..16A5E ; XID_Continue # Lo [31] MRO LETTER TA..MRO LETTER TEK 16A60..16A69 ; XID_Continue # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE @@ -10179,6 +10394,10 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16B50..16B59 ; XID_Continue # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16B63..16B77 ; XID_Continue # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; XID_Continue # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; XID_Continue # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; XID_Continue # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; XID_Continue # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D70..16D79 ; XID_Continue # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE 16E40..16E7F ; XID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16F00..16F4A ; XID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F4F ; XID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -10192,7 +10411,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..187F7 ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; XID_Continue # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; XID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; XID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -10207,6 +10426,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1BC80..1BC88 ; XID_Continue # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; XID_Continue # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1BC9D..1BC9E ; XID_Continue # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CCF0..1CCF9 ; XID_Continue # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1CF00..1CF2D ; XID_Continue # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; XID_Continue # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1D165..1D166 ; XID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM @@ -10278,6 +10498,10 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1E4EB ; XID_Continue # Lm NAG MUNDARI SIGN OJOD 1E4EC..1E4EF ; XID_Continue # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH 1E4F0..1E4F9 ; XID_Continue # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5D0..1E5ED ; XID_Continue # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5EE..1E5EF ; XID_Continue # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E5F0 ; XID_Continue # Lo OL ONAL SIGN HODDOND +1E5F1..1E5FA ; XID_Continue # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E7E0..1E7E6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; XID_Continue # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -10333,7 +10557,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140089 +# Total code points: 144522 # ================================================ @@ -10418,7 +10642,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] .......... F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 762725 code points not listed here. -# Total code points: 900193 +# The above property value applies to 757653 code points not listed here. +# Total code points: 895121 # ================================================ @@ -215,10 +216,6 @@ FF62 ; OP # Ps HALFWIDTH LEFT CORNER BRACKET 2E25 ; CL # Pe BOTTOM RIGHT HALF BRACKET 2E27 ; CL # Pe RIGHT SIDEWAYS U BRACKET 2E29 ; CL # Pe RIGHT DOUBLE PARENTHESIS -2E56 ; CL # Pe RIGHT SQUARE BRACKET WITH STROKE -2E58 ; CL # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE -2E5A ; CL # Pe TOP HALF RIGHT PARENTHESIS -2E5C ; CL # Pe BOTTOM HALF RIGHT PARENTHESIS 3001..3002 ; CL # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP 3009 ; CL # Pe RIGHT ANGLE BRACKET 300B ; CL # Pe RIGHT DOUBLE ANGLE BRACKET @@ -231,7 +228,7 @@ FF62 ; OP # Ps HALFWIDTH LEFT CORNER BRACKET 301B ; CL # Pe RIGHT WHITE SQUARE BRACKET 301E..301F ; CL # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK FD3E ; CL # Pe ORNATE LEFT PARENTHESIS -FE11..FE12 ; CL # Po [2] PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA..PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP +FE10..FE12 ; CL # Po [3] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP FE18 ; CL # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET FE36 ; CL # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS FE38 ; CL # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET @@ -266,7 +263,7 @@ FF64 ; CL # Po HALFWIDTH IDEOGRAPHIC COMMA 1343F ; CL # Cf EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE 145CF ; CL # Lo ANATOLIAN HIEROGLYPH A410A END LOGOGRAM MARK -# Total code points: 97 +# Total code points: 94 # ================================================ @@ -320,12 +317,19 @@ FF64 ; CL # Po HALFWIDTH IDEOGRAPHIC COMMA 2007 ; GL # Zs FIGURE SPACE 2011 ; GL # Pd NON-BREAKING HYPHEN 202F ; GL # Zs NARROW NO-BREAK SPACE +FE20 ; GL # Mn COMBINING LIGATURE LEFT HALF +FE22 ; GL # Mn COMBINING DOUBLE TILDE LEFT HALF +FE24 ; GL # Mn COMBINING MACRON LEFT HALF +FE26..FE27 ; GL # Mn [2] COMBINING CONJOINING MACRON..COMBINING LIGATURE LEFT HALF BELOW +FE29 ; GL # Mn COMBINING TILDE LEFT HALF BELOW +FE2B ; GL # Mn COMBINING MACRON LEFT HALF BELOW +FE2D..FE2E ; GL # Mn [2] COMBINING CONJOINING MACRON BELOW..COMBINING CYRILLIC TITLO LEFT HALF 1107F ; GL # Mn BRAHMI NUMBER JOINER 13430..13436 ; GL # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE 13439..1343B ; GL # Cf [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM 16FE4 ; GL # Mn KHITAN SMALL SCRIPT FILLER -# Total code points: 32 +# Total code points: 41 # ================================================ @@ -344,6 +348,7 @@ FF64 ; CL # Po HALFWIDTH IDEOGRAPHIC COMMA 30FB ; NS # Po KATAKANA MIDDLE DOT 30FD..30FE ; NS # Lm [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK A015 ; NS # Lm YI SYLLABLE WU +FE13..FE14 ; NS # Po [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION FORM FOR VERTICAL SEMICOLON FE54..FE55 ; NS # Po [2] SMALL SEMICOLON..SMALL COLON FF1A..FF1B ; NS # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON FF65 ; NS # Po HALFWIDTH KATAKANA MIDDLE DOT @@ -353,7 +358,7 @@ FF9E..FF9F ; NS # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KA 16FE3 ; NS # Lm OLD CHINESE ITERATION MARK 1F679..1F67B ; NS # So [3] HEAVY INTERROBANG ORNAMENT..HEAVY SANS-SERIF INTERROBANG ORNAMENT -# Total code points: 33 +# Total code points: 35 # ================================================ @@ -407,10 +412,8 @@ FF1F ; EX # Po FULLWIDTH QUESTION MARK 060C..060D ; IS # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR 07F8 ; IS # Po NKO COMMA 2044 ; IS # Sm FRACTION SLASH -FE10 ; IS # Po PRESENTATION FORM FOR VERTICAL COMMA -FE13..FE14 ; IS # Po [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION FORM FOR VERTICAL SEMICOLON -# Total code points: 13 +# Total code points: 10 # ================================================ @@ -509,6 +512,7 @@ FFE0 ; PO # Sc FULLWIDTH CENT SIGN 1810..1819 ; NU # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE 1946..194F ; NU # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE 19D0..19D9 ; NU # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; NU # No NEW TAI LUE THAM DIGIT ONE 1A80..1A89 ; NU # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE 1A90..1A99 ; NU # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE 1BB0..1BB9 ; NU # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE @@ -521,6 +525,7 @@ A9F0..A9F9 ; NU # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DI ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 104A0..104A9 ; NU # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 10D30..10D39 ; NU # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; NU # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE 110BD ; NU # Cf KAITHI NUMBER SIGN 110CD ; NU # Cf KAITHI NUMBER SIGN ABOVE 110F0..110F9 ; NU # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE @@ -531,22 +536,27 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 114D0..114D9 ; NU # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; NU # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; NU # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; NU # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11730..11739 ; NU # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118E0..118E9 ; NU # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +11BF0..11BF9 ; NU # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C50..11C59 ; NU # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; NU # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; NU # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE 16A60..16A69 ; NU # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; NU # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; NU # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16D70..16D79 ; NU # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +1CCF0..1CCF9 ; NU # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; NU # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; NU # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE 1E2F0..1E2F9 ; NU # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE 1E4F0..1E4F9 ; NU # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5F1..1E5FA ; NU # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E950..1E959 ; NU # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; NU # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 624 +# Total code points: 695 # ================================================ @@ -851,7 +861,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 1C4D..1C4F ; AL # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; AL # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; AL # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; AL # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; AL # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; AL # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; AL # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CC0..1CC7 ; AL # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA @@ -947,9 +957,6 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 214C..214D ; AL # So [2] PER SIGN..AKTIESELSKAB 214E ; AL # L& TURNED SMALL F 214F ; AL # So SYMBOL FOR SAMARITAN SOURCE -2150..2153 ; AL # No [4] VULGAR FRACTION ONE SEVENTH..VULGAR FRACTION ONE THIRD -2156..215A ; AL # No [5] VULGAR FRACTION TWO FIFTHS..VULGAR FRACTION FIVE SIXTHS -215C..215D ; AL # No [2] VULGAR FRACTION THREE EIGHTHS..VULGAR FRACTION FIVE EIGHTHS 215F ; AL # No FRACTION NUMERATOR ONE 216C..216F ; AL # Nl [4] ROMAN NUMERAL FIFTY..ROMAN NUMERAL ONE THOUSAND 217A..2182 ; AL # Nl [9] SMALL ROMAN NUMERAL ELEVEN..ROMAN NUMERAL TEN THOUSAND @@ -1013,7 +1020,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 23B4..23DB ; AL # So [40] TOP SQUARE BRACKET..FUSE 23DC..23E1 ; AL # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET 23E2..23EF ; AL # So [14] WHITE TRAPEZIUM..BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR -23F4..2426 ; AL # So [51] BLACK MEDIUM LEFT-POINTING TRIANGLE..SYMBOL FOR SUBSTITUTE FORM TWO +23F4..2429 ; AL # So [54] BLACK MEDIUM LEFT-POINTING TRIANGLE..SYMBOL FOR DELETE MEDIUM SHADE FORM 2440..244A ; AL # So [11] OCR HOOK..OCR DOUBLE BACKSLASH 24FF ; AL # No NEGATIVE CIRCLED DIGIT ZERO 254C..254F ; AL # So [4] BOX DRAWINGS LIGHT DOUBLE DASH HORIZONTAL..BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL @@ -1133,10 +1140,10 @@ A788 ; AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A78F ; AL # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CA ; AL # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A790..A7CD ; AL # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE A7D0..A7D1 ; AL # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G A7D3 ; AL # L& LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9 ; AL # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7D5..A7DC ; AL # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2..A7F4 ; AL # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q A7F5..A7F6 ; AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H A7F7 ; AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -1238,6 +1245,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 105A3..105B1 ; AL # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; AL # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; AL # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; AL # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; AL # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; AL # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; AL # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -1296,9 +1304,17 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10CC0..10CF2 ; AL # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10CFA..10CFF ; AL # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; AL # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; AL # Lm GARAY VOWEL LENGTH MARK +10D4F ; AL # Lo GARAY SUKUN +10D50..10D65 ; AL # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; AL # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; AL # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; AL # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10E60..10E7E ; AL # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10E80..10EA9 ; AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1373,6 +1389,8 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 11A5C..11A89 ; AL # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A9D ; AL # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; AL # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; AL # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; AL # Po SUNUWAR SIGN PVO 11C00..11C08 ; AL # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; AL # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C40 ; AL # Lo BHAIKSUKI SIGN AVAGRAHA @@ -1401,6 +1419,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1328A..13378 ; AL # Lo [239] EGYPTIAN HIEROGLYPH O037..EGYPTIAN HIEROGLYPH V011 1337C..1342E ; AL # Lo [179] EGYPTIAN HIEROGLYPH V012..EGYPTIAN HIEROGLYPH AA032 13441..13446 ; AL # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; AL # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 14400..145CD ; AL # Lo [462] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A409 145D0..14646 ; AL # Lo [119] ANATOLIAN HIEROGLYPH A411..ANATOLIAN HIEROGLYPH A530 16800..16A38 ; AL # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ @@ -1415,6 +1434,10 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 16B5B..16B61 ; AL # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS 16B63..16B77 ; AL # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; AL # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; AL # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; AL # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; AL # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D6D ; AL # Po KIRAT RAI SIGN YUPI 16E40..16E7F ; AL # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16E80..16E96 ; AL # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM 16E99..16E9A ; AL # Po [2] MEDEFAIDRIN SYMBOL AIVA..MEDEFAIDRIN EXCLAMATION OH @@ -1422,6 +1445,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 16F50 ; AL # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; AL # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF ; AL # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; AL # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 @@ -1430,6 +1454,8 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1BC80..1BC88 ; AL # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; AL # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1BC9C ; AL # So DUPLOYAN SIGN O WITH CROSS +1CC00..1CCEF ; AL # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z +1CD00..1CEB3 ; AL # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; AL # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; AL # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -1503,6 +1529,9 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1E2C0..1E2EB ; AL # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH 1E4D0..1E4EA ; AL # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL 1E4EB ; AL # Lm NAG MUNDARI SIGN OJOD +1E5D0..1E5ED ; AL # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; AL # Lo OL ONAL SIGN HODDOND +1E5FF ; AL # Po OL ONAL ABBREVIATION SIGN 1E7E0..1E7E6 ; AL # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO 1E7E8..1E7EB ; AL # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE 1E7ED..1E7EE ; AL # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -1551,8 +1580,10 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; AL # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F10D..1F10F ; AL # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH 1F12E..1F12F ; AL # So [2] CIRCLED WZ..COPYLEFT SYMBOL -1F16A..1F16C ; AL # So [3] RAISED MC SIGN..RAISED MR SIGN +1F16A..1F16F ; AL # So [6] RAISED MC SIGN..CIRCLED HUMAN FIGURE +1F1AD ; AL # So MASK WORK SYMBOL 1F39C..1F39D ; AL # So [2] BEAMED ASCENDING MUSICAL NOTES..BEAMED DESCENDING MUSICAL NOTES 1F3B5..1F3B6 ; AL # So [2] MUSICAL NOTE..MULTIPLE MUSICAL NOTES 1F3BC ; AL # So MUSICAL SCORE @@ -1575,18 +1606,19 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1F850..1F859 ; AL # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; AL # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B0..1F8BB ; AL # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR +1F8C0..1F8C1 ; AL # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1FA00..1FA53 ; AL # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK -1FB94..1FBCA ; AL # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON +1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 21729 +# Total code points: 26679 # ================================================ # Line_Break=Ideographic -1B50..1B59 ; ID # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE 1B5C ; ID # Po BALINESE WINDU 1B61..1B6A ; ID # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE 1B74..1B7C ; ID # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING @@ -1660,7 +1692,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 3192..3195 ; ID # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; ID # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; ID # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH -31C0..31E3 ; ID # So [36] CJK STROKE T..CJK STROKE Q +31C0..31E5 ; ID # So [38] CJK STROKE T..CJK STROKE SZP 31EF ; ID # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 3200..321E ; ID # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3220..3229 ; ID # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN @@ -1678,9 +1710,7 @@ A016..A48C ; ID # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR A490..A4C6 ; ID # So [55] YI RADICAL QOT..YI RADICAL KE A9C1..A9C6 ; ID # Po [6] JAVANESE LEFT RERENGGAN..JAVANESE PADA WINDU A9CA..A9CD ; ID # Po [4] JAVANESE PADA ADEG..JAVANESE TURNED PADA PISELEH -A9D0..A9D9 ; ID # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE A9DE..A9DF ; ID # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN -AA50..AA59 ; ID # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE AA5C ; ID # Po CHAM PUNCTUATION SPIRAL F900..FA6D ; ID # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; ID # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 @@ -1727,7 +1757,10 @@ FFE3 ; ID # Sk FULLWIDTH MACRON FFE4 ; ID # So FULLWIDTH BROKEN BAR 11049..1104D ; ID # Po [5] BRAHMI PUNCTUATION DOT..BRAHMI PUNCTUATION LOTUS 11052..11065 ; ID # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND -11950..11959 ; ID # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +113B7 ; ID # Lo TULU-TIGALARI SIGN AVAGRAHA +113D3 ; ID # Lo TULU-TIGALARI SIGN PLUTA +113D4..113D5 ; ID # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; ID # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL 17000..187F7 ; ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 @@ -1740,9 +1773,6 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 1F0B1..1F0BF ; ID # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER 1F0C1..1F0CF ; ID # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER 1F0D1..1F0F5 ; ID # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 -1F10D..1F10F ; ID # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH -1F16D..1F16F ; ID # So [3] CIRCLED CC..CIRCLED HUMAN FIGURE -1F1AD ; ID # So MASK WORK SYMBOL 1F200..1F202 ; ID # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA 1F210..1F23B ; ID # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D 1F240..1F248 ; ID # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 @@ -1793,7 +1823,6 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 1F7D5..1F7D9 ; ID # So [5] CIRCLED TRIANGLE..NINE POINTED WHITE STAR 1F7E0..1F7EB ; ID # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE 1F7F0 ; ID # So HEAVY EQUALS SIGN -1F8B0..1F8B1 ; ID # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST 1F90D..1F90E ; ID # So [2] WHITE HEART..BROWN HEART 1F910..1F917 ; ID # So [8] ZIPPER-MOUTH FACE..HUGGING FACE 1F920..1F925 ; ID # So [6] FACE WITH COWBOY HAT..LYING FACE @@ -1808,11 +1837,11 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 1F9DE..1F9FF ; ID # So [34] GENIE..NAZAR AMULET 1FA60..1FA6D ; ID # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; ID # So [13] BALLET SHOES..CRUTCH -1FA80..1FA88 ; ID # So [9] YO-YO..FLUTE -1FA90..1FABD ; ID # So [46] RINGED PLANET..WING -1FABF..1FAC2 ; ID # So [4] GOOSE..PEOPLE HUGGING -1FACE..1FADB ; ID # So [14] MOOSE..PEA POD -1FAE0..1FAE8 ; ID # So [9] MELTING FACE..SHAKING FACE +1FA80..1FA89 ; ID # So [10] YO-YO..HARP +1FA8F..1FAC2 ; ID # So [52] SHOVEL..PEOPLE HUGGING +1FAC6 ; ID # So FINGERPRINT +1FACE..1FADC ; ID # So [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; ID # So [11] SPLATTER..FACE WITH BAGS UNDER EYES 20000..2A6DF ; ID # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; ID # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 2B740..2B81D ; ID # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D @@ -1823,8 +1852,8 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# The above property value applies to 61978 code points not listed here. -# Total code points: 172568 +# The above property value applies to 61865 code points not listed here. +# Total code points: 172421 # ================================================ @@ -1881,7 +1910,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 0825..0827 ; CM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; CM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; CM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; CM # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; CM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08CA..08E1 ; CM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; CM # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 0903 ; CM # Mc DEVANAGARI SIGN VISARGA @@ -2133,7 +2162,13 @@ ABEC ; CM # Mc MEETEI MAYEK LUM IYEK ABED ; CM # Mn MEETEI MAYEK APUN IYEK FB1E ; CM # Mn HEBREW POINT JUDEO-SPANISH VARIKA FE00..FE0F ; CM # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 -FE20..FE2F ; CM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FE21 ; CM # Mn COMBINING LIGATURE RIGHT HALF +FE23 ; CM # Mn COMBINING DOUBLE TILDE RIGHT HALF +FE25 ; CM # Mn COMBINING MACRON RIGHT HALF +FE28 ; CM # Mn COMBINING LIGATURE RIGHT HALF BELOW +FE2A ; CM # Mn COMBINING TILDE RIGHT HALF BELOW +FE2C ; CM # Mn COMBINING MACRON RIGHT HALF BELOW +FE2F ; CM # Mn COMBINING CYRILLIC TITLO RIGHT HALF FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR 101FD ; CM # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE 102E0 ; CM # Mn COPTIC EPACT THOUSANDS MARK @@ -2145,8 +2180,9 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 10A3F ; CM # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; CM # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; CM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; CM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; CM # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; CM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; CM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; CM # Mc BRAHMI SIGN CANDRABINDU @@ -2199,6 +2235,16 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 11362..11363 ; CM # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; CM # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; CM # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113B8..113BA ; CM # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; CM # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; CM # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; CM # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; CM # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; CM # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; CM # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; CM # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D2 ; CM # Mn TULU-TIGALARI GEMINATION MARK +113E1..113E2 ; CM # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11435..11437 ; CM # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; CM # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11440..11441 ; CM # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -2294,8 +2340,12 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 11F3E..11F3F ; CM # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 11F40 ; CM # Mn KAWI VOWEL SIGN EU 11F41 ; CM # Mc KAWI SIGN KILLER +11F5A ; CM # Mn KAWI SIGN NUKTA 13440 ; CM # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; CM # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; CM # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; CM # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; CM # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; CM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; CM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; CM # Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -2330,13 +2380,14 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 1E2AE ; CM # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; CM # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; CM # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; CM # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR 1E8D0..1E8D6 ; CM # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; CM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2429 +# Total code points: 2470 # ================================================ @@ -2398,9 +2449,10 @@ A8FC ; BB # Po DEVANAGARI SIGN SIDDHAM 17D8 ; BA # Po KHMER SIGN BEYYAL 17DA ; BA # Po KHMER SIGN KOOMUUT 1804..1805 ; BA # Po [2] MONGOLIAN COLON..MONGOLIAN FOUR DOTS +1B4E..1B4F ; BA # Po [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN 1B5A..1B5B ; BA # Po [2] BALINESE PANTI..BALINESE PAMADA 1B5D..1B60 ; BA # Po [4] BALINESE CARIK PAMUNGKAH..BALINESE PAMENENG -1B7D..1B7E ; BA # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B7D..1B7F ; BA # Po [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK 1C3B..1C3F ; BA # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK 1C7E..1C7F ; BA # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD 2000..2006 ; BA # Zs [7] EN QUAD..SIX-PER-EM SPACE @@ -2450,6 +2502,7 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 10A50..10A57 ; BA # Po [8] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION DOUBLE DANDA 10AF0..10AF5 ; BA # Po [6] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION TWO DOTS 10B39..10B3F ; BA # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10D6E ; BA # Pd GARAY HYPHEN 10EAD ; BA # Pd YEZIDI HYPHENATION MARK 11047..11048 ; BA # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; BA # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA @@ -2482,11 +2535,12 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 16AF5 ; BA # Po BASSA VAH FULL STOP 16B37..16B39 ; BA # Po [3] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN CIM CHEEM 16B44 ; BA # Po PAHAWH HMONG SIGN XAUS +16D6E..16D6F ; BA # Po [2] KIRAT RAI DANDA..KIRAT RAI DOUBLE DANDA 16E97..16E98 ; BA # Po [2] MEDEFAIDRIN COMMA..MEDEFAIDRIN FULL STOP 1BC9F ; BA # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8A ; BA # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON -# Total code points: 266 +# Total code points: 272 # ================================================ @@ -2602,7 +2656,6 @@ FFFC ; CB # So OBJECT REPLACEMENT CHARACTER 1970..1974 ; SA # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 1980..19AB ; SA # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA 19B0..19C9 ; SA # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 -19DA ; SA # No NEW TAI LUE THAM DIGIT ONE 19DE..19DF ; SA # So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV 1A20..1A54 ; SA # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA 1A55 ; SA # Mc TAI THAM CONSONANT SIGN MEDIAL RA @@ -2647,7 +2700,9 @@ AADB..AADC ; SA # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; SA # Lm TAI VIET SYMBOL SAM AADE..AADF ; SA # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI 11700..1171A ; SA # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F ; SA # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; SA # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; SA # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; SA # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; SA # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; SA # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; SA # Mc AHOM VOWEL SIGN E @@ -2656,7 +2711,7 @@ AADE..AADF ; SA # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI 1173F ; SA # So AHOM SYMBOL VI 11740..11746 ; SA # Lo [7] AHOM LETTER CA..AHOM LETTER LLA -# Total code points: 758 +# Total code points: 757 # ================================================ @@ -2690,9 +2745,7 @@ AADE..AADF ; SA # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI 2113 ; AI # L& SCRIPT SMALL L 2121..2122 ; AI # So [2] TELEPHONE SIGN..TRADE MARK SIGN 212B ; AI # L& ANGSTROM SIGN -2154..2155 ; AI # No [2] VULGAR FRACTION TWO THIRDS..VULGAR FRACTION ONE FIFTH -215B ; AI # No VULGAR FRACTION ONE EIGHTH -215E ; AI # No VULGAR FRACTION SEVEN EIGHTHS +2150..215E ; AI # No [15] VULGAR FRACTION ONE SEVENTH..VULGAR FRACTION SEVEN EIGHTHS 2160..216B ; AI # Nl [12] ROMAN NUMERAL ONE..ROMAN NUMERAL TWELVE 2170..2179 ; AI # Nl [10] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL TEN 2189 ; AI # No VULGAR FRACTION ZERO THIRDS @@ -2782,7 +2835,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1F130..1F169 ; AI # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F1AC ; AI # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD -# Total code points: 707 +# Total code points: 718 # ================================================ @@ -3671,8 +3724,12 @@ D789..D7A3 ; H3 # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH 0029 ; CP # Pe RIGHT PARENTHESIS 005D ; CP # Pe RIGHT SQUARE BRACKET +2E56 ; CP # Pe RIGHT SQUARE BRACKET WITH STROKE +2E58 ; CP # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E5A ; CP # Pe TOP HALF RIGHT PARENTHESIS +2E5C ; CP # Pe BOTTOM HALF RIGHT PARENTHESIS -# Total code points: 2 +# Total code points: 6 # ================================================ @@ -3816,6 +3873,7 @@ A984..A9B2 ; AK # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA 11332..11333 ; AK # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA 11335..11339 ; AK # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA 11360..11361 ; AK # Lo [2] GRANTHA LETTER VOCALIC RR..GRANTHA LETTER VOCALIC LL +11392..113B5 ; AK # Lo [36] TULU-TIGALARI LETTER KA..TULU-TIGALARI LETTER LLLA 11900..11906 ; AK # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E 11909 ; AK # Lo DIVES AKURU LETTER O 1190C..11913 ; AK # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA @@ -3824,32 +3882,43 @@ A984..A9B2 ; AK # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA 11F04..11F10 ; AK # Lo [13] KAWI LETTER A..KAWI LETTER O 11F12..11F33 ; AK # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA -# Total code points: 293 +# Total code points: 329 # ================================================ # Line_Break=Aksara_Prebase 11003..11004 ; AP # Lo [2] BRAHMI SIGN JIHVAMULIYA..BRAHMI SIGN UPADHMANIYA +113D1 ; AP # Lo TULU-TIGALARI REPHA 1193F ; AP # Lo DIVES AKURU PREFIXED NASAL SIGN 11941 ; AP # Lo DIVES AKURU INITIAL RA 11F02 ; AP # Lo KAWI SIGN REPHA -# Total code points: 5 +# Total code points: 6 # ================================================ # Line_Break=Aksara_Start +1B50..1B59 ; AS # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE 1BC0..1BE5 ; AS # Lo [38] BATAK LETTER A..BATAK LETTER U +A9D0..A9D9 ; AS # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE AA00..AA28 ; AS # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA50..AA59 ; AS # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE 11066..1106F ; AS # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE 11350 ; AS # Lo GRANTHA OM 1135E..1135F ; AS # Lo [2] GRANTHA LETTER VEDIC ANUSVARA..GRANTHA LETTER VEDIC DOUBLE ANUSVARA +11380..11389 ; AS # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; AS # Lo TULU-TIGALARI LETTER EE +1138E ; AS # Lo TULU-TIGALARI LETTER AI +11390..11391 ; AS # Lo [2] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER AU +11950..11959 ; AS # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE 11EE0..11EF1 ; AS # Lo [18] MAKASAR LETTER KA..MAKASAR LETTER A 11F50..11F59 ; AS # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16100..1611D ; AS # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +16130..16139 ; AS # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE -# Total code points: 120 +# Total code points: 214 # ================================================ @@ -3859,10 +3928,11 @@ AA00..AA28 ; AS # Lo [41] CHAM LETTER A..CHAM LETTER HA A9C0 ; VI # Mc JAVANESE PANGKON 11046 ; VI # Mn BRAHMI VIRAMA 1134D ; VI # Mc GRANTHA SIGN VIRAMA +113D0 ; VI # Mn TULU-TIGALARI CONJOINER 1193E ; VI # Mn DIVES AKURU VIRAMA 11F42 ; VI # Mn KAWI CONJOINER -# Total code points: 6 +# Total code points: 7 # ================================================ diff --git a/data/DerivedNormalizationProps b/data/DerivedNormalizationProps index 7f8edd6..ce636ab 100644 --- a/data/DerivedNormalizationProps +++ b/data/DerivedNormalizationProps @@ -1,8 +1,8 @@ -# DerivedNormalizationProps-15.1.0.txt -# Date: 2023-05-02, 13:20:58 GMT -# © 2023 Unicode®, Inc. +# DerivedNormalizationProps-16.0.0.txt +# Date: 2024-04-30, 21:48:18 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -136,6 +136,32 @@ A7F2 ; FC_NFKC; 0063 # Lm MODIFIER LETTER CAPITAL C A7F3 ; FC_NFKC; 0066 # Lm MODIFIER LETTER CAPITAL F A7F4 ; FC_NFKC; 0071 # Lm MODIFIER LETTER CAPITAL Q A7F8 ; FC_NFKC; 0127 # Lm MODIFIER LETTER CAPITAL H WITH STROKE +1CCD6 ; FC_NFKC; 0061 # So OUTLINED LATIN CAPITAL LETTER A +1CCD7 ; FC_NFKC; 0062 # So OUTLINED LATIN CAPITAL LETTER B +1CCD8 ; FC_NFKC; 0063 # So OUTLINED LATIN CAPITAL LETTER C +1CCD9 ; FC_NFKC; 0064 # So OUTLINED LATIN CAPITAL LETTER D +1CCDA ; FC_NFKC; 0065 # So OUTLINED LATIN CAPITAL LETTER E +1CCDB ; FC_NFKC; 0066 # So OUTLINED LATIN CAPITAL LETTER F +1CCDC ; FC_NFKC; 0067 # So OUTLINED LATIN CAPITAL LETTER G +1CCDD ; FC_NFKC; 0068 # So OUTLINED LATIN CAPITAL LETTER H +1CCDE ; FC_NFKC; 0069 # So OUTLINED LATIN CAPITAL LETTER I +1CCDF ; FC_NFKC; 006A # So OUTLINED LATIN CAPITAL LETTER J +1CCE0 ; FC_NFKC; 006B # So OUTLINED LATIN CAPITAL LETTER K +1CCE1 ; FC_NFKC; 006C # So OUTLINED LATIN CAPITAL LETTER L +1CCE2 ; FC_NFKC; 006D # So OUTLINED LATIN CAPITAL LETTER M +1CCE3 ; FC_NFKC; 006E # So OUTLINED LATIN CAPITAL LETTER N +1CCE4 ; FC_NFKC; 006F # So OUTLINED LATIN CAPITAL LETTER O +1CCE5 ; FC_NFKC; 0070 # So OUTLINED LATIN CAPITAL LETTER P +1CCE6 ; FC_NFKC; 0071 # So OUTLINED LATIN CAPITAL LETTER Q +1CCE7 ; FC_NFKC; 0072 # So OUTLINED LATIN CAPITAL LETTER R +1CCE8 ; FC_NFKC; 0073 # So OUTLINED LATIN CAPITAL LETTER S +1CCE9 ; FC_NFKC; 0074 # So OUTLINED LATIN CAPITAL LETTER T +1CCEA ; FC_NFKC; 0075 # So OUTLINED LATIN CAPITAL LETTER U +1CCEB ; FC_NFKC; 0076 # So OUTLINED LATIN CAPITAL LETTER V +1CCEC ; FC_NFKC; 0077 # So OUTLINED LATIN CAPITAL LETTER W +1CCED ; FC_NFKC; 0078 # So OUTLINED LATIN CAPITAL LETTER X +1CCEE ; FC_NFKC; 0079 # So OUTLINED LATIN CAPITAL LETTER Y +1CCEF ; FC_NFKC; 007A # So OUTLINED LATIN CAPITAL LETTER Z 1D400 ; FC_NFKC; 0061 # L& MATHEMATICAL BOLD CAPITAL A 1D401 ; FC_NFKC; 0062 # L& MATHEMATICAL BOLD CAPITAL B 1D402 ; FC_NFKC; 0063 # L& MATHEMATICAL BOLD CAPITAL C @@ -653,7 +679,7 @@ A7F8 ; FC_NFKC; 0127 # Lm MODIFIER LETTER CAPITAL H WITH STROKE 1F16C ; FC_NFKC; 006D 0072 # So RAISED MR SIGN 1F190 ; FC_NFKC; 0064 006A # So SQUARE DJ -# Total code points: 637 +# Total code points: 663 # ================================================ @@ -981,20 +1007,30 @@ FB3E ; NFD_QC; N # Lo HEBREW LETTER MEM WITH DAGESH FB40..FB41 ; NFD_QC; N # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH FB43..FB44 ; NFD_QC; N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FB4E ; NFD_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LETTER PE WITH RAFE +105C9 ; NFD_QC; N # Lo TODHRI LETTER EI +105E4 ; NFD_QC; N # Lo TODHRI LETTER U 1109A ; NFD_QC; N # Lo KAITHI LETTER DDDHA 1109C ; NFD_QC; N # Lo KAITHI LETTER RHA 110AB ; NFD_QC; N # Lo KAITHI LETTER VA 1112E..1112F ; NFD_QC; N # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1134B..1134C ; NFD_QC; N # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11383 ; NFD_QC; N # Lo TULU-TIGALARI LETTER II +11385 ; NFD_QC; N # Lo TULU-TIGALARI LETTER UU +1138E ; NFD_QC; N # Lo TULU-TIGALARI LETTER AI +11391 ; NFD_QC; N # Lo TULU-TIGALARI LETTER AU +113C5 ; NFD_QC; N # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C8 ; NFD_QC; N # Mc [2] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI VOWEL SIGN AU 114BB..114BC ; NFD_QC; N # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O 114BE ; NFD_QC; N # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; NFD_QC; N # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU 11938 ; NFD_QC; N # Mc DIVES AKURU VOWEL SIGN O +16121..16128 ; NFD_QC; N # Mn [8] GURUNG KHEMA VOWEL SIGN U..GURUNG KHEMA VOWEL SIGN AU +16D68..16D6A ; NFD_QC; N # Lo [3] KIRAT RAI VOWEL SIGN AI..KIRAT RAI VOWEL SIGN AU 1D15E..1D164 ; NFD_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; NFD_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; NFD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 13233 +# Total code points: 13253 # ================================================ @@ -1128,13 +1164,20 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 11127 ; NFC_QC; M # Mn CHAKMA VOWEL SIGN A 1133E ; NFC_QC; M # Mc GRANTHA VOWEL SIGN AA 11357 ; NFC_QC; M # Mc GRANTHA AU LENGTH MARK +113B8 ; NFC_QC; M # Mc TULU-TIGALARI VOWEL SIGN AA +113BB ; NFC_QC; M # Mn TULU-TIGALARI VOWEL SIGN U +113C2 ; NFC_QC; M # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; NFC_QC; M # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C9 ; NFC_QC; M # Mc [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK 114B0 ; NFC_QC; M # Mc TIRHUTA VOWEL SIGN AA 114BA ; NFC_QC; M # Mn TIRHUTA VOWEL SIGN SHORT E 114BD ; NFC_QC; M # Mc TIRHUTA VOWEL SIGN SHORT O 115AF ; NFC_QC; M # Mc SIDDHAM VOWEL SIGN AA 11930 ; NFC_QC; M # Mc DIVES AKURU VOWEL SIGN AA +1611E..16129 ; NFC_QC; M # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +16D67..16D68 ; NFC_QC; M # Lo [2] KIRAT RAI VOWEL SIGN E..KIRAT RAI VOWEL SIGN AI -# Total code points: 111 +# Total code points: 132 # ================================================ @@ -1597,6 +1640,8 @@ FFE5..FFE6 ; NFKD_QC; N # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN FFE8 ; NFKD_QC; N # So HALFWIDTH FORMS LIGHT VERTICAL FFE9..FFEC ; NFKD_QC; N # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +105C9 ; NFKD_QC; N # Lo TODHRI LETTER EI +105E4 ; NFKD_QC; N # Lo TODHRI LETTER U 10781..10785 ; NFKD_QC; N # Lm [5] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; NFKD_QC; N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; NFKD_QC; N # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL @@ -1605,10 +1650,20 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 110AB ; NFKD_QC; N # Lo KAITHI LETTER VA 1112E..1112F ; NFKD_QC; N # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1134B..1134C ; NFKD_QC; N # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11383 ; NFKD_QC; N # Lo TULU-TIGALARI LETTER II +11385 ; NFKD_QC; N # Lo TULU-TIGALARI LETTER UU +1138E ; NFKD_QC; N # Lo TULU-TIGALARI LETTER AI +11391 ; NFKD_QC; N # Lo TULU-TIGALARI LETTER AU +113C5 ; NFKD_QC; N # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C8 ; NFKD_QC; N # Mc [2] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI VOWEL SIGN AU 114BB..114BC ; NFKD_QC; N # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O 114BE ; NFKD_QC; N # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; NFKD_QC; N # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU 11938 ; NFKD_QC; N # Mc DIVES AKURU VOWEL SIGN O +16121..16128 ; NFKD_QC; N # Mn [8] GURUNG KHEMA VOWEL SIGN U..GURUNG KHEMA VOWEL SIGN AU +16D68..16D6A ; NFKD_QC; N # Lo [3] KIRAT RAI VOWEL SIGN AI..KIRAT RAI VOWEL SIGN AU +1CCD6..1CCEF ; NFKD_QC; N # So [26] OUTLINED LATIN CAPITAL LETTER A..OUTLINED LATIN CAPITAL LETTER Z +1CCF0..1CCF9 ; NFKD_QC; N # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D15E..1D164 ; NFKD_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; NFKD_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 1D400..1D454 ; NFKD_QC; N # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G @@ -1698,7 +1753,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKD_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 17029 +# Total code points: 17085 # ================================================ @@ -2018,6 +2073,8 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 10781..10785 ; NFKC_QC; N # Lm [5] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; NFKC_QC; N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; NFKC_QC; N # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +1CCD6..1CCEF ; NFKC_QC; N # So [26] OUTLINED LATIN CAPITAL LETTER A..OUTLINED LATIN CAPITAL LETTER Z +1CCF0..1CCF9 ; NFKC_QC; N # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D15E..1D164 ; NFKC_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; NFKC_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 1D400..1D454 ; NFKC_QC; N # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G @@ -2107,7 +2164,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKC_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 4928 +# Total code points: 4964 # ================================================ @@ -2151,13 +2208,20 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 11127 ; NFKC_QC; M # Mn CHAKMA VOWEL SIGN A 1133E ; NFKC_QC; M # Mc GRANTHA VOWEL SIGN AA 11357 ; NFKC_QC; M # Mc GRANTHA AU LENGTH MARK +113B8 ; NFKC_QC; M # Mc TULU-TIGALARI VOWEL SIGN AA +113BB ; NFKC_QC; M # Mn TULU-TIGALARI VOWEL SIGN U +113C2 ; NFKC_QC; M # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; NFKC_QC; M # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C9 ; NFKC_QC; M # Mc [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK 114B0 ; NFKC_QC; M # Mc TIRHUTA VOWEL SIGN AA 114BA ; NFKC_QC; M # Mn TIRHUTA VOWEL SIGN SHORT E 114BD ; NFKC_QC; M # Mc TIRHUTA VOWEL SIGN SHORT O 115AF ; NFKC_QC; M # Mc SIDDHAM VOWEL SIGN AA 11930 ; NFKC_QC; M # Mc DIVES AKURU VOWEL SIGN AA +1611E..16129 ; NFKC_QC; M # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +16D67..16D68 ; NFKC_QC; M # Lo [2] KIRAT RAI VOWEL SIGN E..KIRAT RAI VOWEL SIGN AI -# Total code points: 111 +# Total code points: 132 # ================================================ @@ -2379,19 +2443,29 @@ FB3E ; Expands_On_NFD # Lo HEBREW LETTER MEM WITH DAGESH FB40..FB41 ; Expands_On_NFD # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH FB43..FB44 ; Expands_On_NFD # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FB4E ; Expands_On_NFD # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LETTER PE WITH RAFE +105C9 ; Expands_On_NFD # Lo TODHRI LETTER EI +105E4 ; Expands_On_NFD # Lo TODHRI LETTER U 1109A ; Expands_On_NFD # Lo KAITHI LETTER DDDHA 1109C ; Expands_On_NFD # Lo KAITHI LETTER RHA 110AB ; Expands_On_NFD # Lo KAITHI LETTER VA 1112E..1112F ; Expands_On_NFD # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1134B..1134C ; Expands_On_NFD # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11383 ; Expands_On_NFD # Lo TULU-TIGALARI LETTER II +11385 ; Expands_On_NFD # Lo TULU-TIGALARI LETTER UU +1138E ; Expands_On_NFD # Lo TULU-TIGALARI LETTER AI +11391 ; Expands_On_NFD # Lo TULU-TIGALARI LETTER AU +113C5 ; Expands_On_NFD # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C8 ; Expands_On_NFD # Mc [2] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI VOWEL SIGN AU 114BB..114BC ; Expands_On_NFD # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O 114BE ; Expands_On_NFD # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; Expands_On_NFD # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU 11938 ; Expands_On_NFD # Mc DIVES AKURU VOWEL SIGN O +16121..16128 ; Expands_On_NFD # Mn [8] GURUNG KHEMA VOWEL SIGN U..GURUNG KHEMA VOWEL SIGN AU +16D68..16D6A ; Expands_On_NFD # Lo [3] KIRAT RAI VOWEL SIGN AI..KIRAT RAI VOWEL SIGN AU 1D15E..1D164 ; Expands_On_NFD # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Expands_On_NFD # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK -# Total code points: 12216 +# Total code points: 12236 # ================================================ @@ -2733,15 +2807,25 @@ FE76..FE7F ; Expands_On_NFKD # Lo [10] ARABIC FATHA ISOLATED FORM..ARABIC SU FE81..FE8C ; Expands_On_NFKD # Lo [12] ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM..ARABIC LETTER YEH WITH HAMZA ABOVE MEDIAL FORM FEF5..FEFC ; Expands_On_NFKD # Lo [8] ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM FFE3 ; Expands_On_NFKD # Sk FULLWIDTH MACRON +105C9 ; Expands_On_NFKD # Lo TODHRI LETTER EI +105E4 ; Expands_On_NFKD # Lo TODHRI LETTER U 1109A ; Expands_On_NFKD # Lo KAITHI LETTER DDDHA 1109C ; Expands_On_NFKD # Lo KAITHI LETTER RHA 110AB ; Expands_On_NFKD # Lo KAITHI LETTER VA 1112E..1112F ; Expands_On_NFKD # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1134B..1134C ; Expands_On_NFKD # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11383 ; Expands_On_NFKD # Lo TULU-TIGALARI LETTER II +11385 ; Expands_On_NFKD # Lo TULU-TIGALARI LETTER UU +1138E ; Expands_On_NFKD # Lo TULU-TIGALARI LETTER AI +11391 ; Expands_On_NFKD # Lo TULU-TIGALARI LETTER AU +113C5 ; Expands_On_NFKD # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C8 ; Expands_On_NFKD # Mc [2] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI VOWEL SIGN AU 114BB..114BC ; Expands_On_NFKD # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O 114BE ; Expands_On_NFKD # Mc TIRHUTA VOWEL SIGN AU 115BA..115BB ; Expands_On_NFKD # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU 11938 ; Expands_On_NFKD # Mc DIVES AKURU VOWEL SIGN O +16121..16128 ; Expands_On_NFKD # Mn [8] GURUNG KHEMA VOWEL SIGN U..GURUNG KHEMA VOWEL SIGN AU +16D68..16D6A ; Expands_On_NFKD # Lo [3] KIRAT RAI VOWEL SIGN AI..KIRAT RAI VOWEL SIGN AU 1D15E..1D164 ; Expands_On_NFKD # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Expands_On_NFKD # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 1F100..1F10A ; Expands_On_NFKD # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA @@ -2754,7 +2838,7 @@ FFE3 ; Expands_On_NFKD # Sk FULLWIDTH MACRON 1F213 ; Expands_On_NFKD # So SQUARED KATAKANA DE 1F240..1F248 ; Expands_On_NFKD # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 -# Total code points: 13390 +# Total code points: 13410 # ================================================ @@ -3545,6 +3629,7 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 1C86 ; NFKC_CF; 044A # L& CYRILLIC SMALL LETTER TALL HARD SIGN 1C87 ; NFKC_CF; 0463 # L& CYRILLIC SMALL LETTER TALL YAT 1C88 ; NFKC_CF; A64B # L& CYRILLIC SMALL LETTER UNBLENDED UK +1C89 ; NFKC_CF; 1C8A # L& CYRILLIC CAPITAL LETTER TJE 1C90 ; NFKC_CF; 10D0 # L& GEORGIAN MTAVRULI CAPITAL LETTER AN 1C91 ; NFKC_CF; 10D1 # L& GEORGIAN MTAVRULI CAPITAL LETTER BAN 1C92 ; NFKC_CF; 10D2 # L& GEORGIAN MTAVRULI CAPITAL LETTER GAN @@ -5361,9 +5446,13 @@ A7C5 ; NFKC_CF; 0282 # L& LATIN CAPITAL LETTER S WITH H A7C6 ; NFKC_CF; 1D8E # L& LATIN CAPITAL LETTER Z WITH PALATAL HOOK A7C7 ; NFKC_CF; A7C8 # L& LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY A7C9 ; NFKC_CF; A7CA # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB ; NFKC_CF; 0264 # L& LATIN CAPITAL LETTER RAMS HORN +A7CC ; NFKC_CF; A7CD # L& LATIN CAPITAL LETTER S WITH DIAGONAL STROKE A7D0 ; NFKC_CF; A7D1 # L& LATIN CAPITAL LETTER CLOSED INSULAR G A7D6 ; NFKC_CF; A7D7 # L& LATIN CAPITAL LETTER MIDDLE SCOTS S A7D8 ; NFKC_CF; A7D9 # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; NFKC_CF; A7DB # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; NFKC_CF; 019B # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE A7F2 ; NFKC_CF; 0063 # Lm MODIFIER LETTER CAPITAL C A7F3 ; NFKC_CF; 0066 # Lm MODIFIER LETTER CAPITAL F A7F4 ; NFKC_CF; 0071 # Lm MODIFIER LETTER CAPITAL Q @@ -7009,6 +7098,28 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] ........ -# Total code points: 10491 +# Total code points: 10554 # ================================================ @@ -9652,6 +9799,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] .......... -# Total code points: 10453 +# Total code points: 10516 # ================================================ @@ -15411,7 +15621,7 @@ E01F0..E0FFF ; NFKC_SCF; # Cn [3600] ........ -# Total code points: 10491 +# Total code points: 10554 # EOF diff --git a/data/DerivedNumericType b/data/DerivedNumericType index 062f4fb..8e48d0b 100644 --- a/data/DerivedNumericType +++ b/data/DerivedNumericType @@ -1,8 +1,8 @@ -# DerivedNumericType-15.1.0.txt -# Date: 2023-01-05, 20:34:41 GMT -# © 2023 Unicode®, Inc. +# DerivedNumericType-16.0.0.txt +# Date: 2024-04-30, 21:48:20 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -260,6 +260,7 @@ ABF0..ABF9 ; Decimal # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT N FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 104A0..104A9 ; Decimal # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 10D30..10D39 ; Decimal # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; Decimal # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE 11066..1106F ; Decimal # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE 110F0..110F9 ; Decimal # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE 11136..1113F ; Decimal # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE @@ -269,23 +270,29 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 114D0..114D9 ; Decimal # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; Decimal # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; Decimal # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; Decimal # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11730..11739 ; Decimal # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118E0..118E9 ; Decimal # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE 11950..11959 ; Decimal # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +11BF0..11BF9 ; Decimal # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C50..11C59 ; Decimal # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Decimal # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Decimal # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE 11F50..11F59 ; Decimal # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16130..16139 ; Decimal # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE 16A60..16A69 ; Decimal # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE 16AC0..16AC9 ; Decimal # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16B50..16B59 ; Decimal # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16D70..16D79 ; Decimal # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +1CCF0..1CCF9 ; Decimal # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE 1D7CE..1D7FF ; Decimal # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE 1E140..1E149 ; Decimal # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE 1E2F0..1E2F9 ; Decimal # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE 1E4F0..1E4F9 ; Decimal # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5F1..1E5FA ; Decimal # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE 1E950..1E959 ; Decimal # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Decimal # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 680 +# Total code points: 760 # EOF diff --git a/data/DerivedNumericValues b/data/DerivedNumericValues index e671646..ae1f99c 100644 --- a/data/DerivedNumericValues +++ b/data/DerivedNumericValues @@ -1,8 +1,8 @@ -# DerivedNumericValues-15.1.0.txt -# Date: 2023-01-05, 20:34:41 GMT -# © 2023 Unicode®, Inc. +# DerivedNumericValues-16.0.0.txt +# Date: 2024-04-30, 21:48:20 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -14,7 +14,7 @@ # The values are based on field 8 of UnicodeData.txt, plus the fields # kAccountingNumeric, kOtherNumeric, kPrimaryNumeric in the Unicode Han Database (Unihan). # The derivations for these values are as follows. -# Numeric_Value = the value of kAccountingNumeric, kOtherNumeric, or kPrimaryNumeric, if they exist; otherwise +# Numeric_Value = the first-listed value of kAccountingNumeric, kOtherNumeric, or kPrimaryNumeric, if they exist; otherwise # Numeric_Value = the value of field 8, if it exists; otherwise # Numeric_Value = NaN # Field 2: @@ -91,6 +91,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1018A ; 0.0 ; ; 0 # No GREEK ZERO SIGN 104A0 ; 0.0 ; ; 0 # Nd OSMANYA DIGIT ZERO 10D30 ; 0.0 ; ; 0 # Nd HANIFI ROHINGYA DIGIT ZERO +10D40 ; 0.0 ; ; 0 # Nd GARAY DIGIT ZERO 11066 ; 0.0 ; ; 0 # Nd BRAHMI DIGIT ZERO 110F0 ; 0.0 ; ; 0 # Nd SORA SOMPENG DIGIT ZERO 11136 ; 0.0 ; ; 0 # Nd CHAKMA DIGIT ZERO @@ -100,17 +101,23 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 114D0 ; 0.0 ; ; 0 # Nd TIRHUTA DIGIT ZERO 11650 ; 0.0 ; ; 0 # Nd MODI DIGIT ZERO 116C0 ; 0.0 ; ; 0 # Nd TAKRI DIGIT ZERO +116D0 ; 0.0 ; ; 0 # Nd MYANMAR PAO DIGIT ZERO +116DA ; 0.0 ; ; 0 # Nd MYANMAR EASTERN PWO KAREN DIGIT ZERO 11730 ; 0.0 ; ; 0 # Nd AHOM DIGIT ZERO 118E0 ; 0.0 ; ; 0 # Nd WARANG CITI DIGIT ZERO 11950 ; 0.0 ; ; 0 # Nd DIVES AKURU DIGIT ZERO +11BF0 ; 0.0 ; ; 0 # Nd SUNUWAR DIGIT ZERO 11C50 ; 0.0 ; ; 0 # Nd BHAIKSUKI DIGIT ZERO 11D50 ; 0.0 ; ; 0 # Nd MASARAM GONDI DIGIT ZERO 11DA0 ; 0.0 ; ; 0 # Nd GUNJALA GONDI DIGIT ZERO 11F50 ; 0.0 ; ; 0 # Nd KAWI DIGIT ZERO +16130 ; 0.0 ; ; 0 # Nd GURUNG KHEMA DIGIT ZERO 16A60 ; 0.0 ; ; 0 # Nd MRO DIGIT ZERO 16AC0 ; 0.0 ; ; 0 # Nd TANGSA DIGIT ZERO 16B50 ; 0.0 ; ; 0 # Nd PAHAWH HMONG DIGIT ZERO +16D70 ; 0.0 ; ; 0 # Nd KIRAT RAI DIGIT ZERO 16E80 ; 0.0 ; ; 0 # No MEDEFAIDRIN DIGIT ZERO +1CCF0 ; 0.0 ; ; 0 # Nd OUTLINED DIGIT ZERO 1D2C0 ; 0.0 ; ; 0 # No KAKTOVIK NUMERAL ZERO 1D2E0 ; 0.0 ; ; 0 # No MAYAN NUMERAL ZERO 1D7CE ; 0.0 ; ; 0 # Nd MATHEMATICAL BOLD DIGIT ZERO @@ -121,12 +128,13 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1E140 ; 0.0 ; ; 0 # Nd NYIAKENG PUACHUE HMONG DIGIT ZERO 1E2F0 ; 0.0 ; ; 0 # Nd WANCHO DIGIT ZERO 1E4F0 ; 0.0 ; ; 0 # Nd NAG MUNDARI DIGIT ZERO +1E5F1 ; 0.0 ; ; 0 # Nd OL ONAL DIGIT ZERO 1E950 ; 0.0 ; ; 0 # Nd ADLAM DIGIT ZERO 1F100..1F101 ; 0.0 ; ; 0 # No [2] DIGIT ZERO FULL STOP..DIGIT ZERO COMMA 1F10B..1F10C ; 0.0 ; ; 0 # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO 1FBF0 ; 0.0 ; ; 0 # Nd SEGMENTED DIGIT ZERO -# Total code points: 88 +# Total code points: 96 # ================================================ @@ -496,6 +504,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 10BA9 ; 1.0 ; ; 1 # No PSALTER PAHLAVI NUMBER ONE 10CFA ; 1.0 ; ; 1 # No OLD HUNGARIAN NUMBER ONE 10D31 ; 1.0 ; ; 1 # Nd HANIFI ROHINGYA DIGIT ONE +10D41 ; 1.0 ; ; 1 # Nd GARAY DIGIT ONE 10E60 ; 1.0 ; ; 1 # No RUMI DIGIT ONE 10F1D ; 1.0 ; ; 1 # No OLD SOGDIAN NUMBER ONE 10F51 ; 1.0 ; ; 1 # No SOGDIAN NUMBER ONE @@ -511,9 +520,12 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 114D1 ; 1.0 ; ; 1 # Nd TIRHUTA DIGIT ONE 11651 ; 1.0 ; ; 1 # Nd MODI DIGIT ONE 116C1 ; 1.0 ; ; 1 # Nd TAKRI DIGIT ONE +116D1 ; 1.0 ; ; 1 # Nd MYANMAR PAO DIGIT ONE +116DB ; 1.0 ; ; 1 # Nd MYANMAR EASTERN PWO KAREN DIGIT ONE 11731 ; 1.0 ; ; 1 # Nd AHOM DIGIT ONE 118E1 ; 1.0 ; ; 1 # Nd WARANG CITI DIGIT ONE 11951 ; 1.0 ; ; 1 # Nd DIVES AKURU DIGIT ONE +11BF1 ; 1.0 ; ; 1 # Nd SUNUWAR DIGIT ONE 11C51 ; 1.0 ; ; 1 # Nd BHAIKSUKI DIGIT ONE 11C5A ; 1.0 ; ; 1 # No BHAIKSUKI NUMBER ONE 11D51 ; 1.0 ; ; 1 # Nd MASARAM GONDI DIGIT ONE @@ -525,11 +537,14 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 12434 ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE BURU 1244F ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE BAN2 12458 ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE ESHE3 +16131 ; 1.0 ; ; 1 # Nd GURUNG KHEMA DIGIT ONE 16A61 ; 1.0 ; ; 1 # Nd MRO DIGIT ONE 16AC1 ; 1.0 ; ; 1 # Nd TANGSA DIGIT ONE 16B51 ; 1.0 ; ; 1 # Nd PAHAWH HMONG DIGIT ONE +16D71 ; 1.0 ; ; 1 # Nd KIRAT RAI DIGIT ONE 16E81 ; 1.0 ; ; 1 # No MEDEFAIDRIN DIGIT ONE 16E94 ; 1.0 ; ; 1 # No MEDEFAIDRIN DIGIT ONE ALTERNATE FORM +1CCF1 ; 1.0 ; ; 1 # Nd OUTLINED DIGIT ONE 1D2C1 ; 1.0 ; ; 1 # No KAKTOVIK NUMERAL ONE 1D2E1 ; 1.0 ; ; 1 # No MAYAN NUMERAL ONE 1D360 ; 1.0 ; ; 1 # No COUNTING ROD UNIT DIGIT ONE @@ -543,6 +558,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 1E141 ; 1.0 ; ; 1 # Nd NYIAKENG PUACHUE HMONG DIGIT ONE 1E2F1 ; 1.0 ; ; 1 # Nd WANCHO DIGIT ONE 1E4F1 ; 1.0 ; ; 1 # Nd NAG MUNDARI DIGIT ONE +1E5F2 ; 1.0 ; ; 1 # Nd OL ONAL DIGIT ONE 1E8C7 ; 1.0 ; ; 1 # No MENDE KIKAKUI DIGIT ONE 1E951 ; 1.0 ; ; 1 # Nd ADLAM DIGIT ONE 1EC71 ; 1.0 ; ; 1 # No INDIC SIYAQ NUMBER ONE @@ -553,7 +569,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 1FBF1 ; 1.0 ; ; 1 # Nd SEGMENTED DIGIT ONE 2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A -# Total code points: 144 +# Total code points: 152 # ================================================ @@ -647,6 +663,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 10B79 ; 2.0 ; ; 2 # No INSCRIPTIONAL PAHLAVI NUMBER TWO 10BAA ; 2.0 ; ; 2 # No PSALTER PAHLAVI NUMBER TWO 10D32 ; 2.0 ; ; 2 # Nd HANIFI ROHINGYA DIGIT TWO +10D42 ; 2.0 ; ; 2 # Nd GARAY DIGIT TWO 10E61 ; 2.0 ; ; 2 # No RUMI DIGIT TWO 10F1E ; 2.0 ; ; 2 # No OLD SOGDIAN NUMBER TWO 10FC6 ; 2.0 ; ; 2 # No CHORASMIAN NUMBER TWO @@ -661,9 +678,12 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 114D2 ; 2.0 ; ; 2 # Nd TIRHUTA DIGIT TWO 11652 ; 2.0 ; ; 2 # Nd MODI DIGIT TWO 116C2 ; 2.0 ; ; 2 # Nd TAKRI DIGIT TWO +116D2 ; 2.0 ; ; 2 # Nd MYANMAR PAO DIGIT TWO +116DC ; 2.0 ; ; 2 # Nd MYANMAR EASTERN PWO KAREN DIGIT TWO 11732 ; 2.0 ; ; 2 # Nd AHOM DIGIT TWO 118E2 ; 2.0 ; ; 2 # Nd WARANG CITI DIGIT TWO 11952 ; 2.0 ; ; 2 # Nd DIVES AKURU DIGIT TWO +11BF2 ; 2.0 ; ; 2 # Nd SUNUWAR DIGIT TWO 11C52 ; 2.0 ; ; 2 # Nd BHAIKSUKI DIGIT TWO 11C5B ; 2.0 ; ; 2 # No BHAIKSUKI NUMBER TWO 11D52 ; 2.0 ; ; 2 # Nd MASARAM GONDI DIGIT TWO @@ -679,11 +699,14 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 12450 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO BAN2 12456 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN NIGIDAMIN 12459 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO ESHE3 +16132 ; 2.0 ; ; 2 # Nd GURUNG KHEMA DIGIT TWO 16A62 ; 2.0 ; ; 2 # Nd MRO DIGIT TWO 16AC2 ; 2.0 ; ; 2 # Nd TANGSA DIGIT TWO 16B52 ; 2.0 ; ; 2 # Nd PAHAWH HMONG DIGIT TWO +16D72 ; 2.0 ; ; 2 # Nd KIRAT RAI DIGIT TWO 16E82 ; 2.0 ; ; 2 # No MEDEFAIDRIN DIGIT TWO 16E95 ; 2.0 ; ; 2 # No MEDEFAIDRIN DIGIT TWO ALTERNATE FORM +1CCF2 ; 2.0 ; ; 2 # Nd OUTLINED DIGIT TWO 1D2C2 ; 2.0 ; ; 2 # No KAKTOVIK NUMERAL TWO 1D2E2 ; 2.0 ; ; 2 # No MAYAN NUMERAL TWO 1D361 ; 2.0 ; ; 2 # No COUNTING ROD UNIT DIGIT TWO @@ -696,6 +719,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 1E142 ; 2.0 ; ; 2 # Nd NYIAKENG PUACHUE HMONG DIGIT TWO 1E2F2 ; 2.0 ; ; 2 # Nd WANCHO DIGIT TWO 1E4F2 ; 2.0 ; ; 2 # Nd NAG MUNDARI DIGIT TWO +1E5F3 ; 2.0 ; ; 2 # Nd OL ONAL DIGIT TWO 1E8C8 ; 2.0 ; ; 2 # No MENDE KIKAKUI DIGIT TWO 1E952 ; 2.0 ; ; 2 # Nd ADLAM DIGIT TWO 1EC72 ; 2.0 ; ; 2 # No INDIC SIYAQ NUMBER TWO @@ -707,7 +731,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 1FBF2 ; 2.0 ; ; 2 # Nd SEGMENTED DIGIT TWO 22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390 -# Total code points: 146 +# Total code points: 154 # ================================================ @@ -792,6 +816,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 10B7A ; 3.0 ; ; 3 # No INSCRIPTIONAL PAHLAVI NUMBER THREE 10BAB ; 3.0 ; ; 3 # No PSALTER PAHLAVI NUMBER THREE 10D33 ; 3.0 ; ; 3 # Nd HANIFI ROHINGYA DIGIT THREE +10D43 ; 3.0 ; ; 3 # Nd GARAY DIGIT THREE 10E62 ; 3.0 ; ; 3 # No RUMI DIGIT THREE 10F1F ; 3.0 ; ; 3 # No OLD SOGDIAN NUMBER THREE 10FC7 ; 3.0 ; ; 3 # No CHORASMIAN NUMBER THREE @@ -806,9 +831,12 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 114D3 ; 3.0 ; ; 3 # Nd TIRHUTA DIGIT THREE 11653 ; 3.0 ; ; 3 # Nd MODI DIGIT THREE 116C3 ; 3.0 ; ; 3 # Nd TAKRI DIGIT THREE +116D3 ; 3.0 ; ; 3 # Nd MYANMAR PAO DIGIT THREE +116DD ; 3.0 ; ; 3 # Nd MYANMAR EASTERN PWO KAREN DIGIT THREE 11733 ; 3.0 ; ; 3 # Nd AHOM DIGIT THREE 118E3 ; 3.0 ; ; 3 # Nd WARANG CITI DIGIT THREE 11953 ; 3.0 ; ; 3 # Nd DIVES AKURU DIGIT THREE +11BF3 ; 3.0 ; ; 3 # Nd SUNUWAR DIGIT THREE 11C53 ; 3.0 ; ; 3 # Nd BHAIKSUKI DIGIT THREE 11C5C ; 3.0 ; ; 3 # No BHAIKSUKI NUMBER THREE 11D53 ; 3.0 ; ; 3 # Nd MASARAM GONDI DIGIT THREE @@ -825,11 +853,14 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 1244B ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE ASH TENU 12451 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE BAN2 12457 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN NIGIDAESH +16133 ; 3.0 ; ; 3 # Nd GURUNG KHEMA DIGIT THREE 16A63 ; 3.0 ; ; 3 # Nd MRO DIGIT THREE 16AC3 ; 3.0 ; ; 3 # Nd TANGSA DIGIT THREE 16B53 ; 3.0 ; ; 3 # Nd PAHAWH HMONG DIGIT THREE +16D73 ; 3.0 ; ; 3 # Nd KIRAT RAI DIGIT THREE 16E83 ; 3.0 ; ; 3 # No MEDEFAIDRIN DIGIT THREE 16E96 ; 3.0 ; ; 3 # No MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +1CCF3 ; 3.0 ; ; 3 # Nd OUTLINED DIGIT THREE 1D2C3 ; 3.0 ; ; 3 # No KAKTOVIK NUMERAL THREE 1D2E3 ; 3.0 ; ; 3 # No MAYAN NUMERAL THREE 1D362 ; 3.0 ; ; 3 # No COUNTING ROD UNIT DIGIT THREE @@ -842,6 +873,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 1E143 ; 3.0 ; ; 3 # Nd NYIAKENG PUACHUE HMONG DIGIT THREE 1E2F3 ; 3.0 ; ; 3 # Nd WANCHO DIGIT THREE 1E4F3 ; 3.0 ; ; 3 # Nd NAG MUNDARI DIGIT THREE +1E5F4 ; 3.0 ; ; 3 # Nd OL ONAL DIGIT THREE 1E8C9 ; 3.0 ; ; 3 # No MENDE KIKAKUI DIGIT THREE 1E953 ; 3.0 ; ; 3 # Nd ADLAM DIGIT THREE 1EC73 ; 3.0 ; ; 3 # No INDIC SIYAQ NUMBER THREE @@ -855,7 +887,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 22998 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-22998 23B1B ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-23B1B -# Total code points: 144 +# Total code points: 152 # ================================================ @@ -934,6 +966,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 10B7B ; 4.0 ; ; 4 # No INSCRIPTIONAL PAHLAVI NUMBER FOUR 10BAC ; 4.0 ; ; 4 # No PSALTER PAHLAVI NUMBER FOUR 10D34 ; 4.0 ; ; 4 # Nd HANIFI ROHINGYA DIGIT FOUR +10D44 ; 4.0 ; ; 4 # Nd GARAY DIGIT FOUR 10E63 ; 4.0 ; ; 4 # No RUMI DIGIT FOUR 10F20 ; 4.0 ; ; 4 # No OLD SOGDIAN NUMBER FOUR 10FC8 ; 4.0 ; ; 4 # No CHORASMIAN NUMBER FOUR @@ -948,9 +981,12 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 114D4 ; 4.0 ; ; 4 # Nd TIRHUTA DIGIT FOUR 11654 ; 4.0 ; ; 4 # Nd MODI DIGIT FOUR 116C4 ; 4.0 ; ; 4 # Nd TAKRI DIGIT FOUR +116D4 ; 4.0 ; ; 4 # Nd MYANMAR PAO DIGIT FOUR +116DE ; 4.0 ; ; 4 # Nd MYANMAR EASTERN PWO KAREN DIGIT FOUR 11734 ; 4.0 ; ; 4 # Nd AHOM DIGIT FOUR 118E4 ; 4.0 ; ; 4 # Nd WARANG CITI DIGIT FOUR 11954 ; 4.0 ; ; 4 # Nd DIVES AKURU DIGIT FOUR +11BF4 ; 4.0 ; ; 4 # Nd SUNUWAR DIGIT FOUR 11C54 ; 4.0 ; ; 4 # Nd BHAIKSUKI DIGIT FOUR 11C5D ; 4.0 ; ; 4 # No BHAIKSUKI NUMBER FOUR 11D54 ; 4.0 ; ; 4 # Nd MASARAM GONDI DIGIT FOUR @@ -968,10 +1004,13 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 1244C ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR ASH TENU 12452..12453 ; 4.0 ; ; 4 # Nl [2] CUNEIFORM NUMERIC SIGN FOUR BAN2..CUNEIFORM NUMERIC SIGN FOUR BAN2 VARIANT FORM 12469 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR U VARIANT FORM +16134 ; 4.0 ; ; 4 # Nd GURUNG KHEMA DIGIT FOUR 16A64 ; 4.0 ; ; 4 # Nd MRO DIGIT FOUR 16AC4 ; 4.0 ; ; 4 # Nd TANGSA DIGIT FOUR 16B54 ; 4.0 ; ; 4 # Nd PAHAWH HMONG DIGIT FOUR +16D74 ; 4.0 ; ; 4 # Nd KIRAT RAI DIGIT FOUR 16E84 ; 4.0 ; ; 4 # No MEDEFAIDRIN DIGIT FOUR +1CCF4 ; 4.0 ; ; 4 # Nd OUTLINED DIGIT FOUR 1D2C4 ; 4.0 ; ; 4 # No KAKTOVIK NUMERAL FOUR 1D2E4 ; 4.0 ; ; 4 # No MAYAN NUMERAL FOUR 1D363 ; 4.0 ; ; 4 # No COUNTING ROD UNIT DIGIT FOUR @@ -984,6 +1023,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 1E144 ; 4.0 ; ; 4 # Nd NYIAKENG PUACHUE HMONG DIGIT FOUR 1E2F4 ; 4.0 ; ; 4 # Nd WANCHO DIGIT FOUR 1E4F4 ; 4.0 ; ; 4 # Nd NAG MUNDARI DIGIT FOUR +1E5F5 ; 4.0 ; ; 4 # Nd OL ONAL DIGIT FOUR 1E8CA ; 4.0 ; ; 4 # No MENDE KIKAKUI DIGIT FOUR 1E954 ; 4.0 ; ; 4 # Nd ADLAM DIGIT FOUR 1EC74 ; 4.0 ; ; 4 # No INDIC SIYAQ NUMBER FOUR @@ -996,7 +1036,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 200E2 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-200E2 2626D ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-2626D -# Total code points: 135 +# Total code points: 143 # ================================================ @@ -1080,6 +1120,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 10AEC ; 5.0 ; ; 5 # No MANICHAEAN NUMBER FIVE 10CFB ; 5.0 ; ; 5 # No OLD HUNGARIAN NUMBER FIVE 10D35 ; 5.0 ; ; 5 # Nd HANIFI ROHINGYA DIGIT FIVE +10D45 ; 5.0 ; ; 5 # Nd GARAY DIGIT FIVE 10E64 ; 5.0 ; ; 5 # No RUMI DIGIT FIVE 10F21 ; 5.0 ; ; 5 # No OLD SOGDIAN NUMBER FIVE 11056 ; 5.0 ; ; 5 # No BRAHMI NUMBER FIVE @@ -1093,9 +1134,12 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 114D5 ; 5.0 ; ; 5 # Nd TIRHUTA DIGIT FIVE 11655 ; 5.0 ; ; 5 # Nd MODI DIGIT FIVE 116C5 ; 5.0 ; ; 5 # Nd TAKRI DIGIT FIVE +116D5 ; 5.0 ; ; 5 # Nd MYANMAR PAO DIGIT FIVE +116DF ; 5.0 ; ; 5 # Nd MYANMAR EASTERN PWO KAREN DIGIT FIVE 11735 ; 5.0 ; ; 5 # Nd AHOM DIGIT FIVE 118E5 ; 5.0 ; ; 5 # Nd WARANG CITI DIGIT FIVE 11955 ; 5.0 ; ; 5 # Nd DIVES AKURU DIGIT FIVE +11BF5 ; 5.0 ; ; 5 # Nd SUNUWAR DIGIT FIVE 11C55 ; 5.0 ; ; 5 # Nd BHAIKSUKI DIGIT FIVE 11C5E ; 5.0 ; ; 5 # No BHAIKSUKI NUMBER FIVE 11D55 ; 5.0 ; ; 5 # Nd MASARAM GONDI DIGIT FIVE @@ -1112,10 +1156,13 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 1244D ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE ASH TENU 12454..12455 ; 5.0 ; ; 5 # Nl [2] CUNEIFORM NUMERIC SIGN FIVE BAN2..CUNEIFORM NUMERIC SIGN FIVE BAN2 VARIANT FORM 1246A ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE U VARIANT FORM +16135 ; 5.0 ; ; 5 # Nd GURUNG KHEMA DIGIT FIVE 16A65 ; 5.0 ; ; 5 # Nd MRO DIGIT FIVE 16AC5 ; 5.0 ; ; 5 # Nd TANGSA DIGIT FIVE 16B55 ; 5.0 ; ; 5 # Nd PAHAWH HMONG DIGIT FIVE +16D75 ; 5.0 ; ; 5 # Nd KIRAT RAI DIGIT FIVE 16E85 ; 5.0 ; ; 5 # No MEDEFAIDRIN DIGIT FIVE +1CCF5 ; 5.0 ; ; 5 # Nd OUTLINED DIGIT FIVE 1D2C5 ; 5.0 ; ; 5 # No KAKTOVIK NUMERAL FIVE 1D2E5 ; 5.0 ; ; 5 # No MAYAN NUMERAL FIVE 1D364 ; 5.0 ; ; 5 # No COUNTING ROD UNIT DIGIT FIVE @@ -1129,6 +1176,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 1E145 ; 5.0 ; ; 5 # Nd NYIAKENG PUACHUE HMONG DIGIT FIVE 1E2F5 ; 5.0 ; ; 5 # Nd WANCHO DIGIT FIVE 1E4F5 ; 5.0 ; ; 5 # Nd NAG MUNDARI DIGIT FIVE +1E5F6 ; 5.0 ; ; 5 # Nd OL ONAL DIGIT FIVE 1E8CB ; 5.0 ; ; 5 # No MENDE KIKAKUI DIGIT FIVE 1E955 ; 5.0 ; ; 5 # Nd ADLAM DIGIT FIVE 1EC75 ; 5.0 ; ; 5 # No INDIC SIYAQ NUMBER FIVE @@ -1139,7 +1187,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 1FBF5 ; 5.0 ; ; 5 # Nd SEGMENTED DIGIT FIVE 20121 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-20121 -# Total code points: 133 +# Total code points: 141 # ================================================ @@ -1214,6 +1262,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 104A6 ; 6.0 ; ; 6 # Nd OSMANYA DIGIT SIX 109C5 ; 6.0 ; ; 6 # No MEROITIC CURSIVE NUMBER SIX 10D36 ; 6.0 ; ; 6 # Nd HANIFI ROHINGYA DIGIT SIX +10D46 ; 6.0 ; ; 6 # Nd GARAY DIGIT SIX 10E65 ; 6.0 ; ; 6 # No RUMI DIGIT SIX 11057 ; 6.0 ; ; 6 # No BRAHMI NUMBER SIX 1106C ; 6.0 ; ; 6 # Nd BRAHMI DIGIT SIX @@ -1226,9 +1275,12 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 114D6 ; 6.0 ; ; 6 # Nd TIRHUTA DIGIT SIX 11656 ; 6.0 ; ; 6 # Nd MODI DIGIT SIX 116C6 ; 6.0 ; ; 6 # Nd TAKRI DIGIT SIX +116D6 ; 6.0 ; ; 6 # Nd MYANMAR PAO DIGIT SIX +116E0 ; 6.0 ; ; 6 # Nd MYANMAR EASTERN PWO KAREN DIGIT SIX 11736 ; 6.0 ; ; 6 # Nd AHOM DIGIT SIX 118E6 ; 6.0 ; ; 6 # Nd WARANG CITI DIGIT SIX 11956 ; 6.0 ; ; 6 # Nd DIVES AKURU DIGIT SIX +11BF6 ; 6.0 ; ; 6 # Nd SUNUWAR DIGIT SIX 11C56 ; 6.0 ; ; 6 # Nd BHAIKSUKI DIGIT SIX 11C5F ; 6.0 ; ; 6 # No BHAIKSUKI NUMBER SIX 11D56 ; 6.0 ; ; 6 # Nd MASARAM GONDI DIGIT SIX @@ -1242,10 +1294,13 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 12440 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX VARIANT FORM ASH9 1244E ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX ASH TENU 1246B ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX U VARIANT FORM +16136 ; 6.0 ; ; 6 # Nd GURUNG KHEMA DIGIT SIX 16A66 ; 6.0 ; ; 6 # Nd MRO DIGIT SIX 16AC6 ; 6.0 ; ; 6 # Nd TANGSA DIGIT SIX 16B56 ; 6.0 ; ; 6 # Nd PAHAWH HMONG DIGIT SIX +16D76 ; 6.0 ; ; 6 # Nd KIRAT RAI DIGIT SIX 16E86 ; 6.0 ; ; 6 # No MEDEFAIDRIN DIGIT SIX +1CCF6 ; 6.0 ; ; 6 # Nd OUTLINED DIGIT SIX 1D2C6 ; 6.0 ; ; 6 # No KAKTOVIK NUMERAL SIX 1D2E6 ; 6.0 ; ; 6 # No MAYAN NUMERAL SIX 1D365 ; 6.0 ; ; 6 # No COUNTING ROD UNIT DIGIT SIX @@ -1257,6 +1312,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 1E146 ; 6.0 ; ; 6 # Nd NYIAKENG PUACHUE HMONG DIGIT SIX 1E2F6 ; 6.0 ; ; 6 # Nd WANCHO DIGIT SIX 1E4F6 ; 6.0 ; ; 6 # Nd NAG MUNDARI DIGIT SIX +1E5F7 ; 6.0 ; ; 6 # Nd OL ONAL DIGIT SIX 1E8CC ; 6.0 ; ; 6 # No MENDE KIKAKUI DIGIT SIX 1E956 ; 6.0 ; ; 6 # Nd ADLAM DIGIT SIX 1EC76 ; 6.0 ; ; 6 # No INDIC SIYAQ NUMBER SIX @@ -1267,7 +1323,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 1FBF6 ; 6.0 ; ; 6 # Nd SEGMENTED DIGIT SIX 20AEA ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-20AEA -# Total code points: 117 +# Total code points: 125 # ================================================ @@ -1341,6 +1397,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 104A7 ; 7.0 ; ; 7 # Nd OSMANYA DIGIT SEVEN 109C6 ; 7.0 ; ; 7 # No MEROITIC CURSIVE NUMBER SEVEN 10D37 ; 7.0 ; ; 7 # Nd HANIFI ROHINGYA DIGIT SEVEN +10D47 ; 7.0 ; ; 7 # Nd GARAY DIGIT SEVEN 10E66 ; 7.0 ; ; 7 # No RUMI DIGIT SEVEN 11058 ; 7.0 ; ; 7 # No BRAHMI NUMBER SEVEN 1106D ; 7.0 ; ; 7 # Nd BRAHMI DIGIT SEVEN @@ -1353,9 +1410,12 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 114D7 ; 7.0 ; ; 7 # Nd TIRHUTA DIGIT SEVEN 11657 ; 7.0 ; ; 7 # Nd MODI DIGIT SEVEN 116C7 ; 7.0 ; ; 7 # Nd TAKRI DIGIT SEVEN +116D7 ; 7.0 ; ; 7 # Nd MYANMAR PAO DIGIT SEVEN +116E1 ; 7.0 ; ; 7 # Nd MYANMAR EASTERN PWO KAREN DIGIT SEVEN 11737 ; 7.0 ; ; 7 # Nd AHOM DIGIT SEVEN 118E7 ; 7.0 ; ; 7 # Nd WARANG CITI DIGIT SEVEN 11957 ; 7.0 ; ; 7 # Nd DIVES AKURU DIGIT SEVEN +11BF7 ; 7.0 ; ; 7 # Nd SUNUWAR DIGIT SEVEN 11C57 ; 7.0 ; ; 7 # Nd BHAIKSUKI DIGIT SEVEN 11C60 ; 7.0 ; ; 7 # No BHAIKSUKI NUMBER SEVEN 11D57 ; 7.0 ; ; 7 # Nd MASARAM GONDI DIGIT SEVEN @@ -1368,10 +1428,13 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 12429 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN SHAR2 12441..12443 ; 7.0 ; ; 7 # Nl [3] CUNEIFORM NUMERIC SIGN SEVEN VARIANT FORM IMIN3..CUNEIFORM NUMERIC SIGN SEVEN VARIANT FORM IMIN B 1246C ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN U VARIANT FORM +16137 ; 7.0 ; ; 7 # Nd GURUNG KHEMA DIGIT SEVEN 16A67 ; 7.0 ; ; 7 # Nd MRO DIGIT SEVEN 16AC7 ; 7.0 ; ; 7 # Nd TANGSA DIGIT SEVEN 16B57 ; 7.0 ; ; 7 # Nd PAHAWH HMONG DIGIT SEVEN +16D77 ; 7.0 ; ; 7 # Nd KIRAT RAI DIGIT SEVEN 16E87 ; 7.0 ; ; 7 # No MEDEFAIDRIN DIGIT SEVEN +1CCF7 ; 7.0 ; ; 7 # Nd OUTLINED DIGIT SEVEN 1D2C7 ; 7.0 ; ; 7 # No KAKTOVIK NUMERAL SEVEN 1D2E7 ; 7.0 ; ; 7 # No MAYAN NUMERAL SEVEN 1D366 ; 7.0 ; ; 7 # No COUNTING ROD UNIT DIGIT SEVEN @@ -1383,6 +1446,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 1E147 ; 7.0 ; ; 7 # Nd NYIAKENG PUACHUE HMONG DIGIT SEVEN 1E2F7 ; 7.0 ; ; 7 # Nd WANCHO DIGIT SEVEN 1E4F7 ; 7.0 ; ; 7 # Nd NAG MUNDARI DIGIT SEVEN +1E5F8 ; 7.0 ; ; 7 # Nd OL ONAL DIGIT SEVEN 1E8CD ; 7.0 ; ; 7 # No MENDE KIKAKUI DIGIT SEVEN 1E957 ; 7.0 ; ; 7 # Nd ADLAM DIGIT SEVEN 1EC77 ; 7.0 ; ; 7 # No INDIC SIYAQ NUMBER SEVEN @@ -1393,7 +1457,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 1FBF7 ; 7.0 ; ; 7 # Nd SEGMENTED DIGIT SEVEN 20001 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-20001 -# Total code points: 117 +# Total code points: 125 # ================================================ @@ -1464,6 +1528,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 104A8 ; 8.0 ; ; 8 # Nd OSMANYA DIGIT EIGHT 109C7 ; 8.0 ; ; 8 # No MEROITIC CURSIVE NUMBER EIGHT 10D38 ; 8.0 ; ; 8 # Nd HANIFI ROHINGYA DIGIT EIGHT +10D48 ; 8.0 ; ; 8 # Nd GARAY DIGIT EIGHT 10E67 ; 8.0 ; ; 8 # No RUMI DIGIT EIGHT 11059 ; 8.0 ; ; 8 # No BRAHMI NUMBER EIGHT 1106E ; 8.0 ; ; 8 # Nd BRAHMI DIGIT EIGHT @@ -1476,9 +1541,12 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 114D8 ; 8.0 ; ; 8 # Nd TIRHUTA DIGIT EIGHT 11658 ; 8.0 ; ; 8 # Nd MODI DIGIT EIGHT 116C8 ; 8.0 ; ; 8 # Nd TAKRI DIGIT EIGHT +116D8 ; 8.0 ; ; 8 # Nd MYANMAR PAO DIGIT EIGHT +116E2 ; 8.0 ; ; 8 # Nd MYANMAR EASTERN PWO KAREN DIGIT EIGHT 11738 ; 8.0 ; ; 8 # Nd AHOM DIGIT EIGHT 118E8 ; 8.0 ; ; 8 # Nd WARANG CITI DIGIT EIGHT 11958 ; 8.0 ; ; 8 # Nd DIVES AKURU DIGIT EIGHT +11BF8 ; 8.0 ; ; 8 # Nd SUNUWAR DIGIT EIGHT 11C58 ; 8.0 ; ; 8 # Nd BHAIKSUKI DIGIT EIGHT 11C61 ; 8.0 ; ; 8 # No BHAIKSUKI NUMBER EIGHT 11D58 ; 8.0 ; ; 8 # Nd MASARAM GONDI DIGIT EIGHT @@ -1491,10 +1559,13 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 1242A ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT SHAR2 12444..12445 ; 8.0 ; ; 8 # Nl [2] CUNEIFORM NUMERIC SIGN EIGHT VARIANT FORM USSU..CUNEIFORM NUMERIC SIGN EIGHT VARIANT FORM USSU3 1246D ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT U VARIANT FORM +16138 ; 8.0 ; ; 8 # Nd GURUNG KHEMA DIGIT EIGHT 16A68 ; 8.0 ; ; 8 # Nd MRO DIGIT EIGHT 16AC8 ; 8.0 ; ; 8 # Nd TANGSA DIGIT EIGHT 16B58 ; 8.0 ; ; 8 # Nd PAHAWH HMONG DIGIT EIGHT +16D78 ; 8.0 ; ; 8 # Nd KIRAT RAI DIGIT EIGHT 16E88 ; 8.0 ; ; 8 # No MEDEFAIDRIN DIGIT EIGHT +1CCF8 ; 8.0 ; ; 8 # Nd OUTLINED DIGIT EIGHT 1D2C8 ; 8.0 ; ; 8 # No KAKTOVIK NUMERAL EIGHT 1D2E8 ; 8.0 ; ; 8 # No MAYAN NUMERAL EIGHT 1D367 ; 8.0 ; ; 8 # No COUNTING ROD UNIT DIGIT EIGHT @@ -1506,6 +1577,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 1E148 ; 8.0 ; ; 8 # Nd NYIAKENG PUACHUE HMONG DIGIT EIGHT 1E2F8 ; 8.0 ; ; 8 # Nd WANCHO DIGIT EIGHT 1E4F8 ; 8.0 ; ; 8 # Nd NAG MUNDARI DIGIT EIGHT +1E5F9 ; 8.0 ; ; 8 # Nd OL ONAL DIGIT EIGHT 1E8CE ; 8.0 ; ; 8 # No MENDE KIKAKUI DIGIT EIGHT 1E958 ; 8.0 ; ; 8 # Nd ADLAM DIGIT EIGHT 1EC78 ; 8.0 ; ; 8 # No INDIC SIYAQ NUMBER EIGHT @@ -1515,7 +1587,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 1F109 ; 8.0 ; ; 8 # No DIGIT EIGHT COMMA 1FBF8 ; 8.0 ; ; 8 # Nd SEGMENTED DIGIT EIGHT -# Total code points: 112 +# Total code points: 120 # ================================================ @@ -1589,6 +1661,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 104A9 ; 9.0 ; ; 9 # Nd OSMANYA DIGIT NINE 109C8 ; 9.0 ; ; 9 # No MEROITIC CURSIVE NUMBER NINE 10D39 ; 9.0 ; ; 9 # Nd HANIFI ROHINGYA DIGIT NINE +10D49 ; 9.0 ; ; 9 # Nd GARAY DIGIT NINE 10E68 ; 9.0 ; ; 9 # No RUMI DIGIT NINE 1105A ; 9.0 ; ; 9 # No BRAHMI NUMBER NINE 1106F ; 9.0 ; ; 9 # Nd BRAHMI DIGIT NINE @@ -1601,9 +1674,12 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 114D9 ; 9.0 ; ; 9 # Nd TIRHUTA DIGIT NINE 11659 ; 9.0 ; ; 9 # Nd MODI DIGIT NINE 116C9 ; 9.0 ; ; 9 # Nd TAKRI DIGIT NINE +116D9 ; 9.0 ; ; 9 # Nd MYANMAR PAO DIGIT NINE +116E3 ; 9.0 ; ; 9 # Nd MYANMAR EASTERN PWO KAREN DIGIT NINE 11739 ; 9.0 ; ; 9 # Nd AHOM DIGIT NINE 118E9 ; 9.0 ; ; 9 # Nd WARANG CITI DIGIT NINE 11959 ; 9.0 ; ; 9 # Nd DIVES AKURU DIGIT NINE +11BF9 ; 9.0 ; ; 9 # Nd SUNUWAR DIGIT NINE 11C59 ; 9.0 ; ; 9 # Nd BHAIKSUKI DIGIT NINE 11C62 ; 9.0 ; ; 9 # No BHAIKSUKI NUMBER NINE 11D59 ; 9.0 ; ; 9 # Nd MASARAM GONDI DIGIT NINE @@ -1616,10 +1692,13 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 1242B ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE SHAR2 12446..12449 ; 9.0 ; ; 9 # Nl [4] CUNEIFORM NUMERIC SIGN NINE VARIANT FORM ILIMMU..CUNEIFORM NUMERIC SIGN NINE VARIANT FORM ILIMMU A 1246E ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +16139 ; 9.0 ; ; 9 # Nd GURUNG KHEMA DIGIT NINE 16A69 ; 9.0 ; ; 9 # Nd MRO DIGIT NINE 16AC9 ; 9.0 ; ; 9 # Nd TANGSA DIGIT NINE 16B59 ; 9.0 ; ; 9 # Nd PAHAWH HMONG DIGIT NINE +16D79 ; 9.0 ; ; 9 # Nd KIRAT RAI DIGIT NINE 16E89 ; 9.0 ; ; 9 # No MEDEFAIDRIN DIGIT NINE +1CCF9 ; 9.0 ; ; 9 # Nd OUTLINED DIGIT NINE 1D2C9 ; 9.0 ; ; 9 # No KAKTOVIK NUMERAL NINE 1D2E9 ; 9.0 ; ; 9 # No MAYAN NUMERAL NINE 1D368 ; 9.0 ; ; 9 # No COUNTING ROD UNIT DIGIT NINE @@ -1631,6 +1710,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 1E149 ; 9.0 ; ; 9 # Nd NYIAKENG PUACHUE HMONG DIGIT NINE 1E2F9 ; 9.0 ; ; 9 # Nd WANCHO DIGIT NINE 1E4F9 ; 9.0 ; ; 9 # Nd NAG MUNDARI DIGIT NINE +1E5FA ; 9.0 ; ; 9 # Nd OL ONAL DIGIT NINE 1E8CF ; 9.0 ; ; 9 # No MENDE KIKAKUI DIGIT NINE 1E959 ; 9.0 ; ; 9 # Nd ADLAM DIGIT NINE 1EC79 ; 9.0 ; ; 9 # No INDIC SIYAQ NUMBER NINE @@ -1641,7 +1721,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 1FBF9 ; 9.0 ; ; 9 # Nd SEGMENTED DIGIT NINE 2F890 ; 9.0 ; ; 9 # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 118 +# Total code points: 126 # ================================================ diff --git a/data/EquivalentUnifiedIdeograph b/data/EquivalentUnifiedIdeograph index d0fea92..2fd2bb1 100644 --- a/data/EquivalentUnifiedIdeograph +++ b/data/EquivalentUnifiedIdeograph @@ -1,8 +1,8 @@ -# EquivalentUnifiedIdeograph-15.1.0.txt -# Date: 2023-01-05 -# © 2023 Unicode®, Inc. +# EquivalentUnifiedIdeograph-16.0.0.txt +# Date: 2024-02-02 +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # For documentation, see UAX #44: Unicode Character Database, # at https://www.unicode.org/reports/tr44/ @@ -404,5 +404,7 @@ # 31CE; CJK STROKE HZZZ # 31E2; CJK STROKE PG # 31E3; CJK STROKE Q +# 31E4; CJK STROKE HXG +# 31E5; CJK STROKE SZP # EOF diff --git a/data/GraphemeBreakProperty b/data/GraphemeBreakProperty index 12453cb..a863397 100644 --- a/data/GraphemeBreakProperty +++ b/data/GraphemeBreakProperty @@ -1,8 +1,8 @@ -# GraphemeBreakProperty-15.1.0.txt -# Date: 2023-01-05, 20:34:41 GMT -# © 2023 Unicode®, Inc. +# GraphemeBreakProperty-16.0.0.txt +# Date: 2024-05-31, 18:09:38 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -27,6 +27,7 @@ 110BD ; Prepend # Cf KAITHI NUMBER SIGN 110CD ; Prepend # Cf KAITHI NUMBER SIGN ABOVE 111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA +113D1 ; Prepend # Lo TULU-TIGALARI REPHA 1193F ; Prepend # Lo DIVES AKURU PREFIXED NASAL SIGN 11941 ; Prepend # Lo DIVES AKURU INITIAL RA 11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA @@ -34,7 +35,7 @@ 11D46 ; Prepend # Lo MASARAM GONDI REPHA 11F02 ; Prepend # Lo KAWI SIGN REPHA -# Total code points: 27 +# Total code points: 28 # ================================================ @@ -106,7 +107,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; Extend # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE @@ -163,8 +164,11 @@ E01F0..E0FFF ; Control # Cn [3600] .. 0C81 ; Extend # Mn KANNADA SIGN CANDRABINDU 0CBC ; Extend # Mn KANNADA SIGN NUKTA 0CBF ; Extend # Mn KANNADA VOWEL SIGN I +0CC0 ; Extend # Mc KANNADA VOWEL SIGN II 0CC2 ; Extend # Mc KANNADA VOWEL SIGN UU 0CC6 ; Extend # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Extend # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Extend # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK 0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL @@ -210,7 +214,9 @@ E01F0..E0FFF ; Control # Cn [3600] .. 109D ; Extend # Mn MYANMAR VOWEL SIGN AITON AI 135D..135F ; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK 1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; Extend # Mc TAGALOG SIGN PAMUDPOD 1732..1733 ; Extend # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; Extend # Mc HANUNOO SIGN PAMUDPOD 1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA @@ -242,17 +248,22 @@ E01F0..E0FFF ; Control # Cn [3600] .. 1B34 ; Extend # Mn BALINESE SIGN REREKAN 1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG 1B36..1B3A ; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Extend # Mc BALINESE VOWEL SIGN RA REPA TEDUNG 1B3C ; Extend # Mn BALINESE VOWEL SIGN LA LENGA +1B3D ; Extend # Mc BALINESE VOWEL SIGN LA LENGA TEDUNG 1B42 ; Extend # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; Extend # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG 1B6B..1B73 ; Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG 1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; Extend # Mc SUNDANESE SIGN PAMAAEH 1BAB..1BAD ; Extend # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE6 ; Extend # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Extend # Mn BATAK VOWEL SIGN KARO O 1BEF..1BF1 ; Extend # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; Extend # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN 1C2C..1C33 ; Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T 1C36..1C37 ; Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA 1CD0..1CD2 ; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -289,10 +300,12 @@ A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEV A8FF ; Extend # Mn DEVANAGARI VOWEL SIGN AY A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A953 ; Extend # Mc REJANG VIRAMA A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT A9BC..A9BD ; Extend # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9C0 ; Extend # Mc JAVANESE PANGKON A9E5 ; Extend # Mn MYANMAR SIGN SHAN SAW AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE @@ -324,8 +337,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10A3F ; Extend # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Extend # Mn BRAHMI SIGN ANUSVARA @@ -342,10 +356,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11173 ; Extend # Mn MAHAJANI SIGN NUKTA 11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA 111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C0 ; Extend # Mc SHARADA SIGN VIRAMA 111C9..111CC ; Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK 111CF ; Extend # Mn SHARADA SIGN INVERTED CANDRABINDU 1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI 11234 ; Extend # Mn KHOJKI SIGN ANUSVARA +11235 ; Extend # Mc KHOJKI SIGN VIRAMA 11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA 1123E ; Extend # Mn KHOJKI SIGN SUKUN 11241 ; Extend # Mn KHOJKI VOWEL SIGN VOCALIC R @@ -355,9 +371,20 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1133B..1133C ; Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA 1133E ; Extend # Mc GRANTHA VOWEL SIGN AA 11340 ; Extend # Mn GRANTHA VOWEL SIGN II +1134D ; Extend # Mc GRANTHA SIGN VIRAMA 11357 ; Extend # Mc GRANTHA AU LENGTH MARK 11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113B8 ; Extend # Mc TULU-TIGALARI VOWEL SIGN AA +113BB..113C0 ; Extend # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Extend # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Extend # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C9 ; Extend # Mc [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK +113CE ; Extend # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Extend # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; Extend # Mn TULU-TIGALARI CONJOINER +113D2 ; Extend # Mn TULU-TIGALARI GEMINATION MARK +113E1..113E2 ; Extend # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA 11446 ; Extend # Mn NEWA SIGN NUKTA @@ -379,14 +406,17 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 116AB ; Extend # Mn TAKRI SIGN ANUSVARA 116AD ; Extend # Mn TAKRI VOWEL SIGN AA 116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; Extend # Mc TAKRI SIGN VIRAMA 116B7 ; Extend # Mn TAKRI SIGN NUKTA -1171D..1171F ; Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Extend # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; Extend # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER 1182F..11837 ; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA 11839..1183A ; Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA 11930 ; Extend # Mc DIVES AKURU VOWEL SIGN AA 1193B..1193C ; Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; Extend # Mc DIVES AKURU SIGN HALANTA 1193E ; Extend # Mn DIVES AKURU VIRAMA 11943 ; Extend # Mn DIVES AKURU SIGN NUKTA 119D4..119D7 ; Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR @@ -419,20 +449,25 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11F00..11F01 ; Extend # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA 11F36..11F3A ; Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F40 ; Extend # Mn KAWI VOWEL SIGN EU +11F41 ; Extend # Mc KAWI SIGN KILLER 11F42 ; Extend # Mn KAWI CONJOINER +11F5A ; Extend # Mn KAWI SIGN NUKTA 13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY 13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Extend # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM 16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1 ; Extend # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK 1CF00..1CF2D ; Extend # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; Extend # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG -1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM +1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 -1D16E..1D172 ; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 +1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 1D17B..1D182 ; Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO @@ -453,13 +488,14 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1E2AE ; Extend # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI 1E4EC..1E4EF ; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; Extend # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR 1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA 1F3FB..1F3FF ; Extend # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2130 +# Total code points: 2198 # ================================================ @@ -496,10 +532,8 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 0C41..0C44 ; SpacingMark # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR 0C82..0C83 ; SpacingMark # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA 0CBE ; SpacingMark # Mc KANNADA VOWEL SIGN AA -0CC0..0CC1 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U +0CC1 ; SpacingMark # Mc KANNADA VOWEL SIGN U 0CC3..0CC4 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR -0CC7..0CC8 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI -0CCA..0CCB ; SpacingMark # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CF3 ; SpacingMark # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D02..0D03 ; SpacingMark # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D3F..0D40 ; SpacingMark # Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II @@ -517,8 +551,6 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 103B..103C ; SpacingMark # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA 1056..1057 ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR 1084 ; SpacingMark # Mc MYANMAR VOWEL SIGN SHAN E -1715 ; SpacingMark # Mc TAGALOG SIGN PAMUDPOD -1734 ; SpacingMark # Mc HANUNOO SIGN PAMUDPOD 17B6 ; SpacingMark # Mc KHMER VOWEL SIGN AA 17BE..17C5 ; SpacingMark # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU 17C7..17C8 ; SpacingMark # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU @@ -531,17 +563,13 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 1A57 ; SpacingMark # Mc TAI THAM CONSONANT SIGN LA TANG LAI 1A6D..1A72 ; SpacingMark # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI 1B04 ; SpacingMark # Mc BALINESE SIGN BISAH -1B3B ; SpacingMark # Mc BALINESE VOWEL SIGN RA REPA TEDUNG -1B3D..1B41 ; SpacingMark # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG -1B43..1B44 ; SpacingMark # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B3E..1B41 ; SpacingMark # Mc [4] BALINESE VOWEL SIGN TALING..BALINESE VOWEL SIGN TALING REPA TEDUNG 1B82 ; SpacingMark # Mc SUNDANESE SIGN PANGWISAD 1BA1 ; SpacingMark # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA6..1BA7 ; SpacingMark # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG -1BAA ; SpacingMark # Mc SUNDANESE SIGN PAMAAEH 1BE7 ; SpacingMark # Mc BATAK VOWEL SIGN E 1BEA..1BEC ; SpacingMark # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O 1BEE ; SpacingMark # Mc BATAK VOWEL SIGN U -1BF2..1BF3 ; SpacingMark # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN 1C24..1C2B ; SpacingMark # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU 1C34..1C35 ; SpacingMark # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1CE1 ; SpacingMark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA @@ -550,11 +578,11 @@ A823..A824 ; SpacingMark # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI V A827 ; SpacingMark # Mc SYLOTI NAGRI VOWEL SIGN OO A880..A881 ; SpacingMark # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA A8B4..A8C3 ; SpacingMark # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU -A952..A953 ; SpacingMark # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A952 ; SpacingMark # Mc REJANG CONSONANT SIGN H A983 ; SpacingMark # Mc JAVANESE SIGN WIGNYAN A9B4..A9B5 ; SpacingMark # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG A9BA..A9BB ; SpacingMark # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE -A9BE..A9C0 ; SpacingMark # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9BE..A9BF ; SpacingMark # Mc [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA AA2F..AA30 ; SpacingMark # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI AA33..AA34 ; SpacingMark # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA AA4D ; SpacingMark # Mc CHAM CONSONANT SIGN FINAL H @@ -574,18 +602,20 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11145..11146 ; SpacingMark # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI 11182 ; SpacingMark # Mc SHARADA SIGN VISARGA 111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II -111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111BF ; SpacingMark # Mc SHARADA VOWEL SIGN AU 111CE ; SpacingMark # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E 1122C..1122E ; SpacingMark # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II 11232..11233 ; SpacingMark # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU -11235 ; SpacingMark # Mc KHOJKI SIGN VIRAMA 112E0..112E2 ; SpacingMark # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II 11302..11303 ; SpacingMark # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA 1133F ; SpacingMark # Mc GRANTHA VOWEL SIGN I 11341..11344 ; SpacingMark # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR 11347..11348 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI -1134B..1134D ; SpacingMark # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +1134B..1134C ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU 11362..11363 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B9..113BA ; SpacingMark # Mc [2] TULU-TIGALARI VOWEL SIGN I..TULU-TIGALARI VOWEL SIGN II +113CA ; SpacingMark # Mc TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; SpacingMark # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA 11435..11437 ; SpacingMark # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11440..11441 ; SpacingMark # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU 11445 ; SpacingMark # Mc NEWA SIGN VISARGA @@ -602,13 +632,12 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 1163E ; SpacingMark # Mc MODI SIGN VISARGA 116AC ; SpacingMark # Mc TAKRI SIGN VISARGA 116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II -116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA +1171E ; SpacingMark # Mc AHOM CONSONANT SIGN MEDIAL RA 11726 ; SpacingMark # Mc AHOM VOWEL SIGN E 1182C..1182E ; SpacingMark # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II 11838 ; SpacingMark # Mc DOGRA SIGN VISARGA 11931..11935 ; SpacingMark # Mc [5] DIVES AKURU VOWEL SIGN I..DIVES AKURU VOWEL SIGN E 11937..11938 ; SpacingMark # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O -1193D ; SpacingMark # Mc DIVES AKURU SIGN HALANTA 11940 ; SpacingMark # Mc DIVES AKURU MEDIAL YA 11942 ; SpacingMark # Mc DIVES AKURU MEDIAL RA 119D1..119D3 ; SpacingMark # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II @@ -629,13 +658,10 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11F03 ; SpacingMark # Mc KAWI SIGN VISARGA 11F34..11F35 ; SpacingMark # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA 11F3E..11F3F ; SpacingMark # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI -11F41 ; SpacingMark # Mc KAWI SIGN KILLER +1612A..1612C ; SpacingMark # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA 16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI -16FF0..16FF1 ; SpacingMark # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM -1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT -# Total code points: 395 +# Total code points: 378 # ================================================ @@ -648,8 +674,10 @@ A960..A97C ; L # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANG 1160..11A7 ; V # Lo [72] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG O-YAE D7B0..D7C6 ; V # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +16D63 ; V # Lo KIRAT RAI VOWEL SIGN AA +16D67..16D6A ; V # Lo [4] KIRAT RAI VOWEL SIGN E..KIRAT RAI VOWEL SIGN AU -# Total code points: 95 +# Total code points: 100 # ================================================ diff --git a/data/HangulSyllableType b/data/HangulSyllableType index 98dafb1..56b5eea 100644 --- a/data/HangulSyllableType +++ b/data/HangulSyllableType @@ -1,8 +1,8 @@ -# HangulSyllableType-15.1.0.txt -# Date: 2023-01-05, 20:34:42 GMT -# © 2023 Unicode®, Inc. +# HangulSyllableType-16.0.0.txt +# Date: 2024-04-30, 21:48:21 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ diff --git a/data/IndicPositionalCategory b/data/IndicPositionalCategory index a7c5aef..7379b43 100644 --- a/data/IndicPositionalCategory +++ b/data/IndicPositionalCategory @@ -1,11 +1,11 @@ -# IndicPositionalCategory-15.1.0.txt -# Date: 2023-01-05 -# © 2023 Unicode®, Inc. +# IndicPositionalCategory-16.0.0.txt +# Date: 2024-04-30, 21:48:21 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # -# For documentation, see UAX #44: Unicode Character Database, -# at https://www.unicode.org/reports/tr44/ +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ # # This file defines the following property: # @@ -68,13 +68,14 @@ # # Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid, # Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati, -# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi, -# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Lepcha, Limbu, -# Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, Modi, -# Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, Rejang, Saurashtra, -# Sharada, Siddham, Sinhala, Soyombo, Sundanese, Syloti Nagri, -# Tagalog, Tagbanwa, Tai Tham, Tai Viet, Takri, Tamil, Telugu, Thai, -# Tibetan, Tirhuta, and Zanabazar Square. +# Gunjala Gondi, Gurmukhi, Gurung Khema, Hanunoo, Javanese, Kaithi, +# Kannada, Kawi, Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, +# Kirat Rai, Lao, Lepcha, Limbu, Makasar, Malayalam, Marchen, +# Masaram Gondi, Meetei Mayek, Modi, Myanmar, Nandinagari, Newa, +# New Tai Lue, Oriya, Rejang, Saurashtra, Sharada, Siddham, Sinhala, +# Soyombo, Sundanese, Syloti Nagri, Tagalog, Tagbanwa, Tai Tham, +# Tai Viet, Takri, Tamil, Telugu, Thai, Tibetan, Tirhuta, +# Tulu-Tigalari, and Zanabazar Square. # # All characters for all other scripts not in that list # take the default value for this property. @@ -91,8 +92,6 @@ # but may have different positions in some cases: # * U+0BC1 TAMIL VOWEL SIGN U and U+0BC2 TAMIL VOWEL SIGN UU have # contextually variable placement in Tamil. -# * U+0D41 MALAYALAM VOWEL SIGN U and U+0D42 MALAYALAM VOWEL SIGN UU form -# complex ligatures with consonants in older Malayalam orthography. # * U+11341 GRANTHA VOWEL SIGN U and U+11342 GRANTHA VOWEL SIGN UU have # contextually variable placement in Grantha. # * U+11440 NEWA VOWEL SIGN O and U+11441 NEWA VOWEL SIGN AU have contextually @@ -105,10 +104,17 @@ # # 3. The following characters are all assigned the positional category Bottom, # but may have different positions in some cases: +# * U+0D41 MALAYALAM VOWEL SIGN U and U+0D42 MALAYALAM VOWEL SIGN UU form +# complex ligatures with consonants in older Malayalam orthography, and +# are spacing marks to the right of the base in reformed Malayalam +# orthography. # * U+102F MYANMAR VOWEL SIGN U and U+1030 MYANMAR VOWEL SIGN UU have # contextually variable placement in Myanmar. # * U+1A69 TAI THAM VOWEL SIGN U and U+1A6A TAI THAM VOWEL SIGN UU have # contextually variable placement in Tai Tham. +# * U+113BB TULU-TIGALARI VOWEL SIGN U and +# U+113BC TULU-TIGALARI VOWEL SIGN UU form complex ligatures with +# consonants. # # 4. The following character is assigned the positional category Left, but # may have different positions in different styles: @@ -161,7 +167,6 @@ 0CF3 ; Right # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT 0D02..0D03 ; Right # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA 0D3E..0D40 ; Right # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II -0D41..0D42 ; Right # Mn [2] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN UU 0D57 ; Right # Mc MALAYALAM AU LENGTH MARK 0D82..0D83 ; Right # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA 0DCF..0DD1 ; Right # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA @@ -258,6 +263,10 @@ ABEC ; Right # Mc MEETEI MAYEK LUM IYEK 1134D ; Right # Mc GRANTHA SIGN VIRAMA 11357 ; Right # Mc GRANTHA AU LENGTH MARK 11362..11363 ; Right # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8 ; Right # Mc TULU-TIGALARI VOWEL SIGN AA +113C9..113CA ; Right # Mc [2] TULU-TIGALARI AU LENGTH MARK..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Right # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CF ; Right # Mc TULU-TIGALARI SIGN LOOPED VIRAMA 11435 ; Right # Mc NEWA VOWEL SIGN AA 11437 ; Right # Mc NEWA VOWEL SIGN II 11440..11441 ; Right # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -300,6 +309,10 @@ ABEC ; Right # Mc MEETEI MAYEK LUM IYEK 11F03 ; Right # Mc KAWI SIGN VISARGA 11F34..11F35 ; Right # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA 11F41 ; Right # Mc KAWI SIGN KILLER +1612C ; Right # Mc GURUNG KHEMA CONSONANT SIGN MEDIAL HA +16D40..16D42 ; Right # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D63..16D6A ; Right # Lo [8] KIRAT RAI VOWEL SIGN AA..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; Right # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT # Indic_Positional_Category=Left @@ -336,6 +349,8 @@ AAEE ; Left # Mc MEETEI MAYEK VOWEL SIGN AU 111CE ; Left # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E 112E1 ; Left # Mc KHUDAWADI VOWEL SIGN I 11347..11348 ; Left # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +113C2 ; Left # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Left # Mc TULU-TIGALARI VOWEL SIGN AI 11436 ; Left # Mc NEWA VOWEL SIGN I 114B1 ; Left # Mc TIRHUTA VOWEL SIGN I 114B9 ; Left # Mc TIRHUTA VOWEL SIGN E @@ -351,6 +366,7 @@ AAEE ; Left # Mc MEETEI MAYEK VOWEL SIGN AU 11CB1 ; Left # Mc MARCHEN VOWEL SIGN I 11EF5 ; Left # Mc MAKASAR VOWEL SIGN E 11F3E..11F3F ; Left # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +1612A..1612B ; Left # Mc [2] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL VA # Indic_Positional_Category=Visual_Order_Left @@ -382,6 +398,7 @@ AABB..AABC ; Visual_Order_Left # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL 17C4..17C5 ; Left_And_Right # Mc [2] KHMER VOWEL SIGN OO..KHMER VOWEL SIGN AU 1B40..1B41 ; Left_And_Right # Mc [2] BALINESE VOWEL SIGN TALING TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG 1134B..1134C ; Left_And_Right # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +113C7..113C8 ; Left_And_Right # Mc [2] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI VOWEL SIGN AU 114BC ; Left_And_Right # Mc TIRHUTA VOWEL SIGN O 114BE ; Left_And_Right # Mc TIRHUTA VOWEL SIGN AU 115BA ; Left_And_Right # Mc SIDDHAM VOWEL SIGN O @@ -544,6 +561,9 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 11340 ; Top # Mn GRANTHA VOWEL SIGN II 11366..1136C ; Top # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Top # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113CE ; Top # Mn TULU-TIGALARI SIGN VIRAMA +113D1 ; Top # Lo TULU-TIGALARI REPHA +113E1 ; Top # Mn TULU-TIGALARI VEDIC TONE SVARITA 1143E..1143F ; Top # Mn [2] NEWA VOWEL SIGN E..NEWA VOWEL SIGN AI 11443..11444 ; Top # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA 1145E ; Top # Mn NEWA SANDHI MARK @@ -590,6 +610,9 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 11F02 ; Top # Lo KAWI SIGN REPHA 11F36..11F37 ; Top # Mn [2] KAWI VOWEL SIGN I..KAWI VOWEL SIGN II 11F40 ; Top # Mn KAWI VOWEL SIGN EU +11F5A ; Top # Mn KAWI SIGN NUKTA +1611E..16129 ; Top # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D ; Top # Mn GURUNG KHEMA SIGN ANUSVARA # Indic_Positional_Category=Bottom @@ -621,7 +644,7 @@ ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 0C62..0C63 ; Bottom # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0CBC ; Bottom # Mn KANNADA SIGN NUKTA 0CE2..0CE3 ; Bottom # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D43..0D44 ; Bottom # Mn [2] MALAYALAM VOWEL SIGN VOCALIC R..MALAYALAM VOWEL SIGN VOCALIC RR +0D41..0D44 ; Bottom # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR 0D62..0D63 ; Bottom # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL 0DD4 ; Bottom # Mn SINHALA VOWEL SIGN KETTI PAA-PILLA 0DD6 ; Bottom # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA @@ -702,6 +725,9 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 112E3..112E4 ; Bottom # Mn [2] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN UU 112E9..112EA ; Bottom # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA 1133B..1133C ; Bottom # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +113BB..113C0 ; Bottom # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113D2 ; Bottom # Mn TULU-TIGALARI GEMINATION MARK +113E2 ; Bottom # Mn TULU-TIGALARI VEDIC TONE ANUDATTA 11438..1143D ; Bottom # Mn [6] NEWA VOWEL SIGN U..NEWA VOWEL SIGN VOCALIC LL 11442 ; Bottom # Mn NEWA SIGN VIRAMA 11446 ; Bottom # Mn NEWA SIGN NUKTA @@ -740,6 +766,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 11D47 ; Bottom # Mn MASARAM GONDI RA-KARA 11EF4 ; Bottom # Mn MAKASAR VOWEL SIGN U 11F38..11F3A ; Bottom # Mn [3] KAWI VOWEL SIGN U..KAWI VOWEL SIGN VOCALIC R +1612E..1612F ; Bottom # Mn [2] GURUNG KHEMA CONSONANT SIGN MEDIAL RA..GURUNG KHEMA SIGN THOLHOMA # Indic_Positional_Category=Top_And_Bottom @@ -761,6 +788,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 1B43 ; Top_And_Right # Mc BALINESE VOWEL SIGN PEPET TEDUNG 111BF ; Top_And_Right # Mc SHARADA VOWEL SIGN AU 11232..11233 ; Top_And_Right # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +113B9..113BA ; Top_And_Right # Mc [2] TULU-TIGALARI VOWEL SIGN I..TULU-TIGALARI VOWEL SIGN II # Indic_Positional_Category=Top_And_Left @@ -796,7 +824,7 @@ A9BF ; Bottom_And_Left # Mc JAVANESE CONSONANT SIGN CAKRA # Indic_Positional_Category=Top_And_Bottom_And_Left 103C ; Top_And_Bottom_And_Left # Mc MYANMAR CONSONANT SIGN MEDIAL RA -1171E ; Top_And_Bottom_And_Left # Mn AHOM CONSONANT SIGN MEDIAL RA +1171E ; Top_And_Bottom_And_Left # Mc AHOM CONSONANT SIGN MEDIAL RA # Indic_Positional_Category=Overstruck diff --git a/data/IndicSyllabicCategory b/data/IndicSyllabicCategory index f2623b4..dc07604 100644 --- a/data/IndicSyllabicCategory +++ b/data/IndicSyllabicCategory @@ -1,11 +1,11 @@ -# IndicSyllabicCategory-15.1.0.txt -# Date: 2023-01-05 -# © 2023 Unicode®, Inc. +# IndicSyllabicCategory-16.0.0.txt +# Date: 2024-04-30, 21:48:21 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # -# For documentation, see UAX #44: Unicode Character Database, -# at https://www.unicode.org/reports/tr44/ +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ # # This file defines the following property: # @@ -37,13 +37,14 @@ # # Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid, # Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati, -# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi, -# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Lepcha, Limbu, -# Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, -# Modi, Multani, Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, -# Phags-pa, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Soyombo, -# Sundanese, Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, -# Tai Viet, Takri, Tamil, Telugu, Thai, Tibetan, Tirhuta, and +# Gunjala Gondi, Gurmukhi, Gurung Khema, Hanunoo, Javanese, Kaithi, +# Kannada, Kawi, Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, +# Kirat Rai, Lao, Lepcha, Limbu, Mahajani, Makasar, Malayalam, +# Marchen, Masaram Gondi, Meetei Mayek, Modi, Multani, Myanmar, +# Nandinagari, Newa, New Tai Lue, Oriya, Phags-pa, Rejang, +# Saurashtra, Sharada, Siddham, Sinhala, Soyombo, Sundanese, +# Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, Tai Viet, Takri, +# Tamil, Telugu, Thai, Tibetan, Tirhuta, Tulu-Tigalari, and # Zanabazar Square. # # All characters for all other scripts not in that list @@ -119,6 +120,8 @@ A980..A981 ; Bindu # Mn [2] JAVANESE SIGN PANYANGGA..JAVANESE SIGN CECAK 11300..11301 ; Bindu # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU 11302 ; Bindu # Mc GRANTHA SIGN ANUSVARA 1135E..1135F ; Bindu # Lo [2] GRANTHA LETTER VEDIC ANUSVARA..GRANTHA LETTER VEDIC DOUBLE ANUSVARA +113CA ; Bindu # Mc TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC ; Bindu # Mc TULU-TIGALARI SIGN ANUSVARA 11443..11444 ; Bindu # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA 1145F ; Bindu # Lo NEWA LETTER VEDIC ANUSVARA 114BF..114C0 ; Bindu # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA @@ -135,6 +138,8 @@ A980..A981 ; Bindu # Mn [2] JAVANESE SIGN PANYANGGA..JAVANESE SIGN CECAK 11D40 ; Bindu # Mn MASARAM GONDI SIGN ANUSVARA 11D95 ; Bindu # Mn GUNJALA GONDI SIGN ANUSVARA 11F00..11F01 ; Bindu # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +1612D ; Bindu # Mn GURUNG KHEMA SIGN ANUSVARA +16D40..16D41 ; Bindu # Lm [2] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN TONPI # ================================================ @@ -169,6 +174,7 @@ AAF5 ; Visarga # Mc MEETEI MAYEK VOWEL SIGN VISARGA 11102 ; Visarga # Mn CHAKMA SIGN VISARGA 11182 ; Visarga # Mc SHARADA SIGN VISARGA 11303 ; Visarga # Mc GRANTHA SIGN VISARGA +113CD ; Visarga # Mc TULU-TIGALARI SIGN VISARGA 11445 ; Visarga # Mc NEWA SIGN VISARGA 114C1 ; Visarga # Mc TIRHUTA SIGN VISARGA 115BE ; Visarga # Mc SIDDHAM SIGN VISARGA @@ -182,6 +188,7 @@ AAF5 ; Visarga # Mc MEETEI MAYEK VOWEL SIGN VISARGA 11D41 ; Visarga # Mn MASARAM GONDI SIGN VISARGA 11D96 ; Visarga # Mc GUNJALA GONDI SIGN VISARGA 11F03 ; Visarga # Mc KAWI SIGN VISARGA +16D42 ; Visarga # Lm KIRAT RAI SIGN VISARGA # ================================================ @@ -203,6 +210,7 @@ AAF5 ; Visarga # Mc MEETEI MAYEK VOWEL SIGN VISARGA 1BBA ; Avagraha # Lo SUNDANESE AVAGRAHA 111C1 ; Avagraha # Lo SHARADA SIGN AVAGRAHA 1133D ; Avagraha # Lo GRANTHA SIGN AVAGRAHA +113B7 ; Avagraha # Lo TULU-TIGALARI SIGN AVAGRAHA 11447 ; Avagraha # Lo NEWA SIGN AVAGRAHA 114C4 ; Avagraha # Lo TIRHUTA SIGN AVAGRAHA 119E1 ; Avagraha # Lo NANDINAGARI SIGN AVAGRAHA @@ -249,19 +257,21 @@ A9B3 ; Nukta # Mn JAVANESE SIGN CECAK TELU 1183A ; Nukta # Mn DOGRA SIGN NUKTA 11943 ; Nukta # Mn DIVES AKURU SIGN NUKTA 11D42 ; Nukta # Mn MASARAM GONDI SIGN NUKTA +11F5A ; Nukta # Mn KAWI SIGN NUKTA # ================================================ # Indic_Syllabic_Category=Virama -# Virama (killing of inherent vowel in consonant sequence -# or consonant stacker) +# Virama (kills inherent vowel of consonant; may act as a Pure_Killer +# or Invisible_Stacker depending on context) # Only includes characters that can act both as visible killer viramas # and consonant stackers. Separate property values exist for characters -# that can only act as pure killers or only as consonant stackers. +# that can only act as pure killers, only as reordering killers, or only +# as consonant stackers. # [Derivation: (ccc=9) - (InSC=Pure_Killer) - (InSC=Invisible_Stacker) -# - (InSC=Number_Joiner) - 2D7F] +# - (InSC=Reordering_Killer) - (InSC=Number_Joiner) - 2D7F] 094D ; Virama # Mn DEVANAGARI SIGN VIRAMA 09CD ; Virama # Mn BENGALI SIGN VIRAMA @@ -295,8 +305,9 @@ A9C0 ; Virama # Mc JAVANESE PANGKON # Indic_Syllabic_Category=Pure_Killer -# Pure killer (killing of inherent vowel in consonant sequence, -# with no consonant stacking behavior) +# Pure killer (kills inherent vowel of consonant; always visible; +# has no conjuct formation, consonant stacking, or reordering +# behavior) # [Not derivable] @@ -312,24 +323,40 @@ A9C0 ; Virama # Mc JAVANESE PANGKON 17D1 ; Pure_Killer # Mn KHMER SIGN VIRIAM 1A7A ; Pure_Killer # Mn TAI THAM SIGN RA HAAM 1BAA ; Pure_Killer # Mc SUNDANESE SIGN PAMAAEH -1BF2..1BF3 ; Pure_Killer # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN A82C ; Pure_Killer # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA A953 ; Pure_Killer # Mc REJANG VIRAMA ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK 11070 ; Pure_Killer # Mn BRAHMI SIGN OLD TAMIL VIRAMA 11134 ; Pure_Killer # Mn CHAKMA MAAYYAA 112EA ; Pure_Killer # Mn KHUDAWADI SIGN VIRAMA +113CE ; Pure_Killer # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Pure_Killer # Mc TULU-TIGALARI SIGN LOOPED VIRAMA 1172B ; Pure_Killer # Mn AHOM SIGN KILLER 1193D ; Pure_Killer # Mc DIVES AKURU SIGN HALANTA 11A34 ; Pure_Killer # Mn ZANABAZAR SQUARE SIGN VIRAMA 11D44 ; Pure_Killer # Mn MASARAM GONDI SIGN HALANTA 11F41 ; Pure_Killer # Mc KAWI SIGN KILLER +1612F ; Pure_Killer # Mn GURUNG KHEMA SIGN THOLHOMA +16D6B..16D6C ; Pure_Killer # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT + +# ================================================ + +# Indic_Syllabic_Category=Reordering_Killer + +# Reordering killer (kills inherent vowel of consonant; always visible; +# may cause consonant reordering) + +# [Not derivable] + +1BF2..1BF3 ; Reordering_Killer # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN # ================================================ # Indic_Syllabic_Category=Invisible_Stacker -# Invisible stacker (invisible consonant stacker virama). +# Invisible stacker (usually kills inherent vowel of consonant; is not visible +# by itself; causes conjunct formation or consonant +# stacking) # # Note that in some scripts, such as Kharoshthi and Masaram Gondi, an invisible # stacker may have a second function, changing the shape and/or location of the @@ -345,6 +372,7 @@ ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK AAF6 ; Invisible_Stacker # Mn MEETEI MAYEK VIRAMA 10A3F ; Invisible_Stacker # Mn KHAROSHTHI VIRAMA 11133 ; Invisible_Stacker # Mn CHAKMA VIRAMA +113D0 ; Invisible_Stacker # Mn TULU-TIGALARI CONJOINER 1193E ; Invisible_Stacker # Mn DIVES AKURU VIRAMA 11A47 ; Invisible_Stacker # Mn ZANABAZAR SQUARE SUBJOINER 11A99 ; Invisible_Stacker # Mn SOYOMBO SUBJOINER @@ -428,6 +456,10 @@ ABD1 ; Vowel_Independent # Lo MEETEI MAYEK LETTER ATIYA 1130F..11310 ; Vowel_Independent # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI 11313..11314 ; Vowel_Independent # Lo [2] GRANTHA LETTER OO..GRANTHA LETTER AU 11360..11361 ; Vowel_Independent # Lo [2] GRANTHA LETTER VOCALIC RR..GRANTHA LETTER VOCALIC LL +11380..11389 ; Vowel_Independent # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Vowel_Independent # Lo TULU-TIGALARI LETTER EE +1138E ; Vowel_Independent # Lo TULU-TIGALARI LETTER AI +11390..11391 ; Vowel_Independent # Lo [2] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER AU 11400..1140D ; Vowel_Independent # Lo [14] NEWA LETTER A..NEWA LETTER AU 11481..1148E ; Vowel_Independent # Lo [14] TIRHUTA LETTER A..TIRHUTA LETTER AU 11580..1158D ; Vowel_Independent # Lo [14] SIDDHAM LETTER A..SIDDHAM LETTER AU @@ -450,6 +482,7 @@ ABD1 ; Vowel_Independent # Lo MEETEI MAYEK LETTER ATIYA 11D67..11D68 ; Vowel_Independent # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI 11D6A..11D6B ; Vowel_Independent # Lo [2] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER AU 11F04..11F10 ; Vowel_Independent # Lo [13] KAWI LETTER A..KAWI LETTER O +16100 ; Vowel_Independent # Lo GURUNG KHEMA LETTER A # ================================================ @@ -655,6 +688,11 @@ ABE9..ABEA ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEET 1134B..1134C ; Vowel_Dependent # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU 11357 ; Vowel_Dependent # Mc GRANTHA AU LENGTH MARK 11362..11363 ; Vowel_Dependent # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8..113BA ; Vowel_Dependent # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Vowel_Dependent # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Vowel_Dependent # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Vowel_Dependent # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C9 ; Vowel_Dependent # Mc [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK 11435..11437 ; Vowel_Dependent # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; Vowel_Dependent # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11440..11441 ; Vowel_Dependent # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -712,6 +750,8 @@ ABE9..ABEA ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEET 11F36..11F3A ; Vowel_Dependent # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F3E..11F3F ; Vowel_Dependent # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 11F40 ; Vowel_Dependent # Mn KAWI VOWEL SIGN EU +1611E..16129 ; Vowel_Dependent # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +16D63..16D6A ; Vowel_Dependent # Lo [8] KIRAT RAI VOWEL SIGN AA..KIRAT RAI VOWEL SIGN AU # ================================================ @@ -901,6 +941,7 @@ ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTE 1132A..11330 ; Consonant # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA 11332..11333 ; Consonant # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA 11335..11339 ; Consonant # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +11392..113B5 ; Consonant # Lo [36] TULU-TIGALARI LETTER KA..TULU-TIGALARI LETTER LLLA 1140E..11434 ; Consonant # Lo [39] NEWA LETTER KA..NEWA LETTER HA 1148F..114AF ; Consonant # Lo [33] TIRHUTA LETTER KA..TIRHUTA LETTER HA 1158E..115AE ; Consonant # Lo [33] SIDDHAM LETTER KA..SIDDHAM LETTER HA @@ -922,6 +963,8 @@ ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTE 11D6C..11D89 ; Consonant # Lo [30] GUNJALA GONDI LETTER YA..GUNJALA GONDI LETTER SA 11EE0..11EF1 ; Consonant # Lo [18] MAKASAR LETTER KA..MAKASAR LETTER A 11F12..11F33 ; Consonant # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +16101..1611D ; Consonant # Lo [29] GURUNG KHEMA LETTER KA..GURUNG KHEMA LETTER SA +16D43..16D62 ; Consonant # Lo [32] KIRAT RAI LETTER A..KIRAT RAI LETTER HA # ================================================ @@ -975,6 +1018,7 @@ ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTE # [Not derivable] 0D4E ; Consonant_Preceding_Repha # Lo MALAYALAM LETTER DOT REPH +113D1 ; Consonant_Preceding_Repha # Lo TULU-TIGALARI REPHA 11941 ; Consonant_Preceding_Repha # Lo DIVES AKURU INITIAL RA 11D46 ; Consonant_Preceding_Repha # Lo MASARAM GONDI REPHA 11F02 ; Consonant_Preceding_Repha # Lo KAWI SIGN REPHA @@ -1046,11 +1090,15 @@ A9BD ; Consonant_Medial # Mn JAVANESE CONSONANT SIGN KERET A9BE..A9BF ; Consonant_Medial # Mc [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA AA33..AA34 ; Consonant_Medial # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA AA35..AA36 ; Consonant_Medial # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA -1171D..1171F ; Consonant_Medial # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Consonant_Medial # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Consonant_Medial # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Consonant_Medial # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11940 ; Consonant_Medial # Mc DIVES AKURU MEDIAL YA 11942 ; Consonant_Medial # Mc DIVES AKURU MEDIAL RA 11A3B..11A3E ; Consonant_Medial # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA 11D47 ; Consonant_Medial # Mn MASARAM GONDI RA-KARA +1612A..1612C ; Consonant_Medial # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612E ; Consonant_Medial # Mn GURUNG KHEMA CONSONANT SIGN MEDIAL RA # ================================================ @@ -1156,6 +1204,7 @@ ABEC ; Tone_Mark # Mc MEETEI MAYEK LUM IYEK 0A71 ; Gemination_Mark # Mn GURMUKHI ADDAK 0AFB ; Gemination_Mark # Mn GUJARATI SIGN SHADDA 11237 ; Gemination_Mark # Mn KHOJKI SIGN SHADDA +113D2 ; Gemination_Mark # Mn TULU-TIGALARI GEMINATION MARK 11A98 ; Gemination_Mark # Mn SOYOMBO GEMINATION MARK # ================================================ @@ -1181,6 +1230,7 @@ A8E0..A8F1 ; Cantillation_Mark # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..CO 1123E ; Cantillation_Mark # Mn KHOJKI SIGN SUKUN 11366..1136C ; Cantillation_Mark # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Cantillation_Mark # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113E1..113E2 ; Cantillation_Mark # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA # ================================================ @@ -1318,6 +1368,7 @@ ABF0..ABF9 ; Number # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NI 114D0..114D9 ; Number # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; Number # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; Number # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; Number # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11730..11739 ; Number # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 1173A..1173B ; Number # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY 11950..11959 ; Number # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE @@ -1326,6 +1377,8 @@ ABF0..ABF9 ; Number # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NI 11D50..11D59 ; Number # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Number # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE 11F50..11F59 ; Number # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16130..16139 ; Number # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE +16D70..16D79 ; Number # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE # ================================================ @@ -1335,7 +1388,7 @@ ABF0..ABF9 ; Number # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NI # script, e.g. in Brahmi) # # Note: These are different from Numbers, in the way that there is no known -# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants. +# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants. # Until such evidence is found, implementations may assume that Brahmi # Joining Numbers only participate in shaping with other Brahmi Joining # Numbers. diff --git a/data/PropList b/data/PropList index 777e8a2..fae2831 100644 --- a/data/PropList +++ b/data/PropList @@ -1,8 +1,8 @@ -# PropList-15.1.0.txt -# Date: 2023-08-01, 21:56:53 GMT -# © 2023 Unicode®, Inc. +# PropList-16.0.0.txt +# Date: 2024-05-31, 18:09:48 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -62,9 +62,10 @@ FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTA FE58 ; Dash # Pd SMALL EM DASH FE63 ; Dash # Pd SMALL HYPHEN-MINUS FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS +10D6E ; Dash # Pd GARAY HYPHEN 10EAD ; Dash # Pd YEZIDI HYPHENATION MARK -# Total code points: 30 +# Total code points: 31 # ================================================ @@ -132,7 +133,8 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET 0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION 070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS 07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK -0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +0830..0835 ; Terminal_Punctuation # Po [6] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION SHIYYAALAA +0837..083E ; Terminal_Punctuation # Po [8] SAMARITAN PUNCTUATION MELODIC QITSA..SAMARITAN PUNCTUATION ANNAAU 085E ; Terminal_Punctuation # Po MANDAIC PUNCTUATION 0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA 0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT @@ -149,13 +151,16 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET 1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP 1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK 1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B4E..1B4F ; Terminal_Punctuation # Po [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN 1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA 1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN -1B7D..1B7E ; Terminal_Punctuation # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B7D..1B7F ; Terminal_Punctuation # Po [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK 1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK 1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +2024 ; Terminal_Punctuation # Po ONE DOT LEADER 203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG 2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2CF9..2CFB ; Terminal_Punctuation # Po [3] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN INDIRECT QUESTION MARK 2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK 2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP 2E41 ; Terminal_Punctuation # Po REVERSED COMMA @@ -174,6 +179,8 @@ AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUN AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI +FE12 ; Terminal_Punctuation # Po PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP +FE15..FE16 ; Terminal_Punctuation # Po [2] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK..PRESENTATION FORM FOR VERTICAL QUESTION MARK FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK @@ -201,6 +208,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK 112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK +113D4..113D5 ; Terminal_Punctuation # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA 1144B..1144D ; Terminal_Punctuation # Po [3] NEWA DANDA..NEWA COMMA 1145A..1145B ; Terminal_Punctuation # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK 115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR @@ -221,11 +229,12 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP 16B37..16B39 ; Terminal_Punctuation # Po [3] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN CIM CHEEM 16B44 ; Terminal_Punctuation # Po PAHAWH HMONG SIGN XAUS +16D6E..16D6F ; Terminal_Punctuation # Po [2] KIRAT RAI DANDA..KIRAT RAI DOUBLE DANDA 16E97..16E98 ; Terminal_Punctuation # Po [2] MEDEFAIDRIN COMMA..MEDEFAIDRIN FULL STOP 1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON -# Total code points: 278 +# Total code points: 291 # ================================================ @@ -430,6 +439,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L # ================================================ 0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +0363..036F ; Other_Alphabetic # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X 05B0..05BD ; Other_Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG 05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE 05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT @@ -450,6 +460,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A 0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +0897 ; Other_Alphabetic # Mn ARABIC PEPET 08D4..08DF ; Other_Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA 08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN 08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA @@ -634,7 +645,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T 1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1C36 ; Other_Alphabetic # Mn LEPCHA SIGN RAN -1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +1DD3..1DF4 ; Other_Alphabetic # Mn [34] COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE..COMBINING LATIN SMALL LETTER U WITH DIAERESIS 24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA @@ -689,7 +700,9 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O 10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA 10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69 ; Other_Alphabetic # Mn GARAY VOWEL SIGN E 10EAB..10EAC ; Other_Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFC ; Other_Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY 11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU 11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA 11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA @@ -732,6 +745,12 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU 11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK 11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8..113BA ; Other_Alphabetic # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Other_Alphabetic # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Other_Alphabetic # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Other_Alphabetic # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Other_Alphabetic # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Other_Alphabetic # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA 11435..11437 ; Other_Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; Other_Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11440..11441 ; Other_Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -761,7 +780,9 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA 116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II 116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU -1171D..1171F ; Other_Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Other_Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Other_Alphabetic # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Other_Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; Other_Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E @@ -818,6 +839,9 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 11F36..11F3A ; Other_Alphabetic # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F3E..11F3F ; Other_Alphabetic # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 11F40 ; Other_Alphabetic # Mn KAWI VOWEL SIGN EU +1611E..16129 ; Other_Alphabetic # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; Other_Alphabetic # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612E ; Other_Alphabetic # Mn [2] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA CONSONANT SIGN MEDIAL RA 16F4F ; Other_Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Other_Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; Other_Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -834,7 +858,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1425 +# Total code points: 1495 # ================================================ @@ -849,7 +873,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER 17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Ideographic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; Ideographic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Ideographic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 @@ -861,7 +885,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106476 +# Total code points: 106477 # ================================================ @@ -932,6 +956,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 0D3B..0D3C ; Diacritic # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA 0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA 0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA +0E3A ; Diacritic # Mn THAI CHARACTER PHINTHU 0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT 0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN 0EBA ; Diacritic # Mn LAO SIGN PALI VIRAMA @@ -955,9 +980,11 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 135D..135F ; Diacritic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK 1714 ; Diacritic # Mn TAGALOG SIGN VIRAMA 1715 ; Diacritic # Mc TAGALOG SIGN PAMUDPOD +1734 ; Diacritic # Mc HANUNOO SIGN PAMUDPOD 17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT 17DD ; Diacritic # Mn KHMER SIGN ATTHACAN 1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A60 ; Diacritic # Mn TAI THAM SIGN SAKOT 1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN 1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW @@ -968,6 +995,8 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG 1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH 1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA +1BE6 ; Diacritic # Mn BATAK SIGN TOMPI +1BF2..1BF3 ; Diacritic # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN 1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA 1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -1006,6 +1035,8 @@ A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIF A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Diacritic # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A806 ; Diacritic # Mn SYLOTI NAGRI SIGN HASANTA +A82C ; Diacritic # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU @@ -1039,9 +1070,13 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 10780..10785 ; Diacritic # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Diacritic # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Diacritic # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10A38..10A3A ; Diacritic # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Diacritic # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D22..10D23 ; Diacritic # Lo [2] HANIFI ROHINGYA MARK SAKIN..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D4E ; Diacritic # Lm GARAY VOWEL LENGTH MARK +10D69..10D6D ; Diacritic # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EFD..10EFF ; Diacritic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -1055,10 +1090,16 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 11235 ; Diacritic # Mc KHOJKI SIGN VIRAMA 11236 ; Diacritic # Mn KHOJKI SIGN NUKTA 112E9..112EA ; Diacritic # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA -1133C ; Diacritic # Mn GRANTHA SIGN NUKTA +1133B..1133C ; Diacritic # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA 1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA 11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113CE ; Diacritic # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Diacritic # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; Diacritic # Mn TULU-TIGALARI CONJOINER +113D2 ; Diacritic # Mn TULU-TIGALARI GEMINATION MARK +113D3 ; Diacritic # Lo TULU-TIGALARI SIGN PLUTA +113E1..113E2 ; Diacritic # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11442 ; Diacritic # Mn NEWA SIGN VIRAMA 11446 ; Diacritic # Mn NEWA SIGN NUKTA 114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA @@ -1079,9 +1120,14 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA 11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA 11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA +11F41 ; Diacritic # Mc KAWI SIGN KILLER +11F42 ; Diacritic # Mn KAWI CONJOINER +11F5A ; Diacritic # Mn KAWI SIGN NUKTA 13447..13455 ; Diacritic # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1612F ; Diacritic # Mn GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D6B..16D6C ; Diacritic # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FF0..16FF1 ; Diacritic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY @@ -1099,11 +1145,12 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E2AE ; Diacritic # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; Diacritic # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E5EE..1E5EF ; Diacritic # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR 1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1144 +# Total code points: 1178 # ================================================ @@ -1111,6 +1158,8 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 02D0..02D1 ; Extender # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON 0640 ; Extender # Lm ARABIC TATWEEL 07FA ; Extender # Lm NKO LAJANYALAN +0A71 ; Extender # Mn GURMUKHI ADDAK +0AFB ; Extender # Mn GUJARATI SIGN SHADDA 0B55 ; Extender # Mn ORIYA SIGN OVERLINE 0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK 0EC6 ; Extender # Lm LAO KO LA @@ -1132,16 +1181,23 @@ AADD ; Extender # Lm TAI VIET SYMBOL SAM AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 10781..10782 ; Extender # Lm [2] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON +10D4E ; Extender # Lm GARAY VOWEL LENGTH MARK +10D6A ; Extender # Mn GARAY CONSONANT GEMINATION MARK +10D6F ; Extender # Lm GARAY REDUPLICATION MARK +11237 ; Extender # Mn KHOJKI SIGN SHADDA 1135D ; Extender # Lo GRANTHA SIGN PLUTA +113D2 ; Extender # Mn TULU-TIGALARI GEMINATION MARK +113D3 ; Extender # Lo TULU-TIGALARI SIGN PLUTA 115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 11A98 ; Extender # Mn SOYOMBO GEMINATION MARK 16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM 16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Extender # Lm OLD CHINESE ITERATION MARK 1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E5EF ; Extender # Mn OL ONAL SIGN IKIR 1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK -# Total code points: 50 +# Total code points: 59 # ================================================ @@ -1217,27 +1273,51 @@ FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] .... +23E2..2429 ; Pattern_Syntax # So [72] WHITE TRAPEZIUM..SYMBOL FOR DELETE MEDIUM SHADE FORM +242A..243F ; Pattern_Syntax # Cn [22] .. 2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH 244B..245F ; Pattern_Syntax # Cn [21] .. 2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE @@ -1824,4 +1911,18 @@ FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT # Total code points: 26 +# ================================================ + +0654..0655 ; Modifier_Combining_Mark # Mn [2] ARABIC HAMZA ABOVE..ARABIC HAMZA BELOW +0658 ; Modifier_Combining_Mark # Mn ARABIC MARK NOON GHUNNA +06DC ; Modifier_Combining_Mark # Mn ARABIC SMALL HIGH SEEN +06E3 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW SEEN +06E7..06E8 ; Modifier_Combining_Mark # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +08CA..08CB ; Modifier_Combining_Mark # Mn [2] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW +08CD..08CF ; Modifier_Combining_Mark # Mn [3] ARABIC SMALL HIGH ZAH..ARABIC LARGE ROUND DOT BELOW +08D3 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW WAW +08F3 ; Modifier_Combining_Mark # Mn ARABIC SMALL HIGH WAW + +# Total code points: 14 + # EOF diff --git a/data/ScriptExtensions b/data/ScriptExtensions index 23141fb..140901a 100644 --- a/data/ScriptExtensions +++ b/data/ScriptExtensions @@ -1,8 +1,8 @@ -# ScriptExtensions-15.1.0.txt -# Date: 2023-02-01, 23:02:24 GMT -# © 2023 Unicode®, Inc. +# ScriptExtensions-16.0.0.txt +# Date: 2024-07-30, 19:38:00 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -21,615 +21,213 @@ # values in that set is not material, but for stability in presentation # it is given here as alphabetical. # -# The Script_Extensions values are presented in sorted order in the file. -# They are sorted first by the number of Script property values in their sets, -# and then alphabetically by first differing Script property value. -# -# Following each distinct Script_Extensions value is the list of code -# points associated with that value, listed in code point order. -# # All code points not explicitly listed for Script_Extensions -# have as their value the corresponding Script property value +# have as their value the corresponding Script property value. # # @missing: 0000..10FFFF;