From 1616feb4901579da80452c95c6e0f732b945c7d5 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Thu, 18 Apr 2024 11:44:31 +0200 Subject: Add uprop_get_jg() --- data/DerivedJoiningGroup | 745 ++++++++++++++++++++++++++++++++++++++++ gen/data-files | 1 + gen/prop/jg | 52 +++ include/unicode/prop.h | 108 ++++++ lib/unicode/prop/uprop_get_jg.c | 230 +++++++++++++ 5 files changed, 1136 insertions(+) create mode 100644 data/DerivedJoiningGroup create mode 100755 gen/prop/jg create mode 100644 lib/unicode/prop/uprop_get_jg.c diff --git a/data/DerivedJoiningGroup b/data/DerivedJoiningGroup new file mode 100644 index 0000000..364847b --- /dev/null +++ b/data/DerivedJoiningGroup @@ -0,0 +1,745 @@ +# DerivedJoiningGroup-15.1.0.txt +# Date: 2023-01-05, 20:34:37 GMT +# © 2023 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ + +# ================================================ + +# Joining Group (listing ArabicShaping.txt, field 3) + +# All code points not explicitly listed for Joining_Group +# have the value No_Joining_Group. + +# @missing: 0000..10FFFF; No_Joining_Group + +# ================================================ + +0639..063A ; Ain # Lo [2] ARABIC LETTER AIN..ARABIC LETTER GHAIN +06A0 ; Ain # Lo ARABIC LETTER AIN WITH THREE DOTS ABOVE +06FC ; Ain # Lo ARABIC LETTER GHAIN WITH DOT BELOW +075D..075F ; Ain # Lo [3] ARABIC LETTER AIN WITH TWO DOTS ABOVE..ARABIC LETTER AIN WITH TWO DOTS VERTICALLY ABOVE +08B3 ; Ain # Lo ARABIC LETTER AIN WITH THREE DOTS BELOW +08C3 ; Ain # Lo ARABIC LETTER GHAIN WITH THREE DOTS ABOVE + +# Total code points: 9 + +# ================================================ + +0710 ; Alaph # Lo SYRIAC LETTER ALAPH + +# Total code points: 1 + +# ================================================ + +0622..0623 ; Alef # Lo [2] ARABIC LETTER ALEF WITH MADDA ABOVE..ARABIC LETTER ALEF WITH HAMZA ABOVE +0625 ; Alef # Lo ARABIC LETTER ALEF WITH HAMZA BELOW +0627 ; Alef # Lo ARABIC LETTER ALEF +0671..0673 ; Alef # Lo [3] ARABIC LETTER ALEF WASLA..ARABIC LETTER ALEF WITH WAVY HAMZA BELOW +0675 ; Alef # Lo ARABIC LETTER HIGH HAMZA ALEF +0773..0774 ; Alef # Lo [2] ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE +0870..0882 ; Alef # Lo [19] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC LETTER ALEF WITH ATTACHED LEFT HAMZA + +# Total code points: 29 + +# ================================================ + +0628 ; Beh # Lo ARABIC LETTER BEH +062A..062B ; Beh # Lo [2] ARABIC LETTER TEH..ARABIC LETTER THEH +066E ; Beh # Lo ARABIC LETTER DOTLESS BEH +0679..0680 ; Beh # Lo [8] ARABIC LETTER TTEH..ARABIC LETTER BEHEH +0750..0756 ; Beh # Lo [7] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER BEH WITH SMALL V +08A0..08A1 ; Beh # Lo [2] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER BEH WITH HAMZA ABOVE +08B6..08B8 ; Beh # Lo [3] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER TEH WITH SMALL TEH ABOVE +08BE..08C0 ; Beh # Lo [3] ARABIC LETTER PEH WITH SMALL V..ARABIC LETTER TTEH WITH SMALL V + +# Total code points: 27 + +# ================================================ + +0712 ; Beth # Lo SYRIAC LETTER BETH +072D ; Beth # Lo SYRIAC LETTER PERSIAN BHETH + +# Total code points: 2 + +# ================================================ + +062F..0630 ; Dal # Lo [2] ARABIC LETTER DAL..ARABIC LETTER THAL +0688..0690 ; Dal # Lo [9] ARABIC LETTER DDAL..ARABIC LETTER DAL WITH FOUR DOTS ABOVE +06EE ; Dal # Lo ARABIC LETTER DAL WITH INVERTED V +0759..075A ; Dal # Lo [2] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW AND SMALL TAH..ARABIC LETTER DAL WITH INVERTED SMALL V BELOW +08AE ; Dal # Lo ARABIC LETTER DAL WITH THREE DOTS BELOW + +# Total code points: 15 + +# ================================================ + +0715..0716 ; Dalath_Rish # Lo [2] SYRIAC LETTER DALATH..SYRIAC LETTER DOTLESS DALATH RISH +072A ; Dalath_Rish # Lo SYRIAC LETTER RISH +072F ; Dalath_Rish # Lo SYRIAC LETTER PERSIAN DHALATH + +# Total code points: 4 + +# ================================================ + +0725 ; E # Lo SYRIAC LETTER E + +# Total code points: 1 + +# ================================================ + +0641 ; Feh # Lo ARABIC LETTER FEH +06A1..06A6 ; Feh # Lo [6] ARABIC LETTER DOTLESS FEH..ARABIC LETTER PEHEH +0760..0761 ; Feh # Lo [2] ARABIC LETTER FEH WITH TWO DOTS BELOW..ARABIC LETTER FEH WITH THREE DOTS POINTING UPWARDS BELOW +08A4 ; Feh # Lo ARABIC LETTER FEH WITH DOT BELOW AND THREE DOTS ABOVE + +# Total code points: 10 + +# ================================================ + +0724 ; Final_Semkath # Lo SYRIAC LETTER FINAL SEMKATH + +# Total code points: 1 + +# ================================================ + +063B..063C ; Gaf # Lo [2] ARABIC LETTER KEHEH WITH TWO DOTS ABOVE..ARABIC LETTER KEHEH WITH THREE DOTS BELOW +06A9 ; Gaf # Lo ARABIC LETTER KEHEH +06AB ; Gaf # Lo ARABIC LETTER KAF WITH RING +06AF..06B4 ; Gaf # Lo [6] ARABIC LETTER GAF..ARABIC LETTER GAF WITH THREE DOTS ABOVE +0762..0764 ; Gaf # Lo [3] ARABIC LETTER KEHEH WITH DOT ABOVE..ARABIC LETTER KEHEH WITH THREE DOTS POINTING UPWARDS BELOW +088D ; Gaf # Lo ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW +08B0 ; Gaf # Lo ARABIC LETTER GAF WITH INVERTED STROKE +08C2 ; Gaf # Lo ARABIC LETTER KEHEH WITH SMALL V +08C8 ; Gaf # Lo ARABIC LETTER GRAF + +# Total code points: 17 + +# ================================================ + +0713..0714 ; Gamal # Lo [2] SYRIAC LETTER GAMAL..SYRIAC LETTER GAMAL GARSHUNI +072E ; Gamal # Lo SYRIAC LETTER PERSIAN GHAMAL + +# Total code points: 3 + +# ================================================ + +062C..062E ; Hah # Lo [3] ARABIC LETTER JEEM..ARABIC LETTER KHAH +0681..0687 ; Hah # Lo [7] ARABIC LETTER HAH WITH HAMZA ABOVE..ARABIC LETTER TCHEHEH +06BF ; Hah # Lo ARABIC LETTER TCHEH WITH DOT ABOVE +0757..0758 ; Hah # Lo [2] ARABIC LETTER HAH WITH TWO DOTS ABOVE..ARABIC LETTER HAH WITH THREE DOTS POINTING UPWARDS BELOW +076E..076F ; Hah # Lo [2] ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH BELOW..ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH AND TWO DOTS +0772 ; Hah # Lo ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH ABOVE +077C ; Hah # Lo ARABIC LETTER HAH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW +088A ; Hah # Lo ARABIC LETTER HAH WITH INVERTED SMALL V BELOW +08A2 ; Hah # Lo ARABIC LETTER JEEM WITH TWO DOTS ABOVE +08C1 ; Hah # Lo ARABIC LETTER TCHEH WITH SMALL V +08C5..08C6 ; Hah # Lo [2] ARABIC LETTER JEEM WITH THREE DOTS ABOVE..ARABIC LETTER JEEM WITH THREE DOTS BELOW + +# Total code points: 22 + +# ================================================ + +06C3 ; Teh_Marbuta_Goal # Lo ARABIC LETTER TEH MARBUTA GOAL + +# Total code points: 1 + +# ================================================ + +0717 ; He # Lo SYRIAC LETTER HE + +# Total code points: 1 + +# ================================================ + +0647 ; Heh # Lo ARABIC LETTER HEH + +# Total code points: 1 + +# ================================================ + +06C1..06C2 ; Heh_Goal # Lo [2] ARABIC LETTER HEH GOAL..ARABIC LETTER HEH GOAL WITH HAMZA ABOVE + +# Total code points: 2 + +# ================================================ + +071A ; Heth # Lo SYRIAC LETTER HETH + +# Total code points: 1 + +# ================================================ + +0643 ; Kaf # Lo ARABIC LETTER KAF +06AC..06AE ; Kaf # Lo [3] ARABIC LETTER KAF WITH DOT ABOVE..ARABIC LETTER KAF WITH THREE DOTS BELOW +077F ; Kaf # Lo ARABIC LETTER KAF WITH TWO DOTS ABOVE +08B4 ; Kaf # Lo ARABIC LETTER KAF WITH DOT BELOW + +# Total code points: 6 + +# ================================================ + +071F ; Kaph # Lo SYRIAC LETTER KAPH + +# Total code points: 1 + +# ================================================ + +06BE ; Knotted_Heh # Lo ARABIC LETTER HEH DOACHASHMEE +06FF ; Knotted_Heh # Lo ARABIC LETTER HEH WITH INVERTED V + +# Total code points: 2 + +# ================================================ + +0644 ; Lam # Lo ARABIC LETTER LAM +06B5..06B8 ; Lam # Lo [4] ARABIC LETTER LAM WITH SMALL V..ARABIC LETTER LAM WITH THREE DOTS BELOW +076A ; Lam # Lo ARABIC LETTER LAM WITH BAR +08A6 ; Lam # Lo ARABIC LETTER LAM WITH DOUBLE BAR +08C7 ; Lam # Lo ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE + +# Total code points: 8 + +# ================================================ + +0720 ; Lamadh # Lo SYRIAC LETTER LAMADH + +# Total code points: 1 + +# ================================================ + +0645 ; Meem # Lo ARABIC LETTER MEEM +0765..0766 ; Meem # Lo [2] ARABIC LETTER MEEM WITH DOT ABOVE..ARABIC LETTER MEEM WITH DOT BELOW +08A7 ; Meem # Lo ARABIC LETTER MEEM WITH THREE DOTS ABOVE + +# Total code points: 4 + +# ================================================ + +0721 ; Mim # Lo SYRIAC LETTER MIM + +# Total code points: 1 + +# ================================================ + +0646 ; Noon # Lo ARABIC LETTER NOON +06B9..06BC ; Noon # Lo [4] ARABIC LETTER NOON WITH DOT BELOW..ARABIC LETTER NOON WITH RING +0767..0769 ; Noon # Lo [3] ARABIC LETTER NOON WITH TWO DOTS BELOW..ARABIC LETTER NOON WITH SMALL V +0889 ; Noon # Lo ARABIC LETTER NOON WITH INVERTED SMALL V + +# Total code points: 9 + +# ================================================ + +0722 ; Nun # Lo SYRIAC LETTER NUN + +# Total code points: 1 + +# ================================================ + +0726 ; Pe # Lo SYRIAC LETTER PE + +# Total code points: 1 + +# ================================================ + +0642 ; Qaf # Lo ARABIC LETTER QAF +066F ; Qaf # Lo ARABIC LETTER DOTLESS QAF +06A7..06A8 ; Qaf # Lo [2] ARABIC LETTER QAF WITH DOT ABOVE..ARABIC LETTER QAF WITH THREE DOTS ABOVE +08A5 ; Qaf # Lo ARABIC LETTER QAF WITH DOT BELOW +08B5 ; Qaf # Lo ARABIC LETTER QAF WITH DOT BELOW AND NO DOTS ABOVE + +# Total code points: 6 + +# ================================================ + +0729 ; Qaph # Lo SYRIAC LETTER QAPH + +# Total code points: 1 + +# ================================================ + +0631..0632 ; Reh # Lo [2] ARABIC LETTER REH..ARABIC LETTER ZAIN +0691..0699 ; Reh # Lo [9] ARABIC LETTER RREH..ARABIC LETTER REH WITH FOUR DOTS ABOVE +06EF ; Reh # Lo ARABIC LETTER REH WITH INVERTED V +075B ; Reh # Lo ARABIC LETTER REH WITH STROKE +076B..076C ; Reh # Lo [2] ARABIC LETTER REH WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER REH WITH HAMZA ABOVE +0771 ; Reh # Lo ARABIC LETTER REH WITH SMALL ARABIC LETTER TAH AND TWO DOTS +08AA ; Reh # Lo ARABIC LETTER REH WITH LOOP +08B2 ; Reh # Lo ARABIC LETTER ZAIN WITH INVERTED V ABOVE +08B9 ; Reh # Lo ARABIC LETTER REH WITH SMALL NOON ABOVE + +# Total code points: 19 + +# ================================================ + +0727 ; Reversed_Pe # Lo SYRIAC LETTER REVERSED PE + +# Total code points: 1 + +# ================================================ + +0635..0636 ; Sad # Lo [2] ARABIC LETTER SAD..ARABIC LETTER DAD +069D..069E ; Sad # Lo [2] ARABIC LETTER SAD WITH TWO DOTS BELOW..ARABIC LETTER SAD WITH THREE DOTS ABOVE +06FB ; Sad # Lo ARABIC LETTER DAD WITH DOT BELOW +08AF ; Sad # Lo ARABIC LETTER SAD WITH THREE DOTS BELOW + +# Total code points: 6 + +# ================================================ + +0728 ; Sadhe # Lo SYRIAC LETTER SADHE + +# Total code points: 1 + +# ================================================ + +0633..0634 ; Seen # Lo [2] ARABIC LETTER SEEN..ARABIC LETTER SHEEN +069A..069C ; Seen # Lo [3] ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE..ARABIC LETTER SEEN WITH THREE DOTS BELOW AND THREE DOTS ABOVE +06FA ; Seen # Lo ARABIC LETTER SHEEN WITH DOT BELOW +075C ; Seen # Lo ARABIC LETTER SEEN WITH FOUR DOTS ABOVE +076D ; Seen # Lo ARABIC LETTER SEEN WITH TWO DOTS VERTICALLY ABOVE +0770 ; Seen # Lo ARABIC LETTER SEEN WITH SMALL ARABIC LETTER TAH AND TWO DOTS +077D..077E ; Seen # Lo [2] ARABIC LETTER SEEN WITH EXTENDED ARABIC-INDIC DIGIT FOUR ABOVE..ARABIC LETTER SEEN WITH INVERTED V + +# Total code points: 11 + +# ================================================ + +0723 ; Semkath # Lo SYRIAC LETTER SEMKATH + +# Total code points: 1 + +# ================================================ + +072B ; Shin # Lo SYRIAC LETTER SHIN + +# Total code points: 1 + +# ================================================ + +06AA ; Swash_Kaf # Lo ARABIC LETTER SWASH KAF + +# Total code points: 1 + +# ================================================ + +0637..0638 ; Tah # Lo [2] ARABIC LETTER TAH..ARABIC LETTER ZAH +069F ; Tah # Lo ARABIC LETTER TAH WITH THREE DOTS ABOVE +088B..088C ; Tah # Lo [2] ARABIC LETTER TAH WITH DOT BELOW..ARABIC LETTER TAH WITH THREE DOTS BELOW +08A3 ; Tah # Lo ARABIC LETTER TAH WITH TWO DOTS ABOVE + +# Total code points: 6 + +# ================================================ + +072C ; Taw # Lo SYRIAC LETTER TAW + +# Total code points: 1 + +# ================================================ + +0629 ; Teh_Marbuta # Lo ARABIC LETTER TEH MARBUTA +06C0 ; Teh_Marbuta # Lo ARABIC LETTER HEH WITH YEH ABOVE +06D5 ; Teh_Marbuta # Lo ARABIC LETTER AE + +# Total code points: 3 + +# ================================================ + +071B..071C ; Teth # Lo [2] SYRIAC LETTER TETH..SYRIAC LETTER TETH GARSHUNI + +# Total code points: 2 + +# ================================================ + +0624 ; Waw # Lo ARABIC LETTER WAW WITH HAMZA ABOVE +0648 ; Waw # Lo ARABIC LETTER WAW +0676..0677 ; Waw # Lo [2] ARABIC LETTER HIGH HAMZA WAW..ARABIC LETTER U WITH HAMZA ABOVE +06C4..06CB ; Waw # Lo [8] ARABIC LETTER WAW WITH RING..ARABIC LETTER VE +06CF ; Waw # Lo ARABIC LETTER WAW WITH DOT ABOVE +0778..0779 ; Waw # Lo [2] ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE +08AB ; Waw # Lo ARABIC LETTER WAW WITH DOT WITHIN + +# Total code points: 16 + +# ================================================ + +0718 ; Syriac_Waw # Lo SYRIAC LETTER WAW + +# Total code points: 1 + +# ================================================ + +0620 ; Yeh # Lo ARABIC LETTER KASHMIRI YEH +0626 ; Yeh # Lo ARABIC LETTER YEH WITH HAMZA ABOVE +0649..064A ; Yeh # Lo [2] ARABIC LETTER ALEF MAKSURA..ARABIC LETTER YEH +0678 ; Yeh # Lo ARABIC LETTER HIGH HAMZA YEH +06D0..06D1 ; Yeh # Lo [2] ARABIC LETTER E..ARABIC LETTER YEH WITH THREE DOTS BELOW +0777 ; Yeh # Lo ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW +08A8..08A9 ; Yeh # Lo [2] ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE +08BA ; Yeh # Lo ARABIC LETTER YEH WITH TWO DOTS BELOW AND SMALL NOON ABOVE + +# Total code points: 11 + +# ================================================ + +06D2..06D3 ; Yeh_Barree # Lo [2] ARABIC LETTER YEH BARREE..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE + +# Total code points: 2 + +# ================================================ + +06CD ; Yeh_With_Tail # Lo ARABIC LETTER YEH WITH TAIL + +# Total code points: 1 + +# ================================================ + +071D ; Yudh # Lo SYRIAC LETTER YUDH + +# Total code points: 1 + +# ================================================ + +071E ; Yudh_He # Lo SYRIAC LETTER YUDH HE + +# Total code points: 1 + +# ================================================ + +0719 ; Zain # Lo SYRIAC LETTER ZAIN + +# Total code points: 1 + +# ================================================ + +074D ; Zhain # Lo SYRIAC LETTER SOGDIAN ZHAIN + +# Total code points: 1 + +# ================================================ + +074E ; Khaph # Lo SYRIAC LETTER SOGDIAN KHAPH + +# Total code points: 1 + +# ================================================ + +074F ; Fe # Lo SYRIAC LETTER SOGDIAN FE + +# Total code points: 1 + +# ================================================ + +077A..077B ; Burushaski_Yeh_Barree # Lo [2] ARABIC LETTER YEH BARREE WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER YEH BARREE WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE + +# Total code points: 2 + +# ================================================ + +063D..063F ; Farsi_Yeh # Lo [3] ARABIC LETTER FARSI YEH WITH INVERTED V..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +06CC ; Farsi_Yeh # Lo ARABIC LETTER FARSI YEH +06CE ; Farsi_Yeh # Lo ARABIC LETTER YEH WITH SMALL V +0775..0776 ; Farsi_Yeh # Lo [2] ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE + +# Total code points: 7 + +# ================================================ + +06BD ; Nya # Lo ARABIC LETTER NOON WITH THREE DOTS ABOVE + +# Total code points: 1 + +# ================================================ + +08AC ; Rohingya_Yeh # Lo ARABIC LETTER ROHINGYA YEH + +# Total code points: 1 + +# ================================================ + +08B1 ; Straight_Waw # Lo ARABIC LETTER STRAIGHT WAW + +# Total code points: 1 + +# ================================================ + +10AC0 ; Manichaean_Aleph # Lo MANICHAEAN LETTER ALEPH + +# Total code points: 1 + +# ================================================ + +10AD9..10ADA ; Manichaean_Ayin # Lo [2] MANICHAEAN LETTER AYIN..MANICHAEAN LETTER AAYIN + +# Total code points: 2 + +# ================================================ + +10AC1..10AC2 ; Manichaean_Beth # Lo [2] MANICHAEAN LETTER BETH..MANICHAEAN LETTER BHETH + +# Total code points: 2 + +# ================================================ + +10AC5 ; Manichaean_Daleth # Lo MANICHAEAN LETTER DALETH + +# Total code points: 1 + +# ================================================ + +10AD4 ; Manichaean_Dhamedh # Lo MANICHAEAN LETTER DHAMEDH + +# Total code points: 1 + +# ================================================ + +10AEC ; Manichaean_Five # No MANICHAEAN NUMBER FIVE + +# Total code points: 1 + +# ================================================ + +10AC3..10AC4 ; Manichaean_Gimel # Lo [2] MANICHAEAN LETTER GIMEL..MANICHAEAN LETTER GHIMEL + +# Total code points: 2 + +# ================================================ + +10ACD ; Manichaean_Heth # Lo MANICHAEAN LETTER HETH + +# Total code points: 1 + +# ================================================ + +10AEF ; Manichaean_Hundred # No MANICHAEAN NUMBER ONE HUNDRED + +# Total code points: 1 + +# ================================================ + +10AD0..10AD2 ; Manichaean_Kaph # Lo [3] MANICHAEAN LETTER KAPH..MANICHAEAN LETTER KHAPH + +# Total code points: 3 + +# ================================================ + +10AD3 ; Manichaean_Lamedh # Lo MANICHAEAN LETTER LAMEDH + +# Total code points: 1 + +# ================================================ + +10AD6 ; Manichaean_Mem # Lo MANICHAEAN LETTER MEM + +# Total code points: 1 + +# ================================================ + +10AD7 ; Manichaean_Nun # Lo MANICHAEAN LETTER NUN + +# Total code points: 1 + +# ================================================ + +10AEB ; Manichaean_One # No MANICHAEAN NUMBER ONE + +# Total code points: 1 + +# ================================================ + +10ADB..10ADC ; Manichaean_Pe # Lo [2] MANICHAEAN LETTER PE..MANICHAEAN LETTER FE + +# Total code points: 2 + +# ================================================ + +10ADE..10AE0 ; Manichaean_Qoph # Lo [3] MANICHAEAN LETTER QOPH..MANICHAEAN LETTER QHOPH + +# Total code points: 3 + +# ================================================ + +10AE1 ; Manichaean_Resh # Lo MANICHAEAN LETTER RESH + +# Total code points: 1 + +# ================================================ + +10ADD ; Manichaean_Sadhe # Lo MANICHAEAN LETTER SADHE + +# Total code points: 1 + +# ================================================ + +10AD8 ; Manichaean_Samekh # Lo MANICHAEAN LETTER SAMEKH + +# Total code points: 1 + +# ================================================ + +10AE4 ; Manichaean_Taw # Lo MANICHAEAN LETTER TAW + +# Total code points: 1 + +# ================================================ + +10AED ; Manichaean_Ten # No MANICHAEAN NUMBER TEN + +# Total code points: 1 + +# ================================================ + +10ACE ; Manichaean_Teth # Lo MANICHAEAN LETTER TETH + +# Total code points: 1 + +# ================================================ + +10AD5 ; Manichaean_Thamedh # Lo MANICHAEAN LETTER THAMEDH + +# Total code points: 1 + +# ================================================ + +10AEE ; Manichaean_Twenty # No MANICHAEAN NUMBER TWENTY + +# Total code points: 1 + +# ================================================ + +10AC7 ; Manichaean_Waw # Lo MANICHAEAN LETTER WAW + +# Total code points: 1 + +# ================================================ + +10ACF ; Manichaean_Yodh # Lo MANICHAEAN LETTER YODH + +# Total code points: 1 + +# ================================================ + +10AC9..10ACA ; Manichaean_Zayin # Lo [2] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER ZHAYIN + +# Total code points: 2 + +# ================================================ + +08BB ; African_Feh # Lo ARABIC LETTER AFRICAN FEH + +# Total code points: 1 + +# ================================================ + +08BC ; African_Qaf # Lo ARABIC LETTER AFRICAN QAF +08C4 ; African_Qaf # Lo ARABIC LETTER AFRICAN QAF WITH THREE DOTS ABOVE + +# Total code points: 2 + +# ================================================ + +08BD ; African_Noon # Lo ARABIC LETTER AFRICAN NOON + +# Total code points: 1 + +# ================================================ + +0860 ; Malayalam_Nga # Lo SYRIAC LETTER MALAYALAM NGA + +# Total code points: 1 + +# ================================================ + +0861 ; Malayalam_Ja # Lo SYRIAC LETTER MALAYALAM JA + +# Total code points: 1 + +# ================================================ + +0862 ; Malayalam_Nya # Lo SYRIAC LETTER MALAYALAM NYA + +# Total code points: 1 + +# ================================================ + +0863 ; Malayalam_Tta # Lo SYRIAC LETTER MALAYALAM TTA + +# Total code points: 1 + +# ================================================ + +0864 ; Malayalam_Nna # Lo SYRIAC LETTER MALAYALAM NNA + +# Total code points: 1 + +# ================================================ + +0865 ; Malayalam_Nnna # Lo SYRIAC LETTER MALAYALAM NNNA + +# Total code points: 1 + +# ================================================ + +0866 ; Malayalam_Bha # Lo SYRIAC LETTER MALAYALAM BHA + +# Total code points: 1 + +# ================================================ + +0867 ; Malayalam_Ra # Lo SYRIAC LETTER MALAYALAM RA + +# Total code points: 1 + +# ================================================ + +0868 ; Malayalam_Lla # Lo SYRIAC LETTER MALAYALAM LLA + +# Total code points: 1 + +# ================================================ + +0869 ; Malayalam_Llla # Lo SYRIAC LETTER MALAYALAM LLLA + +# Total code points: 1 + +# ================================================ + +086A ; Malayalam_Ssa # Lo SYRIAC LETTER MALAYALAM SSA + +# Total code points: 1 + +# ================================================ + +10D02 ; Hanifi_Rohingya_Pa # Lo HANIFI ROHINGYA LETTER PA +10D09 ; Hanifi_Rohingya_Pa # Lo HANIFI ROHINGYA LETTER FA +10D1C ; Hanifi_Rohingya_Pa # Lo HANIFI ROHINGYA LETTER VA + +# Total code points: 3 + +# ================================================ + +10D19 ; Hanifi_Rohingya_Kinna_Ya # Lo HANIFI ROHINGYA LETTER KINNA YA +10D1E ; Hanifi_Rohingya_Kinna_Ya # Lo HANIFI ROHINGYA VOWEL I +10D20 ; Hanifi_Rohingya_Kinna_Ya # Lo HANIFI ROHINGYA VOWEL E +10D23 ; Hanifi_Rohingya_Kinna_Ya # Lo HANIFI ROHINGYA MARK NA KHONNA + +# Total code points: 4 + +# ================================================ + +0886 ; Thin_Yeh # Lo ARABIC LETTER THIN YEH + +# Total code points: 1 + +# ================================================ + +088E ; Vertical_Tail # Lo ARABIC VERTICAL TAIL + +# Total code points: 1 + +# EOF diff --git a/gen/data-files b/gen/data-files index 9222c10..9da5452 100755 --- a/gen/data-files +++ b/gen/data-files @@ -21,6 +21,7 @@ readonly PATHS=' extracted/DerivedBinaryProperties extracted/DerivedDecompositionType extracted/DerivedEastAsianWidth + extracted/DerivedJoiningGroup extracted/DerivedJoiningType extracted/DerivedLineBreak extracted/DerivedNumericType diff --git a/gen/prop/jg b/gen/prop/jg new file mode 100755 index 0000000..e88442d --- /dev/null +++ b/gen/prop/jg @@ -0,0 +1,52 @@ +#!/bin/sh + +set -e +cd "${0%/*}/../.." +exec >lib/unicode/prop/uprop_get_jg.c + +gawk ' +BEGIN { + FS = "[ ;]+" + + print "/* This file is autogenerated by gen/prop/jg; DO NOT EDIT. */" + print "" + print "#include \"_bsearch.h\"" + print "#include \"rune.h\"" + print "#include \"unicode/prop.h\"" + print "" +} + +/^[A-F0-9]/ { + n = split($1, a, /\.\./) + lo = strtonum("0X" a[1]) + hi = strtonum("0X" a[n]) + + for (i = lo; i <= hi; i++) + props[i] = "JG_" toupper($2) +} + +END { + print "static const struct {" + print "\trune lo, hi;" + print "\tenum uprop_jg val;" + print "} lookup[] = {" + + for (i = 0; i <= 0x10FFFF; i++) { + if (!props[i]) + continue + for (lo = i; props[lo] == props[i + 1]; i++) + ; + printf "\t{RUNE_C(0x%06X), RUNE_C(0x%06X), %s},\n", lo, i, props[i] + } + + print "};" + print "" + print "_MLIB_DEFINE_BSEARCH(enum uprop_jg, lookup, JG_NO_JOINING_GROUP)" + print "" + print "enum uprop_jg" + print "uprop_get_jg(rune ch)" + print "{" + print "\treturn ch < lookup[0].lo ? JG_NO_JOINING_GROUP : mlib_lookup(ch);" + print "}" +} +' data/DerivedJoiningGroup | sed 's/\s*$//' diff --git a/include/unicode/prop.h b/include/unicode/prop.h index 61e4441..587d53f 100644 --- a/include/unicode/prop.h +++ b/include/unicode/prop.h @@ -674,6 +674,113 @@ enum uprop_insc { INSC_VOWEL_INDEPENDENT, }; +enum uprop_jg { + JG_NO_JOINING_GROUP = 0, /* No Joining Group */ + JG_AFRICAN_FEH, /* African Feh */ + JG_AFRICAN_NOON, /* African Noon */ + JG_AFRICAN_QAF, /* African Qaf */ + JG_AIN, /* Ain */ + JG_ALAPH, /* Alaph */ + JG_ALEF, /* Alef */ + JG_BEH, /* Beh */ + JG_BETH, /* Beth */ + JG_BURUSHASKI_YEH_BARREE, /* Burushaski Yeh Barree */ + JG_DAL, /* Dal */ + JG_DALATH_RISH, /* Dalath Rish */ + JG_E, /* E */ + JG_FARSI_YEH, /* Farsi Yeh */ + JG_FE, /* Fe */ + JG_FEH, /* Feh */ + JG_FINAL_SEMKATH, /* Final Semkath */ + JG_GAF, /* Gaf */ + JG_GAMAL, /* Gamal */ + JG_HAH, /* Hah */ + JG_HANIFI_ROHINGYA_KINNA_YA, /* Hanifi Rohingya Kinna Ya */ + JG_HANIFI_ROHINGYA_PA, /* Hanifi Rohingya Pa */ + JG_HE, /* He */ + JG_HEH, /* Heh */ + JG_HEH_GOAL, /* Heh Goal */ + JG_HETH, /* Heth */ + JG_KAF, /* Kaf */ + JG_KAPH, /* Kaph */ + JG_KHAPH, /* Khaph */ + JG_KNOTTED_HEH, /* Knotted Heh */ + JG_LAM, /* Lam */ + JG_LAMADH, /* Lamadh */ + JG_MALAYALAM_BHA, /* Malayalam Bha */ + JG_MALAYALAM_JA, /* Malayalam Ja */ + JG_MALAYALAM_LLA, /* Malayalam Lla */ + JG_MALAYALAM_LLLA, /* Malayalam Llla */ + JG_MALAYALAM_NGA, /* Malayalam Nga */ + JG_MALAYALAM_NNA, /* Malayalam Nna */ + JG_MALAYALAM_NNNA, /* Malayalam Nnna */ + JG_MALAYALAM_NYA, /* Malayalam Nya */ + JG_MALAYALAM_RA, /* Malayalam Ra */ + JG_MALAYALAM_SSA, /* Malayalam Ssa */ + JG_MALAYALAM_TTA, /* Malayalam Tta */ + JG_MANICHAEAN_ALEPH, /* Manichaean Aleph */ + JG_MANICHAEAN_AYIN, /* Manichaean Ayin */ + JG_MANICHAEAN_BETH, /* Manichaean Beth */ + JG_MANICHAEAN_DALETH, /* Manichaean Daleth */ + JG_MANICHAEAN_DHAMEDH, /* Manichaean Dhamedh */ + JG_MANICHAEAN_FIVE, /* Manichaean Five */ + JG_MANICHAEAN_GIMEL, /* Manichaean Gimel */ + JG_MANICHAEAN_HETH, /* Manichaean Heth */ + JG_MANICHAEAN_HUNDRED, /* Manichaean Hundred */ + JG_MANICHAEAN_KAPH, /* Manichaean Kaph */ + JG_MANICHAEAN_LAMEDH, /* Manichaean Lamedh */ + JG_MANICHAEAN_MEM, /* Manichaean Mem */ + JG_MANICHAEAN_NUN, /* Manichaean Nun */ + JG_MANICHAEAN_ONE, /* Manichaean One */ + JG_MANICHAEAN_PE, /* Manichaean Pe */ + JG_MANICHAEAN_QOPH, /* Manichaean Qoph */ + JG_MANICHAEAN_RESH, /* Manichaean Resh */ + JG_MANICHAEAN_SADHE, /* Manichaean Sadhe */ + JG_MANICHAEAN_SAMEKH, /* Manichaean Samekh */ + JG_MANICHAEAN_TAW, /* Manichaean Taw */ + JG_MANICHAEAN_TEN, /* Manichaean Ten */ + JG_MANICHAEAN_TETH, /* Manichaean Teth */ + JG_MANICHAEAN_THAMEDH, /* Manichaean Thamedh */ + JG_MANICHAEAN_TWENTY, /* Manichaean Twenty */ + JG_MANICHAEAN_WAW, /* Manichaean Waw */ + JG_MANICHAEAN_YODH, /* Manichaean Yodh */ + JG_MANICHAEAN_ZAYIN, /* Manichaean Zayin */ + JG_MEEM, /* Meem */ + JG_MIM, /* Mim */ + JG_NOON, /* Noon */ + JG_NUN, /* Nun */ + JG_NYA, /* Nya */ + JG_PE, /* Pe */ + JG_QAF, /* Qaf */ + JG_QAPH, /* Qaph */ + JG_REH, /* Reh */ + JG_REVERSED_PE, /* Reversed Pe */ + JG_ROHINGYA_YEH, /* Rohingya Yeh */ + JG_SAD, /* Sad */ + JG_SADHE, /* Sadhe */ + JG_SEEN, /* Seen */ + JG_SEMKATH, /* Semkath */ + JG_SHIN, /* Shin */ + JG_STRAIGHT_WAW, /* Straight Waw */ + JG_SWASH_KAF, /* Swash Kaf */ + JG_SYRIAC_WAW, /* Syriac Waw */ + JG_TAH, /* Tah */ + JG_TAW, /* Taw */ + JG_TEH_MARBUTA, /* Teh Marbuta */ + JG_TEH_MARBUTA_GOAL, /* Hamza On Heh Goal */ + JG_TETH, /* Teth */ + JG_THIN_YEH, /* Thin Yeh */ + JG_VERTICAL_TAIL, /* Vertical Tail */ + JG_WAW, /* Waw */ + JG_YEH, /* Yeh */ + JG_YEH_BARREE, /* Yeh Barree */ + JG_YEH_WITH_TAIL, /* Yeh With Tail */ + JG_YUDH, /* Yudh */ + JG_YUDH_HE, /* Yudh He */ + JG_ZAIN, /* Zain */ + JG_ZHAIN, /* Zhain */ +}; + enum uprop_jt { JT_U = 0, /* Non Joining */ JT_C, /* Join Causing */ @@ -932,6 +1039,7 @@ enum uprop_vo { [[_mlib_pure]] enum uprop_hst uprop_get_hst(rune); [[_mlib_pure]] enum uprop_inpc uprop_get_inpc(rune); [[_mlib_pure]] enum uprop_insc uprop_get_insc(rune); +[[_mlib_pure]] enum uprop_jg uprop_get_jg(rune); [[_mlib_pure]] enum uprop_jt uprop_get_jt(rune); [[_mlib_pure]] enum uprop_lb uprop_get_lb(rune); [[_mlib_pure]] enum uprop_nt uprop_get_nt(rune); diff --git a/lib/unicode/prop/uprop_get_jg.c b/lib/unicode/prop/uprop_get_jg.c new file mode 100644 index 0000000..9c38700 --- /dev/null +++ b/lib/unicode/prop/uprop_get_jg.c @@ -0,0 +1,230 @@ +/* This file is autogenerated by gen/prop/jg; DO NOT EDIT. */ + +#include "_bsearch.h" +#include "rune.h" +#include "unicode/prop.h" + +static const struct { + rune lo, hi; + enum uprop_jg val; +} lookup[] = { + {RUNE_C(0x000620), RUNE_C(0x000620), JG_YEH}, + {RUNE_C(0x000622), RUNE_C(0x000623), JG_ALEF}, + {RUNE_C(0x000624), RUNE_C(0x000624), JG_WAW}, + {RUNE_C(0x000625), RUNE_C(0x000625), JG_ALEF}, + {RUNE_C(0x000626), RUNE_C(0x000626), JG_YEH}, + {RUNE_C(0x000627), RUNE_C(0x000627), JG_ALEF}, + {RUNE_C(0x000628), RUNE_C(0x000628), JG_BEH}, + {RUNE_C(0x000629), RUNE_C(0x000629), JG_TEH_MARBUTA}, + {RUNE_C(0x00062A), RUNE_C(0x00062B), JG_BEH}, + {RUNE_C(0x00062C), RUNE_C(0x00062E), JG_HAH}, + {RUNE_C(0x00062F), RUNE_C(0x000630), JG_DAL}, + {RUNE_C(0x000631), RUNE_C(0x000632), JG_REH}, + {RUNE_C(0x000633), RUNE_C(0x000634), JG_SEEN}, + {RUNE_C(0x000635), RUNE_C(0x000636), JG_SAD}, + {RUNE_C(0x000637), RUNE_C(0x000638), JG_TAH}, + {RUNE_C(0x000639), RUNE_C(0x00063A), JG_AIN}, + {RUNE_C(0x00063B), RUNE_C(0x00063C), JG_GAF}, + {RUNE_C(0x00063D), RUNE_C(0x00063F), JG_FARSI_YEH}, + {RUNE_C(0x000641), RUNE_C(0x000641), JG_FEH}, + {RUNE_C(0x000642), RUNE_C(0x000642), JG_QAF}, + {RUNE_C(0x000643), RUNE_C(0x000643), JG_KAF}, + {RUNE_C(0x000644), RUNE_C(0x000644), JG_LAM}, + {RUNE_C(0x000645), RUNE_C(0x000645), JG_MEEM}, + {RUNE_C(0x000646), RUNE_C(0x000646), JG_NOON}, + {RUNE_C(0x000647), RUNE_C(0x000647), JG_HEH}, + {RUNE_C(0x000648), RUNE_C(0x000648), JG_WAW}, + {RUNE_C(0x000649), RUNE_C(0x00064A), JG_YEH}, + {RUNE_C(0x00066E), RUNE_C(0x00066E), JG_BEH}, + {RUNE_C(0x00066F), RUNE_C(0x00066F), JG_QAF}, + {RUNE_C(0x000671), RUNE_C(0x000673), JG_ALEF}, + {RUNE_C(0x000675), RUNE_C(0x000675), JG_ALEF}, + {RUNE_C(0x000676), RUNE_C(0x000677), JG_WAW}, + {RUNE_C(0x000678), RUNE_C(0x000678), JG_YEH}, + {RUNE_C(0x000679), RUNE_C(0x000680), JG_BEH}, + {RUNE_C(0x000681), RUNE_C(0x000687), JG_HAH}, + {RUNE_C(0x000688), RUNE_C(0x000690), JG_DAL}, + {RUNE_C(0x000691), RUNE_C(0x000699), JG_REH}, + {RUNE_C(0x00069A), RUNE_C(0x00069C), JG_SEEN}, + {RUNE_C(0x00069D), RUNE_C(0x00069E), JG_SAD}, + {RUNE_C(0x00069F), RUNE_C(0x00069F), JG_TAH}, + {RUNE_C(0x0006A0), RUNE_C(0x0006A0), JG_AIN}, + {RUNE_C(0x0006A1), RUNE_C(0x0006A6), JG_FEH}, + {RUNE_C(0x0006A7), RUNE_C(0x0006A8), JG_QAF}, + {RUNE_C(0x0006A9), RUNE_C(0x0006A9), JG_GAF}, + {RUNE_C(0x0006AA), RUNE_C(0x0006AA), JG_SWASH_KAF}, + {RUNE_C(0x0006AB), RUNE_C(0x0006AB), JG_GAF}, + {RUNE_C(0x0006AC), RUNE_C(0x0006AE), JG_KAF}, + {RUNE_C(0x0006AF), RUNE_C(0x0006B4), JG_GAF}, + {RUNE_C(0x0006B5), RUNE_C(0x0006B8), JG_LAM}, + {RUNE_C(0x0006B9), RUNE_C(0x0006BC), JG_NOON}, + {RUNE_C(0x0006BD), RUNE_C(0x0006BD), JG_NYA}, + {RUNE_C(0x0006BE), RUNE_C(0x0006BE), JG_KNOTTED_HEH}, + {RUNE_C(0x0006BF), RUNE_C(0x0006BF), JG_HAH}, + {RUNE_C(0x0006C0), RUNE_C(0x0006C0), JG_TEH_MARBUTA}, + {RUNE_C(0x0006C1), RUNE_C(0x0006C2), JG_HEH_GOAL}, + {RUNE_C(0x0006C3), RUNE_C(0x0006C3), JG_TEH_MARBUTA_GOAL}, + {RUNE_C(0x0006C4), RUNE_C(0x0006CB), JG_WAW}, + {RUNE_C(0x0006CC), RUNE_C(0x0006CC), JG_FARSI_YEH}, + {RUNE_C(0x0006CD), RUNE_C(0x0006CD), JG_YEH_WITH_TAIL}, + {RUNE_C(0x0006CE), RUNE_C(0x0006CE), JG_FARSI_YEH}, + {RUNE_C(0x0006CF), RUNE_C(0x0006CF), JG_WAW}, + {RUNE_C(0x0006D0), RUNE_C(0x0006D1), JG_YEH}, + {RUNE_C(0x0006D2), RUNE_C(0x0006D3), JG_YEH_BARREE}, + {RUNE_C(0x0006D5), RUNE_C(0x0006D5), JG_TEH_MARBUTA}, + {RUNE_C(0x0006EE), RUNE_C(0x0006EE), JG_DAL}, + {RUNE_C(0x0006EF), RUNE_C(0x0006EF), JG_REH}, + {RUNE_C(0x0006FA), RUNE_C(0x0006FA), JG_SEEN}, + {RUNE_C(0x0006FB), RUNE_C(0x0006FB), JG_SAD}, + {RUNE_C(0x0006FC), RUNE_C(0x0006FC), JG_AIN}, + {RUNE_C(0x0006FF), RUNE_C(0x0006FF), JG_KNOTTED_HEH}, + {RUNE_C(0x000710), RUNE_C(0x000710), JG_ALAPH}, + {RUNE_C(0x000712), RUNE_C(0x000712), JG_BETH}, + {RUNE_C(0x000713), RUNE_C(0x000714), JG_GAMAL}, + {RUNE_C(0x000715), RUNE_C(0x000716), JG_DALATH_RISH}, + {RUNE_C(0x000717), RUNE_C(0x000717), JG_HE}, + {RUNE_C(0x000718), RUNE_C(0x000718), JG_SYRIAC_WAW}, + {RUNE_C(0x000719), RUNE_C(0x000719), JG_ZAIN}, + {RUNE_C(0x00071A), RUNE_C(0x00071A), JG_HETH}, + {RUNE_C(0x00071B), RUNE_C(0x00071C), JG_TETH}, + {RUNE_C(0x00071D), RUNE_C(0x00071D), JG_YUDH}, + {RUNE_C(0x00071E), RUNE_C(0x00071E), JG_YUDH_HE}, + {RUNE_C(0x00071F), RUNE_C(0x00071F), JG_KAPH}, + {RUNE_C(0x000720), RUNE_C(0x000720), JG_LAMADH}, + {RUNE_C(0x000721), RUNE_C(0x000721), JG_MIM}, + {RUNE_C(0x000722), RUNE_C(0x000722), JG_NUN}, + {RUNE_C(0x000723), RUNE_C(0x000723), JG_SEMKATH}, + {RUNE_C(0x000724), RUNE_C(0x000724), JG_FINAL_SEMKATH}, + {RUNE_C(0x000725), RUNE_C(0x000725), JG_E}, + {RUNE_C(0x000726), RUNE_C(0x000726), JG_PE}, + {RUNE_C(0x000727), RUNE_C(0x000727), JG_REVERSED_PE}, + {RUNE_C(0x000728), RUNE_C(0x000728), JG_SADHE}, + {RUNE_C(0x000729), RUNE_C(0x000729), JG_QAPH}, + {RUNE_C(0x00072A), RUNE_C(0x00072A), JG_DALATH_RISH}, + {RUNE_C(0x00072B), RUNE_C(0x00072B), JG_SHIN}, + {RUNE_C(0x00072C), RUNE_C(0x00072C), JG_TAW}, + {RUNE_C(0x00072D), RUNE_C(0x00072D), JG_BETH}, + {RUNE_C(0x00072E), RUNE_C(0x00072E), JG_GAMAL}, + {RUNE_C(0x00072F), RUNE_C(0x00072F), JG_DALATH_RISH}, + {RUNE_C(0x00074D), RUNE_C(0x00074D), JG_ZHAIN}, + {RUNE_C(0x00074E), RUNE_C(0x00074E), JG_KHAPH}, + {RUNE_C(0x00074F), RUNE_C(0x00074F), JG_FE}, + {RUNE_C(0x000750), RUNE_C(0x000756), JG_BEH}, + {RUNE_C(0x000757), RUNE_C(0x000758), JG_HAH}, + {RUNE_C(0x000759), RUNE_C(0x00075A), JG_DAL}, + {RUNE_C(0x00075B), RUNE_C(0x00075B), JG_REH}, + {RUNE_C(0x00075C), RUNE_C(0x00075C), JG_SEEN}, + {RUNE_C(0x00075D), RUNE_C(0x00075F), JG_AIN}, + {RUNE_C(0x000760), RUNE_C(0x000761), JG_FEH}, + {RUNE_C(0x000762), RUNE_C(0x000764), JG_GAF}, + {RUNE_C(0x000765), RUNE_C(0x000766), JG_MEEM}, + {RUNE_C(0x000767), RUNE_C(0x000769), JG_NOON}, + {RUNE_C(0x00076A), RUNE_C(0x00076A), JG_LAM}, + {RUNE_C(0x00076B), RUNE_C(0x00076C), JG_REH}, + {RUNE_C(0x00076D), RUNE_C(0x00076D), JG_SEEN}, + {RUNE_C(0x00076E), RUNE_C(0x00076F), JG_HAH}, + {RUNE_C(0x000770), RUNE_C(0x000770), JG_SEEN}, + {RUNE_C(0x000771), RUNE_C(0x000771), JG_REH}, + {RUNE_C(0x000772), RUNE_C(0x000772), JG_HAH}, + {RUNE_C(0x000773), RUNE_C(0x000774), JG_ALEF}, + {RUNE_C(0x000775), RUNE_C(0x000776), JG_FARSI_YEH}, + {RUNE_C(0x000777), RUNE_C(0x000777), JG_YEH}, + {RUNE_C(0x000778), RUNE_C(0x000779), JG_WAW}, + {RUNE_C(0x00077A), RUNE_C(0x00077B), JG_BURUSHASKI_YEH_BARREE}, + {RUNE_C(0x00077C), RUNE_C(0x00077C), JG_HAH}, + {RUNE_C(0x00077D), RUNE_C(0x00077E), JG_SEEN}, + {RUNE_C(0x00077F), RUNE_C(0x00077F), JG_KAF}, + {RUNE_C(0x000860), RUNE_C(0x000860), JG_MALAYALAM_NGA}, + {RUNE_C(0x000861), RUNE_C(0x000861), JG_MALAYALAM_JA}, + {RUNE_C(0x000862), RUNE_C(0x000862), JG_MALAYALAM_NYA}, + {RUNE_C(0x000863), RUNE_C(0x000863), JG_MALAYALAM_TTA}, + {RUNE_C(0x000864), RUNE_C(0x000864), JG_MALAYALAM_NNA}, + {RUNE_C(0x000865), RUNE_C(0x000865), JG_MALAYALAM_NNNA}, + {RUNE_C(0x000866), RUNE_C(0x000866), JG_MALAYALAM_BHA}, + {RUNE_C(0x000867), RUNE_C(0x000867), JG_MALAYALAM_RA}, + {RUNE_C(0x000868), RUNE_C(0x000868), JG_MALAYALAM_LLA}, + {RUNE_C(0x000869), RUNE_C(0x000869), JG_MALAYALAM_LLLA}, + {RUNE_C(0x00086A), RUNE_C(0x00086A), JG_MALAYALAM_SSA}, + {RUNE_C(0x000870), RUNE_C(0x000882), JG_ALEF}, + {RUNE_C(0x000886), RUNE_C(0x000886), JG_THIN_YEH}, + {RUNE_C(0x000889), RUNE_C(0x000889), JG_NOON}, + {RUNE_C(0x00088A), RUNE_C(0x00088A), JG_HAH}, + {RUNE_C(0x00088B), RUNE_C(0x00088C), JG_TAH}, + {RUNE_C(0x00088D), RUNE_C(0x00088D), JG_GAF}, + {RUNE_C(0x00088E), RUNE_C(0x00088E), JG_VERTICAL_TAIL}, + {RUNE_C(0x0008A0), RUNE_C(0x0008A1), JG_BEH}, + {RUNE_C(0x0008A2), RUNE_C(0x0008A2), JG_HAH}, + {RUNE_C(0x0008A3), RUNE_C(0x0008A3), JG_TAH}, + {RUNE_C(0x0008A4), RUNE_C(0x0008A4), JG_FEH}, + {RUNE_C(0x0008A5), RUNE_C(0x0008A5), JG_QAF}, + {RUNE_C(0x0008A6), RUNE_C(0x0008A6), JG_LAM}, + {RUNE_C(0x0008A7), RUNE_C(0x0008A7), JG_MEEM}, + {RUNE_C(0x0008A8), RUNE_C(0x0008A9), JG_YEH}, + {RUNE_C(0x0008AA), RUNE_C(0x0008AA), JG_REH}, + {RUNE_C(0x0008AB), RUNE_C(0x0008AB), JG_WAW}, + {RUNE_C(0x0008AC), RUNE_C(0x0008AC), JG_ROHINGYA_YEH}, + {RUNE_C(0x0008AE), RUNE_C(0x0008AE), JG_DAL}, + {RUNE_C(0x0008AF), RUNE_C(0x0008AF), JG_SAD}, + {RUNE_C(0x0008B0), RUNE_C(0x0008B0), JG_GAF}, + {RUNE_C(0x0008B1), RUNE_C(0x0008B1), JG_STRAIGHT_WAW}, + {RUNE_C(0x0008B2), RUNE_C(0x0008B2), JG_REH}, + {RUNE_C(0x0008B3), RUNE_C(0x0008B3), JG_AIN}, + {RUNE_C(0x0008B4), RUNE_C(0x0008B4), JG_KAF}, + {RUNE_C(0x0008B5), RUNE_C(0x0008B5), JG_QAF}, + {RUNE_C(0x0008B6), RUNE_C(0x0008B8), JG_BEH}, + {RUNE_C(0x0008B9), RUNE_C(0x0008B9), JG_REH}, + {RUNE_C(0x0008BA), RUNE_C(0x0008BA), JG_YEH}, + {RUNE_C(0x0008BB), RUNE_C(0x0008BB), JG_AFRICAN_FEH}, + {RUNE_C(0x0008BC), RUNE_C(0x0008BC), JG_AFRICAN_QAF}, + {RUNE_C(0x0008BD), RUNE_C(0x0008BD), JG_AFRICAN_NOON}, + {RUNE_C(0x0008BE), RUNE_C(0x0008C0), JG_BEH}, + {RUNE_C(0x0008C1), RUNE_C(0x0008C1), JG_HAH}, + {RUNE_C(0x0008C2), RUNE_C(0x0008C2), JG_GAF}, + {RUNE_C(0x0008C3), RUNE_C(0x0008C3), JG_AIN}, + {RUNE_C(0x0008C4), RUNE_C(0x0008C4), JG_AFRICAN_QAF}, + {RUNE_C(0x0008C5), RUNE_C(0x0008C6), JG_HAH}, + {RUNE_C(0x0008C7), RUNE_C(0x0008C7), JG_LAM}, + {RUNE_C(0x0008C8), RUNE_C(0x0008C8), JG_GAF}, + {RUNE_C(0x010AC0), RUNE_C(0x010AC0), JG_MANICHAEAN_ALEPH}, + {RUNE_C(0x010AC1), RUNE_C(0x010AC2), JG_MANICHAEAN_BETH}, + {RUNE_C(0x010AC3), RUNE_C(0x010AC4), JG_MANICHAEAN_GIMEL}, + {RUNE_C(0x010AC5), RUNE_C(0x010AC5), JG_MANICHAEAN_DALETH}, + {RUNE_C(0x010AC7), RUNE_C(0x010AC7), JG_MANICHAEAN_WAW}, + {RUNE_C(0x010AC9), RUNE_C(0x010ACA), JG_MANICHAEAN_ZAYIN}, + {RUNE_C(0x010ACD), RUNE_C(0x010ACD), JG_MANICHAEAN_HETH}, + {RUNE_C(0x010ACE), RUNE_C(0x010ACE), JG_MANICHAEAN_TETH}, + {RUNE_C(0x010ACF), RUNE_C(0x010ACF), JG_MANICHAEAN_YODH}, + {RUNE_C(0x010AD0), RUNE_C(0x010AD2), JG_MANICHAEAN_KAPH}, + {RUNE_C(0x010AD3), RUNE_C(0x010AD3), JG_MANICHAEAN_LAMEDH}, + {RUNE_C(0x010AD4), RUNE_C(0x010AD4), JG_MANICHAEAN_DHAMEDH}, + {RUNE_C(0x010AD5), RUNE_C(0x010AD5), JG_MANICHAEAN_THAMEDH}, + {RUNE_C(0x010AD6), RUNE_C(0x010AD6), JG_MANICHAEAN_MEM}, + {RUNE_C(0x010AD7), RUNE_C(0x010AD7), JG_MANICHAEAN_NUN}, + {RUNE_C(0x010AD8), RUNE_C(0x010AD8), JG_MANICHAEAN_SAMEKH}, + {RUNE_C(0x010AD9), RUNE_C(0x010ADA), JG_MANICHAEAN_AYIN}, + {RUNE_C(0x010ADB), RUNE_C(0x010ADC), JG_MANICHAEAN_PE}, + {RUNE_C(0x010ADD), RUNE_C(0x010ADD), JG_MANICHAEAN_SADHE}, + {RUNE_C(0x010ADE), RUNE_C(0x010AE0), JG_MANICHAEAN_QOPH}, + {RUNE_C(0x010AE1), RUNE_C(0x010AE1), JG_MANICHAEAN_RESH}, + {RUNE_C(0x010AE4), RUNE_C(0x010AE4), JG_MANICHAEAN_TAW}, + {RUNE_C(0x010AEB), RUNE_C(0x010AEB), JG_MANICHAEAN_ONE}, + {RUNE_C(0x010AEC), RUNE_C(0x010AEC), JG_MANICHAEAN_FIVE}, + {RUNE_C(0x010AED), RUNE_C(0x010AED), JG_MANICHAEAN_TEN}, + {RUNE_C(0x010AEE), RUNE_C(0x010AEE), JG_MANICHAEAN_TWENTY}, + {RUNE_C(0x010AEF), RUNE_C(0x010AEF), JG_MANICHAEAN_HUNDRED}, + {RUNE_C(0x010D02), RUNE_C(0x010D02), JG_HANIFI_ROHINGYA_PA}, + {RUNE_C(0x010D09), RUNE_C(0x010D09), JG_HANIFI_ROHINGYA_PA}, + {RUNE_C(0x010D19), RUNE_C(0x010D19), JG_HANIFI_ROHINGYA_KINNA_YA}, + {RUNE_C(0x010D1C), RUNE_C(0x010D1C), JG_HANIFI_ROHINGYA_PA}, + {RUNE_C(0x010D1E), RUNE_C(0x010D1E), JG_HANIFI_ROHINGYA_KINNA_YA}, + {RUNE_C(0x010D20), RUNE_C(0x010D20), JG_HANIFI_ROHINGYA_KINNA_YA}, + {RUNE_C(0x010D23), RUNE_C(0x010D23), JG_HANIFI_ROHINGYA_KINNA_YA}, +}; + +_MLIB_DEFINE_BSEARCH(enum uprop_jg, lookup, JG_NO_JOINING_GROUP) + +enum uprop_jg +uprop_get_jg(rune ch) +{ + return ch < lookup[0].lo ? JG_NO_JOINING_GROUP : mlib_lookup(ch); +} -- cgit v1.2.3