diff options
| -rw-r--r-- | .gitignore | 2 | ||||
| -rw-r--r-- | Cargo.lock | 40 | ||||
| -rw-r--r-- | Cargo.toml | 2 | ||||
| -rwxr-xr-x | fetch (renamed from unigen/fetch) | 0 | ||||
| -rw-r--r-- | oryxc/.gitignore | 1 | ||||
| -rw-r--r-- | oryxc/Cargo.toml | 13 | ||||
| -rw-r--r-- | oryxc/build.rs | 192 | ||||
| -rw-r--r-- | oryxc/src/unicode/line_terminator.rs | 136 | ||||
| -rw-r--r-- | oryxc/src/unicode/pattern_white_space.rs | 138 | ||||
| -rw-r--r-- | oryxc/src/unicode/xid_continue.rs | 1008 | ||||
| -rw-r--r-- | oryxc/src/unicode/xid_start.rs | 928 | ||||
| -rw-r--r-- | unigen/Cargo.toml | 7 | ||||
| -rw-r--r-- | unigen/src/main.rs | 279 |
13 files changed, 206 insertions, 2540 deletions
@@ -1,3 +1,3 @@ target/ -unigen/data/ +data/ .idea/ @@ -180,12 +180,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] -name = "lexopt" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "803ec87c9cfb29b9d2633f20cba1f488db3fd53f2158b1024cbefb47ba05d413" - -[[package]] name = "libc" version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -213,7 +207,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] -name = "oryxc" +name = "oryx" version = "0.1.0" dependencies = [ "boxcar", @@ -222,7 +216,6 @@ dependencies = [ "dashmap", "phf", "soa-rs", - "unicode-normalization", "unicode-width", ] @@ -394,49 +387,18 @@ dependencies = [ ] [[package]] -name = "tinyvec" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] -name = "unicode-normalization" -version = "0.1.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" -dependencies = [ - "tinyvec", -] - -[[package]] name = "unicode-width" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] -name = "unigen" -version = "0.1.0" -dependencies = [ - "lexopt", -] - -[[package]] name = "utf8parse" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1,4 +1,4 @@ [workspace] resolver = "3" default-members = ["oryxc"] -members = ["oryxc", "unigen"] +members = ["oryxc"] diff --git a/oryxc/.gitignore b/oryxc/.gitignore deleted file mode 100644 index ea8c4bf..0000000 --- a/oryxc/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/target diff --git a/oryxc/Cargo.toml b/oryxc/Cargo.toml index a7ab1e4..aeeea7d 100644 --- a/oryxc/Cargo.toml +++ b/oryxc/Cargo.toml @@ -1,15 +1,16 @@ [package] -name = "oryxc" +name = "oryx" version = "0.1.0" edition = "2024" [dependencies] boxcar = "0.2.14" -clap = { version = "4", features = ["derive"] } +clap = { version = "4", features = ["derive"] } crossbeam-deque = "0.8.6" dashmap = "6.1.0" +phf = { version = "0.13.1", features = ["macros"] } +soa-rs = "0.9.1" +unicode-width = "0.2.2" + # num-rational = "0.4.2" -phf = { version = "0.13.1", features = ["macros"] } -soa-rs = "0.9.1" -unicode-normalization = "0.1.25" -unicode-width = "0.2.2" +# icu = { version = "2.1.1", features = ["compiled_data"] } diff --git a/oryxc/build.rs b/oryxc/build.rs new file mode 100644 index 0000000..59233c5 --- /dev/null +++ b/oryxc/build.rs @@ -0,0 +1,192 @@ +use std::collections::HashMap; +use std::env; +use std::fs::File; +use std::io::{ + self, + BufRead, + BufReader, + Write, +}; + +const MIN_SHIFT: usize = 1; +const MAX_SHIFT: usize = 22; + +fn main() { + let out_dir = env::var("OUT_DIR").unwrap(); + let root = env::var("CARGO_MANIFEST_DIR").unwrap(); + let data = format!("{root}/../data"); + + println!("cargo:rerun-if-changed={data}/DerivedCoreProperties.txt"); + println!("cargo:rerun-if-changed={data}/PropList.txt"); + + generate_from_file( + &out_dir, + &format!("{data}/DerivedCoreProperties.txt"), + "XID_Start", + "xid_start", + ); + generate_from_file( + &out_dir, + &format!("{data}/DerivedCoreProperties.txt"), + "XID_Continue", + "xid_continue", + ); + generate_from_file( + &out_dir, + &format!("{data}/PropList.txt"), + "Pattern_White_Space", + "pattern_white_space", + ); + generate_from_codepoints( + &out_dir, + &[ + '\u{A}', '\u{B}', '\u{C}', '\u{D}', '\u{85}', '\u{2028}', + '\u{2029}', + ], + "line_terminator", + ); +} + +fn generate_from_file(out_dir: &str, path: &str, prop: &str, name: &str) { + let mut bitmap = vec![false; 0x110000]; + parse_file(path, prop, &mut bitmap) + .unwrap_or_else(|e| panic!("failed to read {path}: {e}")); + write_output(out_dir, name, &bitmap); +} + +fn generate_from_codepoints(out_dir: &str, codepoints: &[char], name: &str) { + let mut bitmap = vec![false; 0x110000]; + for &c in codepoints { + bitmap[c as usize] = true; + } + write_output(out_dir, name, &bitmap); +} + +fn write_output(out_dir: &str, name: &str, bitmap: &[bool]) { + let (shift, lvl1, lvl2) = optimize_tables(bitmap); + let mut f = File::create(format!("{out_dir}/{name}.rs")).unwrap(); + generate_code(&mut f, name, shift, &lvl1, &lvl2); +} + +fn optimize_tables(bitmap: &[bool]) -> (usize, Vec<u16>, Vec<u64>) { + let mut minsz = usize::MAX; + let mut config = (0, Vec::new(), Vec::new()); + + for i in MIN_SHIFT..=MAX_SHIFT { + let (l1, l2) = build_tables(bitmap, i); + let sz = l1.len() * 2 + l2.len() * 8; + if sz < minsz { + minsz = sz; + config = (i, l1, l2); + } + } + + config +} + +fn parse_file(path: &str, prop: &str, bitmap: &mut [bool]) -> io::Result<()> { + let file = File::open(path)?; + let reader = BufReader::new(file); + + for line in reader.lines() { + let line = line?; + let line = line.split('#').next().unwrap_or("").trim(); + if line.is_empty() { + continue; + } + + let parts: Vec<&str> = line.split(';').map(|s| s.trim()).collect(); + if parts.len() < 2 || parts[1] != prop { + continue; + } + + let (beg, end) = if parts[0].contains("..") { + let mut range = parts[0].split(".."); + ( + u32::from_str_radix(range.next().unwrap(), 16).unwrap(), + u32::from_str_radix(range.next().unwrap(), 16).unwrap(), + ) + } else { + let val = u32::from_str_radix(parts[0], 16).unwrap(); + (val, val) + }; + + for cp in beg..=end { + if (cp as usize) < bitmap.len() { + bitmap[cp as usize] = true; + } + } + } + + Ok(()) +} + +fn build_tables(bitmap: &[bool], shift: usize) -> (Vec<u16>, Vec<u64>) { + let blksz = 1 << shift; + let u64s_per_block = (blksz + 63) / 64; + + let mut lvl2: Vec<u64> = Vec::new(); + let mut lvl1: Vec<u16> = Vec::new(); + let mut blkmap: HashMap<Vec<u64>, u16> = HashMap::new(); + + for chunk in bitmap.chunks(blksz) { + let mut blkdata = vec![0u64; u64s_per_block]; + + for (i, &bit) in chunk.iter().enumerate() { + if bit { + let word_idx = i / 64; + let bit_idx = i % 64; + blkdata[word_idx] |= 1 << bit_idx; + } + } + + if let Some(&i) = blkmap.get(&blkdata) { + lvl1.push(i); + } else { + let i = (lvl2.len() / u64s_per_block) as u16; + lvl2.extend_from_slice(&blkdata); + blkmap.insert(blkdata, i); + lvl1.push(i); + } + } + + (lvl1, lvl2) +} + +fn generate_code( + f: &mut impl Write, + prop_name: &str, + shift: usize, + level1: &[u16], + level2: &[u64], +) { + let upper_name = prop_name.to_uppercase(); + let lower_name = prop_name.to_lowercase(); + let block_size = 1 << shift; + let mask = block_size - 1; + let u64s_per_block = (block_size + 63) / 64; + + let pred_name = if lower_name.contains('_') { + format!("{lower_name}_p") + } else { + format!("{lower_name}p") + }; + + writeln!(f, "/* Autogenerated – DO NOT EDIT */").unwrap(); + writeln!(f).unwrap(); + writeln!(f, "static {upper_name}_L1: [u16; {}] = {level1:?};", level1.len()).unwrap(); + writeln!(f, "static {upper_name}_L2: [u64; {}] = {level2:?};", level2.len()).unwrap(); + writeln!(f, "#[inline]").unwrap(); + writeln!(f, "pub fn {pred_name}(c: char) -> bool {{").unwrap(); + writeln!(f, "\tlet cp = c as usize;").unwrap(); + writeln!(f, "\tlet blki = unsafe {{ *{upper_name}_L1.get_unchecked(cp >> {shift}) }} as usize;").unwrap(); + writeln!(f, "\tlet in_blk_offset_p = cp & 0x{mask:X};").unwrap(); + if u64s_per_block == 1 { + writeln!(f, "\tunsafe {{ return ({upper_name}_L2.get_unchecked(blki) & (1 << in_blk_offset_p)) != 0; }}").unwrap(); + } else { + writeln!(f, "\tlet wordi = (blki * {u64s_per_block}) + (in_blk_offset_p >> 6);").unwrap(); + writeln!(f, "\tlet biti = in_blk_offset_p & 0x3F;").unwrap(); + writeln!(f, "\tunsafe {{ return (*{upper_name}_L2.get_unchecked(wordi) & (1 << biti)) != 0; }}").unwrap(); + } + writeln!(f, "}}").unwrap(); +} diff --git a/oryxc/src/unicode/line_terminator.rs b/oryxc/src/unicode/line_terminator.rs index 5c6b052..300bb54 100644 --- a/oryxc/src/unicode/line_terminator.rs +++ b/oryxc/src/unicode/line_terminator.rs @@ -1,135 +1 @@ -/* Autogenerated – DO NOT EDIT */ - -static LINE_TERMINATOR_L1: [u16; 544] = [ - 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -]; -static LINE_TERMINATOR_L2: [u64; 96] = [ - 7168, - 0, - 32, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 3298534883328, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -]; -#[inline] -pub fn line_terminator_p(c: char) -> bool { - let cp = c as usize; - let blki = unsafe { *LINE_TERMINATOR_L1.get_unchecked(cp >> 11) } as usize; - let in_blk_offset_p = cp & 0x7FF; - let wordi = (blki * 32) + (in_blk_offset_p >> 6); - let biti = in_blk_offset_p & 0x3F; - unsafe { - return (*LINE_TERMINATOR_L2.get_unchecked(wordi) & (1 << biti)) != 0; - } -} +include!(concat!(env!("OUT_DIR"), "/line_terminator.rs")); diff --git a/oryxc/src/unicode/pattern_white_space.rs b/oryxc/src/unicode/pattern_white_space.rs index b051e3a..f90db8d 100644 --- a/oryxc/src/unicode/pattern_white_space.rs +++ b/oryxc/src/unicode/pattern_white_space.rs @@ -1,137 +1 @@ -/* Autogenerated – DO NOT EDIT */ - -static PATTERN_WHITE_SPACE_L1: [u16; 544] = [ - 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -]; -static PATTERN_WHITE_SPACE_L2: [u64; 96] = [ - 4294983168, - 0, - 32, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 3298534932480, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -]; -#[inline] -pub fn pattern_white_space_p(c: char) -> bool { - let cp = c as usize; - let blki = - unsafe { *PATTERN_WHITE_SPACE_L1.get_unchecked(cp >> 11) } as usize; - let in_blk_offset_p = cp & 0x7FF; - let wordi = (blki * 32) + (in_blk_offset_p >> 6); - let biti = in_blk_offset_p & 0x3F; - unsafe { - return (*PATTERN_WHITE_SPACE_L2.get_unchecked(wordi) & (1 << biti)) - != 0; - } -} +include!(concat!(env!("OUT_DIR"), "/pattern_white_space.rs")); diff --git a/oryxc/src/unicode/xid_continue.rs b/oryxc/src/unicode/xid_continue.rs index 8fbbce4..571ae40 100644 --- a/oryxc/src/unicode/xid_continue.rs +++ b/oryxc/src/unicode/xid_continue.rs @@ -1,1007 +1 @@ -/* Autogenerated – DO NOT EDIT */ - -static XID_CONTINUE_L1: [u16; 1088] = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 12, 12, 12, 12, 12, 13, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 14, 15, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 9, 9, 9, 9, 9, 9, - 9, 9, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 9, 29, 12, 30, 12, - 12, 31, 32, 9, 9, 9, 9, 9, 9, 33, 9, 34, 35, 12, 12, 12, 12, 12, 12, 12, - 36, 9, 9, 9, 9, 9, 9, 9, 37, 38, 9, 9, 39, 9, 9, 9, 40, 41, 42, 43, 44, 45, - 46, 47, 48, 9, 9, 49, 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 50, 12, 12, 12, 12, 51, 12, 12, 12, - 12, 52, 12, 12, 12, 12, 12, 12, 53, 54, 9, 9, 55, 9, 12, 12, 12, 12, 56, - 12, 12, 12, 12, 12, 12, 12, 12, 57, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 58, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, -]; -static XID_CONTINUE_L2: [u64; 944] = [ - 287948901175001088, - 576460745995190270, - 333270770471927808, - 18410715276682199039, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 88094074470339, - 18446744073709551615, - 13321647697761927167, - 18446744056529672128, - 18428729675200069631, - 18446744073709551615, - 18446744073709551615, - 18446744073709550843, - 18446744073709551615, - 18446462598732840959, - 18446744069456527359, - 13835058055282033151, - 2119858418286774, - 18446744069548736512, - 18446678103011885055, - 18446744073709551615, - 11529212845433552895, - 18446744073709486080, - 18446744073709545471, - 1125899906842623, - 2612087783874887679, - 70368744177663, - 18446471390799331327, - 18446744073701228287, - 18446744056529682431, - 18446744073709551615, - 18446462392574410751, - 17565725197581524975, - 5765733215448889759, - 15235112390417287150, - 18014125208779143, - 17576984196650090478, - 18302910150157089727, - 17576984196649951214, - 844217444219295, - 14123225865944680428, - 281200107273671, - 17582050746231021567, - 281265452367327, - 17577547146603651055, - 4221916082617823, - 18446744073709412351, - 18158794964244397535, - 3457638613854978030, - 3658904103781503, - 576460752303423486, - 67076095, - 4611685674830002134, - 4093607775, - 14024213633433600001, - 18446216308128218879, - 2305843009196916703, - 64, - 18446744073709551615, - 18446744073709487103, - 18446744070488326143, - 17870283321406070975, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744070446333439, - 9168765891372858879, - 18446744073701162813, - 18446744073696837631, - 1123704775901183, - 18446744069414649855, - 4557642822898941951, - 18446744073709551614, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446638520593285119, - 18446744069548802046, - 144053615424700415, - 9007197111451647, - 3905461007941631, - 18446744073709551615, - 4394566287359, - 18446744069481674752, - 144115188075855871, - 18446471394825863167, - 18014398509481983, - 1152657619668697087, - 8796093022207936, - 18446480190918885375, - 134153215, - 18446744069683019775, - 11529215043920986111, - 13834777130128311295, - 17588964818943, - 18446744073709551615, - 4494803601399807, - 18446744073709551615, - 4503599627370495, - 72057594037927935, - 4611686018427380735, - 16717361816799217663, - 576460752302833664, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744070475743231, - 4611686017001275199, - 6908521828386340863, - 2295745090394464220, - 9231253336202686464, - 9223934986817634305, - 536805376, - 562821641207808, - 17582049991377026180, - 18446744069414601696, - 511, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 4494940973301759, - 18446498607738650623, - 9223513873854758911, - 9187201948305063935, - 18446744071553646463, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 2251518330118602976, - 18446744073709551614, - 18446744068986765311, - 18446744073709551615, - 18446462598732840928, - 18446744073709551615, - 18446744069414617087, - 18446462598732840960, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 8191, - 4611686018427322368, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 17592185987071, - 13830835930631503871, - 18446744073709551615, - 1125899906842623, - 18446744060816261120, - 18446744073709551615, - 18446744073709550079, - 18446181124293001215, - 18691697672191, - 4503599627370495, - 18446744073709551615, - 16789419406609285183, - 18446532967477018623, - 2305843004919775231, - 18446744073709551615, - 9223372032626884609, - 36028797018963967, - 18194542490348896255, - 18446744073709551615, - 35184368733388807, - 18446602782178705022, - 18446466996645134335, - 18446744073709551615, - 288010473826156543, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446462667452317695, - 1152921504606845055, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446532967477018623, - 18446744073709551615, - 67108863, - 6881498031078244479, - 18446744073709551579, - 1125899906842623, - 18446744073709027328, - 18446744073709551615, - 18446744006063816703, - 18446744073709551615, - 18446744073709551615, - 4611686018427387903, - 18446744073709486080, - 18446744073709355007, - 287948901175001343, - 7036870122864639, - 12288634533233876992, - 18446744073709551615, - 2305843009213693951, - 9799832780635308032, - 18446743936404815870, - 9223372036854775807, - 486341884, - 13258596753222922239, - 1073692671, - 18446744073709551615, - 576460752303423487, - 0, - 9007199254740991, - 0, - 2305843009213693952, - 0, - 0, - 18446744069951455231, - 4295098367, - 18446708893632430079, - 576460752303359999, - 18446744070488326143, - 4128527, - 18446744073709551615, - 18446744073709551615, - 18446466993558126591, - 1152921504591118335, - 18446463698244468735, - 17870001915148894207, - 2016486715970549759, - 4503599627370495, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 36028797018963967, - 1095220854783, - 575897802350002111, - 0, - 10502394331027995967, - 36028792728190975, - 2147483647, - 15762594400829440, - 288230371860938751, - 67108863, - 13907115649320091647, - 0, - 9745789593611923567, - 2305843004918726656, - 536870911, - 549755813631, - 18014398509481983, - 2251795522912255, - 262143, - 0, - 18446744073709551615, - 511, - 2251799813685247, - 2251799813685247, - 287950000686628863, - 18446671780820025343, - 63, - 0, - 0, - 0, - 875211255709695, - 18158513697557840124, - 18446463149025525759, - 18446462598732972031, - 18446462598732841023, - 36028792723996703, - 18446744073709551615, - 9241386160486350975, - 576460752303423487, - 287951100198191108, - 18437736874454810623, - 22517998136787184, - 18446744073709551615, - 402644511, - 13907115649319829503, - 3, - 18446464796682337663, - 287957697268023295, - 18153444948953374703, - 8760701963286943, - 18428729675200023551, - 25770850213, - 18446744073709551615, - 16173172735, - 18446744073709551615, - 67043519, - 0, - 0, - 18392700878181105663, - 1056964609, - 18446744073709551615, - 67043345, - 144115188075855871, - 68719412223, - 287966492958392319, - 127, - 0, - 0, - 576460752303423487, - 0, - 18446744069414584320, - 9223376434901286911, - 17996384110963061375, - 67043343, - 18446740770879700992, - 120208752639, - 9223372036854775807, - 18446744073709486208, - 18446462599336820735, - 144115188075855871, - 0, - 1095216660480, - 0, - 287948909764935679, - 18410715276690587135, - 18445618173869752321, - 36027697507139583, - 0, - 13006395723845991295, - 18446741595580465407, - 18446466992517644287, - 4394019979263, - 0, - 0, - 0, - 36028792723996672, - 14411518807585456127, - 134152199, - 281474976710656, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 67108863, - 0, - 18446744073709551615, - 140737488355327, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 15, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709486080, - 562949953421311, - 281474976710655, - 18446744069418778623, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 576460752303423487, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 127, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 288230376151711743, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 144115188075855871, - 18446466994631868415, - 9223372036854775807, - 8796093022143487, - 36028797018963967, - 16212958624241090575, - 65535, - 0, - 0, - 0, - 0, - 0, - 0, - 287984085547089919, - 0, - 0, - 0, - 18446744073709551615, - 18014398505187016704, - 1048575, - 18446744073709551615, - 18446744073709520895, - 4294934783, - 35747438006370304, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 9223372036858970111, - 2147483647, - 0, - 18446744073709551615, - 2251799813685247, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 8065665457643847680, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 1125934266580991, - 18446463629527547904, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 1152921504606846975, - 0, - 0, - 0, - 0, - 18446744073709551615, - 2305570330330005503, - 1677656575, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 287948901175001088, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446532967477018623, - 127, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 17872504197455282176, - 65970697670631, - 0, - 0, - 28, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073707454463, - 17005555242810474495, - 18446744073709551599, - 8935141660164089791, - 18446744073709419615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446743249075830783, - 17870283321271910397, - 18437736874452713471, - 18446603336221163519, - 18446741874686295551, - 18446744073709539319, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 17906312118425092095, - 9042383626829823, - 281470547525648, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 8660801552383, - 0, - 0, - 0, - 18446471240106377087, - 70368744177663, - 32768, - 0, - 4611439727822766079, - 17407, - 0, - 0, - 0, - 0, - 140737488289792, - 288230376151711743, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 288230376151646208, - 0, - 0, - 0, - 576460752303357952, - 0, - 0, - 0, - 13853072451644162047, - 0, - 0, - 0, - 9223213153129594880, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 8323103, - 18446744073709551615, - 67047423, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 790380184120328175, - 6843210385291930244, - 1152917029519358975, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 287948901175001088, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 4294967295, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744070488326143, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446532967477018623, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446462607322775551, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 1073741823, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 1073741823, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709488127, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 288230376151711743, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 281474976710655, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -]; -#[inline] -pub fn xid_continue_p(c: char) -> bool { - let cp = c as usize; - let blki = unsafe { *XID_CONTINUE_L1.get_unchecked(cp >> 10) } as usize; - let in_blk_offset_p = cp & 0x3FF; - let wordi = (blki * 16) + (in_blk_offset_p >> 6); - let biti = in_blk_offset_p & 0x3F; - unsafe { - return (*XID_CONTINUE_L2.get_unchecked(wordi) & (1 << biti)) != 0; - } -} +include!(concat!(env!("OUT_DIR"), "/xid_continue.rs")); diff --git a/oryxc/src/unicode/xid_start.rs b/oryxc/src/unicode/xid_start.rs index 1c9d9ae..649faa5 100644 --- a/oryxc/src/unicode/xid_start.rs +++ b/oryxc/src/unicode/xid_start.rs @@ -1,927 +1 @@ -/* Autogenerated – DO NOT EDIT */ - -static XID_START_L1: [u16; 1088] = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 12, 12, 12, 12, 12, 13, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 14, 15, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 9, 9, 9, 9, 9, 9, - 9, 9, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 9, 29, 12, 30, 12, - 12, 31, 32, 9, 9, 9, 9, 9, 9, 33, 9, 34, 35, 12, 12, 12, 12, 12, 12, 12, - 36, 9, 9, 9, 9, 9, 9, 9, 37, 38, 9, 9, 39, 9, 9, 9, 9, 9, 40, 9, 41, 42, - 43, 44, 45, 9, 9, 9, 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 46, 12, 12, 12, 12, 47, 12, 12, 12, 12, - 48, 12, 12, 12, 12, 12, 12, 49, 50, 9, 9, 51, 9, 12, 12, 12, 12, 52, 12, - 12, 12, 12, 12, 12, 12, 12, 53, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, -]; -static XID_START_L2: [u64; 864] = [ - 68719476736, - 576460745995190270, - 297241973452963840, - 18410715276682199039, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 88094074470339, - 0, - 13321366222785216512, - 18446744056529672000, - 18428729675200069631, - 18446744073709551615, - 18446744073709551615, - 18446744073709550595, - 18446744073709551615, - 18446462598732840959, - 18446744069456527359, - 511, - 2119858418286592, - 18446744069414584320, - 18446392229988665343, - 18446744073709551615, - 11241196188469297151, - 281474976514048, - 18446744073709543424, - 563224831328255, - 301749971126844416, - 1168302407679, - 18446471390564450303, - 18446744069414649599, - 1023, - 2594073385365405680, - 18446181140919287808, - 2577745637692514273, - 1153765945374687232, - 247132830528276448, - 7881300924956672, - 2589004636761079776, - 144115200960823296, - 2589004636760940512, - 562965791113216, - 288167810662516712, - 65536, - 2594071186342010848, - 13807648768, - 2589567586714640353, - 1688864624214016, - 2882303761516978160, - 18158513712597581824, - 3457638613854978016, - 127, - 1688849860263934, - 127, - 2307531515476572118, - 4026531935, - 1, - 35184372088575, - 7936, - 0, - 9223380832947798015, - 18438229877581611008, - 18446744069414600707, - 17870283321406070975, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744070446333439, - 9168765891372858879, - 18446744073701162813, - 18446744073696837631, - 134217727, - 18446744069414649855, - 4557642822898941951, - 18446744073709551614, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446638520593285119, - 18446744069548802046, - 144053615424700415, - 1125897759621119, - 527761286627327, - 4503599627370495, - 276824064, - 18446744069414584320, - 144115188075855871, - 18446469195802607615, - 18014398509481983, - 2147483647, - 8796093022142464, - 18446480190918885375, - 1023, - 18446744069422972927, - 2097151, - 549755813888, - 0, - 4503599627370464, - 8160, - 18158724812380307448, - 274877906943, - 68719476735, - 4611686018360336384, - 16717361816799217663, - 319718190147960832, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744070475743231, - 4611686017001275199, - 6908521828386340863, - 2295745090394464220, - 0, - 9223934986808197120, - 536805376, - 0, - 17582049991377026180, - 18446744069414601696, - 511, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 3509778554814463, - 18446498607738650623, - 141836999983103, - 9187201948305063935, - 2139062143, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 2251241253188403424, - 18446744073709551614, - 18446744068886102015, - 17870283321406128127, - 18446462598732840928, - 18446744073709551615, - 18446744069414617087, - 18446462598732840960, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 8191, - 4611686018427322368, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 13198434443263, - 9223512774343131135, - 18446744070488326143, - 281474976710655, - 18446744060816261120, - 18446744073709551615, - 18446744073709550079, - 18446181124293001215, - 34359736251, - 4503599627370495, - 4503599627370492, - 7564921474075590656, - 18446462873610746880, - 2305843004918726783, - 2251799813685232, - 8935422993945886720, - 2199023255551, - 14159317224157876215, - 4495436853045886975, - 7890092085477381, - 18446602782178705022, - 18446466996645134335, - 18446744073709551615, - 34359738367, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446462667452317695, - 1152921504606845055, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446532967477018623, - 18446744073709551615, - 67108863, - 6881498030004502655, - 18446744073709551579, - 1125899906842623, - 18446744073709027328, - 18446744073709551615, - 18446744006063816703, - 18446744073709551615, - 18446744073709551615, - 4611686018427387903, - 18446744073709486080, - 18446744073709355007, - 287948901175001343, - 0, - 12288634533233819648, - 18446744073709551615, - 2305843009213693951, - 576460743713488896, - 18446743798965862398, - 9223372033633550335, - 486341884, - 13258596753222922239, - 1073692671, - 18446744073709551615, - 576460752303423487, - 0, - 9007199254740991, - 0, - 0, - 0, - 0, - 18446744069951455231, - 131071, - 18446708893632430079, - 18014398509418495, - 18446744070488326143, - 4128527, - 18446744073709551615, - 18446744073709551615, - 18446462599806582783, - 1152921504591118335, - 18446463698244468735, - 17870001915148894207, - 2016486715970549759, - 4503599627370495, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 36028797018963967, - 1095220854783, - 575897802350002111, - 0, - 10502394331027995967, - 36028792728190975, - 2147483647, - 15762594400829440, - 288230371860938751, - 67108863, - 13907115649320091647, - 0, - 18014398491590657, - 2305843004918726656, - 536870911, - 137438953215, - 18014398509481983, - 2251795522912255, - 262143, - 0, - 18446744073709551615, - 511, - 2251799813685247, - 2251799813685247, - 68719476735, - 18446603611099102208, - 63, - 0, - 0, - 0, - 848822976643071, - 252, - 18446463149025525759, - 18446462598732841023, - 18446462598732840963, - 36028792723996703, - 72057594037927928, - 10696049115004928, - 281474976710648, - 2199023190016, - 549755813880, - 20266198323101840, - 2251799813685240, - 335544350, - 9223389629040558079, - 1, - 18446464796682337663, - 2147483647, - 2589004636760940512, - 16643063808, - 54043195528399871, - 655360, - 9007199254740991, - 15032387456, - 281474976710655, - 176, - 0, - 0, - 140737488355327, - 251658240, - 281474976710655, - 16, - 72066390130950143, - 0, - 134217727, - 127, - 0, - 0, - 17592186044415, - 0, - 18446744069414584320, - 9223372041149743103, - 9223653511822045823, - 2, - 18446740770879700992, - 42949804031, - 290482175965394945, - 18446744073441181696, - 18446462599269712895, - 144115188075855871, - 0, - 0, - 0, - 8589934591, - 140737488354815, - 18445618173802708993, - 65535, - 0, - 562949953420159, - 18446741595513421888, - 18446462598749619199, - 268435455, - 0, - 0, - 0, - 2251795518717952, - 4503599627239412, - 0, - 281474976710656, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 67108863, - 0, - 18446744073709551615, - 140737488355327, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 15, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709486080, - 562949953421311, - 281474976710655, - 18446744069414584446, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 576460752303423487, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 127, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1073741823, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 144115188075855871, - 18446462600880324607, - 9223372036854775807, - 70368744112128, - 281474976710655, - 16212958624174047247, - 65535, - 0, - 0, - 0, - 0, - 0, - 0, - 35184372088831, - 0, - 0, - 0, - 18446744073709551615, - 18014398505187016704, - 1048575, - 18446744073709551615, - 67583, - 4294443008, - 34902944356761600, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 9223372036858970111, - 2147483647, - 0, - 18446744073709551615, - 2251799813685247, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 8065665457643847680, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 1125934266580991, - 18446463629527547904, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 1152921504606846975, - 0, - 0, - 0, - 0, - 18446744073709551615, - 2305570330330005503, - 67043839, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073707454463, - 17005555242810474495, - 18446744073709551599, - 8935141660164089791, - 18446744073709419615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446743249075830783, - 17870283321271910397, - 18437736874452713471, - 18446603336221163519, - 18446741874686295551, - 4087, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 8660801552383, - 0, - 0, - 0, - 18446462598732840960, - 70368744177663, - 0, - 0, - 4575692405780512767, - 16384, - 0, - 0, - 0, - 0, - 70368744112128, - 17592186044415, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 17592185978880, - 0, - 0, - 0, - 351843720822784, - 0, - 0, - 0, - 13843853836919242751, - 0, - 0, - 0, - 9223213153129594880, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 31, - 18446744073709551615, - 2063, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 790380184120328175, - 6843210385291930244, - 1152917029519358975, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 4294967295, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744070488326143, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446532967477018623, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446462607322775551, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 1073741823, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 1073741823, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 18446744073709488127, - 18446744073709551615, - 18446744073709551615, - 18446744073709551615, - 288230376151711743, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -]; -#[inline] -pub fn xid_start_p(c: char) -> bool { - let cp = c as usize; - let blki = unsafe { *XID_START_L1.get_unchecked(cp >> 10) } as usize; - let in_blk_offset_p = cp & 0x3FF; - let wordi = (blki * 16) + (in_blk_offset_p >> 6); - let biti = in_blk_offset_p & 0x3F; - unsafe { - return (*XID_START_L2.get_unchecked(wordi) & (1 << biti)) != 0; - } -} +include!(concat!(env!("OUT_DIR"), "/xid_start.rs")); diff --git a/unigen/Cargo.toml b/unigen/Cargo.toml deleted file mode 100644 index 587afb7..0000000 --- a/unigen/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "unigen" -version = "0.1.0" -edition = "2024" - -[dependencies] -lexopt = "0.3.2" diff --git a/unigen/src/main.rs b/unigen/src/main.rs deleted file mode 100644 index bedabc6..0000000 --- a/unigen/src/main.rs +++ /dev/null @@ -1,279 +0,0 @@ -use std::collections::HashMap; -use std::ffi::{ - OsStr, - OsString, -}; -use std::fs::File; -use std::io::{ - self, - BufRead, - BufReader, -}; -use std::path::Path; -use std::sync::OnceLock; -use std::vec::Vec; -use std::{ - env, - process, -}; - -const MIN_SHIFT: usize = 1; -const MAX_SHIFT: usize = 22; - -#[derive(Default)] -struct Flags { - codepoints: Option<Vec<char>>, - help: bool, -} - -impl Flags { - fn parse() -> Result<(Flags, Vec<String>), lexopt::Error> { - use lexopt::prelude::*; - - let mut rest = Vec::with_capacity(env::args().len() - 1); - let mut flags = Flags::default(); - let mut parser = lexopt::Parser::from_env(); - parser.set_short_equals(false); - - while let Some(arg) = parser.next()? { - match arg { - Short('c') | Long("codepoints") => { - fn hex_to_char(s: &str) -> char { - return u32::from_str_radix(s, 16).map_or_else( - |e| { - eprintln!("{}: {s}: {e}", progname().display()); - process::exit(1); - }, - |n| { - char::from_u32(n).unwrap_or_else(|| { - eprintln!( - "{}: {s}: invalid codepoint", - progname().display() - ); - process::exit(1); - }) - }, - ); - } - - flags.codepoints = Some( - parser - .value()? - .to_str() - .unwrap_or_else(|| { - eprintln!( - "{}: unable to parse argument to -c/--codepoints", - progname().display() - ); - process::exit(1); - }) - .split(',') - .map(hex_to_char) - .collect(), - ); - }, - Short('h') | Long("help") => flags.help = true, - Value(v) => rest.push(v.into_string()?), - _ => return Err(arg.unexpected()), - } - } - - return Ok((flags, rest)); - } -} - -fn progname() -> &'static OsString { - static ARGV0: OnceLock<OsString> = OnceLock::new(); - return ARGV0.get_or_init(|| { - let default = OsStr::new("oryxc"); - let s = env::args_os().next().unwrap_or(default.into()); - return Path::new(&s).file_name().unwrap_or(default).to_os_string(); - }); -} - -fn usage() { - eprintln!( - concat!( - "Usage: {0} data-file property-name\n", - " {0} -c codepoints name\n", - " {0} -h", - ), - progname().display() - ); -} - -fn main() -> io::Result<()> { - let (flags, rest) = match Flags::parse() { - Ok(v) => v, - Err(e) => { - eprintln!("{}: {e}", progname().display()); - usage(); - process::exit(1); - }, - }; - - if flags.help { - usage(); - process::exit(0); - } - - if (flags.codepoints.is_none() && rest.len() != 2) - || (flags.codepoints.is_some() && rest.len() != 1) - { - usage(); - process::exit(1); - } - - let mut bitmap = vec![false; 0x110000]; - let name = match flags.codepoints { - Some(vec) => { - vec.iter().for_each(|c| bitmap[*c as usize] = true); - &rest[0] - }, - None => { - parse_file(&rest[0], &rest[1], &mut bitmap)?; - &rest[1] - }, - }; - let (shift, lvl1, lvl2) = optimize_tables(&bitmap); - write_tables(name, shift, &lvl1, &lvl2); - return Ok(()); -} - -fn optimize_tables(bitmap: &[bool]) -> (usize, Vec<u16>, Vec<u64>) { - let mut minsz = usize::MAX; - let mut config = (0, Vec::new(), Vec::new()); - - for i in MIN_SHIFT..=MAX_SHIFT { - let (l1, l2) = build_tables(bitmap, i); - let sz = l1.len() * 2 + l2.len() * 8; - if sz < minsz { - minsz = sz; - config = (i, l1, l2); - } - } - - return config; -} - -fn parse_file<P: AsRef<Path>>( - path: P, - prop: &str, - bitmap: &mut [bool], -) -> io::Result<()> { - let file = File::open(path)?; - let reader = BufReader::new(file); - - for line in reader.lines() { - let line = line?; - let line = line.split('#').next().unwrap_or("").trim(); - if line.is_empty() { - continue; - } - - let parts: Vec<&str> = line.split(';').map(|s| s.trim()).collect(); - if parts.len() < 2 || parts[1] != prop { - continue; - } - - let (beg, end) = if parts[0].contains("..") { - let mut range = parts[0].split(".."); - ( - u32::from_str_radix(range.next().unwrap(), 16).unwrap(), - u32::from_str_radix(range.next().unwrap(), 16).unwrap(), - ) - } else { - let val = u32::from_str_radix(parts[0], 16).unwrap(); - (val, val) - }; - - for cp in beg..=end { - if (cp as usize) < bitmap.len() { - bitmap[cp as usize] = true; - } - } - } - return Ok(()); -} - -fn build_tables(bitmap: &[bool], shift: usize) -> (Vec<u16>, Vec<u64>) { - let blksz = 1 << shift; - let u64s_per_block = (blksz + 63) / 64; - - let mut lvl2: Vec<u64> = Vec::new(); - let mut lvl1: Vec<u16> = Vec::new(); - let mut blkmap: HashMap<Vec<u64>, u16> = HashMap::new(); - - for chunk in bitmap.chunks(blksz) { - let mut blkdata = vec![0u64; u64s_per_block]; - - for (i, &bit) in chunk.iter().enumerate() { - if bit { - let word_idx = i / 64; - let bit_idx = i % 64; - blkdata[word_idx] |= 1 << bit_idx; - } - } - - if let Some(&i) = blkmap.get(&blkdata) { - lvl1.push(i); - } else { - let i = (lvl2.len() / u64s_per_block) as u16; - lvl2.extend_from_slice(&blkdata); - blkmap.insert(blkdata, i); - lvl1.push(i); - } - } - - return (lvl1, lvl2); -} - -fn write_tables(prop_name: &str, shift: usize, level1: &[u16], level2: &[u64]) { - let upper_name = prop_name.to_uppercase(); - let lower_name = prop_name.to_lowercase(); - let block_size = 1 << shift; - let mask = block_size - 1; - let u64s_per_block = (block_size + 63) / 64; - - println!("/* Autogenerated – DO NOT EDIT */\n"); - print!( - "static {upper_name}_L1: [u16; {}] = {level1:?};", - level1.len() - ); - print!( - "static {upper_name}_L2: [u64; {}] = {level2:?};", - level2.len() - ); - - let pred_name = if lower_name.contains('_') { - format!("{lower_name}_p") - } else { - format!("{lower_name}p") - }; - - print!( - "#[inline] - pub fn {pred_name}(c: char) -> bool {{ - let cp = c as usize; - let blki = unsafe {{ *{upper_name}_L1.get_unchecked(cp >> {shift}) }} as usize; - let in_blk_offset_p = cp & 0x{mask:X};" - ); - - if u64s_per_block == 1 { - print!( - " unsafe {{ - return ({upper_name}_L2.get_unchecked(blki) & (1 << in_blk_offset_p)) != 0; - }}" - ); - } else { - print!( - "let wordi = (blki * {u64s_per_block}) + (in_blk_offset_p >> 6); - let biti = in_blk_offset_p & 0x3F; - unsafe {{ - return (*{upper_name}_L2.get_unchecked(wordi) & (1 << biti)) != 0; - }}" - ); - } - - print!("}}"); -} |