summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--Cargo.lock40
-rw-r--r--Cargo.toml2
-rwxr-xr-xfetch (renamed from unigen/fetch)0
-rw-r--r--oryxc/.gitignore1
-rw-r--r--oryxc/Cargo.toml13
-rw-r--r--oryxc/build.rs192
-rw-r--r--oryxc/src/unicode/line_terminator.rs136
-rw-r--r--oryxc/src/unicode/pattern_white_space.rs138
-rw-r--r--oryxc/src/unicode/xid_continue.rs1008
-rw-r--r--oryxc/src/unicode/xid_start.rs928
-rw-r--r--unigen/Cargo.toml7
-rw-r--r--unigen/src/main.rs279
13 files changed, 206 insertions, 2540 deletions
diff --git a/.gitignore b/.gitignore
index 72e2ff5..b42a62c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
target/
-unigen/data/
+data/
.idea/
diff --git a/Cargo.lock b/Cargo.lock
index b835f73..0a66b5c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -180,12 +180,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]]
-name = "lexopt"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "803ec87c9cfb29b9d2633f20cba1f488db3fd53f2158b1024cbefb47ba05d413"
-
-[[package]]
name = "libc"
version = "0.2.182"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -213,7 +207,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
-name = "oryxc"
+name = "oryx"
version = "0.1.0"
dependencies = [
"boxcar",
@@ -222,7 +216,6 @@ dependencies = [
"dashmap",
"phf",
"soa-rs",
- "unicode-normalization",
"unicode-width",
]
@@ -394,49 +387,18 @@ dependencies = [
]
[[package]]
-name = "tinyvec"
-version = "1.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
-dependencies = [
- "tinyvec_macros",
-]
-
-[[package]]
-name = "tinyvec_macros"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
-
-[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
-name = "unicode-normalization"
-version = "0.1.25"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
-dependencies = [
- "tinyvec",
-]
-
-[[package]]
name = "unicode-width"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
[[package]]
-name = "unigen"
-version = "0.1.0"
-dependencies = [
- "lexopt",
-]
-
-[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index cbe097e..4221c83 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,4 +1,4 @@
[workspace]
resolver = "3"
default-members = ["oryxc"]
-members = ["oryxc", "unigen"]
+members = ["oryxc"]
diff --git a/unigen/fetch b/fetch
index 46c02f9..46c02f9 100755
--- a/unigen/fetch
+++ b/fetch
diff --git a/oryxc/.gitignore b/oryxc/.gitignore
deleted file mode 100644
index ea8c4bf..0000000
--- a/oryxc/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-/target
diff --git a/oryxc/Cargo.toml b/oryxc/Cargo.toml
index a7ab1e4..aeeea7d 100644
--- a/oryxc/Cargo.toml
+++ b/oryxc/Cargo.toml
@@ -1,15 +1,16 @@
[package]
-name = "oryxc"
+name = "oryx"
version = "0.1.0"
edition = "2024"
[dependencies]
boxcar = "0.2.14"
-clap = { version = "4", features = ["derive"] }
+clap = { version = "4", features = ["derive"] }
crossbeam-deque = "0.8.6"
dashmap = "6.1.0"
+phf = { version = "0.13.1", features = ["macros"] }
+soa-rs = "0.9.1"
+unicode-width = "0.2.2"
+
# num-rational = "0.4.2"
-phf = { version = "0.13.1", features = ["macros"] }
-soa-rs = "0.9.1"
-unicode-normalization = "0.1.25"
-unicode-width = "0.2.2"
+# icu = { version = "2.1.1", features = ["compiled_data"] }
diff --git a/oryxc/build.rs b/oryxc/build.rs
new file mode 100644
index 0000000..59233c5
--- /dev/null
+++ b/oryxc/build.rs
@@ -0,0 +1,192 @@
+use std::collections::HashMap;
+use std::env;
+use std::fs::File;
+use std::io::{
+ self,
+ BufRead,
+ BufReader,
+ Write,
+};
+
+const MIN_SHIFT: usize = 1;
+const MAX_SHIFT: usize = 22;
+
+fn main() {
+ let out_dir = env::var("OUT_DIR").unwrap();
+ let root = env::var("CARGO_MANIFEST_DIR").unwrap();
+ let data = format!("{root}/../data");
+
+ println!("cargo:rerun-if-changed={data}/DerivedCoreProperties.txt");
+ println!("cargo:rerun-if-changed={data}/PropList.txt");
+
+ generate_from_file(
+ &out_dir,
+ &format!("{data}/DerivedCoreProperties.txt"),
+ "XID_Start",
+ "xid_start",
+ );
+ generate_from_file(
+ &out_dir,
+ &format!("{data}/DerivedCoreProperties.txt"),
+ "XID_Continue",
+ "xid_continue",
+ );
+ generate_from_file(
+ &out_dir,
+ &format!("{data}/PropList.txt"),
+ "Pattern_White_Space",
+ "pattern_white_space",
+ );
+ generate_from_codepoints(
+ &out_dir,
+ &[
+ '\u{A}', '\u{B}', '\u{C}', '\u{D}', '\u{85}', '\u{2028}',
+ '\u{2029}',
+ ],
+ "line_terminator",
+ );
+}
+
+fn generate_from_file(out_dir: &str, path: &str, prop: &str, name: &str) {
+ let mut bitmap = vec![false; 0x110000];
+ parse_file(path, prop, &mut bitmap)
+ .unwrap_or_else(|e| panic!("failed to read {path}: {e}"));
+ write_output(out_dir, name, &bitmap);
+}
+
+fn generate_from_codepoints(out_dir: &str, codepoints: &[char], name: &str) {
+ let mut bitmap = vec![false; 0x110000];
+ for &c in codepoints {
+ bitmap[c as usize] = true;
+ }
+ write_output(out_dir, name, &bitmap);
+}
+
+fn write_output(out_dir: &str, name: &str, bitmap: &[bool]) {
+ let (shift, lvl1, lvl2) = optimize_tables(bitmap);
+ let mut f = File::create(format!("{out_dir}/{name}.rs")).unwrap();
+ generate_code(&mut f, name, shift, &lvl1, &lvl2);
+}
+
+fn optimize_tables(bitmap: &[bool]) -> (usize, Vec<u16>, Vec<u64>) {
+ let mut minsz = usize::MAX;
+ let mut config = (0, Vec::new(), Vec::new());
+
+ for i in MIN_SHIFT..=MAX_SHIFT {
+ let (l1, l2) = build_tables(bitmap, i);
+ let sz = l1.len() * 2 + l2.len() * 8;
+ if sz < minsz {
+ minsz = sz;
+ config = (i, l1, l2);
+ }
+ }
+
+ config
+}
+
+fn parse_file(path: &str, prop: &str, bitmap: &mut [bool]) -> io::Result<()> {
+ let file = File::open(path)?;
+ let reader = BufReader::new(file);
+
+ for line in reader.lines() {
+ let line = line?;
+ let line = line.split('#').next().unwrap_or("").trim();
+ if line.is_empty() {
+ continue;
+ }
+
+ let parts: Vec<&str> = line.split(';').map(|s| s.trim()).collect();
+ if parts.len() < 2 || parts[1] != prop {
+ continue;
+ }
+
+ let (beg, end) = if parts[0].contains("..") {
+ let mut range = parts[0].split("..");
+ (
+ u32::from_str_radix(range.next().unwrap(), 16).unwrap(),
+ u32::from_str_radix(range.next().unwrap(), 16).unwrap(),
+ )
+ } else {
+ let val = u32::from_str_radix(parts[0], 16).unwrap();
+ (val, val)
+ };
+
+ for cp in beg..=end {
+ if (cp as usize) < bitmap.len() {
+ bitmap[cp as usize] = true;
+ }
+ }
+ }
+
+ Ok(())
+}
+
+fn build_tables(bitmap: &[bool], shift: usize) -> (Vec<u16>, Vec<u64>) {
+ let blksz = 1 << shift;
+ let u64s_per_block = (blksz + 63) / 64;
+
+ let mut lvl2: Vec<u64> = Vec::new();
+ let mut lvl1: Vec<u16> = Vec::new();
+ let mut blkmap: HashMap<Vec<u64>, u16> = HashMap::new();
+
+ for chunk in bitmap.chunks(blksz) {
+ let mut blkdata = vec![0u64; u64s_per_block];
+
+ for (i, &bit) in chunk.iter().enumerate() {
+ if bit {
+ let word_idx = i / 64;
+ let bit_idx = i % 64;
+ blkdata[word_idx] |= 1 << bit_idx;
+ }
+ }
+
+ if let Some(&i) = blkmap.get(&blkdata) {
+ lvl1.push(i);
+ } else {
+ let i = (lvl2.len() / u64s_per_block) as u16;
+ lvl2.extend_from_slice(&blkdata);
+ blkmap.insert(blkdata, i);
+ lvl1.push(i);
+ }
+ }
+
+ (lvl1, lvl2)
+}
+
+fn generate_code(
+ f: &mut impl Write,
+ prop_name: &str,
+ shift: usize,
+ level1: &[u16],
+ level2: &[u64],
+) {
+ let upper_name = prop_name.to_uppercase();
+ let lower_name = prop_name.to_lowercase();
+ let block_size = 1 << shift;
+ let mask = block_size - 1;
+ let u64s_per_block = (block_size + 63) / 64;
+
+ let pred_name = if lower_name.contains('_') {
+ format!("{lower_name}_p")
+ } else {
+ format!("{lower_name}p")
+ };
+
+ writeln!(f, "/* Autogenerated – DO NOT EDIT */").unwrap();
+ writeln!(f).unwrap();
+ writeln!(f, "static {upper_name}_L1: [u16; {}] = {level1:?};", level1.len()).unwrap();
+ writeln!(f, "static {upper_name}_L2: [u64; {}] = {level2:?};", level2.len()).unwrap();
+ writeln!(f, "#[inline]").unwrap();
+ writeln!(f, "pub fn {pred_name}(c: char) -> bool {{").unwrap();
+ writeln!(f, "\tlet cp = c as usize;").unwrap();
+ writeln!(f, "\tlet blki = unsafe {{ *{upper_name}_L1.get_unchecked(cp >> {shift}) }} as usize;").unwrap();
+ writeln!(f, "\tlet in_blk_offset_p = cp & 0x{mask:X};").unwrap();
+ if u64s_per_block == 1 {
+ writeln!(f, "\tunsafe {{ return ({upper_name}_L2.get_unchecked(blki) & (1 << in_blk_offset_p)) != 0; }}").unwrap();
+ } else {
+ writeln!(f, "\tlet wordi = (blki * {u64s_per_block}) + (in_blk_offset_p >> 6);").unwrap();
+ writeln!(f, "\tlet biti = in_blk_offset_p & 0x3F;").unwrap();
+ writeln!(f, "\tunsafe {{ return (*{upper_name}_L2.get_unchecked(wordi) & (1 << biti)) != 0; }}").unwrap();
+ }
+ writeln!(f, "}}").unwrap();
+}
diff --git a/oryxc/src/unicode/line_terminator.rs b/oryxc/src/unicode/line_terminator.rs
index 5c6b052..300bb54 100644
--- a/oryxc/src/unicode/line_terminator.rs
+++ b/oryxc/src/unicode/line_terminator.rs
@@ -1,135 +1 @@
-/* Autogenerated – DO NOT EDIT */
-
-static LINE_TERMINATOR_L1: [u16; 544] = [
- 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-];
-static LINE_TERMINATOR_L2: [u64; 96] = [
- 7168,
- 0,
- 32,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 3298534883328,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
-];
-#[inline]
-pub fn line_terminator_p(c: char) -> bool {
- let cp = c as usize;
- let blki = unsafe { *LINE_TERMINATOR_L1.get_unchecked(cp >> 11) } as usize;
- let in_blk_offset_p = cp & 0x7FF;
- let wordi = (blki * 32) + (in_blk_offset_p >> 6);
- let biti = in_blk_offset_p & 0x3F;
- unsafe {
- return (*LINE_TERMINATOR_L2.get_unchecked(wordi) & (1 << biti)) != 0;
- }
-}
+include!(concat!(env!("OUT_DIR"), "/line_terminator.rs"));
diff --git a/oryxc/src/unicode/pattern_white_space.rs b/oryxc/src/unicode/pattern_white_space.rs
index b051e3a..f90db8d 100644
--- a/oryxc/src/unicode/pattern_white_space.rs
+++ b/oryxc/src/unicode/pattern_white_space.rs
@@ -1,137 +1 @@
-/* Autogenerated – DO NOT EDIT */
-
-static PATTERN_WHITE_SPACE_L1: [u16; 544] = [
- 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-];
-static PATTERN_WHITE_SPACE_L2: [u64; 96] = [
- 4294983168,
- 0,
- 32,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 3298534932480,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
-];
-#[inline]
-pub fn pattern_white_space_p(c: char) -> bool {
- let cp = c as usize;
- let blki =
- unsafe { *PATTERN_WHITE_SPACE_L1.get_unchecked(cp >> 11) } as usize;
- let in_blk_offset_p = cp & 0x7FF;
- let wordi = (blki * 32) + (in_blk_offset_p >> 6);
- let biti = in_blk_offset_p & 0x3F;
- unsafe {
- return (*PATTERN_WHITE_SPACE_L2.get_unchecked(wordi) & (1 << biti))
- != 0;
- }
-}
+include!(concat!(env!("OUT_DIR"), "/pattern_white_space.rs"));
diff --git a/oryxc/src/unicode/xid_continue.rs b/oryxc/src/unicode/xid_continue.rs
index 8fbbce4..571ae40 100644
--- a/oryxc/src/unicode/xid_continue.rs
+++ b/oryxc/src/unicode/xid_continue.rs
@@ -1,1007 +1 @@
-/* Autogenerated – DO NOT EDIT */
-
-static XID_CONTINUE_L1: [u16; 1088] = [
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 12, 12, 12, 12, 12, 13, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 14, 15, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 9, 9, 9, 9, 9, 9,
- 9, 9, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 9, 29, 12, 30, 12,
- 12, 31, 32, 9, 9, 9, 9, 9, 9, 33, 9, 34, 35, 12, 12, 12, 12, 12, 12, 12,
- 36, 9, 9, 9, 9, 9, 9, 9, 37, 38, 9, 9, 39, 9, 9, 9, 40, 41, 42, 43, 44, 45,
- 46, 47, 48, 9, 9, 49, 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 50, 12, 12, 12, 12, 51, 12, 12, 12,
- 12, 52, 12, 12, 12, 12, 12, 12, 53, 54, 9, 9, 55, 9, 12, 12, 12, 12, 56,
- 12, 12, 12, 12, 12, 12, 12, 12, 57, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 58, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
-];
-static XID_CONTINUE_L2: [u64; 944] = [
- 287948901175001088,
- 576460745995190270,
- 333270770471927808,
- 18410715276682199039,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 88094074470339,
- 18446744073709551615,
- 13321647697761927167,
- 18446744056529672128,
- 18428729675200069631,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709550843,
- 18446744073709551615,
- 18446462598732840959,
- 18446744069456527359,
- 13835058055282033151,
- 2119858418286774,
- 18446744069548736512,
- 18446678103011885055,
- 18446744073709551615,
- 11529212845433552895,
- 18446744073709486080,
- 18446744073709545471,
- 1125899906842623,
- 2612087783874887679,
- 70368744177663,
- 18446471390799331327,
- 18446744073701228287,
- 18446744056529682431,
- 18446744073709551615,
- 18446462392574410751,
- 17565725197581524975,
- 5765733215448889759,
- 15235112390417287150,
- 18014125208779143,
- 17576984196650090478,
- 18302910150157089727,
- 17576984196649951214,
- 844217444219295,
- 14123225865944680428,
- 281200107273671,
- 17582050746231021567,
- 281265452367327,
- 17577547146603651055,
- 4221916082617823,
- 18446744073709412351,
- 18158794964244397535,
- 3457638613854978030,
- 3658904103781503,
- 576460752303423486,
- 67076095,
- 4611685674830002134,
- 4093607775,
- 14024213633433600001,
- 18446216308128218879,
- 2305843009196916703,
- 64,
- 18446744073709551615,
- 18446744073709487103,
- 18446744070488326143,
- 17870283321406070975,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744070446333439,
- 9168765891372858879,
- 18446744073701162813,
- 18446744073696837631,
- 1123704775901183,
- 18446744069414649855,
- 4557642822898941951,
- 18446744073709551614,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446638520593285119,
- 18446744069548802046,
- 144053615424700415,
- 9007197111451647,
- 3905461007941631,
- 18446744073709551615,
- 4394566287359,
- 18446744069481674752,
- 144115188075855871,
- 18446471394825863167,
- 18014398509481983,
- 1152657619668697087,
- 8796093022207936,
- 18446480190918885375,
- 134153215,
- 18446744069683019775,
- 11529215043920986111,
- 13834777130128311295,
- 17588964818943,
- 18446744073709551615,
- 4494803601399807,
- 18446744073709551615,
- 4503599627370495,
- 72057594037927935,
- 4611686018427380735,
- 16717361816799217663,
- 576460752302833664,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744070475743231,
- 4611686017001275199,
- 6908521828386340863,
- 2295745090394464220,
- 9231253336202686464,
- 9223934986817634305,
- 536805376,
- 562821641207808,
- 17582049991377026180,
- 18446744069414601696,
- 511,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 4494940973301759,
- 18446498607738650623,
- 9223513873854758911,
- 9187201948305063935,
- 18446744071553646463,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 2251518330118602976,
- 18446744073709551614,
- 18446744068986765311,
- 18446744073709551615,
- 18446462598732840928,
- 18446744073709551615,
- 18446744069414617087,
- 18446462598732840960,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 8191,
- 4611686018427322368,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 17592185987071,
- 13830835930631503871,
- 18446744073709551615,
- 1125899906842623,
- 18446744060816261120,
- 18446744073709551615,
- 18446744073709550079,
- 18446181124293001215,
- 18691697672191,
- 4503599627370495,
- 18446744073709551615,
- 16789419406609285183,
- 18446532967477018623,
- 2305843004919775231,
- 18446744073709551615,
- 9223372032626884609,
- 36028797018963967,
- 18194542490348896255,
- 18446744073709551615,
- 35184368733388807,
- 18446602782178705022,
- 18446466996645134335,
- 18446744073709551615,
- 288010473826156543,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446462667452317695,
- 1152921504606845055,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446532967477018623,
- 18446744073709551615,
- 67108863,
- 6881498031078244479,
- 18446744073709551579,
- 1125899906842623,
- 18446744073709027328,
- 18446744073709551615,
- 18446744006063816703,
- 18446744073709551615,
- 18446744073709551615,
- 4611686018427387903,
- 18446744073709486080,
- 18446744073709355007,
- 287948901175001343,
- 7036870122864639,
- 12288634533233876992,
- 18446744073709551615,
- 2305843009213693951,
- 9799832780635308032,
- 18446743936404815870,
- 9223372036854775807,
- 486341884,
- 13258596753222922239,
- 1073692671,
- 18446744073709551615,
- 576460752303423487,
- 0,
- 9007199254740991,
- 0,
- 2305843009213693952,
- 0,
- 0,
- 18446744069951455231,
- 4295098367,
- 18446708893632430079,
- 576460752303359999,
- 18446744070488326143,
- 4128527,
- 18446744073709551615,
- 18446744073709551615,
- 18446466993558126591,
- 1152921504591118335,
- 18446463698244468735,
- 17870001915148894207,
- 2016486715970549759,
- 4503599627370495,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 36028797018963967,
- 1095220854783,
- 575897802350002111,
- 0,
- 10502394331027995967,
- 36028792728190975,
- 2147483647,
- 15762594400829440,
- 288230371860938751,
- 67108863,
- 13907115649320091647,
- 0,
- 9745789593611923567,
- 2305843004918726656,
- 536870911,
- 549755813631,
- 18014398509481983,
- 2251795522912255,
- 262143,
- 0,
- 18446744073709551615,
- 511,
- 2251799813685247,
- 2251799813685247,
- 287950000686628863,
- 18446671780820025343,
- 63,
- 0,
- 0,
- 0,
- 875211255709695,
- 18158513697557840124,
- 18446463149025525759,
- 18446462598732972031,
- 18446462598732841023,
- 36028792723996703,
- 18446744073709551615,
- 9241386160486350975,
- 576460752303423487,
- 287951100198191108,
- 18437736874454810623,
- 22517998136787184,
- 18446744073709551615,
- 402644511,
- 13907115649319829503,
- 3,
- 18446464796682337663,
- 287957697268023295,
- 18153444948953374703,
- 8760701963286943,
- 18428729675200023551,
- 25770850213,
- 18446744073709551615,
- 16173172735,
- 18446744073709551615,
- 67043519,
- 0,
- 0,
- 18392700878181105663,
- 1056964609,
- 18446744073709551615,
- 67043345,
- 144115188075855871,
- 68719412223,
- 287966492958392319,
- 127,
- 0,
- 0,
- 576460752303423487,
- 0,
- 18446744069414584320,
- 9223376434901286911,
- 17996384110963061375,
- 67043343,
- 18446740770879700992,
- 120208752639,
- 9223372036854775807,
- 18446744073709486208,
- 18446462599336820735,
- 144115188075855871,
- 0,
- 1095216660480,
- 0,
- 287948909764935679,
- 18410715276690587135,
- 18445618173869752321,
- 36027697507139583,
- 0,
- 13006395723845991295,
- 18446741595580465407,
- 18446466992517644287,
- 4394019979263,
- 0,
- 0,
- 0,
- 36028792723996672,
- 14411518807585456127,
- 134152199,
- 281474976710656,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 67108863,
- 0,
- 18446744073709551615,
- 140737488355327,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 15,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709486080,
- 562949953421311,
- 281474976710655,
- 18446744069418778623,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 576460752303423487,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 127,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 288230376151711743,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 144115188075855871,
- 18446466994631868415,
- 9223372036854775807,
- 8796093022143487,
- 36028797018963967,
- 16212958624241090575,
- 65535,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 287984085547089919,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18014398505187016704,
- 1048575,
- 18446744073709551615,
- 18446744073709520895,
- 4294934783,
- 35747438006370304,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 9223372036858970111,
- 2147483647,
- 0,
- 18446744073709551615,
- 2251799813685247,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 8065665457643847680,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 1125934266580991,
- 18446463629527547904,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 1152921504606846975,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 2305570330330005503,
- 1677656575,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 287948901175001088,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446532967477018623,
- 127,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 17872504197455282176,
- 65970697670631,
- 0,
- 0,
- 28,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073707454463,
- 17005555242810474495,
- 18446744073709551599,
- 8935141660164089791,
- 18446744073709419615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446743249075830783,
- 17870283321271910397,
- 18437736874452713471,
- 18446603336221163519,
- 18446741874686295551,
- 18446744073709539319,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 17906312118425092095,
- 9042383626829823,
- 281470547525648,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 8660801552383,
- 0,
- 0,
- 0,
- 18446471240106377087,
- 70368744177663,
- 32768,
- 0,
- 4611439727822766079,
- 17407,
- 0,
- 0,
- 0,
- 0,
- 140737488289792,
- 288230376151711743,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 288230376151646208,
- 0,
- 0,
- 0,
- 576460752303357952,
- 0,
- 0,
- 0,
- 13853072451644162047,
- 0,
- 0,
- 0,
- 9223213153129594880,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 8323103,
- 18446744073709551615,
- 67047423,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 790380184120328175,
- 6843210385291930244,
- 1152917029519358975,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 287948901175001088,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 4294967295,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744070488326143,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446532967477018623,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446462607322775551,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 1073741823,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 1073741823,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709488127,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 288230376151711743,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 281474976710655,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
-];
-#[inline]
-pub fn xid_continue_p(c: char) -> bool {
- let cp = c as usize;
- let blki = unsafe { *XID_CONTINUE_L1.get_unchecked(cp >> 10) } as usize;
- let in_blk_offset_p = cp & 0x3FF;
- let wordi = (blki * 16) + (in_blk_offset_p >> 6);
- let biti = in_blk_offset_p & 0x3F;
- unsafe {
- return (*XID_CONTINUE_L2.get_unchecked(wordi) & (1 << biti)) != 0;
- }
-}
+include!(concat!(env!("OUT_DIR"), "/xid_continue.rs"));
diff --git a/oryxc/src/unicode/xid_start.rs b/oryxc/src/unicode/xid_start.rs
index 1c9d9ae..649faa5 100644
--- a/oryxc/src/unicode/xid_start.rs
+++ b/oryxc/src/unicode/xid_start.rs
@@ -1,927 +1 @@
-/* Autogenerated – DO NOT EDIT */
-
-static XID_START_L1: [u16; 1088] = [
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 12, 12, 12, 12, 12, 13, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 14, 15, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 9, 9, 9, 9, 9, 9,
- 9, 9, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 9, 29, 12, 30, 12,
- 12, 31, 32, 9, 9, 9, 9, 9, 9, 33, 9, 34, 35, 12, 12, 12, 12, 12, 12, 12,
- 36, 9, 9, 9, 9, 9, 9, 9, 37, 38, 9, 9, 39, 9, 9, 9, 9, 9, 40, 9, 41, 42,
- 43, 44, 45, 9, 9, 9, 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 46, 12, 12, 12, 12, 47, 12, 12, 12, 12,
- 48, 12, 12, 12, 12, 12, 12, 49, 50, 9, 9, 51, 9, 12, 12, 12, 12, 52, 12,
- 12, 12, 12, 12, 12, 12, 12, 53, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
-];
-static XID_START_L2: [u64; 864] = [
- 68719476736,
- 576460745995190270,
- 297241973452963840,
- 18410715276682199039,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 88094074470339,
- 0,
- 13321366222785216512,
- 18446744056529672000,
- 18428729675200069631,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709550595,
- 18446744073709551615,
- 18446462598732840959,
- 18446744069456527359,
- 511,
- 2119858418286592,
- 18446744069414584320,
- 18446392229988665343,
- 18446744073709551615,
- 11241196188469297151,
- 281474976514048,
- 18446744073709543424,
- 563224831328255,
- 301749971126844416,
- 1168302407679,
- 18446471390564450303,
- 18446744069414649599,
- 1023,
- 2594073385365405680,
- 18446181140919287808,
- 2577745637692514273,
- 1153765945374687232,
- 247132830528276448,
- 7881300924956672,
- 2589004636761079776,
- 144115200960823296,
- 2589004636760940512,
- 562965791113216,
- 288167810662516712,
- 65536,
- 2594071186342010848,
- 13807648768,
- 2589567586714640353,
- 1688864624214016,
- 2882303761516978160,
- 18158513712597581824,
- 3457638613854978016,
- 127,
- 1688849860263934,
- 127,
- 2307531515476572118,
- 4026531935,
- 1,
- 35184372088575,
- 7936,
- 0,
- 9223380832947798015,
- 18438229877581611008,
- 18446744069414600707,
- 17870283321406070975,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744070446333439,
- 9168765891372858879,
- 18446744073701162813,
- 18446744073696837631,
- 134217727,
- 18446744069414649855,
- 4557642822898941951,
- 18446744073709551614,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446638520593285119,
- 18446744069548802046,
- 144053615424700415,
- 1125897759621119,
- 527761286627327,
- 4503599627370495,
- 276824064,
- 18446744069414584320,
- 144115188075855871,
- 18446469195802607615,
- 18014398509481983,
- 2147483647,
- 8796093022142464,
- 18446480190918885375,
- 1023,
- 18446744069422972927,
- 2097151,
- 549755813888,
- 0,
- 4503599627370464,
- 8160,
- 18158724812380307448,
- 274877906943,
- 68719476735,
- 4611686018360336384,
- 16717361816799217663,
- 319718190147960832,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744070475743231,
- 4611686017001275199,
- 6908521828386340863,
- 2295745090394464220,
- 0,
- 9223934986808197120,
- 536805376,
- 0,
- 17582049991377026180,
- 18446744069414601696,
- 511,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 3509778554814463,
- 18446498607738650623,
- 141836999983103,
- 9187201948305063935,
- 2139062143,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 2251241253188403424,
- 18446744073709551614,
- 18446744068886102015,
- 17870283321406128127,
- 18446462598732840928,
- 18446744073709551615,
- 18446744069414617087,
- 18446462598732840960,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 8191,
- 4611686018427322368,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 13198434443263,
- 9223512774343131135,
- 18446744070488326143,
- 281474976710655,
- 18446744060816261120,
- 18446744073709551615,
- 18446744073709550079,
- 18446181124293001215,
- 34359736251,
- 4503599627370495,
- 4503599627370492,
- 7564921474075590656,
- 18446462873610746880,
- 2305843004918726783,
- 2251799813685232,
- 8935422993945886720,
- 2199023255551,
- 14159317224157876215,
- 4495436853045886975,
- 7890092085477381,
- 18446602782178705022,
- 18446466996645134335,
- 18446744073709551615,
- 34359738367,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446462667452317695,
- 1152921504606845055,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446532967477018623,
- 18446744073709551615,
- 67108863,
- 6881498030004502655,
- 18446744073709551579,
- 1125899906842623,
- 18446744073709027328,
- 18446744073709551615,
- 18446744006063816703,
- 18446744073709551615,
- 18446744073709551615,
- 4611686018427387903,
- 18446744073709486080,
- 18446744073709355007,
- 287948901175001343,
- 0,
- 12288634533233819648,
- 18446744073709551615,
- 2305843009213693951,
- 576460743713488896,
- 18446743798965862398,
- 9223372033633550335,
- 486341884,
- 13258596753222922239,
- 1073692671,
- 18446744073709551615,
- 576460752303423487,
- 0,
- 9007199254740991,
- 0,
- 0,
- 0,
- 0,
- 18446744069951455231,
- 131071,
- 18446708893632430079,
- 18014398509418495,
- 18446744070488326143,
- 4128527,
- 18446744073709551615,
- 18446744073709551615,
- 18446462599806582783,
- 1152921504591118335,
- 18446463698244468735,
- 17870001915148894207,
- 2016486715970549759,
- 4503599627370495,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 36028797018963967,
- 1095220854783,
- 575897802350002111,
- 0,
- 10502394331027995967,
- 36028792728190975,
- 2147483647,
- 15762594400829440,
- 288230371860938751,
- 67108863,
- 13907115649320091647,
- 0,
- 18014398491590657,
- 2305843004918726656,
- 536870911,
- 137438953215,
- 18014398509481983,
- 2251795522912255,
- 262143,
- 0,
- 18446744073709551615,
- 511,
- 2251799813685247,
- 2251799813685247,
- 68719476735,
- 18446603611099102208,
- 63,
- 0,
- 0,
- 0,
- 848822976643071,
- 252,
- 18446463149025525759,
- 18446462598732841023,
- 18446462598732840963,
- 36028792723996703,
- 72057594037927928,
- 10696049115004928,
- 281474976710648,
- 2199023190016,
- 549755813880,
- 20266198323101840,
- 2251799813685240,
- 335544350,
- 9223389629040558079,
- 1,
- 18446464796682337663,
- 2147483647,
- 2589004636760940512,
- 16643063808,
- 54043195528399871,
- 655360,
- 9007199254740991,
- 15032387456,
- 281474976710655,
- 176,
- 0,
- 0,
- 140737488355327,
- 251658240,
- 281474976710655,
- 16,
- 72066390130950143,
- 0,
- 134217727,
- 127,
- 0,
- 0,
- 17592186044415,
- 0,
- 18446744069414584320,
- 9223372041149743103,
- 9223653511822045823,
- 2,
- 18446740770879700992,
- 42949804031,
- 290482175965394945,
- 18446744073441181696,
- 18446462599269712895,
- 144115188075855871,
- 0,
- 0,
- 0,
- 8589934591,
- 140737488354815,
- 18445618173802708993,
- 65535,
- 0,
- 562949953420159,
- 18446741595513421888,
- 18446462598749619199,
- 268435455,
- 0,
- 0,
- 0,
- 2251795518717952,
- 4503599627239412,
- 0,
- 281474976710656,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 67108863,
- 0,
- 18446744073709551615,
- 140737488355327,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 15,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709486080,
- 562949953421311,
- 281474976710655,
- 18446744069414584446,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 576460752303423487,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 127,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 1073741823,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 144115188075855871,
- 18446462600880324607,
- 9223372036854775807,
- 70368744112128,
- 281474976710655,
- 16212958624174047247,
- 65535,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 35184372088831,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18014398505187016704,
- 1048575,
- 18446744073709551615,
- 67583,
- 4294443008,
- 34902944356761600,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 9223372036858970111,
- 2147483647,
- 0,
- 18446744073709551615,
- 2251799813685247,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 8065665457643847680,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 1125934266580991,
- 18446463629527547904,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 1152921504606846975,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 2305570330330005503,
- 67043839,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073707454463,
- 17005555242810474495,
- 18446744073709551599,
- 8935141660164089791,
- 18446744073709419615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446743249075830783,
- 17870283321271910397,
- 18437736874452713471,
- 18446603336221163519,
- 18446741874686295551,
- 4087,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 8660801552383,
- 0,
- 0,
- 0,
- 18446462598732840960,
- 70368744177663,
- 0,
- 0,
- 4575692405780512767,
- 16384,
- 0,
- 0,
- 0,
- 0,
- 70368744112128,
- 17592186044415,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 17592185978880,
- 0,
- 0,
- 0,
- 351843720822784,
- 0,
- 0,
- 0,
- 13843853836919242751,
- 0,
- 0,
- 0,
- 9223213153129594880,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 31,
- 18446744073709551615,
- 2063,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 790380184120328175,
- 6843210385291930244,
- 1152917029519358975,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 4294967295,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744070488326143,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446532967477018623,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446462607322775551,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 1073741823,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 1073741823,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709488127,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 288230376151711743,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
-];
-#[inline]
-pub fn xid_start_p(c: char) -> bool {
- let cp = c as usize;
- let blki = unsafe { *XID_START_L1.get_unchecked(cp >> 10) } as usize;
- let in_blk_offset_p = cp & 0x3FF;
- let wordi = (blki * 16) + (in_blk_offset_p >> 6);
- let biti = in_blk_offset_p & 0x3F;
- unsafe {
- return (*XID_START_L2.get_unchecked(wordi) & (1 << biti)) != 0;
- }
-}
+include!(concat!(env!("OUT_DIR"), "/xid_start.rs"));
diff --git a/unigen/Cargo.toml b/unigen/Cargo.toml
deleted file mode 100644
index 587afb7..0000000
--- a/unigen/Cargo.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[package]
-name = "unigen"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-lexopt = "0.3.2"
diff --git a/unigen/src/main.rs b/unigen/src/main.rs
deleted file mode 100644
index bedabc6..0000000
--- a/unigen/src/main.rs
+++ /dev/null
@@ -1,279 +0,0 @@
-use std::collections::HashMap;
-use std::ffi::{
- OsStr,
- OsString,
-};
-use std::fs::File;
-use std::io::{
- self,
- BufRead,
- BufReader,
-};
-use std::path::Path;
-use std::sync::OnceLock;
-use std::vec::Vec;
-use std::{
- env,
- process,
-};
-
-const MIN_SHIFT: usize = 1;
-const MAX_SHIFT: usize = 22;
-
-#[derive(Default)]
-struct Flags {
- codepoints: Option<Vec<char>>,
- help: bool,
-}
-
-impl Flags {
- fn parse() -> Result<(Flags, Vec<String>), lexopt::Error> {
- use lexopt::prelude::*;
-
- let mut rest = Vec::with_capacity(env::args().len() - 1);
- let mut flags = Flags::default();
- let mut parser = lexopt::Parser::from_env();
- parser.set_short_equals(false);
-
- while let Some(arg) = parser.next()? {
- match arg {
- Short('c') | Long("codepoints") => {
- fn hex_to_char(s: &str) -> char {
- return u32::from_str_radix(s, 16).map_or_else(
- |e| {
- eprintln!("{}: {s}: {e}", progname().display());
- process::exit(1);
- },
- |n| {
- char::from_u32(n).unwrap_or_else(|| {
- eprintln!(
- "{}: {s}: invalid codepoint",
- progname().display()
- );
- process::exit(1);
- })
- },
- );
- }
-
- flags.codepoints = Some(
- parser
- .value()?
- .to_str()
- .unwrap_or_else(|| {
- eprintln!(
- "{}: unable to parse argument to -c/--codepoints",
- progname().display()
- );
- process::exit(1);
- })
- .split(',')
- .map(hex_to_char)
- .collect(),
- );
- },
- Short('h') | Long("help") => flags.help = true,
- Value(v) => rest.push(v.into_string()?),
- _ => return Err(arg.unexpected()),
- }
- }
-
- return Ok((flags, rest));
- }
-}
-
-fn progname() -> &'static OsString {
- static ARGV0: OnceLock<OsString> = OnceLock::new();
- return ARGV0.get_or_init(|| {
- let default = OsStr::new("oryxc");
- let s = env::args_os().next().unwrap_or(default.into());
- return Path::new(&s).file_name().unwrap_or(default).to_os_string();
- });
-}
-
-fn usage() {
- eprintln!(
- concat!(
- "Usage: {0} data-file property-name\n",
- " {0} -c codepoints name\n",
- " {0} -h",
- ),
- progname().display()
- );
-}
-
-fn main() -> io::Result<()> {
- let (flags, rest) = match Flags::parse() {
- Ok(v) => v,
- Err(e) => {
- eprintln!("{}: {e}", progname().display());
- usage();
- process::exit(1);
- },
- };
-
- if flags.help {
- usage();
- process::exit(0);
- }
-
- if (flags.codepoints.is_none() && rest.len() != 2)
- || (flags.codepoints.is_some() && rest.len() != 1)
- {
- usage();
- process::exit(1);
- }
-
- let mut bitmap = vec![false; 0x110000];
- let name = match flags.codepoints {
- Some(vec) => {
- vec.iter().for_each(|c| bitmap[*c as usize] = true);
- &rest[0]
- },
- None => {
- parse_file(&rest[0], &rest[1], &mut bitmap)?;
- &rest[1]
- },
- };
- let (shift, lvl1, lvl2) = optimize_tables(&bitmap);
- write_tables(name, shift, &lvl1, &lvl2);
- return Ok(());
-}
-
-fn optimize_tables(bitmap: &[bool]) -> (usize, Vec<u16>, Vec<u64>) {
- let mut minsz = usize::MAX;
- let mut config = (0, Vec::new(), Vec::new());
-
- for i in MIN_SHIFT..=MAX_SHIFT {
- let (l1, l2) = build_tables(bitmap, i);
- let sz = l1.len() * 2 + l2.len() * 8;
- if sz < minsz {
- minsz = sz;
- config = (i, l1, l2);
- }
- }
-
- return config;
-}
-
-fn parse_file<P: AsRef<Path>>(
- path: P,
- prop: &str,
- bitmap: &mut [bool],
-) -> io::Result<()> {
- let file = File::open(path)?;
- let reader = BufReader::new(file);
-
- for line in reader.lines() {
- let line = line?;
- let line = line.split('#').next().unwrap_or("").trim();
- if line.is_empty() {
- continue;
- }
-
- let parts: Vec<&str> = line.split(';').map(|s| s.trim()).collect();
- if parts.len() < 2 || parts[1] != prop {
- continue;
- }
-
- let (beg, end) = if parts[0].contains("..") {
- let mut range = parts[0].split("..");
- (
- u32::from_str_radix(range.next().unwrap(), 16).unwrap(),
- u32::from_str_radix(range.next().unwrap(), 16).unwrap(),
- )
- } else {
- let val = u32::from_str_radix(parts[0], 16).unwrap();
- (val, val)
- };
-
- for cp in beg..=end {
- if (cp as usize) < bitmap.len() {
- bitmap[cp as usize] = true;
- }
- }
- }
- return Ok(());
-}
-
-fn build_tables(bitmap: &[bool], shift: usize) -> (Vec<u16>, Vec<u64>) {
- let blksz = 1 << shift;
- let u64s_per_block = (blksz + 63) / 64;
-
- let mut lvl2: Vec<u64> = Vec::new();
- let mut lvl1: Vec<u16> = Vec::new();
- let mut blkmap: HashMap<Vec<u64>, u16> = HashMap::new();
-
- for chunk in bitmap.chunks(blksz) {
- let mut blkdata = vec![0u64; u64s_per_block];
-
- for (i, &bit) in chunk.iter().enumerate() {
- if bit {
- let word_idx = i / 64;
- let bit_idx = i % 64;
- blkdata[word_idx] |= 1 << bit_idx;
- }
- }
-
- if let Some(&i) = blkmap.get(&blkdata) {
- lvl1.push(i);
- } else {
- let i = (lvl2.len() / u64s_per_block) as u16;
- lvl2.extend_from_slice(&blkdata);
- blkmap.insert(blkdata, i);
- lvl1.push(i);
- }
- }
-
- return (lvl1, lvl2);
-}
-
-fn write_tables(prop_name: &str, shift: usize, level1: &[u16], level2: &[u64]) {
- let upper_name = prop_name.to_uppercase();
- let lower_name = prop_name.to_lowercase();
- let block_size = 1 << shift;
- let mask = block_size - 1;
- let u64s_per_block = (block_size + 63) / 64;
-
- println!("/* Autogenerated – DO NOT EDIT */\n");
- print!(
- "static {upper_name}_L1: [u16; {}] = {level1:?};",
- level1.len()
- );
- print!(
- "static {upper_name}_L2: [u64; {}] = {level2:?};",
- level2.len()
- );
-
- let pred_name = if lower_name.contains('_') {
- format!("{lower_name}_p")
- } else {
- format!("{lower_name}p")
- };
-
- print!(
- "#[inline]
- pub fn {pred_name}(c: char) -> bool {{
- let cp = c as usize;
- let blki = unsafe {{ *{upper_name}_L1.get_unchecked(cp >> {shift}) }} as usize;
- let in_blk_offset_p = cp & 0x{mask:X};"
- );
-
- if u64s_per_block == 1 {
- print!(
- " unsafe {{
- return ({upper_name}_L2.get_unchecked(blki) & (1 << in_blk_offset_p)) != 0;
- }}"
- );
- } else {
- print!(
- "let wordi = (blki * {u64s_per_block}) + (in_blk_offset_p >> 6);
- let biti = in_blk_offset_p & 0x3F;
- unsafe {{
- return (*{upper_name}_L2.get_unchecked(wordi) & (1 << biti)) != 0;
- }}"
- );
- }
-
- print!("}}");
-}