summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--oryxc/.gitignore1
-rw-r--r--oryxc/Cargo.lock277
-rw-r--r--oryxc/Cargo.toml13
l---------oryxc/rustfmt.toml1
-rw-r--r--oryxc/src/compiler.rs136
-rw-r--r--oryxc/src/errors.rs67
-rw-r--r--oryxc/src/intern.rs68
-rw-r--r--oryxc/src/lexer.rs427
-rw-r--r--oryxc/src/main.rs95
-rw-r--r--oryxc/src/parser.rs544
-rw-r--r--oryxc/src/size.rs3
-rw-r--r--oryxc/src/unicode/default_ignorable_code_point.rs363
-rw-r--r--oryxc/src/unicode/line_terminator.rs135
-rw-r--r--oryxc/src/unicode/mod.rs11
-rw-r--r--oryxc/src/unicode/pattern_white_space.rs137
-rw-r--r--oryxc/src/unicode/xid_continue.rs1007
-rw-r--r--oryxc/src/unicode/xid_start.rs927
-rw-r--r--rustfmt.toml14
-rw-r--r--test.x33
-rw-r--r--unigen/Cargo.lock16
-rw-r--r--unigen/Cargo.toml7
-rwxr-xr-xunigen/fetch19
l---------unigen/rustfmt.toml1
-rw-r--r--unigen/src/main.rs278
25 files changed, 4583 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..987ff3e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+oryxc/target/
+unigen/data/
+unigen/target/
diff --git a/oryxc/.gitignore b/oryxc/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/oryxc/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/oryxc/Cargo.lock b/oryxc/Cargo.lock
new file mode 100644
index 0000000..5514afa
--- /dev/null
+++ b/oryxc/Cargo.lock
@@ -0,0 +1,277 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "bitflags"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "dashmap"
+version = "6.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+ "hashbrown",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+
+[[package]]
+name = "lexopt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c5d9b5843e8c9311ff602e6bd50855015e99e75159c2c54fe104cfac241f552"
+
+[[package]]
+name = "libc"
+version = "0.2.181"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "oryx"
+version = "0.1.0"
+dependencies = [
+ "crossbeam-deque",
+ "dashmap",
+ "lexopt",
+ "phf",
+ "soa-rs",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "phf"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
+dependencies = [
+ "phf_macros",
+ "phf_shared",
+ "serde",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
+dependencies = [
+ "fastrand",
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_macros"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef"
+dependencies = [
+ "phf_generator",
+ "phf_shared",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266"
+dependencies = [
+ "siphasher",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "siphasher"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "soa-rs"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf20e367c4676b712916633bc735e97d18cf4d7c2a88b0e29a43446790d029d6"
+dependencies = [
+ "soa-rs-derive",
+]
+
+[[package]]
+name = "soa-rs-derive"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3de734b144ae71c89c91cdc45de3bd22e99e4efb1098e2635797fc0ee0566172"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.115"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e"
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
diff --git a/oryxc/Cargo.toml b/oryxc/Cargo.toml
new file mode 100644
index 0000000..88464ca
--- /dev/null
+++ b/oryxc/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "oryx"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+crossbeam-deque = "0.8.6"
+dashmap = "6.1.0"
+# icu = { version = "2.1.1", features = ["compiled_data"] }
+lexopt = "0.1.0"
+# num-rational = "0.4.2"
+phf = { version = "0.13.1", features = ["macros"] }
+soa-rs = "0.9.1"
diff --git a/oryxc/rustfmt.toml b/oryxc/rustfmt.toml
new file mode 120000
index 0000000..39f97b0
--- /dev/null
+++ b/oryxc/rustfmt.toml
@@ -0,0 +1 @@
+../rustfmt.toml \ No newline at end of file
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs
new file mode 100644
index 0000000..05e275f
--- /dev/null
+++ b/oryxc/src/compiler.rs
@@ -0,0 +1,136 @@
+use std::ffi::OsString;
+use std::iter::IntoIterator;
+use std::sync::Arc;
+use std::sync::atomic::{
+ AtomicUsize,
+ Ordering,
+};
+use std::vec::Vec;
+use std::{
+ panic,
+ thread,
+};
+
+use crossbeam_deque::{
+ Injector,
+ Steal,
+ Stealer,
+ Worker,
+};
+use dashmap::DashMap;
+
+use crate::Flags;
+
+#[derive(Clone, Copy, Eq, Hash, PartialEq)]
+pub struct FileId(u32);
+
+pub struct FileData {
+ name: OsString,
+}
+
+pub enum Job {
+ LexAndParse { file: FileId },
+ TypeCheck { file: FileId },
+}
+
+pub struct CompilerState {
+ pub files: DashMap<FileId, FileData>,
+ pub globalq: Injector<Job>,
+ pub njobs: AtomicUsize,
+ pub flags: Flags,
+}
+
+pub fn start<T>(paths: T, flags: Flags)
+where
+ T: IntoIterator<Item = OsString>,
+{
+ let state = Arc::new(CompilerState {
+ files: DashMap::new(),
+ globalq: Injector::new(),
+ njobs: AtomicUsize::new(0),
+ flags,
+ });
+ for (i, path) in paths.into_iter().enumerate() {
+ let id = FileId(i as u32);
+ state.files.insert(id, FileData { name: path.clone() });
+ state.njobs.fetch_add(1, Ordering::SeqCst);
+ state.globalq.push(Job::LexAndParse { file: id });
+ }
+
+ let mut workers = Vec::with_capacity(flags.threads);
+ let mut stealers = Vec::with_capacity(flags.threads);
+ for _ in 0..flags.threads {
+ let w = Worker::new_fifo();
+ stealers.push(w.stealer());
+ workers.push(w);
+ }
+
+ let mut threads = Vec::with_capacity(flags.threads);
+ let stealer_view: Arc<[_]> = Arc::from(stealers);
+
+ for (id, w) in workers.into_iter().enumerate() {
+ let stealer_view = Arc::clone(&stealer_view);
+ let state = Arc::clone(&state);
+ threads.push(thread::spawn(move || {
+ worker_loop(id, w, stealer_view, state);
+ }));
+ }
+
+ for t in threads {
+ t.join().unwrap_or_else(|e| panic::resume_unwind(e));
+ }
+}
+
+fn worker_loop(
+ id: usize,
+ queue: Worker<Job>,
+ stealers: Arc<[Stealer<Job>]>,
+ state: Arc<CompilerState>,
+) {
+ loop {
+ if state.njobs.load(Ordering::SeqCst) == 0 {
+ break;
+ }
+
+ let job = find_task(&queue, &state.globalq, &stealers);
+ if let Some(job) = job {
+ match job {
+ LexAndParse { file } => {},
+ }
+
+ state.njobs.fetch_sub(1, Ordering::SeqCst);
+ } else {
+ thread::yield_now();
+ }
+ }
+}
+
+fn find_task(
+ localq: &Worker<Job>,
+ globalq: &Injector<Job>,
+ stealers: &Arc<[Stealer<Job>]>,
+) -> Option<Job> {
+ if let Some(job) = localq.pop() {
+ return Some(job);
+ }
+
+ loop {
+ match globalq.steal_batch_and_pop(localq) {
+ Steal::Success(job) => return Some(job),
+ Steal::Empty => break,
+ Steal::Retry => continue,
+ }
+ }
+
+ for s in stealers.iter() {
+ loop {
+ match s.steal_batch_and_pop(localq) {
+ Steal::Success(job) => return Some(job),
+ Steal::Empty => break,
+ Steal::Retry => continue,
+ }
+ }
+ }
+
+ return None;
+}
diff --git a/oryxc/src/errors.rs b/oryxc/src/errors.rs
new file mode 100644
index 0000000..b3e6013
--- /dev/null
+++ b/oryxc/src/errors.rs
@@ -0,0 +1,67 @@
+use std::ffi::{
+ OsStr,
+ OsString,
+};
+use std::fmt::Display;
+use std::ops::Deref;
+use std::path::Path;
+use std::sync::OnceLock;
+use std::{
+ env,
+ process,
+};
+
+pub fn progname() -> &'static OsString {
+ static ARGV0: OnceLock<OsString> = OnceLock::new();
+ return ARGV0.get_or_init(|| {
+ let default = OsStr::new("oryxc");
+ let s = env::args_os().next().unwrap_or(default.into());
+ return Path::new(&s).file_name().unwrap_or(default).to_os_string();
+ });
+}
+
+#[macro_export]
+macro_rules! warn {
+ ($err:expr, $fmt:literal, $($arg:tt)*) => {{
+ use crate::errors::progname;
+ let _ = eprintln!("{}: {}: {}", progname().display(),
+ format_args!($fmt, $($arg)*), $err);
+ }};
+
+ ($err:expr, $fmt:literal) => {{
+ warn!($err, $fmt,);
+ }};
+
+ ($err:expr) => {{
+ use crate::errors::progname;
+ let _ = eprintln!("{}: {}", progname().display(), $err);
+ }};
+}
+
+#[macro_export]
+macro_rules! err {
+ ($err:expr, $fmt:literal, $($arg:tt)*) => {{
+ use crate::warn;
+ warn!($err, $fmt, $($arg)*);
+ std::process::exit(1);
+ }};
+
+ ($err:expr, $fmt:literal) => {{
+ err!($err, $fmt,);
+ }};
+
+ ($err:expr) => {{
+ use crate::warn;
+ warn!($err);
+ std::process::exit(1);
+ }};
+}
+
+pub fn err_at_position<T, S>(filename: T, s: S) -> !
+where
+ T: Deref<Target = OsStr>,
+ S: Display,
+{
+ eprintln!("{}: \x1b[31;1mError:\x1b[0m {}", filename.display(), s);
+ process::exit(1);
+}
diff --git a/oryxc/src/intern.rs b/oryxc/src/intern.rs
new file mode 100644
index 0000000..3ab91cf
--- /dev/null
+++ b/oryxc/src/intern.rs
@@ -0,0 +1,68 @@
+use std::hash;
+
+use dashmap;
+use icu::normalizer;
+
+#[repr(transparent)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct Key(u32);
+
+pub struct Interner<'a> {
+ map: dashmap::DashMap<UniStr<'a>, Key>,
+ store: Vec<&'a str>,
+}
+
+#[derive(Eq)]
+pub struct UniStr<'a>(pub &'a str);
+
+impl hash::Hash for UniStr<'_> {
+ fn hash<H: hash::Hasher>(&self, state: &mut H) {
+ if self.0.is_ascii() {
+ self.0.chars().for_each(|c| c.hash(state));
+ } else {
+ let nfkd = normalizer::DecomposingNormalizer::new_nfkd();
+ nfkd.normalize_iter(self.0.chars()).for_each(|c| c.hash(state));
+ }
+ }
+}
+
+impl PartialEq for UniStr<'_> {
+ fn eq(&self, other: &Self) -> bool {
+ let nfkd = normalizer::DecomposingNormalizer::new_nfkd();
+ return match (self.0.is_ascii(), other.0.is_ascii()) {
+ (true, true) => self.0 == other.0,
+ (true, false) => {
+ self.0.chars().eq(nfkd.normalize_iter(other.0.chars()))
+ },
+ (false, true) => {
+ other.0.chars().eq(nfkd.normalize_iter(self.0.chars()))
+ },
+ (false, false) => nfkd
+ .normalize_iter(self.0.chars())
+ .eq(nfkd.normalize_iter(other.0.chars())),
+ };
+ }
+}
+
+impl<'a> Interner<'a> {
+ pub fn new() -> Self {
+ return Interner {
+ map: dashmap::DashMap::new(),
+ store: Vec::new(),
+ };
+ }
+
+ pub fn get(&self, key: Key) -> &str {
+ return self.store[key.0 as usize];
+ }
+
+ pub fn intern(&mut self, value: &'a str) -> Key {
+ if let Some(key) = self.map.get(&UniStr(value)) {
+ return *key;
+ }
+ let key = Key(self.store.len() as u32);
+ self.map.insert(UniStr(value), key);
+ self.store.push(value);
+ return key;
+ }
+}
diff --git a/oryxc/src/lexer.rs b/oryxc/src/lexer.rs
new file mode 100644
index 0000000..531593d
--- /dev/null
+++ b/oryxc/src/lexer.rs
@@ -0,0 +1,427 @@
+use std::ffi::OsStr;
+use std::fmt::Display;
+use std::{
+ iter,
+ mem,
+ str,
+};
+
+use phf;
+use soa_rs::{
+ self,
+ Soars,
+};
+
+use crate::{
+ errors,
+ size,
+ unicode,
+};
+
+#[repr(u8)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum TokenType {
+ Eof = 0,
+ Ampersand = '&' as u8,
+ AngleL = '<' as u8,
+ AngleR = '>' as u8,
+ Asterisk = '*' as u8,
+ Bar = '|' as u8,
+ BraceL = '{' as u8,
+ BraceR = '}' as u8,
+ BracketL = '[' as u8,
+ BracketR = ']' as u8,
+ Caret = '^' as u8,
+ Comma = ',' as u8,
+ Equals = '=' as u8,
+ Exclamation = '!' as u8,
+ Minus = '-' as u8,
+ ParenL = '(' as u8,
+ ParenR = ')' as u8,
+ Plus = '+' as u8,
+ Semicolon = ';' as u8,
+ Slash = '/' as u8,
+ Tilde = '~' as u8,
+ AmpersandTilde,
+ AngleL2,
+ AngleL3,
+ AngleR2,
+ AngleR3,
+ Ellipsis,
+ Identifier,
+ KeywordDef,
+ KeywordFunc,
+ KeywordReturn,
+ Number,
+ String,
+}
+
+impl TokenType {
+ pub fn literalp(&self) -> bool {
+ return match self {
+ Self::Identifier
+ | Self::KeywordDef
+ | Self::KeywordFunc
+ | Self::Number
+ | Self::String => true,
+ _ => false,
+ };
+ }
+
+ /* Tokens that start an expression */
+ pub fn exprp(&self) -> bool {
+ return match self {
+ Self::Ampersand
+ | Self::Caret
+ | Self::Exclamation
+ | Self::Identifier
+ | Self::KeywordFunc
+ | Self::Minus
+ | Self::Number
+ | Self::ParenL
+ | Self::Plus
+ | Self::String
+ | Self::Tilde => true,
+ _ => false,
+ };
+ }
+}
+
+#[derive(Soars)]
+#[soa_derive(Debug)]
+pub struct Token<'a> {
+ pub kind: TokenType,
+ pub view: &'a str,
+}
+
+pub struct TokenizedBuffer<'a> {
+ pub tokens: soa_rs::Soa<Token<'a>>,
+ pub buffer: &'a str,
+ pub filename: Option<&'a OsStr>,
+}
+
+struct LexerContext<'a> {
+ pos_a: usize, /* Pos [a]fter char */
+ pos_b: usize, /* Pos [b]efore char */
+ chars: iter::Peekable<str::Chars<'a>>,
+ string: &'a str,
+ filename: Option<&'a OsStr>,
+ expect_punct_p: bool,
+}
+
+impl<'a> LexerContext<'a> {
+ fn new(filename: Option<&'a OsStr>, string: &'a str) -> Self {
+ return Self {
+ pos_a: 0,
+ pos_b: 0,
+ chars: string.chars().peekable(),
+ string,
+ filename,
+ expect_punct_p: false,
+ };
+ }
+
+ #[inline(always)]
+ fn next(&mut self) -> Option<char> {
+ let c = self.chars.next()?;
+ self.pos_b = self.pos_a;
+ self.pos_a += c.len_utf8();
+ return Some(c);
+ }
+
+ #[inline(always)]
+ fn peek(&mut self) -> Option<char> {
+ return self.chars.peek().copied();
+ }
+
+ fn err_at_position<S>(&self, s: S) -> !
+ where
+ S: Display,
+ {
+ errors::err_at_position(self.filename.unwrap_or(OsStr::new("-")), s);
+ }
+
+ #[inline(always)]
+ fn literal_spacing_guard(&self) {
+ if self.expect_punct_p {
+ self.err_at_position(
+ "Two literals may not be directly adjacent to each other",
+ );
+ }
+ }
+}
+
+static KEYWORDS: phf::Map<&'static str, TokenType> = phf::phf_map! {
+ "def" => TokenType::KeywordDef,
+ "func" => TokenType::KeywordFunc,
+ "return" => TokenType::KeywordReturn,
+};
+
+pub fn tokenize<'a>(
+ filename: Option<&'a OsStr>,
+ s: &'a str,
+) -> TokenizedBuffer<'a> {
+ let mut toks = soa_rs::Soa::<Token>::with_capacity(size::kibibytes(10));
+ let mut ctx = LexerContext::new(filename, s);
+
+ while let Some(c) = ctx.next() {
+ let (i, j) = (ctx.pos_b, ctx.pos_a);
+ if let Some(tok) = match c {
+ '/' if ctx.peek().is_some_and(|c| c == '*') => {
+ skip_comment(&mut ctx);
+ ctx.expect_punct_p = false;
+ None
+ },
+ '<' if ctx.peek().is_some_and(|c| c == '<') => {
+ ctx.next(); /* Consume ‘<’ */
+ let kind = if ctx.peek().is_some_and(|c| c == '<') {
+ ctx.next(); /* Consume ‘<’ */
+ TokenType::AngleL3
+ } else {
+ TokenType::AngleL2
+ };
+ Some(Token {
+ kind,
+ view: &s[i..ctx.pos_a],
+ })
+ },
+ '>' if ctx.peek().is_some_and(|c| c == '>') => {
+ ctx.next(); /* Consume ‘>’ */
+ let kind = if ctx.peek().is_some_and(|c| c == '>') {
+ ctx.next(); /* Consume ‘>’ */
+ TokenType::AngleR3
+ } else {
+ TokenType::AngleR2
+ };
+ Some(Token {
+ kind,
+ view: &s[i..ctx.pos_a],
+ })
+ },
+ '&' if ctx.peek().is_some_and(|c| c == '~') => {
+ ctx.next(); /* Consume ‘~’ */
+ Some(Token {
+ kind: TokenType::AmpersandTilde,
+ view: &s[i..j + 1],
+ })
+ },
+ '!' | '&' | '(' | ')' | '*' | '+' | ',' | '-' | '/' | ';' | '<'
+ | '=' | '>' | '[' | ']' | '^' | '{' | '|' | '}' | '~' | '…' => {
+ Some(Token {
+ kind: unsafe { mem::transmute(c as u8) },
+ view: &s[i..j],
+ })
+ },
+ '#' => {
+ ctx.literal_spacing_guard();
+ Some(tokenize_number_based(&mut ctx))
+ },
+ '0'..='9' => {
+ ctx.literal_spacing_guard();
+ Some(tokenize_number(&mut ctx, "0123456789"))
+ },
+ '"' => {
+ ctx.literal_spacing_guard();
+ Some(tokenize_string(&mut ctx))
+ },
+ _ if unicode::xid_start_p(c) => {
+ ctx.literal_spacing_guard();
+ Some(tokenize_identifier(&mut ctx))
+ },
+ _ if unicode::pattern_white_space_p(c) => {
+ if !unicode::default_ignorable_code_point_p(c) {
+ ctx.expect_punct_p = false;
+ }
+ None
+ },
+ c => {
+ let msg = format!("Invalid character ‘{c}’");
+ ctx.err_at_position(msg.as_str());
+ },
+ } {
+ ctx.expect_punct_p = tok.kind.literalp();
+ toks.push(tok);
+ }
+ }
+
+ toks.push(Token {
+ kind: TokenType::Eof,
+ view: &s[s.len() - 1..],
+ });
+ return TokenizedBuffer {
+ tokens: toks,
+ buffer: s,
+ filename,
+ };
+}
+
+fn skip_comment<'a>(ctx: &mut LexerContext<'a>) {
+ ctx.next(); /* Consume ‘*’ */
+ let mut depth = 1;
+ while let Some(c) = ctx.next() {
+ match c {
+ '/' if ctx.peek().is_some_and(|c| c == '*') => {
+ depth += 1;
+ ctx.next(); /* Consume ‘*’ */
+ },
+ '*' if ctx.peek().is_some_and(|c| c == '/') => {
+ depth -= 1;
+ ctx.next(); /* Consume ‘/’ */
+ if depth == 0 {
+ return;
+ }
+ },
+ _ => {},
+ };
+ }
+ ctx.err_at_position("Unterminated comment");
+}
+
+fn tokenize_number_based<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> {
+ let i = ctx.pos_b;
+ let alphabet = match ctx.next() {
+ Some('b') => "01",
+ Some('o') => "01234567",
+ Some('d') => "0123456789",
+ Some('x') => "0123456789ABCDEF",
+ Some(c) => {
+ let msg = format!("Invalid number base specifier ‘{c}’");
+ ctx.err_at_position(msg.as_str());
+ },
+ None => ctx.err_at_position("Expected number base specifier after ‘#’"),
+ };
+ let mut tok = match ctx.next() {
+ Some(c) if alphabet.contains(c) => tokenize_number(ctx, alphabet),
+ Some(c) => {
+ let base = match alphabet.len() {
+ 2 => "binary",
+ 8 => "octal",
+ 10 => "decimal",
+ 16 => "hexadecimal",
+ _ => unreachable!(),
+ };
+ let msg = format!("Invalid {base} digit ‘{c}’");
+ ctx.err_at_position(msg.as_str());
+ },
+ None => ctx.err_at_position("Expected number after base specifier"),
+ };
+ tok.view = &ctx.string[i..ctx.pos_a];
+ return tok;
+}
+
+fn tokenize_number<'a>(
+ ctx: &mut LexerContext<'a>,
+ alphabet: &'static str,
+) -> Token<'a> {
+ let i = ctx.pos_b;
+ span_raw_number(ctx, alphabet, true);
+
+ /* Fractional part */
+ if ctx.peek().is_some_and(|c| c == '.') {
+ ctx.next();
+ if ctx.peek().is_some_and(|c| alphabet.contains(c)) {
+ span_raw_number(ctx, alphabet, false);
+ }
+ }
+
+ /* Exponential part */
+ if ctx.peek().is_some_and(|c| c == 'e') {
+ ctx.next();
+ span_raw_number(ctx, alphabet, false);
+ }
+
+ return Token {
+ kind: TokenType::Number,
+ view: &ctx.string[i..ctx.pos_a],
+ };
+}
+
+fn span_raw_number<'a>(
+ ctx: &mut LexerContext<'a>,
+ alphabet: &'static str,
+ first_digit_lexed_p: bool,
+) {
+ if !first_digit_lexed_p {
+ match ctx.next() {
+ Some(c) if alphabet.contains(c) => c,
+ Some(c) => {
+ let base = match alphabet.len() {
+ 2 => "binary",
+ 8 => "octal",
+ 10 => "decimal",
+ 16 => "hexadecimal",
+ _ => unreachable!(),
+ };
+ let msg = format!("Invalid {base} digit ‘{c}’");
+ ctx.err_at_position(msg.as_str());
+ },
+ None => {
+ let base = match alphabet.len() {
+ 2 => "binary",
+ 8 => "octal",
+ 10 => "decimal",
+ 16 => "hexadecimal",
+ _ => unreachable!(),
+ };
+ let msg = format!(
+ "Expected {base} digit but reached end-of-file instead"
+ );
+ ctx.err_at_position(msg.as_str());
+ },
+ };
+ }
+
+ let mut last_was_apos_p = false;
+ while let Some(c) = ctx.peek() {
+ match c {
+ '\'' if last_was_apos_p => ctx.err_at_position(
+ "Multiple concurrent digit separators in numeric literal",
+ ),
+ '\'' => {
+ last_was_apos_p = true;
+ ctx.next();
+ },
+ _ if alphabet.contains(c) => {
+ last_was_apos_p = false;
+ ctx.next();
+ },
+ _ => break,
+ };
+ }
+
+ if last_was_apos_p {
+ ctx.err_at_position(
+ "Numeric literals may not end with a digit separator",
+ );
+ }
+}
+
+fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> {
+ let i = ctx.pos_b;
+ loop {
+ if let Some(c) = ctx.next() {
+ if c == '"' {
+ break;
+ }
+ } else {
+ ctx.err_at_position("Unterminated string");
+ }
+ }
+ return Token {
+ kind: TokenType::String,
+ view: &ctx.string[i..ctx.pos_a],
+ };
+}
+
+fn tokenize_identifier<'a>(ctx: &mut LexerContext<'a>) -> Token<'a> {
+ let i = ctx.pos_b;
+ while ctx.peek().is_some_and(unicode::xid_continue_p) {
+ ctx.next();
+ }
+ let view = &ctx.string[i..ctx.pos_a];
+ let kind = match KEYWORDS.get(view) {
+ Some(kind) => kind.clone(),
+ None => TokenType::Identifier,
+ };
+ return Token { kind, view };
+}
diff --git a/oryxc/src/main.rs b/oryxc/src/main.rs
new file mode 100644
index 0000000..298093d
--- /dev/null
+++ b/oryxc/src/main.rs
@@ -0,0 +1,95 @@
+#![allow(unsafe_op_in_unsafe_fn)]
+
+mod compiler;
+mod errors;
+mod lexer;
+mod parser;
+mod size;
+mod unicode;
+
+use std::ffi::OsString;
+use std::{
+ env,
+ fs,
+ process,
+ thread,
+};
+
+use lexopt;
+
+#[derive(Clone, Copy, Default)]
+pub struct Flags {
+ pub debug_lexer: bool,
+ pub debug_parser: bool,
+ pub help: bool,
+ pub threads: usize,
+}
+
+impl Flags {
+ fn parse() -> Result<(Flags, Vec<OsString>), lexopt::Error> {
+ use lexopt::prelude::*;
+
+ let mut rest = Vec::with_capacity(env::args().len());
+ let mut flags = Flags::default();
+ let mut parser = lexopt::Parser::from_env();
+
+ while let Some(arg) = parser.next()? {
+ match arg {
+ Short('h') | Long("help") => flags.help = true,
+ Short('l') | Long("debug-lexer") => flags.debug_lexer = true,
+ Short('p') | Long("debug-parser") => flags.debug_parser = true,
+ Short('t') | Long("threads") => {
+ flags.threads = parser.value()?.parse()?;
+ if flags.threads == 0 {
+ err!("thread count must be greater than 0");
+ }
+ },
+ Value(v) => rest.push(v),
+ _ => return Err(arg.unexpected()),
+ }
+ }
+
+ if flags.threads == 0 {
+ flags.threads = thread::available_parallelism().map_or_else(
+ |e| {
+ warn!(e, "failed to get thread count");
+ 1
+ },
+ |x| x.get(),
+ );
+ }
+
+ return Ok((flags, rest));
+ }
+}
+
+fn usage() {
+ eprintln!(
+ concat!("Usage: {0} [-lp] [-t threads]\n", " {0} -h"),
+ errors::progname().display()
+ );
+}
+
+fn main() {
+ let (flags, rest) = match Flags::parse() {
+ Ok(v) => v,
+ Err(e) => {
+ warn!(e);
+ usage();
+ process::exit(1);
+ },
+ };
+
+ if flags.help {
+ usage();
+ process::exit(0);
+ }
+
+ compiler::start(rest, flags);
+ // let tokbuf = lexer::tokenize(Some(file), s.as_str());
+ // let (ast, extra_data) = parser::parse(&tokbuf);
+
+ // if flags.debug_lexer {
+ // tokbuf.tokens.iter().for_each(|t| println!("{t:?}"));
+ // }
+}
diff --git a/oryxc/src/parser.rs b/oryxc/src/parser.rs
new file mode 100644
index 0000000..212d0db
--- /dev/null
+++ b/oryxc/src/parser.rs
@@ -0,0 +1,544 @@
+use std::ffi::OsStr;
+use std::fmt::Display;
+use std::mem::ManuallyDrop;
+use std::vec::Vec;
+
+use soa_rs::{
+ Soa,
+ Soars,
+};
+
+use crate::lexer::{
+ TokenType,
+ TokenizedBuffer,
+};
+use crate::{
+ errors,
+ size,
+};
+
+const MIN_PREC: i64 = 0;
+const MAX_PREC: i64 = 6;
+
+#[repr(u8)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum AstType {
+ Assign, /* (ident-token, expression) */
+ Block, /* (extra-data, _) */
+ Dereference, /* (lhs, _) */
+ FunCall, /* (expression, extra-data) */
+ FunProto, /* (extra-data, _) */
+ Function, /* (prototype, body) */
+ Identifier, /* (_, _) */
+ MultiDefBind, /* (extra-data, _) */
+ Number, /* (token, _) */
+ Pointer, /* (rhs, _) */
+ Return, /* (extra-data, _) */
+ String, /* (token, _) */
+ Type, /* (type-data, _) */
+ UnaryOperator, /* (rhs, _) */
+ BinaryOperator, /* (lhs, rhs) */
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct SubNodes(u32, u32);
+
+impl Default for SubNodes {
+ fn default() -> Self {
+ return Self(u32::MAX, u32::MAX);
+ }
+}
+
+#[derive(Soars)]
+#[soa_derive(Debug)]
+pub struct AstNode {
+ pub kind: AstType,
+ pub tok: u32,
+ pub sub: SubNodes,
+}
+
+pub struct DeclData {
+ lhs: Vec<(u32, u32)>, /* (ident, type) tuple */
+ rhs: Vec<u32>,
+}
+
+pub struct FunCallData {
+ args: Vec<u32>,
+}
+
+pub struct FunProtoData {
+ args: Vec<(u32, u32)>, /* (ident, type) tuple */
+ ret: Vec<u32>,
+}
+
+pub struct BlockData {
+ stmts: Vec<u32>,
+}
+
+pub struct ReturnData {
+ exprs: Vec<u32>,
+}
+
+pub union ExtraData {
+ block: ManuallyDrop<BlockData>,
+ decl: ManuallyDrop<DeclData>,
+ funcall: ManuallyDrop<FunCallData>,
+ funproto: ManuallyDrop<FunProtoData>,
+ r#return: ManuallyDrop<ReturnData>,
+}
+
+struct Parser<'a, 'b> {
+ ast: Soa<AstNode>,
+ extra_data: Vec<ExtraData>,
+ tokbuf: &'a TokenizedBuffer<'b>,
+ cursor: u32,
+ scratch: Vec<u32>,
+}
+
+impl<'a, 'b> Parser<'a, 'b> {
+ fn new(tokbuf: &'a TokenizedBuffer<'b>) -> Self {
+ return Self {
+ ast: Soa::with_capacity(size::kibibytes(10)),
+ extra_data: Vec::with_capacity(size::kibibytes(1)),
+ tokbuf,
+ cursor: 0,
+ scratch: Vec::with_capacity(64),
+ };
+ }
+
+ #[inline(always)]
+ fn get(&self) -> TokenType {
+ return unsafe {
+ *self
+ .tokbuf
+ .tokens
+ .kind()
+ .get_unchecked(self.cursor as usize)
+ };
+ }
+
+ #[inline(always)]
+ fn next(&mut self) -> TokenType {
+ self.cursor += 1;
+ return self.get();
+ }
+
+ #[inline(always)]
+ fn get_n_move(&mut self) -> TokenType {
+ let t = self.get();
+ self.cursor += 1;
+ return t;
+ }
+
+ #[inline(always)]
+ fn new_node(&mut self, n: AstNode) -> u32 {
+ self.ast.push(n);
+ return (self.ast.len() - 1) as u32;
+ }
+
+ #[inline(always)]
+ fn new_extra_data(&mut self, d: ExtraData) -> u32 {
+ self.extra_data.push(d);
+ return (self.extra_data.len() - 1) as u32;
+ }
+
+ fn err_at_position<T>(&self, i: u32, s: T) -> !
+ where
+ T: Display,
+ {
+ errors::err_at_position(
+ self.tokbuf.filename.unwrap_or(OsStr::new("-")),
+ s,
+ );
+ }
+
+ fn parse_toplevel(&mut self) {
+ match self.get() {
+ TokenType::KeywordDef => self.parse_def(),
+ TokenType::Eof => return,
+ _ => {
+ let msg = format!(
+ "Expected top-level statement but got {:?}",
+ self.get()
+ );
+ self.err_at_position(self.cursor, msg.as_str());
+ },
+ };
+ }
+
+ fn parse_stmt(&mut self) -> u32 {
+ return match self.get() {
+ TokenType::KeywordDef => self.parse_def(),
+ TokenType::KeywordReturn => {
+ let main_tok = self.cursor;
+ self.next(); /* Consume ‘return’ */
+ let exprs = self.parse_expr_list();
+ if self.get_n_move() != TokenType::Semicolon {
+ self.err_at_position(
+ self.cursor - 1,
+ "Expected semicolon after return statement",
+ );
+ }
+ let i = self.new_extra_data(ExtraData {
+ r#return: ManuallyDrop::new(ReturnData { exprs }),
+ });
+ self.new_node(AstNode {
+ kind: AstType::Return,
+ tok: main_tok,
+ sub: SubNodes(i, u32::MAX),
+ })
+ },
+ t if t.exprp() => {
+ let k = self.parse_expr(MIN_PREC);
+ if self.get_n_move() != TokenType::Semicolon {
+ self.err_at_position(
+ self.cursor - 1,
+ "Expected semicolon after expression",
+ );
+ }
+ k
+ },
+ _ => {
+ let msg =
+ format!("Expected statement but got {:?}", self.get());
+ self.err_at_position(self.cursor, msg.as_str());
+ },
+ };
+ }
+
+ fn parse_def(&mut self) -> u32 {
+ let main_tok = self.cursor;
+ if self.get_n_move() != TokenType::KeywordDef {
+ self.err_at_position(self.cursor - 1, "Expected ‘def’");
+ }
+ let lhs = self.parse_decl_list();
+ if lhs.len() == 0 {
+ self.err_at_position(main_tok, "Expected an identifier");
+ }
+
+ if self.get_n_move() != TokenType::Equals {
+ self.err_at_position(self.cursor - 1, "Expected ‘=’");
+ }
+
+ let rhs = self.parse_expr_list();
+ if rhs.len() == 0 {
+ self.err_at_position(
+ self.cursor - 1,
+ "Expected expression after ‘=’",
+ );
+ }
+ if self.get_n_move() != TokenType::Semicolon {
+ self.err_at_position(self.cursor - 1, "Expected semicolon");
+ }
+
+ let i = self.new_extra_data(ExtraData {
+ decl: ManuallyDrop::new(DeclData { lhs, rhs }),
+ });
+ return self.new_node(AstNode {
+ kind: AstType::MultiDefBind,
+ tok: main_tok,
+ sub: SubNodes(i as u32, u32::MAX),
+ });
+ }
+
+ fn parse_func_proto(&mut self) -> u32 {
+ let main_tok = self.cursor;
+
+ /* No params or return */
+ if self.next() != TokenType::ParenL {
+ return self.new_node(AstNode {
+ kind: AstType::FunProto,
+ tok: main_tok,
+ sub: SubNodes(u32::MAX, u32::MAX),
+ });
+ }
+
+ self.next(); /* Consume ‘(’ */
+ let args = self.parse_decl_list();
+
+ if self.get_n_move() != TokenType::ParenR {
+ self.err_at_position(
+ self.cursor - 1,
+ "Expected closing parenthesis",
+ );
+ }
+
+ let t = self.get();
+ let ret = match t {
+ TokenType::ParenL => {
+ self.next(); /* Consume ‘(’ */
+ let xs = self.parse_expr_list();
+ if self.get_n_move() != TokenType::ParenR {
+ self.err_at_position(
+ self.cursor - 1,
+ "Expected closing parenthesis",
+ );
+ }
+ xs
+ },
+ _ if t.exprp() => {
+ // TODO: This is really bad. We should probably optimize
+ // for the small cases (or use an arena?)
+ vec![self.parse_expr(MIN_PREC)]
+ },
+ _ => Vec::new(), /* Doesn’t allocate */
+ };
+
+ let i = self.new_extra_data(ExtraData {
+ funproto: ManuallyDrop::new(FunProtoData { args, ret }),
+ });
+ return self.new_node(AstNode {
+ kind: AstType::FunProto,
+ tok: main_tok,
+ sub: SubNodes(i, u32::MAX),
+ });
+ }
+
+ fn parse_block(&mut self) -> u32 {
+ let main_tok = self.cursor;
+ if self.get_n_move() != TokenType::BraceL {
+ self.err_at_position(self.cursor - 1, "Expected opening brace");
+ }
+
+ let mut stmts = Vec::<u32>::with_capacity(64);
+ while self.get() != TokenType::BraceR {
+ stmts.push(self.parse_stmt());
+ }
+ self.next(); /* Consume ‘}’ */
+ let i = self.new_extra_data(ExtraData {
+ block: ManuallyDrop::new(BlockData { stmts }),
+ });
+ return self.new_node(AstNode {
+ kind: AstType::Block,
+ tok: main_tok,
+ sub: SubNodes(i, u32::MAX),
+ });
+ }
+
+ fn parse_decl_list(&mut self) -> Vec<(u32, u32)> {
+ let scratch_beg = self.scratch.len();
+ let (mut nidents, mut nuntyped) = (0, 0);
+ loop {
+ if self.get() != TokenType::Identifier {
+ break;
+ }
+ self.scratch.push(self.cursor);
+ self.scratch.push(u32::MAX);
+ nidents += 1;
+ nuntyped += 1;
+
+ match self.next() {
+ TokenType::Comma => {
+ self.next();
+ },
+ t if t.exprp() => {
+ let k = self.parse_expr(MIN_PREC);
+ let len = self.scratch.len();
+ for i in 0..nuntyped {
+ self.scratch[len - 1 - 2 * i] = k;
+ }
+ nuntyped = 0;
+ },
+ _ => break,
+ };
+ }
+
+ let mut iter = self.scratch.drain(scratch_beg..);
+ let mut pairs = Vec::with_capacity(nidents);
+ while let (Some(a), Some(b)) = (iter.next(), iter.next()) {
+ pairs.push((a, b));
+ }
+ return pairs;
+ }
+
+ fn parse_expr_list(&mut self) -> Vec<u32> {
+ let scratch_beg = self.scratch.len();
+
+ while self.get().exprp() {
+ let k = self.parse_expr(MIN_PREC);
+ self.scratch.push(k);
+ if self.get() == TokenType::Comma {
+ self.next();
+ } else {
+ break;
+ }
+ }
+
+ return self.scratch.drain(scratch_beg..).collect();
+ }
+
+ fn parse_expr(&mut self, minprec: i64) -> u32 {
+ fn getprec(t: TokenType) -> i64 {
+ match t {
+ TokenType::ParenL => 6,
+ TokenType::Ampersand
+ | TokenType::AmpersandTilde
+ | TokenType::AngleL2
+ | TokenType::AngleL3
+ | TokenType::AngleR2
+ | TokenType::AngleR3
+ | TokenType::Asterisk
+ | TokenType::Slash => 5,
+ TokenType::Bar
+ | TokenType::Minus
+ | TokenType::Plus
+ | TokenType::Tilde => 4,
+ TokenType::AngleL | TokenType::AngleR => 3,
+ _ => -1,
+ }
+ }
+
+ let mut lhs = match self.get() {
+ TokenType::Identifier => {
+ self.next();
+ self.new_node(AstNode {
+ kind: AstType::Identifier,
+ tok: self.cursor - 1,
+ sub: SubNodes::default(),
+ })
+ },
+ TokenType::Number => {
+ self.next();
+ self.new_node(AstNode {
+ kind: AstType::Number,
+ tok: self.cursor - 1,
+ sub: SubNodes::default(),
+ })
+ },
+ TokenType::String => {
+ self.next();
+ self.new_node(AstNode {
+ kind: AstType::String,
+ tok: self.cursor - 1,
+ sub: SubNodes::default(),
+ })
+ },
+ TokenType::Ampersand
+ | TokenType::Exclamation
+ | TokenType::Minus
+ | TokenType::Plus
+ | TokenType::Tilde => {
+ let i = self.cursor;
+ self.next();
+ let lhs = self.parse_expr(MAX_PREC);
+ self.new_node(AstNode {
+ kind: AstType::UnaryOperator,
+ tok: i,
+ sub: SubNodes(lhs, u32::MAX),
+ })
+ },
+ TokenType::ParenL => {
+ self.next();
+ let k = self.parse_expr(MIN_PREC);
+ if self.get() != TokenType::ParenR {
+ self.err_at_position(
+ self.cursor,
+ "Expected closing parenthesis",
+ );
+ }
+ self.next(); /* Consume ‘)’ */
+ k
+ },
+ TokenType::Caret => {
+ let tok = self.cursor;
+ self.next();
+ let k = self.parse_expr(MAX_PREC);
+ self.new_node(AstNode {
+ kind: AstType::Pointer,
+ tok,
+ sub: SubNodes(k, u32::MAX),
+ })
+ },
+ TokenType::KeywordFunc => {
+ let tok = self.cursor;
+ let proto = self.parse_func_proto();
+ if self.get() == TokenType::BraceL {
+ let body = self.parse_block();
+ self.new_node(AstNode {
+ kind: AstType::Function,
+ tok,
+ sub: SubNodes(proto, body),
+ })
+ } else {
+ proto
+ }
+ },
+ _ => self.err_at_position(self.cursor, "Expected expression"),
+ };
+
+ loop {
+ let tok = self.get();
+ let prec = getprec(tok);
+ if prec < minprec {
+ break;
+ }
+
+ lhs = match tok {
+ /* Binop */
+ TokenType::Ampersand
+ | TokenType::AmpersandTilde
+ | TokenType::AngleL2
+ | TokenType::AngleL3
+ | TokenType::AngleR2
+ | TokenType::AngleR3
+ | TokenType::Asterisk
+ | TokenType::Slash
+ | TokenType::Bar
+ | TokenType::Minus
+ | TokenType::Plus
+ | TokenType::Tilde
+ | TokenType::AngleL
+ | TokenType::AngleR => {
+ let i = self.cursor;
+ self.next();
+ let rhs = self.parse_expr(prec);
+ self.new_node(AstNode {
+ kind: AstType::BinaryOperator,
+ tok: i,
+ sub: SubNodes(lhs, rhs),
+ })
+ },
+
+ /* Dereference */
+ TokenType::Caret => {
+ self.next();
+ self.new_node(AstNode {
+ kind: AstType::Dereference,
+ tok: self.cursor - 1,
+ sub: SubNodes(lhs, u32::MAX),
+ })
+ },
+
+ /* Funcall */
+ TokenType::ParenL => {
+ let tok = self.cursor;
+ self.next();
+ let args = self.parse_expr_list();
+ if self.get_n_move() != TokenType::ParenR {
+ self.err_at_position(self.cursor - 1, "Expected ‘)’");
+ }
+ let i = self.new_extra_data(ExtraData {
+ funcall: ManuallyDrop::new(FunCallData { args }),
+ });
+ self.new_node(AstNode {
+ kind: AstType::FunCall,
+ tok,
+ sub: SubNodes(lhs, i),
+ })
+ },
+
+ _ => break,
+ }
+ }
+
+ return lhs;
+ }
+}
+
+pub fn parse(tokbuf: &TokenizedBuffer) -> (Soa<AstNode>, Vec<ExtraData>) {
+ let mut p = Parser::new(tokbuf);
+ while p.get() != TokenType::Eof {
+ p.parse_toplevel();
+ }
+ return (p.ast, p.extra_data);
+}
diff --git a/oryxc/src/size.rs b/oryxc/src/size.rs
new file mode 100644
index 0000000..4f639c2
--- /dev/null
+++ b/oryxc/src/size.rs
@@ -0,0 +1,3 @@
+pub const fn kibibytes(n: usize) -> usize {
+ return n * 1024;
+}
diff --git a/oryxc/src/unicode/default_ignorable_code_point.rs b/oryxc/src/unicode/default_ignorable_code_point.rs
new file mode 100644
index 0000000..b900a3b
--- /dev/null
+++ b/oryxc/src/unicode/default_ignorable_code_point.rs
@@ -0,0 +1,363 @@
+/* Autogenerated – DO NOT EDIT */
+
+static DEFAULT_IGNORABLE_CODE_POINT_L1: [u16; 544] = [
+ 0, 1, 2, 3, 4, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 7, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+];
+static DEFAULT_IGNORABLE_CODE_POINT_L2: [u64; 320] = [
+ 0,
+ 0,
+ 35184372088832,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 32768,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 268435456,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 6442450944,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 13510798882111488,
+ 0,
+ 63488,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 136339441907712,
+ 281470681743360,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 68719476736,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 65535,
+ 0,
+ 0,
+ 9223372036854775808,
+ 0,
+ 0,
+ 4294967296,
+ 143833713099145216,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 64424509440,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 574208952489738240,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+];
+#[inline]
+pub fn default_ignorable_code_point_p(c: char) -> bool {
+ let cp = c as usize;
+ let blki =
+ unsafe { *DEFAULT_IGNORABLE_CODE_POINT_L1.get_unchecked(cp >> 11) }
+ as usize;
+ let in_blk_offset_p = cp & 0x7FF;
+ let wordi = (blki * 32) + (in_blk_offset_p >> 6);
+ let biti = in_blk_offset_p & 0x3F;
+ unsafe {
+ return (*DEFAULT_IGNORABLE_CODE_POINT_L2.get_unchecked(wordi)
+ & (1 << biti))
+ != 0;
+ }
+}
diff --git a/oryxc/src/unicode/line_terminator.rs b/oryxc/src/unicode/line_terminator.rs
new file mode 100644
index 0000000..e30e031
--- /dev/null
+++ b/oryxc/src/unicode/line_terminator.rs
@@ -0,0 +1,135 @@
+/* Autogenerated – DO NOT EDIT */
+
+static LINE_TERMINATOR_L1: [u16; 544] = [
+ 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+];
+static LINE_TERMINATOR_L2: [u64; 96] = [
+ 15360,
+ 0,
+ 32,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 3298534883328,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+];
+#[inline]
+pub fn line_terminator_p(c: char) -> bool {
+ let cp = c as usize;
+ let blki = unsafe { *LINE_TERMINATOR_L1.get_unchecked(cp >> 11) } as usize;
+ let in_blk_offset_p = cp & 0x7FF;
+ let wordi = (blki * 32) + (in_blk_offset_p >> 6);
+ let biti = in_blk_offset_p & 0x3F;
+ unsafe {
+ return (*LINE_TERMINATOR_L2.get_unchecked(wordi) & (1 << biti)) != 0;
+ }
+}
diff --git a/oryxc/src/unicode/mod.rs b/oryxc/src/unicode/mod.rs
new file mode 100644
index 0000000..2fbdcb3
--- /dev/null
+++ b/oryxc/src/unicode/mod.rs
@@ -0,0 +1,11 @@
+pub mod default_ignorable_code_point;
+pub mod line_terminator;
+pub mod pattern_white_space;
+pub mod xid_continue;
+pub mod xid_start;
+
+pub use default_ignorable_code_point::default_ignorable_code_point_p;
+pub use line_terminator::line_terminator_p;
+pub use pattern_white_space::pattern_white_space_p;
+pub use xid_continue::xid_continue_p;
+pub use xid_start::xid_start_p;
diff --git a/oryxc/src/unicode/pattern_white_space.rs b/oryxc/src/unicode/pattern_white_space.rs
new file mode 100644
index 0000000..b051e3a
--- /dev/null
+++ b/oryxc/src/unicode/pattern_white_space.rs
@@ -0,0 +1,137 @@
+/* Autogenerated – DO NOT EDIT */
+
+static PATTERN_WHITE_SPACE_L1: [u16; 544] = [
+ 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+];
+static PATTERN_WHITE_SPACE_L2: [u64; 96] = [
+ 4294983168,
+ 0,
+ 32,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 3298534932480,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+];
+#[inline]
+pub fn pattern_white_space_p(c: char) -> bool {
+ let cp = c as usize;
+ let blki =
+ unsafe { *PATTERN_WHITE_SPACE_L1.get_unchecked(cp >> 11) } as usize;
+ let in_blk_offset_p = cp & 0x7FF;
+ let wordi = (blki * 32) + (in_blk_offset_p >> 6);
+ let biti = in_blk_offset_p & 0x3F;
+ unsafe {
+ return (*PATTERN_WHITE_SPACE_L2.get_unchecked(wordi) & (1 << biti))
+ != 0;
+ }
+}
diff --git a/oryxc/src/unicode/xid_continue.rs b/oryxc/src/unicode/xid_continue.rs
new file mode 100644
index 0000000..8fbbce4
--- /dev/null
+++ b/oryxc/src/unicode/xid_continue.rs
@@ -0,0 +1,1007 @@
+/* Autogenerated – DO NOT EDIT */
+
+static XID_CONTINUE_L1: [u16; 1088] = [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 12, 12, 12, 12, 12, 13, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 14, 15, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 9, 9, 9, 9, 9, 9,
+ 9, 9, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 9, 29, 12, 30, 12,
+ 12, 31, 32, 9, 9, 9, 9, 9, 9, 33, 9, 34, 35, 12, 12, 12, 12, 12, 12, 12,
+ 36, 9, 9, 9, 9, 9, 9, 9, 37, 38, 9, 9, 39, 9, 9, 9, 40, 41, 42, 43, 44, 45,
+ 46, 47, 48, 9, 9, 49, 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 50, 12, 12, 12, 12, 51, 12, 12, 12,
+ 12, 52, 12, 12, 12, 12, 12, 12, 53, 54, 9, 9, 55, 9, 12, 12, 12, 12, 56,
+ 12, 12, 12, 12, 12, 12, 12, 12, 57, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 58, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+];
+static XID_CONTINUE_L2: [u64; 944] = [
+ 287948901175001088,
+ 576460745995190270,
+ 333270770471927808,
+ 18410715276682199039,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 88094074470339,
+ 18446744073709551615,
+ 13321647697761927167,
+ 18446744056529672128,
+ 18428729675200069631,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709550843,
+ 18446744073709551615,
+ 18446462598732840959,
+ 18446744069456527359,
+ 13835058055282033151,
+ 2119858418286774,
+ 18446744069548736512,
+ 18446678103011885055,
+ 18446744073709551615,
+ 11529212845433552895,
+ 18446744073709486080,
+ 18446744073709545471,
+ 1125899906842623,
+ 2612087783874887679,
+ 70368744177663,
+ 18446471390799331327,
+ 18446744073701228287,
+ 18446744056529682431,
+ 18446744073709551615,
+ 18446462392574410751,
+ 17565725197581524975,
+ 5765733215448889759,
+ 15235112390417287150,
+ 18014125208779143,
+ 17576984196650090478,
+ 18302910150157089727,
+ 17576984196649951214,
+ 844217444219295,
+ 14123225865944680428,
+ 281200107273671,
+ 17582050746231021567,
+ 281265452367327,
+ 17577547146603651055,
+ 4221916082617823,
+ 18446744073709412351,
+ 18158794964244397535,
+ 3457638613854978030,
+ 3658904103781503,
+ 576460752303423486,
+ 67076095,
+ 4611685674830002134,
+ 4093607775,
+ 14024213633433600001,
+ 18446216308128218879,
+ 2305843009196916703,
+ 64,
+ 18446744073709551615,
+ 18446744073709487103,
+ 18446744070488326143,
+ 17870283321406070975,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744070446333439,
+ 9168765891372858879,
+ 18446744073701162813,
+ 18446744073696837631,
+ 1123704775901183,
+ 18446744069414649855,
+ 4557642822898941951,
+ 18446744073709551614,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446638520593285119,
+ 18446744069548802046,
+ 144053615424700415,
+ 9007197111451647,
+ 3905461007941631,
+ 18446744073709551615,
+ 4394566287359,
+ 18446744069481674752,
+ 144115188075855871,
+ 18446471394825863167,
+ 18014398509481983,
+ 1152657619668697087,
+ 8796093022207936,
+ 18446480190918885375,
+ 134153215,
+ 18446744069683019775,
+ 11529215043920986111,
+ 13834777130128311295,
+ 17588964818943,
+ 18446744073709551615,
+ 4494803601399807,
+ 18446744073709551615,
+ 4503599627370495,
+ 72057594037927935,
+ 4611686018427380735,
+ 16717361816799217663,
+ 576460752302833664,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744070475743231,
+ 4611686017001275199,
+ 6908521828386340863,
+ 2295745090394464220,
+ 9231253336202686464,
+ 9223934986817634305,
+ 536805376,
+ 562821641207808,
+ 17582049991377026180,
+ 18446744069414601696,
+ 511,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 4494940973301759,
+ 18446498607738650623,
+ 9223513873854758911,
+ 9187201948305063935,
+ 18446744071553646463,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 2251518330118602976,
+ 18446744073709551614,
+ 18446744068986765311,
+ 18446744073709551615,
+ 18446462598732840928,
+ 18446744073709551615,
+ 18446744069414617087,
+ 18446462598732840960,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 8191,
+ 4611686018427322368,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 17592185987071,
+ 13830835930631503871,
+ 18446744073709551615,
+ 1125899906842623,
+ 18446744060816261120,
+ 18446744073709551615,
+ 18446744073709550079,
+ 18446181124293001215,
+ 18691697672191,
+ 4503599627370495,
+ 18446744073709551615,
+ 16789419406609285183,
+ 18446532967477018623,
+ 2305843004919775231,
+ 18446744073709551615,
+ 9223372032626884609,
+ 36028797018963967,
+ 18194542490348896255,
+ 18446744073709551615,
+ 35184368733388807,
+ 18446602782178705022,
+ 18446466996645134335,
+ 18446744073709551615,
+ 288010473826156543,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446462667452317695,
+ 1152921504606845055,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446532967477018623,
+ 18446744073709551615,
+ 67108863,
+ 6881498031078244479,
+ 18446744073709551579,
+ 1125899906842623,
+ 18446744073709027328,
+ 18446744073709551615,
+ 18446744006063816703,
+ 18446744073709551615,
+ 18446744073709551615,
+ 4611686018427387903,
+ 18446744073709486080,
+ 18446744073709355007,
+ 287948901175001343,
+ 7036870122864639,
+ 12288634533233876992,
+ 18446744073709551615,
+ 2305843009213693951,
+ 9799832780635308032,
+ 18446743936404815870,
+ 9223372036854775807,
+ 486341884,
+ 13258596753222922239,
+ 1073692671,
+ 18446744073709551615,
+ 576460752303423487,
+ 0,
+ 9007199254740991,
+ 0,
+ 2305843009213693952,
+ 0,
+ 0,
+ 18446744069951455231,
+ 4295098367,
+ 18446708893632430079,
+ 576460752303359999,
+ 18446744070488326143,
+ 4128527,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446466993558126591,
+ 1152921504591118335,
+ 18446463698244468735,
+ 17870001915148894207,
+ 2016486715970549759,
+ 4503599627370495,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 36028797018963967,
+ 1095220854783,
+ 575897802350002111,
+ 0,
+ 10502394331027995967,
+ 36028792728190975,
+ 2147483647,
+ 15762594400829440,
+ 288230371860938751,
+ 67108863,
+ 13907115649320091647,
+ 0,
+ 9745789593611923567,
+ 2305843004918726656,
+ 536870911,
+ 549755813631,
+ 18014398509481983,
+ 2251795522912255,
+ 262143,
+ 0,
+ 18446744073709551615,
+ 511,
+ 2251799813685247,
+ 2251799813685247,
+ 287950000686628863,
+ 18446671780820025343,
+ 63,
+ 0,
+ 0,
+ 0,
+ 875211255709695,
+ 18158513697557840124,
+ 18446463149025525759,
+ 18446462598732972031,
+ 18446462598732841023,
+ 36028792723996703,
+ 18446744073709551615,
+ 9241386160486350975,
+ 576460752303423487,
+ 287951100198191108,
+ 18437736874454810623,
+ 22517998136787184,
+ 18446744073709551615,
+ 402644511,
+ 13907115649319829503,
+ 3,
+ 18446464796682337663,
+ 287957697268023295,
+ 18153444948953374703,
+ 8760701963286943,
+ 18428729675200023551,
+ 25770850213,
+ 18446744073709551615,
+ 16173172735,
+ 18446744073709551615,
+ 67043519,
+ 0,
+ 0,
+ 18392700878181105663,
+ 1056964609,
+ 18446744073709551615,
+ 67043345,
+ 144115188075855871,
+ 68719412223,
+ 287966492958392319,
+ 127,
+ 0,
+ 0,
+ 576460752303423487,
+ 0,
+ 18446744069414584320,
+ 9223376434901286911,
+ 17996384110963061375,
+ 67043343,
+ 18446740770879700992,
+ 120208752639,
+ 9223372036854775807,
+ 18446744073709486208,
+ 18446462599336820735,
+ 144115188075855871,
+ 0,
+ 1095216660480,
+ 0,
+ 287948909764935679,
+ 18410715276690587135,
+ 18445618173869752321,
+ 36027697507139583,
+ 0,
+ 13006395723845991295,
+ 18446741595580465407,
+ 18446466992517644287,
+ 4394019979263,
+ 0,
+ 0,
+ 0,
+ 36028792723996672,
+ 14411518807585456127,
+ 134152199,
+ 281474976710656,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 67108863,
+ 0,
+ 18446744073709551615,
+ 140737488355327,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 15,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709486080,
+ 562949953421311,
+ 281474976710655,
+ 18446744069418778623,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 576460752303423487,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 127,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 288230376151711743,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 144115188075855871,
+ 18446466994631868415,
+ 9223372036854775807,
+ 8796093022143487,
+ 36028797018963967,
+ 16212958624241090575,
+ 65535,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 287984085547089919,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18014398505187016704,
+ 1048575,
+ 18446744073709551615,
+ 18446744073709520895,
+ 4294934783,
+ 35747438006370304,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 9223372036858970111,
+ 2147483647,
+ 0,
+ 18446744073709551615,
+ 2251799813685247,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 8065665457643847680,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 1125934266580991,
+ 18446463629527547904,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 1152921504606846975,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 2305570330330005503,
+ 1677656575,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 287948901175001088,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446532967477018623,
+ 127,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 17872504197455282176,
+ 65970697670631,
+ 0,
+ 0,
+ 28,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073707454463,
+ 17005555242810474495,
+ 18446744073709551599,
+ 8935141660164089791,
+ 18446744073709419615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446743249075830783,
+ 17870283321271910397,
+ 18437736874452713471,
+ 18446603336221163519,
+ 18446741874686295551,
+ 18446744073709539319,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 17906312118425092095,
+ 9042383626829823,
+ 281470547525648,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 8660801552383,
+ 0,
+ 0,
+ 0,
+ 18446471240106377087,
+ 70368744177663,
+ 32768,
+ 0,
+ 4611439727822766079,
+ 17407,
+ 0,
+ 0,
+ 0,
+ 0,
+ 140737488289792,
+ 288230376151711743,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 288230376151646208,
+ 0,
+ 0,
+ 0,
+ 576460752303357952,
+ 0,
+ 0,
+ 0,
+ 13853072451644162047,
+ 0,
+ 0,
+ 0,
+ 9223213153129594880,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 8323103,
+ 18446744073709551615,
+ 67047423,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 790380184120328175,
+ 6843210385291930244,
+ 1152917029519358975,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 287948901175001088,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 4294967295,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744070488326143,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446532967477018623,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446462607322775551,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 1073741823,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 1073741823,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709488127,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 288230376151711743,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 281474976710655,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+];
+#[inline]
+pub fn xid_continue_p(c: char) -> bool {
+ let cp = c as usize;
+ let blki = unsafe { *XID_CONTINUE_L1.get_unchecked(cp >> 10) } as usize;
+ let in_blk_offset_p = cp & 0x3FF;
+ let wordi = (blki * 16) + (in_blk_offset_p >> 6);
+ let biti = in_blk_offset_p & 0x3F;
+ unsafe {
+ return (*XID_CONTINUE_L2.get_unchecked(wordi) & (1 << biti)) != 0;
+ }
+}
diff --git a/oryxc/src/unicode/xid_start.rs b/oryxc/src/unicode/xid_start.rs
new file mode 100644
index 0000000..1c9d9ae
--- /dev/null
+++ b/oryxc/src/unicode/xid_start.rs
@@ -0,0 +1,927 @@
+/* Autogenerated – DO NOT EDIT */
+
+static XID_START_L1: [u16; 1088] = [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 12, 12, 12, 12, 12, 13, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 14, 15, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 9, 9, 9, 9, 9, 9,
+ 9, 9, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 9, 29, 12, 30, 12,
+ 12, 31, 32, 9, 9, 9, 9, 9, 9, 33, 9, 34, 35, 12, 12, 12, 12, 12, 12, 12,
+ 36, 9, 9, 9, 9, 9, 9, 9, 37, 38, 9, 9, 39, 9, 9, 9, 9, 9, 40, 9, 41, 42,
+ 43, 44, 45, 9, 9, 9, 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 46, 12, 12, 12, 12, 47, 12, 12, 12, 12,
+ 48, 12, 12, 12, 12, 12, 12, 49, 50, 9, 9, 51, 9, 12, 12, 12, 12, 52, 12,
+ 12, 12, 12, 12, 12, 12, 12, 53, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+];
+static XID_START_L2: [u64; 864] = [
+ 68719476736,
+ 576460745995190270,
+ 297241973452963840,
+ 18410715276682199039,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 88094074470339,
+ 0,
+ 13321366222785216512,
+ 18446744056529672000,
+ 18428729675200069631,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709550595,
+ 18446744073709551615,
+ 18446462598732840959,
+ 18446744069456527359,
+ 511,
+ 2119858418286592,
+ 18446744069414584320,
+ 18446392229988665343,
+ 18446744073709551615,
+ 11241196188469297151,
+ 281474976514048,
+ 18446744073709543424,
+ 563224831328255,
+ 301749971126844416,
+ 1168302407679,
+ 18446471390564450303,
+ 18446744069414649599,
+ 1023,
+ 2594073385365405680,
+ 18446181140919287808,
+ 2577745637692514273,
+ 1153765945374687232,
+ 247132830528276448,
+ 7881300924956672,
+ 2589004636761079776,
+ 144115200960823296,
+ 2589004636760940512,
+ 562965791113216,
+ 288167810662516712,
+ 65536,
+ 2594071186342010848,
+ 13807648768,
+ 2589567586714640353,
+ 1688864624214016,
+ 2882303761516978160,
+ 18158513712597581824,
+ 3457638613854978016,
+ 127,
+ 1688849860263934,
+ 127,
+ 2307531515476572118,
+ 4026531935,
+ 1,
+ 35184372088575,
+ 7936,
+ 0,
+ 9223380832947798015,
+ 18438229877581611008,
+ 18446744069414600707,
+ 17870283321406070975,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744070446333439,
+ 9168765891372858879,
+ 18446744073701162813,
+ 18446744073696837631,
+ 134217727,
+ 18446744069414649855,
+ 4557642822898941951,
+ 18446744073709551614,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446638520593285119,
+ 18446744069548802046,
+ 144053615424700415,
+ 1125897759621119,
+ 527761286627327,
+ 4503599627370495,
+ 276824064,
+ 18446744069414584320,
+ 144115188075855871,
+ 18446469195802607615,
+ 18014398509481983,
+ 2147483647,
+ 8796093022142464,
+ 18446480190918885375,
+ 1023,
+ 18446744069422972927,
+ 2097151,
+ 549755813888,
+ 0,
+ 4503599627370464,
+ 8160,
+ 18158724812380307448,
+ 274877906943,
+ 68719476735,
+ 4611686018360336384,
+ 16717361816799217663,
+ 319718190147960832,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744070475743231,
+ 4611686017001275199,
+ 6908521828386340863,
+ 2295745090394464220,
+ 0,
+ 9223934986808197120,
+ 536805376,
+ 0,
+ 17582049991377026180,
+ 18446744069414601696,
+ 511,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 3509778554814463,
+ 18446498607738650623,
+ 141836999983103,
+ 9187201948305063935,
+ 2139062143,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 2251241253188403424,
+ 18446744073709551614,
+ 18446744068886102015,
+ 17870283321406128127,
+ 18446462598732840928,
+ 18446744073709551615,
+ 18446744069414617087,
+ 18446462598732840960,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 8191,
+ 4611686018427322368,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 13198434443263,
+ 9223512774343131135,
+ 18446744070488326143,
+ 281474976710655,
+ 18446744060816261120,
+ 18446744073709551615,
+ 18446744073709550079,
+ 18446181124293001215,
+ 34359736251,
+ 4503599627370495,
+ 4503599627370492,
+ 7564921474075590656,
+ 18446462873610746880,
+ 2305843004918726783,
+ 2251799813685232,
+ 8935422993945886720,
+ 2199023255551,
+ 14159317224157876215,
+ 4495436853045886975,
+ 7890092085477381,
+ 18446602782178705022,
+ 18446466996645134335,
+ 18446744073709551615,
+ 34359738367,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446462667452317695,
+ 1152921504606845055,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446532967477018623,
+ 18446744073709551615,
+ 67108863,
+ 6881498030004502655,
+ 18446744073709551579,
+ 1125899906842623,
+ 18446744073709027328,
+ 18446744073709551615,
+ 18446744006063816703,
+ 18446744073709551615,
+ 18446744073709551615,
+ 4611686018427387903,
+ 18446744073709486080,
+ 18446744073709355007,
+ 287948901175001343,
+ 0,
+ 12288634533233819648,
+ 18446744073709551615,
+ 2305843009213693951,
+ 576460743713488896,
+ 18446743798965862398,
+ 9223372033633550335,
+ 486341884,
+ 13258596753222922239,
+ 1073692671,
+ 18446744073709551615,
+ 576460752303423487,
+ 0,
+ 9007199254740991,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744069951455231,
+ 131071,
+ 18446708893632430079,
+ 18014398509418495,
+ 18446744070488326143,
+ 4128527,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446462599806582783,
+ 1152921504591118335,
+ 18446463698244468735,
+ 17870001915148894207,
+ 2016486715970549759,
+ 4503599627370495,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 36028797018963967,
+ 1095220854783,
+ 575897802350002111,
+ 0,
+ 10502394331027995967,
+ 36028792728190975,
+ 2147483647,
+ 15762594400829440,
+ 288230371860938751,
+ 67108863,
+ 13907115649320091647,
+ 0,
+ 18014398491590657,
+ 2305843004918726656,
+ 536870911,
+ 137438953215,
+ 18014398509481983,
+ 2251795522912255,
+ 262143,
+ 0,
+ 18446744073709551615,
+ 511,
+ 2251799813685247,
+ 2251799813685247,
+ 68719476735,
+ 18446603611099102208,
+ 63,
+ 0,
+ 0,
+ 0,
+ 848822976643071,
+ 252,
+ 18446463149025525759,
+ 18446462598732841023,
+ 18446462598732840963,
+ 36028792723996703,
+ 72057594037927928,
+ 10696049115004928,
+ 281474976710648,
+ 2199023190016,
+ 549755813880,
+ 20266198323101840,
+ 2251799813685240,
+ 335544350,
+ 9223389629040558079,
+ 1,
+ 18446464796682337663,
+ 2147483647,
+ 2589004636760940512,
+ 16643063808,
+ 54043195528399871,
+ 655360,
+ 9007199254740991,
+ 15032387456,
+ 281474976710655,
+ 176,
+ 0,
+ 0,
+ 140737488355327,
+ 251658240,
+ 281474976710655,
+ 16,
+ 72066390130950143,
+ 0,
+ 134217727,
+ 127,
+ 0,
+ 0,
+ 17592186044415,
+ 0,
+ 18446744069414584320,
+ 9223372041149743103,
+ 9223653511822045823,
+ 2,
+ 18446740770879700992,
+ 42949804031,
+ 290482175965394945,
+ 18446744073441181696,
+ 18446462599269712895,
+ 144115188075855871,
+ 0,
+ 0,
+ 0,
+ 8589934591,
+ 140737488354815,
+ 18445618173802708993,
+ 65535,
+ 0,
+ 562949953420159,
+ 18446741595513421888,
+ 18446462598749619199,
+ 268435455,
+ 0,
+ 0,
+ 0,
+ 2251795518717952,
+ 4503599627239412,
+ 0,
+ 281474976710656,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 67108863,
+ 0,
+ 18446744073709551615,
+ 140737488355327,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 15,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709486080,
+ 562949953421311,
+ 281474976710655,
+ 18446744069414584446,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 576460752303423487,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 127,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1073741823,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 144115188075855871,
+ 18446462600880324607,
+ 9223372036854775807,
+ 70368744112128,
+ 281474976710655,
+ 16212958624174047247,
+ 65535,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 35184372088831,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18014398505187016704,
+ 1048575,
+ 18446744073709551615,
+ 67583,
+ 4294443008,
+ 34902944356761600,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 9223372036858970111,
+ 2147483647,
+ 0,
+ 18446744073709551615,
+ 2251799813685247,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 8065665457643847680,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 1125934266580991,
+ 18446463629527547904,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 1152921504606846975,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 2305570330330005503,
+ 67043839,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073707454463,
+ 17005555242810474495,
+ 18446744073709551599,
+ 8935141660164089791,
+ 18446744073709419615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446743249075830783,
+ 17870283321271910397,
+ 18437736874452713471,
+ 18446603336221163519,
+ 18446741874686295551,
+ 4087,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 8660801552383,
+ 0,
+ 0,
+ 0,
+ 18446462598732840960,
+ 70368744177663,
+ 0,
+ 0,
+ 4575692405780512767,
+ 16384,
+ 0,
+ 0,
+ 0,
+ 0,
+ 70368744112128,
+ 17592186044415,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 17592185978880,
+ 0,
+ 0,
+ 0,
+ 351843720822784,
+ 0,
+ 0,
+ 0,
+ 13843853836919242751,
+ 0,
+ 0,
+ 0,
+ 9223213153129594880,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 31,
+ 18446744073709551615,
+ 2063,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 790380184120328175,
+ 6843210385291930244,
+ 1152917029519358975,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 4294967295,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744070488326143,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446532967477018623,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446462607322775551,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 1073741823,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 1073741823,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709488127,
+ 18446744073709551615,
+ 18446744073709551615,
+ 18446744073709551615,
+ 288230376151711743,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+];
+#[inline]
+pub fn xid_start_p(c: char) -> bool {
+ let cp = c as usize;
+ let blki = unsafe { *XID_START_L1.get_unchecked(cp >> 10) } as usize;
+ let in_blk_offset_p = cp & 0x3FF;
+ let wordi = (blki * 16) + (in_blk_offset_p >> 6);
+ let biti = in_blk_offset_p & 0x3F;
+ unsafe {
+ return (*XID_START_L2.get_unchecked(wordi) & (1 << biti)) != 0;
+ }
+}
diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644
index 0000000..8632490
--- /dev/null
+++ b/rustfmt.toml
@@ -0,0 +1,14 @@
+comment_width = 73
+wrap_comments = true
+edition = "2024"
+enum_discrim_align_threshold = 20
+hard_tabs = true
+imports_layout = "Vertical"
+match_block_trailing_comma = true
+max_width = 80
+imports_granularity = "Module"
+newline_style = "Unix"
+group_imports = "StdExternalCrate"
+struct_field_align_threshold = 20
+style_edition = "2024"
+unstable_features = true
diff --git a/test.x b/test.x
new file mode 100644
index 0000000..f5d52f0
--- /dev/null
+++ b/test.x
@@ -0,0 +1,33 @@
+def puts = $foreign("puts", func(s ^u8));
+
+/*
+def foo = func {
+ let my_string =
+ \ This is my line
+ \ this is a second line
+ \ etc.
+ ;
+ puts(my_string);
+}
+*/
+
+/* def add = func(dst *vec($N), v, u vec($N))
+ * $poke(operator.addeq)
+ * {
+ * loop (i: 0...N)
+ * dst[i] = v[i] + u[i];
+ * }; */
+
+def main′ = func {
+ puts("Hello, sailor!");
+ some_func(#b10.1100'1001e11);
+ slices_sort(my_slice, func(x, y int) int {
+ return x - y;
+ });
+};
+
+def some_func = func(n u32) u32 { return n * 2; };
+
+/* def MY_FLOAT = union { f f64; n u64; } { n = 0x482DEF }.f */
+
+def main = func { main′(); };
diff --git a/unigen/Cargo.lock b/unigen/Cargo.lock
new file mode 100644
index 0000000..5a36f70
--- /dev/null
+++ b/unigen/Cargo.lock
@@ -0,0 +1,16 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "lexopt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c5d9b5843e8c9311ff602e6bd50855015e99e75159c2c54fe104cfac241f552"
+
+[[package]]
+name = "unigen"
+version = "0.1.0"
+dependencies = [
+ "lexopt",
+]
diff --git a/unigen/Cargo.toml b/unigen/Cargo.toml
new file mode 100644
index 0000000..e0c6b4d
--- /dev/null
+++ b/unigen/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "unigen"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+lexopt = "0.1.0"
diff --git a/unigen/fetch b/unigen/fetch
new file mode 100755
index 0000000..46c02f9
--- /dev/null
+++ b/unigen/fetch
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+set -e
+
+cd "${0%/*}"
+trap 'rm -f UCD.zip' EXIT
+
+set -x
+mkdir -p data
+curl -LO https://www.unicode.org/Public/zipped/latest/UCD.zip
+unzip -od data UCD.zip
+
+# XID_Start and XID_Continue additions
+cat <<-EOF >>data/DerivedCoreProperties.txt
+0024 ; XID_Start # Pc DOLLAR SIGN
+005F ; XID_Start # Pc LOW LINE
+2032..2034 ; XID_Continue # Po [3] PRIME..TRIPLE PRIME
+2057 ; XID_Continue # Po QUADRUPLE PRIME
+EOF
diff --git a/unigen/rustfmt.toml b/unigen/rustfmt.toml
new file mode 120000
index 0000000..39f97b0
--- /dev/null
+++ b/unigen/rustfmt.toml
@@ -0,0 +1 @@
+../rustfmt.toml \ No newline at end of file
diff --git a/unigen/src/main.rs b/unigen/src/main.rs
new file mode 100644
index 0000000..4851fa5
--- /dev/null
+++ b/unigen/src/main.rs
@@ -0,0 +1,278 @@
+use std::collections::HashMap;
+use std::ffi::{
+ OsStr,
+ OsString,
+};
+use std::fs::File;
+use std::io::{
+ self,
+ BufRead,
+ BufReader,
+};
+use std::path::Path;
+use std::sync::OnceLock;
+use std::vec::Vec;
+use std::{
+ env,
+ process,
+};
+
+const MIN_SHIFT: usize = 1;
+const MAX_SHIFT: usize = 22;
+
+#[derive(Default)]
+struct Flags {
+ codepoints: Option<Vec<char>>,
+ help: bool,
+}
+
+impl Flags {
+ fn parse() -> Result<(Flags, Vec<String>), lexopt::Error> {
+ use lexopt::prelude::*;
+
+ let mut rest = Vec::with_capacity(env::args().len() - 1);
+ let mut flags = Flags::default();
+ let mut parser = lexopt::Parser::from_env();
+
+ while let Some(arg) = parser.next()? {
+ match arg {
+ Short('c') | Long("codepoints") => {
+ fn hex_to_char(s: &str) -> char {
+ return u32::from_str_radix(s, 16).map_or_else(
+ |e| {
+ eprintln!("{}: {s}: {e}", progname().display());
+ process::exit(1);
+ },
+ |n| {
+ char::from_u32(n).unwrap_or_else(|| {
+ eprintln!(
+ "{}: {s}: invalid codepoint",
+ progname().display()
+ );
+ process::exit(1);
+ })
+ },
+ );
+ }
+
+ flags.codepoints = Some(
+ parser
+ .value()?
+ .to_str()
+ .unwrap_or_else(|| {
+ eprintln!(
+ "{}: unable to parse argument to -c/--codepoints",
+ progname().display()
+ );
+ process::exit(1);
+ })
+ .split(',')
+ .map(hex_to_char)
+ .collect(),
+ );
+ },
+ Short('h') | Long("help") => flags.help = true,
+ Value(v) => rest.push(v.into_string()?),
+ _ => return Err(arg.unexpected()),
+ }
+ }
+
+ return Ok((flags, rest));
+ }
+}
+
+fn progname() -> &'static OsString {
+ static ARGV0: OnceLock<OsString> = OnceLock::new();
+ return ARGV0.get_or_init(|| {
+ let default = OsStr::new("oryxc");
+ let s = env::args_os().next().unwrap_or(default.into());
+ return Path::new(&s).file_name().unwrap_or(default).to_os_string();
+ });
+}
+
+fn usage() {
+ eprintln!(
+ concat!(
+ "Usage: {0} data-file property-name\n",
+ " {0} -c codepoints name\n",
+ " {0} -h",
+ ),
+ progname().display()
+ );
+}
+
+fn main() -> io::Result<()> {
+ let (flags, rest) = match Flags::parse() {
+ Ok(v) => v,
+ Err(e) => {
+ eprintln!("{}: {e}", progname().display());
+ usage();
+ process::exit(1);
+ },
+ };
+
+ if flags.help {
+ usage();
+ process::exit(0);
+ }
+
+ if (flags.codepoints.is_none() && rest.len() != 2)
+ || (flags.codepoints.is_some() && rest.len() != 1)
+ {
+ usage();
+ process::exit(1);
+ }
+
+ let mut bitmap = vec![false; 0x110000];
+ let name = match flags.codepoints {
+ Some(vec) => {
+ vec.iter().for_each(|c| bitmap[*c as usize] = true);
+ &rest[0]
+ },
+ None => {
+ parse_file(&rest[0], &rest[1], &mut bitmap)?;
+ &rest[1]
+ },
+ };
+ let (shift, lvl1, lvl2) = optimize_tables(&bitmap);
+ write_tables(name, shift, &lvl1, &lvl2);
+ return Ok(());
+}
+
+fn optimize_tables(bitmap: &[bool]) -> (usize, Vec<u16>, Vec<u64>) {
+ let mut minsz = usize::MAX;
+ let mut config = (0, Vec::new(), Vec::new());
+
+ for i in MIN_SHIFT..=MAX_SHIFT {
+ let (l1, l2) = build_tables(bitmap, i);
+ let sz = l1.len() * 2 + l2.len() * 8;
+ if sz < minsz {
+ minsz = sz;
+ config = (i, l1, l2);
+ }
+ }
+
+ return config;
+}
+
+fn parse_file<P: AsRef<Path>>(
+ path: P,
+ prop: &str,
+ bitmap: &mut [bool],
+) -> io::Result<()> {
+ let file = File::open(path)?;
+ let reader = BufReader::new(file);
+
+ for line in reader.lines() {
+ let line = line?;
+ let line = line.split('#').next().unwrap_or("").trim();
+ if line.is_empty() {
+ continue;
+ }
+
+ let parts: Vec<&str> = line.split(';').map(|s| s.trim()).collect();
+ if parts.len() < 2 || parts[1] != prop {
+ continue;
+ }
+
+ let (beg, end) = if parts[0].contains("..") {
+ let mut range = parts[0].split("..");
+ (
+ u32::from_str_radix(range.next().unwrap(), 16).unwrap(),
+ u32::from_str_radix(range.next().unwrap(), 16).unwrap(),
+ )
+ } else {
+ let val = u32::from_str_radix(parts[0], 16).unwrap();
+ (val, val)
+ };
+
+ for cp in beg..=end {
+ if (cp as usize) < bitmap.len() {
+ bitmap[cp as usize] = true;
+ }
+ }
+ }
+ return Ok(());
+}
+
+fn build_tables(bitmap: &[bool], shift: usize) -> (Vec<u16>, Vec<u64>) {
+ let blksz = 1 << shift;
+ let u64s_per_block = (blksz + 63) / 64;
+
+ let mut lvl2: Vec<u64> = Vec::new();
+ let mut lvl1: Vec<u16> = Vec::new();
+ let mut blkmap: HashMap<Vec<u64>, u16> = HashMap::new();
+
+ for chunk in bitmap.chunks(blksz) {
+ let mut blkdata = vec![0u64; u64s_per_block];
+
+ for (i, &bit) in chunk.iter().enumerate() {
+ if bit {
+ let word_idx = i / 64;
+ let bit_idx = i % 64;
+ blkdata[word_idx] |= 1 << bit_idx;
+ }
+ }
+
+ if let Some(&i) = blkmap.get(&blkdata) {
+ lvl1.push(i);
+ } else {
+ let i = (lvl2.len() / u64s_per_block) as u16;
+ lvl2.extend_from_slice(&blkdata);
+ blkmap.insert(blkdata, i);
+ lvl1.push(i);
+ }
+ }
+
+ return (lvl1, lvl2);
+}
+
+fn write_tables(prop_name: &str, shift: usize, level1: &[u16], level2: &[u64]) {
+ let upper_name = prop_name.to_uppercase();
+ let lower_name = prop_name.to_lowercase();
+ let block_size = 1 << shift;
+ let mask = block_size - 1;
+ let u64s_per_block = (block_size + 63) / 64;
+
+ println!("/* Autogenerated – DO NOT EDIT */\n");
+ print!(
+ "static {upper_name}_L1: [u16; {}] = {level1:?};",
+ level1.len()
+ );
+ print!(
+ "static {upper_name}_L2: [u64; {}] = {level2:?};",
+ level2.len()
+ );
+
+ let pred_name = if lower_name.contains('_') {
+ format!("{lower_name}_p")
+ } else {
+ format!("{lower_name}p")
+ };
+
+ print!(
+ "#[inline]
+ pub fn {pred_name}(c: char) -> bool {{
+ let cp = c as usize;
+ let blki = unsafe {{ *{upper_name}_L1.get_unchecked(cp >> {shift}) }} as usize;
+ let in_blk_offset_p = cp & 0x{mask:X};"
+ );
+
+ if u64s_per_block == 1 {
+ print!(
+ " unsafe {{
+ return ({upper_name}_L2.get_unchecked(blki) & (1 << in_blk_offset_p)) != 0;
+ }}"
+ );
+ } else {
+ print!(
+ "let wordi = (blki * {u64s_per_block}) + (in_blk_offset_p >> 6);
+ let biti = in_blk_offset_p & 0x3F;
+ unsafe {{
+ return (*{upper_name}_L2.get_unchecked(wordi) & (1 << biti)) != 0;
+ }}"
+ );
+ }
+
+ print!("}}");
+}