Significantly improve lexer error reporting

author: Thomas Voss <mail@thomasvoss.com> 2026-03-02 16:27:17 +0100
committer: Thomas Voss <mail@thomasvoss.com> 2026-03-02 18:37:57 +0100
commit: f09e816aee0513031656734cc3cded9827e0b22b (patch)
tree: 06edab862eceb829dd235c1f39265f26b15ed386 /oryxc
parent: 048a4e588c01f65c94d7a6d93c555ca11e0230ae (diff)
7 files changed, 306 insertions, 524 deletions
diff --git a/oryxc/Cargo.lock b/oryxc/Cargo.lock
index 5514afa..eeaf054 100644
--- a/oryxc/Cargo.lock
+++ b/oryxc/Cargo.lock
@@ -101,6 +101,7 @@ dependencies = [
  "lexopt",
  "phf",
  "soa-rs",
+ "unicode-width",
 ]
 
 [[package]]
@@ -271,6 +272,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e"
 
 [[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
+[[package]]
 name = "windows-link"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/oryxc/Cargo.toml b/oryxc/Cargo.toml
index 88464ca..984d910 100644
--- a/oryxc/Cargo.toml
+++ b/oryxc/Cargo.toml
@@ -11,3 +11,4 @@ lexopt = "0.1.0"
 # num-rational = "0.4.2"
 phf = { version = "0.13.1", features = ["macros"] }
 soa-rs = "0.9.1"
+unicode-width = "0.2.2"
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs
index f3de028..1e539ce 100644
--- a/oryxc/src/compiler.rs
+++ b/oryxc/src/compiler.rs
@@ -14,6 +14,7 @@ use std::vec::Vec;
 use std::{
 	fs,
 	panic,
+	process,
 	thread,
 };
 
@@ -26,6 +27,7 @@ use crossbeam_deque::{
 use dashmap::DashMap;
 use soa_rs::Soa;
 
+use crate::errors::OryxError;
 use crate::lexer::Token;
 use crate::parser::AstNode;
 use crate::{
@@ -111,7 +113,7 @@ where
 		let stealer_view = Arc::clone(&stealer_view);
 		let state = Arc::clone(&state);
 		threads.push(thread::spawn(move || {
-			worker_loop(id, w, stealer_view, state);
+			worker_loop(id, state, w, stealer_view);
 		}));
 	}
 
@@ -120,11 +122,24 @@ where
 	}
 }
 
+fn emit_errors<T>(state: Arc<CompilerState>, file: FileId, errors: T)
+where
+	T: IntoIterator<Item = OryxError>,
+{
+	let (name, buffer) = {
+		let fdata = state.files.get(&file).unwrap();
+		(fdata.name.clone(), fdata.buffer.clone())
+	};
+	for e in errors.into_iter() {
+		e.report(name.as_ref(), buffer.as_ref());
+	}
+}
+
 fn worker_loop(
 	id: usize,
+	state: Arc<CompilerState>,
 	queue: Worker<Job>,
 	stealers: Arc<[Stealer<Job>]>,
-	state: Arc<CompilerState>,
 ) {
 	loop {
 		if state.njobs.load(Ordering::SeqCst) == 0 {
@@ -140,9 +155,12 @@ fn worker_loop(
 						(fdata.name.clone(), fdata.buffer.clone())
 					};
 					let (name, buffer) = (name.as_ref(), buffer.as_ref());
-					let tokens = match lexer::tokenize(name, buffer) {
+					let tokens = match lexer::tokenize(buffer) {
 						Ok(xs) => xs,
-						Err(errs) => todo!(),
+						Err(e) => {
+							emit_errors(state.clone(), file, vec![e]);
+							process::exit(1);
+						},
 					};
 
 					if state.flags.debug_lexer {
diff --git a/oryxc/src/errors.rs b/oryxc/src/errors.rs
index b3e6013..b9b5955 100644
--- a/oryxc/src/errors.rs
+++ b/oryxc/src/errors.rs
@@ -1,8 +1,15 @@
+use std::borrow::Cow;
+use std::convert::AsRef;
+use std::error::Error;
 use std::ffi::{
 	OsStr,
 	OsString,
 };
-use std::fmt::Display;
+use std::fmt::{
+	self,
+	Display,
+	Formatter,
+};
 use std::ops::Deref;
 use std::path::Path;
 use std::sync::OnceLock;
@@ -11,6 +18,13 @@ use std::{
 	process,
 };
 
+use unicode_width::UnicodeWidthStr;
+
+use crate::unicode;
+
+const TAB_AS_SPACES: &'static str = "    ";
+const TABSIZE: usize = TAB_AS_SPACES.len();
+
 pub fn progname() -> &'static OsString {
 	static ARGV0: OnceLock<OsString> = OnceLock::new();
 	return ARGV0.get_or_init(|| {
@@ -65,3 +79,109 @@ where
 	eprintln!("{}: \x1b[31;1mError:\x1b[0m {}", filename.display(), s);
 	process::exit(1);
 }
+
+#[derive(Debug)]
+pub struct OryxError {
+	pub span: (usize, usize),
+	pub msg:  Cow<'static, str>,
+}
+
+impl OryxError {
+	pub fn new<T>(beg: usize, end: usize, msg: T) -> Self
+	where
+		T: Into<Cow<'static, str>>,
+	{
+		return Self {
+			span: (beg, end),
+			msg:  msg.into(),
+		};
+	}
+
+	pub fn report<Tf, Tb>(&self, filename: &Tf, buffer: &Tb)
+	where
+		Tf: AsRef<OsStr>,
+		Tb: AsRef<str>,
+	{
+		fn nspaces(n: i32) -> i32 {
+			return match () {
+				() if n < 10000 => 6,
+				() if n < 100000 => 7,
+				() if n < 1000000 => 8,
+				() if n < 10000000 => 9,
+				() if n < 100000000 => 10,
+				() if n < 1000000000 => 11,
+				() => 12,
+			};
+		}
+
+		let buffer = buffer.as_ref();
+		let (mut line, mut linebeg, mut lineend) = (1, 0, buffer.len());
+		for (i, c) in buffer.char_indices() {
+			if unicode::line_terminator_p(c) {
+				if i >= self.span.0 {
+					lineend = i;
+					break;
+				}
+				line += 1;
+				linebeg = i + c.len_utf8();
+			}
+		}
+
+		let (spanbeg, spanend) = (self.span.0, self.span.1.min(lineend));
+
+		let errbeg = new_string_with_spaces(&buffer[linebeg..spanbeg]);
+		let errmid = new_string_with_spaces(&buffer[spanbeg..spanend]);
+		let errend = new_string_with_spaces(&buffer[spanend..lineend]);
+
+		let errmid = match errmid.len() {
+			0 => "_".to_string(),
+			_ => errmid,
+		};
+
+		/* TODO: Do tab math */
+		let col = errbeg.width() + 1;
+
+		const FNAMEBEG: &str = "\x1b[37;1m";
+		const ERRORBEG: &str = "\x1b[31;1m";
+		const FMTEND: &str = "\x1b[0m";
+
+		eprintln!(
+			"{FNAMEBEG}{}:{line}:{col}:{FMTEND} {ERRORBEG}error:{FMTEND} {self}",
+			filename.as_ref().display()
+		);
+		eprintln!(" {line:>4} │ {errbeg}{ERRORBEG}{errmid}{FMTEND}{errend}");
+		for _ in 0..nspaces(line) {
+			eprint!(" ");
+		}
+		eprint!("│ ");
+		for _ in 1..col {
+			eprint!(" ");
+		}
+		eprint!("{ERRORBEG}");
+		for _ in 0..errmid.width().max(1) {
+			eprint!("^");
+		}
+		eprint!("{FMTEND}");
+		eprintln!();
+	}
+}
+
+fn new_string_with_spaces(s: &str) -> String {
+	let ntabs = s.bytes().filter(|b| *b == b'\t').count();
+	let mut buf = String::with_capacity(s.len() + ntabs * (TABSIZE - 1));
+	for c in s.chars() {
+		match c {
+			'\t' => buf.push_str(TAB_AS_SPACES),
+			_ => buf.push(c),
+		}
+	}
+	return buf;
+}
+
+impl Display for OryxError {
+	fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+		return write!(f, "{}", self.msg);
+	}
+}
+
+impl Error for OryxError {}
diff --git a/oryxc/src/lexer.rs b/oryxc/src/lexer.rs
index 2e1a21c..6f05a9c 100644
--- a/oryxc/src/lexer.rs
+++ b/oryxc/src/lexer.rs
@@ -1,13 +1,9 @@
-use std::borrow::Cow;
-use std::ffi::OsStr;
-use std::fmt::Display;
 use std::iter::Peekable;
 use std::mem;
 use std::str::{
 	self,
 	Chars,
 };
-use std::vec::Vec;
 
 use phf;
 use soa_rs::{
@@ -15,10 +11,8 @@ use soa_rs::{
 	Soars,
 };
 
-use crate::{
-	errors,
-	unicode,
-};
+use crate::errors::OryxError;
+use crate::unicode;
 
 #[allow(dead_code)]
 #[repr(u8)]
@@ -60,17 +54,6 @@ pub enum TokenType {
 }
 
 impl TokenType {
-	pub fn literalp(&self) -> bool {
-		return match self {
-			Self::Identifier
-			| Self::KeywordDef
-			| Self::KeywordFunc
-			| Self::Number
-			| Self::String => true,
-			_ => false,
-		};
-	}
-
 	/* Tokens that start an expression */
 	pub fn exprp(&self) -> bool {
 		return match self {
@@ -100,41 +83,20 @@ pub struct Token {
 	pub view: Span,
 }
 
-pub struct Error {
-	pub pos: usize,
-	pub msg: Cow<'static, str>,
-}
-
-impl Error {
-	fn new<T>(pos: usize, msg: T) -> Self
-	where
-		T: Into<Cow<'static, str>>,
-	{
-		return Self {
-			pos,
-			msg: msg.into(),
-		};
-	}
-}
-
 struct LexerContext<'a> {
-	pos_a:          usize, /* Pos [a]fter char */
-	pos_b:          usize, /* Pos [b]efore char */
-	chars:          Peekable<Chars<'a>>,
-	string:         &'a str,
-	filename:       &'a OsStr,
-	expect_punct_p: bool,
+	pos_a:  usize, /* Pos [a]fter char */
+	pos_b:  usize, /* Pos [b]efore char */
+	chars:  Peekable<Chars<'a>>,
+	string: &'a str,
 }
 
 impl<'a> LexerContext<'a> {
-	fn new(filename: &'a OsStr, string: &'a str) -> Self {
+	fn new(string: &'a str) -> Self {
 		return Self {
 			pos_a: 0,
 			pos_b: 0,
 			chars: string.chars().peekable(),
 			string,
-			filename,
-			expect_punct_p: false,
 		};
 	}
 
@@ -150,22 +112,6 @@ impl<'a> LexerContext<'a> {
 	fn peek(&mut self) -> Option<char> {
 		return self.chars.peek().copied();
 	}
-
-	fn err_at_position<S>(&self, s: S) -> !
-	where
-		S: Display,
-	{
-		errors::err_at_position(self.filename, s);
-	}
-
-	#[inline(always)]
-	fn literal_spacing_guard(&self) {
-		if self.expect_punct_p {
-			self.err_at_position(
-				"Two literals may not be directly adjacent to each other",
-			);
-		}
-	}
 }
 
 static KEYWORDS: phf::Map<&'static str, TokenType> = phf::phf_map! {
@@ -174,16 +120,15 @@ static KEYWORDS: phf::Map<&'static str, TokenType> = phf::phf_map! {
 	"return" => TokenType::KeywordReturn,
 };
 
-pub fn tokenize(filename: &OsStr, s: &str) -> Result<Soa<Token>, Vec<Error>> {
+pub fn tokenize(s: &str) -> Result<Soa<Token>, OryxError> {
 	let mut toks = Soa::<Token>::with_capacity(s.len() / 2);
-	let mut ctx = LexerContext::new(filename, s);
+	let mut ctx = LexerContext::new(s);
 
 	while let Some(c) = ctx.next() {
 		let (i, j) = (ctx.pos_b, ctx.pos_a);
 		if let Some(tok) = match c {
 			'/' if ctx.peek().is_some_and(|c| c == '*') => {
-				skip_comment(&mut ctx);
-				ctx.expect_punct_p = false;
+				skip_comment(&mut ctx)?;
 				None
 			},
 			'<' if ctx.peek().is_some_and(|c| c == '<') => {
@@ -226,34 +171,19 @@ pub fn tokenize(filename: &OsStr, s: &str) -> Result<Soa<Token>, Vec<Error>> {
 					view: Span(i, j),
 				})
 			},
-			'#' => {
-				ctx.literal_spacing_guard();
-				Some(tokenize_number_based(&mut ctx))
-			},
-			'0'..='9' => {
-				ctx.literal_spacing_guard();
-				Some(tokenize_number(&mut ctx, "0123456789"))
-			},
-			'"' => {
-				ctx.literal_spacing_guard();
-				Some(tokenize_string(&mut ctx))
-			},
-			_ if unicode::xid_start_p(c) => {
-				ctx.literal_spacing_guard();
-				Some(tokenize_identifier(&mut ctx))
-			},
-			_ if unicode::pattern_white_space_p(c) => {
-				if !unicode::default_ignorable_code_point_p(c) {
-					ctx.expect_punct_p = false;
-				}
-				None
-			},
+			'#' => Some(tokenize_number_based(&mut ctx)?),
+			'0'..='9' => Some(tokenize_number(&mut ctx, "0123456789")?),
+			'"' => Some(tokenize_string(&mut ctx)?),
+			_ if unicode::xid_start_p(c) => Some(tokenize_identifier(&mut ctx)),
+			_ if unicode::pattern_white_space_p(c) => None,
 			c => {
-				let msg = format!("Invalid character ‘{c}’");
-				ctx.err_at_position(msg.as_str());
+				return Err(OryxError::new(
+					i,
+					j,
+					format!("Invalid character ‘{c}’"),
+				));
 			},
 		} {
-			ctx.expect_punct_p = tok.kind.literalp();
 			toks.push(tok);
 		}
 	}
@@ -265,7 +195,8 @@ pub fn tokenize(filename: &OsStr, s: &str) -> Result<Soa<Token>, Vec<Error>> {
 	return Ok(toks);
 }
 
-fn skip_comment<'a>(ctx: &mut LexerContext<'a>) {
+fn skip_comment<'a>(ctx: &mut LexerContext<'a>) -> Result<(), OryxError> {
+	let beg = ctx.pos_b;
 	ctx.next(); /* Consume ‘*’ */
 	let mut depth = 1;
 	while let Some(c) = ctx.next() {
@@ -278,118 +209,169 @@ fn skip_comment<'a>(ctx: &mut LexerContext<'a>) {
 				depth -= 1;
 				ctx.next(); /* Consume ‘/’ */
 				if depth == 0 {
-					return;
+					return Ok(());
 				}
 			},
 			_ => {},
 		};
 	}
-	ctx.err_at_position("Unterminated comment");
+	return Err(OryxError::new(beg, ctx.pos_a, "Unterminated comment"));
 }
 
-fn tokenize_number_based<'a>(ctx: &mut LexerContext<'a>) -> Token {
+fn tokenize_number_based<'a>(
+	ctx: &mut LexerContext<'a>,
+) -> Result<Token, OryxError> {
 	let i = ctx.pos_b;
 	let alphabet = match ctx.next() {
 		Some('b') => "01",
 		Some('o') => "01234567",
 		Some('d') => "0123456789",
 		Some('x') => "0123456789ABCDEF",
-		Some(c) => {
-			let msg = format!("Invalid number base specifier ‘{c}’");
-			ctx.err_at_position(msg.as_str());
+		Some(c @ 'B') | Some(c @ 'O') | Some(c @ 'D') | Some(c @ 'X') => {
+			return Err(OryxError::new(
+				ctx.pos_b,
+				ctx.pos_a,
+				format!(
+					"Invalid number base specifier ‘{c}’, did you mean ‘{}’?",
+					c.to_ascii_lowercase()
+				),
+			));
+		},
+		Some(c) if c.is_alphanumeric() => {
+			return Err(OryxError::new(
+				ctx.pos_b,
+				ctx.pos_a,
+				format!("Invalid number base specifier ‘{c}’"),
+			));
+		},
+		_ => {
+			return Err(OryxError::new(
+				i,
+				i + 1,
+				"Expected number base specifier after ‘#’",
+			));
 		},
-		None => ctx.err_at_position("Expected number base specifier after ‘#’"),
 	};
+
+	let (beg, end) = (ctx.pos_b, ctx.pos_a);
 	let mut tok = match ctx.next() {
-		Some(c) if alphabet.contains(c) => tokenize_number(ctx, alphabet),
-		Some(c) => {
-			let base = match alphabet.len() {
-				2 => "binary",
-				8 => "octal",
-				10 => "decimal",
-				16 => "hexadecimal",
-				_ => unreachable!(),
-			};
-			let msg = format!("Invalid {base} digit ‘{c}’");
-			ctx.err_at_position(msg.as_str());
+		Some(c) if alphabet.contains(c) => tokenize_number(ctx, alphabet)?,
+		Some(c) if alphabet.len() == 16 && c.is_ascii_hexdigit() => {
+			return Err(OryxError::new(
+				ctx.pos_b,
+				ctx.pos_a,
+				format!("Hexadecimal digits must be uppercase"),
+			));
+		},
+		Some(c) if c.is_alphanumeric() => {
+			let base = base2str(alphabet.len());
+			return Err(OryxError::new(
+				ctx.pos_b,
+				ctx.pos_a,
+				format!("Invalid {base} digit ‘{c}’"),
+			));
+		},
+		Some('\'') => {
+			return Err(OryxError::new(
+				ctx.pos_b,
+				ctx.pos_a,
+				format!(
+					"Numeric literals may not begin with a digit separator"
+				),
+			));
+		},
+		_ => {
+			let base = base2str(alphabet.len());
+			return Err(OryxError::new(
+				beg,
+				end,
+				format!("Expected {base} digit after base specifier"),
+			));
 		},
-		None => ctx.err_at_position("Expected number after base specifier"),
 	};
 	tok.view = Span(i, ctx.pos_a);
-	return tok;
+	return Ok(tok);
 }
 
 fn tokenize_number<'a>(
 	ctx: &mut LexerContext<'a>,
 	alphabet: &'static str,
-) -> Token {
+) -> Result<Token, OryxError> {
 	let i = ctx.pos_b;
-	span_raw_number(ctx, alphabet, true);
+	span_raw_number(ctx, alphabet, true)?;
 
 	/* Fractional part */
 	if ctx.peek().is_some_and(|c| c == '.') {
 		ctx.next();
 		if ctx.peek().is_some_and(|c| alphabet.contains(c)) {
-			span_raw_number(ctx, alphabet, false);
+			span_raw_number(ctx, alphabet, false)?;
 		}
 	}
 
 	/* Exponential part */
 	if ctx.peek().is_some_and(|c| c == 'e') {
 		ctx.next();
-		span_raw_number(ctx, alphabet, false);
+		if ctx.peek().is_some_and(|c| c == '+' || c == '-') {
+			ctx.next();
+		}
+		span_raw_number(ctx, alphabet, false)?;
 	}
 
-	return Token {
+	return Ok(Token {
 		kind: TokenType::Number,
 		view: Span(i, ctx.pos_a),
-	};
+	});
 }
 
 fn span_raw_number<'a>(
 	ctx: &mut LexerContext<'a>,
 	alphabet: &'static str,
 	first_digit_lexed_p: bool,
-) {
+) -> Result<(), OryxError> {
 	if !first_digit_lexed_p {
 		match ctx.next() {
 			Some(c) if alphabet.contains(c) => c,
-			Some(c) => {
-				let base = match alphabet.len() {
-					2 => "binary",
-					8 => "octal",
-					10 => "decimal",
-					16 => "hexadecimal",
-					_ => unreachable!(),
-				};
-				let msg = format!("Invalid {base} digit ‘{c}’");
-				ctx.err_at_position(msg.as_str());
+			Some(c) if alphabet.len() == 16 && c.is_ascii_hexdigit() => {
+				return Err(OryxError::new(
+					ctx.pos_b,
+					ctx.pos_a,
+					format!("Hexadecimal digits must be uppercase"),
+				));
 			},
-			None => {
-				let base = match alphabet.len() {
-					2 => "binary",
-					8 => "octal",
-					10 => "decimal",
-					16 => "hexadecimal",
-					_ => unreachable!(),
-				};
-				let msg = format!(
-					"Expected {base} digit but reached end-of-file instead"
-				);
-				ctx.err_at_position(msg.as_str());
+			Some(c) if c.is_alphanumeric() => {
+				let base = base2str(alphabet.len());
+				return Err(OryxError::new(
+					ctx.pos_b,
+					ctx.pos_a,
+					format!("Invalid {base} digit ‘{c}’"),
+				));
+			},
+			_ => {
+				let base = base2str(alphabet.len());
+				return Err(OryxError::new(
+					ctx.pos_b,
+					ctx.pos_a,
+					format!("Expected {base} digit"),
+				));
 			},
 		};
 	}
 
+	let (mut beg, mut end) = (0, 0);
 	let mut last_was_apos_p = false;
 	while let Some(c) = ctx.peek() {
 		match c {
-			'\'' if last_was_apos_p => ctx.err_at_position(
-				"Multiple concurrent digit separators in numeric literal",
-			),
+			'\'' if last_was_apos_p => {
+				return Err(OryxError::new(
+					ctx.pos_b,
+					ctx.pos_a + 1,
+					"Numeric literals may not have adjecent digit separators",
+				));
+			},
 			'\'' => {
 				last_was_apos_p = true;
 				ctx.next();
+				(beg, end) = (ctx.pos_b, ctx.pos_a);
 			},
 			_ if alphabet.contains(c) => {
 				last_was_apos_p = false;
@@ -400,27 +382,36 @@ fn span_raw_number<'a>(
 	}
 
 	if last_was_apos_p {
-		ctx.err_at_position(
+		return Err(OryxError::new(
+			beg,
+			end,
 			"Numeric literals may not end with a digit separator",
-		);
+		));
 	}
+
+	return Ok(());
 }
 
-fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Token {
+fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Result<Token, OryxError> {
 	let i = ctx.pos_b;
+
 	loop {
-		if let Some(c) = ctx.next() {
-			if c == '"' {
-				break;
-			}
-		} else {
-			ctx.err_at_position("Unterminated string");
+		match ctx.next() {
+			Some(c) if c == '"' => break,
+			Some(_) => {},
+			None => {
+				return Err(OryxError::new(
+					i,
+					ctx.pos_a,
+					"Unterminated string literal",
+				));
+			},
 		}
 	}
-	return Token {
+	return Ok(Token {
 		kind: TokenType::String,
 		view: Span(i, ctx.pos_a),
-	};
+	});
 }
 
 fn tokenize_identifier<'a>(ctx: &mut LexerContext<'a>) -> Token {
@@ -435,3 +426,13 @@ fn tokenize_identifier<'a>(ctx: &mut LexerContext<'a>) -> Token {
 	};
 	return Token { kind, view };
 }
+
+fn base2str(n: usize) -> &'static str {
+	return match n {
+		2 => "binary",
+		8 => "octal",
+		10 => "decimal",
+		16 => "hexadecimal",
+		_ => unreachable!(),
+	};
+}
diff --git a/oryxc/src/unicode/default_ignorable_code_point.rs b/oryxc/src/unicode/default_ignorable_code_point.rs
deleted file mode 100644
index b900a3b..0000000
--- a/oryxc/src/unicode/default_ignorable_code_point.rs
+++ /dev/null
@@ -1,363 +0,0 @@
-/* Autogenerated – DO NOT EDIT */
-
-static DEFAULT_IGNORABLE_CODE_POINT_L1: [u16; 544] = [
-	0, 1, 2, 3, 4, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 7, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-];
-static DEFAULT_IGNORABLE_CODE_POINT_L2: [u64; 320] = [
-	0,
-	0,
-	35184372088832,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	32768,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	268435456,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	6442450944,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	13510798882111488,
-	0,
-	63488,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	136339441907712,
-	281470681743360,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	68719476736,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	65535,
-	0,
-	0,
-	9223372036854775808,
-	0,
-	0,
-	4294967296,
-	143833713099145216,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	64424509440,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	574208952489738240,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-	18446744073709551615,
-];
-#[inline]
-pub fn default_ignorable_code_point_p(c: char) -> bool {
-	let cp = c as usize;
-	let blki =
-		unsafe { *DEFAULT_IGNORABLE_CODE_POINT_L1.get_unchecked(cp >> 11) }
-			as usize;
-	let in_blk_offset_p = cp & 0x7FF;
-	let wordi = (blki * 32) + (in_blk_offset_p >> 6);
-	let biti = in_blk_offset_p & 0x3F;
-	unsafe {
-		return (*DEFAULT_IGNORABLE_CODE_POINT_L2.get_unchecked(wordi)
-			& (1 << biti))
-			!= 0;
-	}
-}
diff --git a/oryxc/src/unicode/mod.rs b/oryxc/src/unicode/mod.rs
index 2fbdcb3..e4a719e 100644
--- a/oryxc/src/unicode/mod.rs
+++ b/oryxc/src/unicode/mod.rs
@@ -1,10 +1,8 @@
-pub mod default_ignorable_code_point;
 pub mod line_terminator;
 pub mod pattern_white_space;
 pub mod xid_continue;
 pub mod xid_start;
 
-pub use default_ignorable_code_point::default_ignorable_code_point_p;
 pub use line_terminator::line_terminator_p;
 pub use pattern_white_space::pattern_white_space_p;
 pub use xid_continue::xid_continue_p;
author	Thomas Voss <mail@thomasvoss.com>	2026-03-02 16:27:17 +0100
committer	Thomas Voss <mail@thomasvoss.com>	2026-03-02 18:37:57 +0100
commit	f09e816aee0513031656734cc3cded9827e0b22b (patch)
tree	06edab862eceb829dd235c1f39265f26b15ed386 /oryxc
parent	048a4e588c01f65c94d7a6d93c555ca11e0230ae (diff)