summaryrefslogtreecommitdiff
path: root/oryxc/src
diff options
context:
space:
mode:
Diffstat (limited to 'oryxc/src')
-rw-r--r--oryxc/src/compiler.rs26
-rw-r--r--oryxc/src/errors.rs122
-rw-r--r--oryxc/src/lexer.rs309
-rw-r--r--oryxc/src/unicode/default_ignorable_code_point.rs363
-rw-r--r--oryxc/src/unicode/mod.rs2
5 files changed, 298 insertions, 524 deletions
diff --git a/oryxc/src/compiler.rs b/oryxc/src/compiler.rs
index f3de028..1e539ce 100644
--- a/oryxc/src/compiler.rs
+++ b/oryxc/src/compiler.rs
@@ -14,6 +14,7 @@ use std::vec::Vec;
use std::{
fs,
panic,
+ process,
thread,
};
@@ -26,6 +27,7 @@ use crossbeam_deque::{
use dashmap::DashMap;
use soa_rs::Soa;
+use crate::errors::OryxError;
use crate::lexer::Token;
use crate::parser::AstNode;
use crate::{
@@ -111,7 +113,7 @@ where
let stealer_view = Arc::clone(&stealer_view);
let state = Arc::clone(&state);
threads.push(thread::spawn(move || {
- worker_loop(id, w, stealer_view, state);
+ worker_loop(id, state, w, stealer_view);
}));
}
@@ -120,11 +122,24 @@ where
}
}
+fn emit_errors<T>(state: Arc<CompilerState>, file: FileId, errors: T)
+where
+ T: IntoIterator<Item = OryxError>,
+{
+ let (name, buffer) = {
+ let fdata = state.files.get(&file).unwrap();
+ (fdata.name.clone(), fdata.buffer.clone())
+ };
+ for e in errors.into_iter() {
+ e.report(name.as_ref(), buffer.as_ref());
+ }
+}
+
fn worker_loop(
id: usize,
+ state: Arc<CompilerState>,
queue: Worker<Job>,
stealers: Arc<[Stealer<Job>]>,
- state: Arc<CompilerState>,
) {
loop {
if state.njobs.load(Ordering::SeqCst) == 0 {
@@ -140,9 +155,12 @@ fn worker_loop(
(fdata.name.clone(), fdata.buffer.clone())
};
let (name, buffer) = (name.as_ref(), buffer.as_ref());
- let tokens = match lexer::tokenize(name, buffer) {
+ let tokens = match lexer::tokenize(buffer) {
Ok(xs) => xs,
- Err(errs) => todo!(),
+ Err(e) => {
+ emit_errors(state.clone(), file, vec![e]);
+ process::exit(1);
+ },
};
if state.flags.debug_lexer {
diff --git a/oryxc/src/errors.rs b/oryxc/src/errors.rs
index b3e6013..b9b5955 100644
--- a/oryxc/src/errors.rs
+++ b/oryxc/src/errors.rs
@@ -1,8 +1,15 @@
+use std::borrow::Cow;
+use std::convert::AsRef;
+use std::error::Error;
use std::ffi::{
OsStr,
OsString,
};
-use std::fmt::Display;
+use std::fmt::{
+ self,
+ Display,
+ Formatter,
+};
use std::ops::Deref;
use std::path::Path;
use std::sync::OnceLock;
@@ -11,6 +18,13 @@ use std::{
process,
};
+use unicode_width::UnicodeWidthStr;
+
+use crate::unicode;
+
+const TAB_AS_SPACES: &'static str = " ";
+const TABSIZE: usize = TAB_AS_SPACES.len();
+
pub fn progname() -> &'static OsString {
static ARGV0: OnceLock<OsString> = OnceLock::new();
return ARGV0.get_or_init(|| {
@@ -65,3 +79,109 @@ where
eprintln!("{}: \x1b[31;1mError:\x1b[0m {}", filename.display(), s);
process::exit(1);
}
+
+#[derive(Debug)]
+pub struct OryxError {
+ pub span: (usize, usize),
+ pub msg: Cow<'static, str>,
+}
+
+impl OryxError {
+ pub fn new<T>(beg: usize, end: usize, msg: T) -> Self
+ where
+ T: Into<Cow<'static, str>>,
+ {
+ return Self {
+ span: (beg, end),
+ msg: msg.into(),
+ };
+ }
+
+ pub fn report<Tf, Tb>(&self, filename: &Tf, buffer: &Tb)
+ where
+ Tf: AsRef<OsStr>,
+ Tb: AsRef<str>,
+ {
+ fn nspaces(n: i32) -> i32 {
+ return match () {
+ () if n < 10000 => 6,
+ () if n < 100000 => 7,
+ () if n < 1000000 => 8,
+ () if n < 10000000 => 9,
+ () if n < 100000000 => 10,
+ () if n < 1000000000 => 11,
+ () => 12,
+ };
+ }
+
+ let buffer = buffer.as_ref();
+ let (mut line, mut linebeg, mut lineend) = (1, 0, buffer.len());
+ for (i, c) in buffer.char_indices() {
+ if unicode::line_terminator_p(c) {
+ if i >= self.span.0 {
+ lineend = i;
+ break;
+ }
+ line += 1;
+ linebeg = i + c.len_utf8();
+ }
+ }
+
+ let (spanbeg, spanend) = (self.span.0, self.span.1.min(lineend));
+
+ let errbeg = new_string_with_spaces(&buffer[linebeg..spanbeg]);
+ let errmid = new_string_with_spaces(&buffer[spanbeg..spanend]);
+ let errend = new_string_with_spaces(&buffer[spanend..lineend]);
+
+ let errmid = match errmid.len() {
+ 0 => "_".to_string(),
+ _ => errmid,
+ };
+
+ /* TODO: Do tab math */
+ let col = errbeg.width() + 1;
+
+ const FNAMEBEG: &str = "\x1b[37;1m";
+ const ERRORBEG: &str = "\x1b[31;1m";
+ const FMTEND: &str = "\x1b[0m";
+
+ eprintln!(
+ "{FNAMEBEG}{}:{line}:{col}:{FMTEND} {ERRORBEG}error:{FMTEND} {self}",
+ filename.as_ref().display()
+ );
+ eprintln!(" {line:>4} │ {errbeg}{ERRORBEG}{errmid}{FMTEND}{errend}");
+ for _ in 0..nspaces(line) {
+ eprint!(" ");
+ }
+ eprint!("│ ");
+ for _ in 1..col {
+ eprint!(" ");
+ }
+ eprint!("{ERRORBEG}");
+ for _ in 0..errmid.width().max(1) {
+ eprint!("^");
+ }
+ eprint!("{FMTEND}");
+ eprintln!();
+ }
+}
+
+fn new_string_with_spaces(s: &str) -> String {
+ let ntabs = s.bytes().filter(|b| *b == b'\t').count();
+ let mut buf = String::with_capacity(s.len() + ntabs * (TABSIZE - 1));
+ for c in s.chars() {
+ match c {
+ '\t' => buf.push_str(TAB_AS_SPACES),
+ _ => buf.push(c),
+ }
+ }
+ return buf;
+}
+
+impl Display for OryxError {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ return write!(f, "{}", self.msg);
+ }
+}
+
+impl Error for OryxError {}
diff --git a/oryxc/src/lexer.rs b/oryxc/src/lexer.rs
index 2e1a21c..6f05a9c 100644
--- a/oryxc/src/lexer.rs
+++ b/oryxc/src/lexer.rs
@@ -1,13 +1,9 @@
-use std::borrow::Cow;
-use std::ffi::OsStr;
-use std::fmt::Display;
use std::iter::Peekable;
use std::mem;
use std::str::{
self,
Chars,
};
-use std::vec::Vec;
use phf;
use soa_rs::{
@@ -15,10 +11,8 @@ use soa_rs::{
Soars,
};
-use crate::{
- errors,
- unicode,
-};
+use crate::errors::OryxError;
+use crate::unicode;
#[allow(dead_code)]
#[repr(u8)]
@@ -60,17 +54,6 @@ pub enum TokenType {
}
impl TokenType {
- pub fn literalp(&self) -> bool {
- return match self {
- Self::Identifier
- | Self::KeywordDef
- | Self::KeywordFunc
- | Self::Number
- | Self::String => true,
- _ => false,
- };
- }
-
/* Tokens that start an expression */
pub fn exprp(&self) -> bool {
return match self {
@@ -100,41 +83,20 @@ pub struct Token {
pub view: Span,
}
-pub struct Error {
- pub pos: usize,
- pub msg: Cow<'static, str>,
-}
-
-impl Error {
- fn new<T>(pos: usize, msg: T) -> Self
- where
- T: Into<Cow<'static, str>>,
- {
- return Self {
- pos,
- msg: msg.into(),
- };
- }
-}
-
struct LexerContext<'a> {
- pos_a: usize, /* Pos [a]fter char */
- pos_b: usize, /* Pos [b]efore char */
- chars: Peekable<Chars<'a>>,
- string: &'a str,
- filename: &'a OsStr,
- expect_punct_p: bool,
+ pos_a: usize, /* Pos [a]fter char */
+ pos_b: usize, /* Pos [b]efore char */
+ chars: Peekable<Chars<'a>>,
+ string: &'a str,
}
impl<'a> LexerContext<'a> {
- fn new(filename: &'a OsStr, string: &'a str) -> Self {
+ fn new(string: &'a str) -> Self {
return Self {
pos_a: 0,
pos_b: 0,
chars: string.chars().peekable(),
string,
- filename,
- expect_punct_p: false,
};
}
@@ -150,22 +112,6 @@ impl<'a> LexerContext<'a> {
fn peek(&mut self) -> Option<char> {
return self.chars.peek().copied();
}
-
- fn err_at_position<S>(&self, s: S) -> !
- where
- S: Display,
- {
- errors::err_at_position(self.filename, s);
- }
-
- #[inline(always)]
- fn literal_spacing_guard(&self) {
- if self.expect_punct_p {
- self.err_at_position(
- "Two literals may not be directly adjacent to each other",
- );
- }
- }
}
static KEYWORDS: phf::Map<&'static str, TokenType> = phf::phf_map! {
@@ -174,16 +120,15 @@ static KEYWORDS: phf::Map<&'static str, TokenType> = phf::phf_map! {
"return" => TokenType::KeywordReturn,
};
-pub fn tokenize(filename: &OsStr, s: &str) -> Result<Soa<Token>, Vec<Error>> {
+pub fn tokenize(s: &str) -> Result<Soa<Token>, OryxError> {
let mut toks = Soa::<Token>::with_capacity(s.len() / 2);
- let mut ctx = LexerContext::new(filename, s);
+ let mut ctx = LexerContext::new(s);
while let Some(c) = ctx.next() {
let (i, j) = (ctx.pos_b, ctx.pos_a);
if let Some(tok) = match c {
'/' if ctx.peek().is_some_and(|c| c == '*') => {
- skip_comment(&mut ctx);
- ctx.expect_punct_p = false;
+ skip_comment(&mut ctx)?;
None
},
'<' if ctx.peek().is_some_and(|c| c == '<') => {
@@ -226,34 +171,19 @@ pub fn tokenize(filename: &OsStr, s: &str) -> Result<Soa<Token>, Vec<Error>> {
view: Span(i, j),
})
},
- '#' => {
- ctx.literal_spacing_guard();
- Some(tokenize_number_based(&mut ctx))
- },
- '0'..='9' => {
- ctx.literal_spacing_guard();
- Some(tokenize_number(&mut ctx, "0123456789"))
- },
- '"' => {
- ctx.literal_spacing_guard();
- Some(tokenize_string(&mut ctx))
- },
- _ if unicode::xid_start_p(c) => {
- ctx.literal_spacing_guard();
- Some(tokenize_identifier(&mut ctx))
- },
- _ if unicode::pattern_white_space_p(c) => {
- if !unicode::default_ignorable_code_point_p(c) {
- ctx.expect_punct_p = false;
- }
- None
- },
+ '#' => Some(tokenize_number_based(&mut ctx)?),
+ '0'..='9' => Some(tokenize_number(&mut ctx, "0123456789")?),
+ '"' => Some(tokenize_string(&mut ctx)?),
+ _ if unicode::xid_start_p(c) => Some(tokenize_identifier(&mut ctx)),
+ _ if unicode::pattern_white_space_p(c) => None,
c => {
- let msg = format!("Invalid character ‘{c}’");
- ctx.err_at_position(msg.as_str());
+ return Err(OryxError::new(
+ i,
+ j,
+ format!("Invalid character ‘{c}’"),
+ ));
},
} {
- ctx.expect_punct_p = tok.kind.literalp();
toks.push(tok);
}
}
@@ -265,7 +195,8 @@ pub fn tokenize(filename: &OsStr, s: &str) -> Result<Soa<Token>, Vec<Error>> {
return Ok(toks);
}
-fn skip_comment<'a>(ctx: &mut LexerContext<'a>) {
+fn skip_comment<'a>(ctx: &mut LexerContext<'a>) -> Result<(), OryxError> {
+ let beg = ctx.pos_b;
ctx.next(); /* Consume ‘*’ */
let mut depth = 1;
while let Some(c) = ctx.next() {
@@ -278,118 +209,169 @@ fn skip_comment<'a>(ctx: &mut LexerContext<'a>) {
depth -= 1;
ctx.next(); /* Consume ‘/’ */
if depth == 0 {
- return;
+ return Ok(());
}
},
_ => {},
};
}
- ctx.err_at_position("Unterminated comment");
+ return Err(OryxError::new(beg, ctx.pos_a, "Unterminated comment"));
}
-fn tokenize_number_based<'a>(ctx: &mut LexerContext<'a>) -> Token {
+fn tokenize_number_based<'a>(
+ ctx: &mut LexerContext<'a>,
+) -> Result<Token, OryxError> {
let i = ctx.pos_b;
let alphabet = match ctx.next() {
Some('b') => "01",
Some('o') => "01234567",
Some('d') => "0123456789",
Some('x') => "0123456789ABCDEF",
- Some(c) => {
- let msg = format!("Invalid number base specifier ‘{c}’");
- ctx.err_at_position(msg.as_str());
+ Some(c @ 'B') | Some(c @ 'O') | Some(c @ 'D') | Some(c @ 'X') => {
+ return Err(OryxError::new(
+ ctx.pos_b,
+ ctx.pos_a,
+ format!(
+ "Invalid number base specifier ‘{c}’, did you mean ‘{}’?",
+ c.to_ascii_lowercase()
+ ),
+ ));
+ },
+ Some(c) if c.is_alphanumeric() => {
+ return Err(OryxError::new(
+ ctx.pos_b,
+ ctx.pos_a,
+ format!("Invalid number base specifier ‘{c}’"),
+ ));
+ },
+ _ => {
+ return Err(OryxError::new(
+ i,
+ i + 1,
+ "Expected number base specifier after ‘#’",
+ ));
},
- None => ctx.err_at_position("Expected number base specifier after ‘#’"),
};
+
+ let (beg, end) = (ctx.pos_b, ctx.pos_a);
let mut tok = match ctx.next() {
- Some(c) if alphabet.contains(c) => tokenize_number(ctx, alphabet),
- Some(c) => {
- let base = match alphabet.len() {
- 2 => "binary",
- 8 => "octal",
- 10 => "decimal",
- 16 => "hexadecimal",
- _ => unreachable!(),
- };
- let msg = format!("Invalid {base} digit ‘{c}’");
- ctx.err_at_position(msg.as_str());
+ Some(c) if alphabet.contains(c) => tokenize_number(ctx, alphabet)?,
+ Some(c) if alphabet.len() == 16 && c.is_ascii_hexdigit() => {
+ return Err(OryxError::new(
+ ctx.pos_b,
+ ctx.pos_a,
+ format!("Hexadecimal digits must be uppercase"),
+ ));
+ },
+ Some(c) if c.is_alphanumeric() => {
+ let base = base2str(alphabet.len());
+ return Err(OryxError::new(
+ ctx.pos_b,
+ ctx.pos_a,
+ format!("Invalid {base} digit ‘{c}’"),
+ ));
+ },
+ Some('\'') => {
+ return Err(OryxError::new(
+ ctx.pos_b,
+ ctx.pos_a,
+ format!(
+ "Numeric literals may not begin with a digit separator"
+ ),
+ ));
+ },
+ _ => {
+ let base = base2str(alphabet.len());
+ return Err(OryxError::new(
+ beg,
+ end,
+ format!("Expected {base} digit after base specifier"),
+ ));
},
- None => ctx.err_at_position("Expected number after base specifier"),
};
tok.view = Span(i, ctx.pos_a);
- return tok;
+ return Ok(tok);
}
fn tokenize_number<'a>(
ctx: &mut LexerContext<'a>,
alphabet: &'static str,
-) -> Token {
+) -> Result<Token, OryxError> {
let i = ctx.pos_b;
- span_raw_number(ctx, alphabet, true);
+ span_raw_number(ctx, alphabet, true)?;
/* Fractional part */
if ctx.peek().is_some_and(|c| c == '.') {
ctx.next();
if ctx.peek().is_some_and(|c| alphabet.contains(c)) {
- span_raw_number(ctx, alphabet, false);
+ span_raw_number(ctx, alphabet, false)?;
}
}
/* Exponential part */
if ctx.peek().is_some_and(|c| c == 'e') {
ctx.next();
- span_raw_number(ctx, alphabet, false);
+ if ctx.peek().is_some_and(|c| c == '+' || c == '-') {
+ ctx.next();
+ }
+ span_raw_number(ctx, alphabet, false)?;
}
- return Token {
+ return Ok(Token {
kind: TokenType::Number,
view: Span(i, ctx.pos_a),
- };
+ });
}
fn span_raw_number<'a>(
ctx: &mut LexerContext<'a>,
alphabet: &'static str,
first_digit_lexed_p: bool,
-) {
+) -> Result<(), OryxError> {
if !first_digit_lexed_p {
match ctx.next() {
Some(c) if alphabet.contains(c) => c,
- Some(c) => {
- let base = match alphabet.len() {
- 2 => "binary",
- 8 => "octal",
- 10 => "decimal",
- 16 => "hexadecimal",
- _ => unreachable!(),
- };
- let msg = format!("Invalid {base} digit ‘{c}’");
- ctx.err_at_position(msg.as_str());
+ Some(c) if alphabet.len() == 16 && c.is_ascii_hexdigit() => {
+ return Err(OryxError::new(
+ ctx.pos_b,
+ ctx.pos_a,
+ format!("Hexadecimal digits must be uppercase"),
+ ));
},
- None => {
- let base = match alphabet.len() {
- 2 => "binary",
- 8 => "octal",
- 10 => "decimal",
- 16 => "hexadecimal",
- _ => unreachable!(),
- };
- let msg = format!(
- "Expected {base} digit but reached end-of-file instead"
- );
- ctx.err_at_position(msg.as_str());
+ Some(c) if c.is_alphanumeric() => {
+ let base = base2str(alphabet.len());
+ return Err(OryxError::new(
+ ctx.pos_b,
+ ctx.pos_a,
+ format!("Invalid {base} digit ‘{c}’"),
+ ));
+ },
+ _ => {
+ let base = base2str(alphabet.len());
+ return Err(OryxError::new(
+ ctx.pos_b,
+ ctx.pos_a,
+ format!("Expected {base} digit"),
+ ));
},
};
}
+ let (mut beg, mut end) = (0, 0);
let mut last_was_apos_p = false;
while let Some(c) = ctx.peek() {
match c {
- '\'' if last_was_apos_p => ctx.err_at_position(
- "Multiple concurrent digit separators in numeric literal",
- ),
+ '\'' if last_was_apos_p => {
+ return Err(OryxError::new(
+ ctx.pos_b,
+ ctx.pos_a + 1,
+ "Numeric literals may not have adjecent digit separators",
+ ));
+ },
'\'' => {
last_was_apos_p = true;
ctx.next();
+ (beg, end) = (ctx.pos_b, ctx.pos_a);
},
_ if alphabet.contains(c) => {
last_was_apos_p = false;
@@ -400,27 +382,36 @@ fn span_raw_number<'a>(
}
if last_was_apos_p {
- ctx.err_at_position(
+ return Err(OryxError::new(
+ beg,
+ end,
"Numeric literals may not end with a digit separator",
- );
+ ));
}
+
+ return Ok(());
}
-fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Token {
+fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Result<Token, OryxError> {
let i = ctx.pos_b;
+
loop {
- if let Some(c) = ctx.next() {
- if c == '"' {
- break;
- }
- } else {
- ctx.err_at_position("Unterminated string");
+ match ctx.next() {
+ Some(c) if c == '"' => break,
+ Some(_) => {},
+ None => {
+ return Err(OryxError::new(
+ i,
+ ctx.pos_a,
+ "Unterminated string literal",
+ ));
+ },
}
}
- return Token {
+ return Ok(Token {
kind: TokenType::String,
view: Span(i, ctx.pos_a),
- };
+ });
}
fn tokenize_identifier<'a>(ctx: &mut LexerContext<'a>) -> Token {
@@ -435,3 +426,13 @@ fn tokenize_identifier<'a>(ctx: &mut LexerContext<'a>) -> Token {
};
return Token { kind, view };
}
+
+fn base2str(n: usize) -> &'static str {
+ return match n {
+ 2 => "binary",
+ 8 => "octal",
+ 10 => "decimal",
+ 16 => "hexadecimal",
+ _ => unreachable!(),
+ };
+}
diff --git a/oryxc/src/unicode/default_ignorable_code_point.rs b/oryxc/src/unicode/default_ignorable_code_point.rs
deleted file mode 100644
index b900a3b..0000000
--- a/oryxc/src/unicode/default_ignorable_code_point.rs
+++ /dev/null
@@ -1,363 +0,0 @@
-/* Autogenerated – DO NOT EDIT */
-
-static DEFAULT_IGNORABLE_CODE_POINT_L1: [u16; 544] = [
- 0, 1, 2, 3, 4, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 7, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-];
-static DEFAULT_IGNORABLE_CODE_POINT_L2: [u64; 320] = [
- 0,
- 0,
- 35184372088832,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 32768,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 268435456,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 6442450944,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 13510798882111488,
- 0,
- 63488,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 136339441907712,
- 281470681743360,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 68719476736,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 65535,
- 0,
- 0,
- 9223372036854775808,
- 0,
- 0,
- 4294967296,
- 143833713099145216,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 64424509440,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 574208952489738240,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
- 18446744073709551615,
-];
-#[inline]
-pub fn default_ignorable_code_point_p(c: char) -> bool {
- let cp = c as usize;
- let blki =
- unsafe { *DEFAULT_IGNORABLE_CODE_POINT_L1.get_unchecked(cp >> 11) }
- as usize;
- let in_blk_offset_p = cp & 0x7FF;
- let wordi = (blki * 32) + (in_blk_offset_p >> 6);
- let biti = in_blk_offset_p & 0x3F;
- unsafe {
- return (*DEFAULT_IGNORABLE_CODE_POINT_L2.get_unchecked(wordi)
- & (1 << biti))
- != 0;
- }
-}
diff --git a/oryxc/src/unicode/mod.rs b/oryxc/src/unicode/mod.rs
index 2fbdcb3..e4a719e 100644
--- a/oryxc/src/unicode/mod.rs
+++ b/oryxc/src/unicode/mod.rs
@@ -1,10 +1,8 @@
-pub mod default_ignorable_code_point;
pub mod line_terminator;
pub mod pattern_white_space;
pub mod xid_continue;
pub mod xid_start;
-pub use default_ignorable_code_point::default_ignorable_code_point_p;
pub use line_terminator::line_terminator_p;
pub use pattern_white_space::pattern_white_space_p;
pub use xid_continue::xid_continue_p;