summaryrefslogtreecommitdiff
path: root/oryxc/build.rs
diff options
context:
space:
mode:
Diffstat (limited to 'oryxc/build.rs')
-rw-r--r--oryxc/build.rs164
1 files changed, 82 insertions, 82 deletions
diff --git a/oryxc/build.rs b/oryxc/build.rs
index 8de0327..9bb3497 100644
--- a/oryxc/build.rs
+++ b/oryxc/build.rs
@@ -1,28 +1,21 @@
-const NAMES: &[&str] = &[
- "xid_start",
- "xid_continue",
- "pattern_white_space",
- "line_terminator",
-];
-
fn main() {
- use std::env;
-
- let out_dir = env::var("OUT_DIR").unwrap();
- let root = env::var("CARGO_MANIFEST_DIR").unwrap();
- let generated = format!("{root}/generated");
-
#[cfg(feature = "fetch")]
- fetch::run(&out_dir, &generated);
-
- #[cfg(not(feature = "fetch"))]
- fallback::run(&out_dir, &generated);
+ {
+ let root = std::env::var("CARGO_MANIFEST_DIR").unwrap();
+ let out_dir = format!("{root}/src/unicode");
+ fetch::run(&out_dir);
+ }
}
#[cfg(feature = "fetch")]
mod fetch {
use std::collections::HashMap;
- use std::fs;
+ use std::env;
+ use std::fs::{
+ self,
+ File,
+ OpenOptions,
+ };
use std::io::{
self,
BufRead,
@@ -32,13 +25,22 @@ mod fetch {
Write,
};
+ use zip::ZipArchive;
+
const MIN_SHIFT: usize = 1;
const MAX_SHIFT: usize = 22;
const UCD_URL: &str =
"https://www.unicode.org/Public/zipped/latest/UCD.zip";
- pub fn run(out_dir: &str, generated: &str) {
- let data = format!("{out_dir}/data");
+ const NAMES: &[&str] = &[
+ "line_terminator",
+ "pattern_white_space",
+ "xid_continue",
+ "xid_start",
+ ];
+
+ pub fn run(out_dir: &str) {
+ let data = format!("{}/data", env::var("CARGO_MANIFEST_DIR").unwrap());
let derived = format!("{data}/DerivedCoreProperties.txt");
let proplist = format!("{data}/PropList.txt");
@@ -52,18 +54,19 @@ mod fetch {
ureq::get(UCD_URL)
.call()
.expect("failed to download UCD.zip")
+ .into_body()
.into_reader()
.read_to_end(&mut bytes)
.expect("failed to read UCD.zip");
fs::create_dir_all(&data).unwrap();
- zip::ZipArchive::new(Cursor::new(bytes))
+ ZipArchive::new(Cursor::new(bytes))
.expect("failed to open UCD.zip")
.extract(&data)
.expect("failed to extract UCD.zip");
// XID_Start and XID_Continue additions
- let mut f = fs::OpenOptions::new()
+ let mut f = OpenOptions::new()
.append(true)
.open(&derived)
.expect("failed to open DerivedCoreProperties.txt");
@@ -77,14 +80,10 @@ mod fetch {
.unwrap();
}
- generate_from_file(out_dir, &derived, "XID_Start", "xid_start");
- generate_from_file(out_dir, &derived, "XID_Continue", "xid_continue");
- generate_from_file(
- out_dir,
- &proplist,
- "Pattern_White_Space",
- "pattern_white_space",
- );
+ fs::create_dir_all(out_dir).unwrap();
+ generate_from_file(out_dir, &derived, "XID_Start");
+ generate_from_file(out_dir, &derived, "XID_Continue");
+ generate_from_file(out_dir, &proplist, "Pattern_White_Space");
generate_from_codepoints(
out_dir,
&[
@@ -93,25 +92,23 @@ mod fetch {
],
"line_terminator",
);
+ generate_mod_rs(out_dir);
+ }
- // Keep generated/ in sync so it can be committed as a fallback
- fs::create_dir_all(generated).unwrap();
- for name in super::NAMES {
- fs::copy(
- format!("{out_dir}/{name}.rs"),
- format!("{generated}/{name}.rs"),
- )
- .unwrap_or_else(|e| {
- panic!("failed to copy {name}.rs to generated/: {e}")
- });
+ fn generate_mod_rs(out_dir: &str) {
+ let mut f = File::create(format!("{out_dir}/mod.rs")).unwrap();
+ writeln!(f, "/* Autogenerated – DO NOT EDIT */\n").unwrap();
+ for &name in NAMES {
+ writeln!(f, "pub mod {name};").unwrap();
+ writeln!(f, "pub use {name}::{};", mkpredname(name)).unwrap();
}
}
- fn generate_from_file(out_dir: &str, path: &str, prop: &str, name: &str) {
+ fn generate_from_file(out_dir: &str, path: &str, prop: &str) {
let mut bitmap = vec![false; 0x110000];
parse_file(path, prop, &mut bitmap)
.unwrap_or_else(|e| panic!("failed to read {path}: {e}"));
- write_output(out_dir, name, &bitmap);
+ write_output(out_dir, prop.to_lowercase().as_str(), &bitmap);
}
fn generate_from_codepoints(
@@ -128,8 +125,8 @@ mod fetch {
fn write_output(out_dir: &str, name: &str, bitmap: &[bool]) {
let (shift, lvl1, lvl2) = optimize_tables(bitmap);
- let mut f = fs::File::create(format!("{out_dir}/{name}.rs")).unwrap();
- generate_code(&mut f, name, shift, &lvl1, &lvl2);
+ let mut f = File::create(format!("{out_dir}/{name}.rs")).unwrap();
+ generate_code(&mut f, name, shift, &lvl1, &lvl2).unwrap();
}
fn optimize_tables(bitmap: &[bool]) -> (usize, Vec<u16>, Vec<u64>) {
@@ -145,7 +142,7 @@ mod fetch {
}
}
- config
+ return config;
}
fn parse_file(
@@ -153,7 +150,7 @@ mod fetch {
prop: &str,
bitmap: &mut [bool],
) -> io::Result<()> {
- let file = fs::File::open(path)?;
+ let file = File::open(path)?;
let reader = BufReader::new(file);
for line in reader.lines() {
@@ -186,7 +183,7 @@ mod fetch {
}
}
- Ok(())
+ return Ok(());
}
fn build_tables(bitmap: &[bool], shift: usize) -> (Vec<u16>, Vec<u64>) {
@@ -218,7 +215,7 @@ mod fetch {
}
}
- (lvl1, lvl2)
+ return (lvl1, lvl2);
}
fn generate_code(
@@ -227,63 +224,66 @@ mod fetch {
shift: usize,
level1: &[u16],
level2: &[u64],
- ) {
+ ) -> io::Result<()> {
let upper_name = prop_name.to_uppercase();
let lower_name = prop_name.to_lowercase();
let block_size = 1 << shift;
let mask = block_size - 1;
let u64s_per_block = (block_size + 63) / 64;
+ let pred_name = mkpredname(&lower_name);
- let pred_name = if lower_name.contains('_') {
- format!("{lower_name}_p")
+ let biggest_i = level1.into_iter().max().unwrap().clone();
+ let l1type = if biggest_i <= u8::MAX as u16 {
+ "u8"
} else {
- format!("{lower_name}p")
+ "u16"
};
- writeln!(f, "/* Autogenerated – DO NOT EDIT */").unwrap();
- writeln!(f).unwrap();
+ writeln!(f, "/* Autogenerated – DO NOT EDIT */")?;
+ writeln!(f)?;
writeln!(
f,
- "static {upper_name}_L1: [u16; {}] = {level1:?};",
+ "static {upper_name}_L1: [{l1type}; {}] = {level1:?};",
level1.len()
- )
- .unwrap();
+ )?;
writeln!(
f,
"static {upper_name}_L2: [u64; {}] = {level2:?};",
level2.len()
- )
- .unwrap();
- writeln!(f, "#[inline]").unwrap();
- writeln!(f, "pub fn {pred_name}(c: char) -> bool {{").unwrap();
- writeln!(f, "\tlet cp = c as usize;").unwrap();
- writeln!(f, "\tlet blki = unsafe {{ *{upper_name}_L1.get_unchecked(cp >> {shift}) }} as usize;").unwrap();
- writeln!(f, "\tlet in_blk_offset_p = cp & 0x{mask:X};").unwrap();
+ )?;
+ writeln!(f, "#[inline]")?;
+ writeln!(f, "pub fn {pred_name}(c: char) -> bool {{")?;
+ writeln!(f, "\tlet cp = c as usize;")?;
+ writeln!(
+ f,
+ "\tlet blki = unsafe {{ *{upper_name}_L1.get_unchecked(cp >> {shift}) }} as usize;"
+ )?;
+ writeln!(f, "\tlet in_blk_offset_p = cp & 0x{mask:X};")?;
if u64s_per_block == 1 {
- writeln!(f, "\tunsafe {{ return ({upper_name}_L2.get_unchecked(blki) & (1 << in_blk_offset_p)) != 0; }}").unwrap();
+ writeln!(
+ f,
+ "\tunsafe {{ return ({upper_name}_L2.get_unchecked(blki) & (1 << in_blk_offset_p)) != 0; }}"
+ )?;
} else {
writeln!(
f,
"\tlet wordi = (blki * {u64s_per_block}) + (in_blk_offset_p >> 6);"
- )
- .unwrap();
- writeln!(f, "\tlet biti = in_blk_offset_p & 0x3F;").unwrap();
- writeln!(f, "\tunsafe {{ return (*{upper_name}_L2.get_unchecked(wordi) & (1 << biti)) != 0; }}").unwrap();
+ )?;
+ writeln!(f, "\tlet biti = in_blk_offset_p & 0x3F;")?;
+ writeln!(
+ f,
+ "\tunsafe {{ return (*{upper_name}_L2.get_unchecked(wordi) & (1 << biti)) != 0; }}"
+ )?;
}
- writeln!(f, "}}").unwrap();
+ return writeln!(f, "}}");
}
-}
-
-#[cfg(not(feature = "fetch"))]
-mod fallback {
- use std::fs;
- pub fn run(out_dir: &str, generated: &str) {
- for name in super::NAMES {
- let src = format!("{generated}/{name}.rs");
- println!("cargo:rerun-if-changed={src}");
- fs::copy(&src, format!("{out_dir}/{name}.rs"))
- .unwrap_or_else(|e| panic!("failed to copy {src}: {e}"));
- }
+ fn mkpredname<S: AsRef<str>>(s: S) -> String {
+ let s = s.as_ref();
+ return if s.contains('_') {
+ format!("{s}_p")
+ } else {
+ format!("{s}p")
+ };
}
}