From c77b9644712f93f529309751e06d59e22dbb5090 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Wed, 11 Mar 2026 01:37:08 +0100 Subject: Rework build.rs --- oryxc/build.rs | 164 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 82 insertions(+), 82 deletions(-) (limited to 'oryxc/build.rs') diff --git a/oryxc/build.rs b/oryxc/build.rs index 8de0327..9bb3497 100644 --- a/oryxc/build.rs +++ b/oryxc/build.rs @@ -1,28 +1,21 @@ -const NAMES: &[&str] = &[ - "xid_start", - "xid_continue", - "pattern_white_space", - "line_terminator", -]; - fn main() { - use std::env; - - let out_dir = env::var("OUT_DIR").unwrap(); - let root = env::var("CARGO_MANIFEST_DIR").unwrap(); - let generated = format!("{root}/generated"); - #[cfg(feature = "fetch")] - fetch::run(&out_dir, &generated); - - #[cfg(not(feature = "fetch"))] - fallback::run(&out_dir, &generated); + { + let root = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let out_dir = format!("{root}/src/unicode"); + fetch::run(&out_dir); + } } #[cfg(feature = "fetch")] mod fetch { use std::collections::HashMap; - use std::fs; + use std::env; + use std::fs::{ + self, + File, + OpenOptions, + }; use std::io::{ self, BufRead, @@ -32,13 +25,22 @@ mod fetch { Write, }; + use zip::ZipArchive; + const MIN_SHIFT: usize = 1; const MAX_SHIFT: usize = 22; const UCD_URL: &str = "https://www.unicode.org/Public/zipped/latest/UCD.zip"; - pub fn run(out_dir: &str, generated: &str) { - let data = format!("{out_dir}/data"); + const NAMES: &[&str] = &[ + "line_terminator", + "pattern_white_space", + "xid_continue", + "xid_start", + ]; + + pub fn run(out_dir: &str) { + let data = format!("{}/data", env::var("CARGO_MANIFEST_DIR").unwrap()); let derived = format!("{data}/DerivedCoreProperties.txt"); let proplist = format!("{data}/PropList.txt"); @@ -52,18 +54,19 @@ mod fetch { ureq::get(UCD_URL) .call() .expect("failed to download UCD.zip") + .into_body() .into_reader() .read_to_end(&mut bytes) .expect("failed to read UCD.zip"); fs::create_dir_all(&data).unwrap(); - zip::ZipArchive::new(Cursor::new(bytes)) + ZipArchive::new(Cursor::new(bytes)) .expect("failed to open UCD.zip") .extract(&data) .expect("failed to extract UCD.zip"); // XID_Start and XID_Continue additions - let mut f = fs::OpenOptions::new() + let mut f = OpenOptions::new() .append(true) .open(&derived) .expect("failed to open DerivedCoreProperties.txt"); @@ -77,14 +80,10 @@ mod fetch { .unwrap(); } - generate_from_file(out_dir, &derived, "XID_Start", "xid_start"); - generate_from_file(out_dir, &derived, "XID_Continue", "xid_continue"); - generate_from_file( - out_dir, - &proplist, - "Pattern_White_Space", - "pattern_white_space", - ); + fs::create_dir_all(out_dir).unwrap(); + generate_from_file(out_dir, &derived, "XID_Start"); + generate_from_file(out_dir, &derived, "XID_Continue"); + generate_from_file(out_dir, &proplist, "Pattern_White_Space"); generate_from_codepoints( out_dir, &[ @@ -93,25 +92,23 @@ mod fetch { ], "line_terminator", ); + generate_mod_rs(out_dir); + } - // Keep generated/ in sync so it can be committed as a fallback - fs::create_dir_all(generated).unwrap(); - for name in super::NAMES { - fs::copy( - format!("{out_dir}/{name}.rs"), - format!("{generated}/{name}.rs"), - ) - .unwrap_or_else(|e| { - panic!("failed to copy {name}.rs to generated/: {e}") - }); + fn generate_mod_rs(out_dir: &str) { + let mut f = File::create(format!("{out_dir}/mod.rs")).unwrap(); + writeln!(f, "/* Autogenerated – DO NOT EDIT */\n").unwrap(); + for &name in NAMES { + writeln!(f, "pub mod {name};").unwrap(); + writeln!(f, "pub use {name}::{};", mkpredname(name)).unwrap(); } } - fn generate_from_file(out_dir: &str, path: &str, prop: &str, name: &str) { + fn generate_from_file(out_dir: &str, path: &str, prop: &str) { let mut bitmap = vec![false; 0x110000]; parse_file(path, prop, &mut bitmap) .unwrap_or_else(|e| panic!("failed to read {path}: {e}")); - write_output(out_dir, name, &bitmap); + write_output(out_dir, prop.to_lowercase().as_str(), &bitmap); } fn generate_from_codepoints( @@ -128,8 +125,8 @@ mod fetch { fn write_output(out_dir: &str, name: &str, bitmap: &[bool]) { let (shift, lvl1, lvl2) = optimize_tables(bitmap); - let mut f = fs::File::create(format!("{out_dir}/{name}.rs")).unwrap(); - generate_code(&mut f, name, shift, &lvl1, &lvl2); + let mut f = File::create(format!("{out_dir}/{name}.rs")).unwrap(); + generate_code(&mut f, name, shift, &lvl1, &lvl2).unwrap(); } fn optimize_tables(bitmap: &[bool]) -> (usize, Vec, Vec) { @@ -145,7 +142,7 @@ mod fetch { } } - config + return config; } fn parse_file( @@ -153,7 +150,7 @@ mod fetch { prop: &str, bitmap: &mut [bool], ) -> io::Result<()> { - let file = fs::File::open(path)?; + let file = File::open(path)?; let reader = BufReader::new(file); for line in reader.lines() { @@ -186,7 +183,7 @@ mod fetch { } } - Ok(()) + return Ok(()); } fn build_tables(bitmap: &[bool], shift: usize) -> (Vec, Vec) { @@ -218,7 +215,7 @@ mod fetch { } } - (lvl1, lvl2) + return (lvl1, lvl2); } fn generate_code( @@ -227,63 +224,66 @@ mod fetch { shift: usize, level1: &[u16], level2: &[u64], - ) { + ) -> io::Result<()> { let upper_name = prop_name.to_uppercase(); let lower_name = prop_name.to_lowercase(); let block_size = 1 << shift; let mask = block_size - 1; let u64s_per_block = (block_size + 63) / 64; + let pred_name = mkpredname(&lower_name); - let pred_name = if lower_name.contains('_') { - format!("{lower_name}_p") + let biggest_i = level1.into_iter().max().unwrap().clone(); + let l1type = if biggest_i <= u8::MAX as u16 { + "u8" } else { - format!("{lower_name}p") + "u16" }; - writeln!(f, "/* Autogenerated – DO NOT EDIT */").unwrap(); - writeln!(f).unwrap(); + writeln!(f, "/* Autogenerated – DO NOT EDIT */")?; + writeln!(f)?; writeln!( f, - "static {upper_name}_L1: [u16; {}] = {level1:?};", + "static {upper_name}_L1: [{l1type}; {}] = {level1:?};", level1.len() - ) - .unwrap(); + )?; writeln!( f, "static {upper_name}_L2: [u64; {}] = {level2:?};", level2.len() - ) - .unwrap(); - writeln!(f, "#[inline]").unwrap(); - writeln!(f, "pub fn {pred_name}(c: char) -> bool {{").unwrap(); - writeln!(f, "\tlet cp = c as usize;").unwrap(); - writeln!(f, "\tlet blki = unsafe {{ *{upper_name}_L1.get_unchecked(cp >> {shift}) }} as usize;").unwrap(); - writeln!(f, "\tlet in_blk_offset_p = cp & 0x{mask:X};").unwrap(); + )?; + writeln!(f, "#[inline]")?; + writeln!(f, "pub fn {pred_name}(c: char) -> bool {{")?; + writeln!(f, "\tlet cp = c as usize;")?; + writeln!( + f, + "\tlet blki = unsafe {{ *{upper_name}_L1.get_unchecked(cp >> {shift}) }} as usize;" + )?; + writeln!(f, "\tlet in_blk_offset_p = cp & 0x{mask:X};")?; if u64s_per_block == 1 { - writeln!(f, "\tunsafe {{ return ({upper_name}_L2.get_unchecked(blki) & (1 << in_blk_offset_p)) != 0; }}").unwrap(); + writeln!( + f, + "\tunsafe {{ return ({upper_name}_L2.get_unchecked(blki) & (1 << in_blk_offset_p)) != 0; }}" + )?; } else { writeln!( f, "\tlet wordi = (blki * {u64s_per_block}) + (in_blk_offset_p >> 6);" - ) - .unwrap(); - writeln!(f, "\tlet biti = in_blk_offset_p & 0x3F;").unwrap(); - writeln!(f, "\tunsafe {{ return (*{upper_name}_L2.get_unchecked(wordi) & (1 << biti)) != 0; }}").unwrap(); + )?; + writeln!(f, "\tlet biti = in_blk_offset_p & 0x3F;")?; + writeln!( + f, + "\tunsafe {{ return (*{upper_name}_L2.get_unchecked(wordi) & (1 << biti)) != 0; }}" + )?; } - writeln!(f, "}}").unwrap(); + return writeln!(f, "}}"); } -} - -#[cfg(not(feature = "fetch"))] -mod fallback { - use std::fs; - pub fn run(out_dir: &str, generated: &str) { - for name in super::NAMES { - let src = format!("{generated}/{name}.rs"); - println!("cargo:rerun-if-changed={src}"); - fs::copy(&src, format!("{out_dir}/{name}.rs")) - .unwrap_or_else(|e| panic!("failed to copy {src}: {e}")); - } + fn mkpredname>(s: S) -> String { + let s = s.as_ref(); + return if s.contains('_') { + format!("{s}_p") + } else { + format!("{s}p") + }; } } -- cgit v1.2.3