From da65ee39162d0323321340b2a9cef9a013ad36ef Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sun, 29 Mar 2026 23:09:46 +0200 Subject: Beginning sema work --- oryxc/src/sema/mod.rs | 3 + oryxc/src/sema/typecheck.rs | 192 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 195 insertions(+) create mode 100644 oryxc/src/sema/mod.rs create mode 100644 oryxc/src/sema/typecheck.rs (limited to 'oryxc/src/sema') diff --git a/oryxc/src/sema/mod.rs b/oryxc/src/sema/mod.rs new file mode 100644 index 0000000..9d55486 --- /dev/null +++ b/oryxc/src/sema/mod.rs @@ -0,0 +1,3 @@ +mod typecheck; + +pub use typecheck::*; diff --git a/oryxc/src/sema/typecheck.rs b/oryxc/src/sema/typecheck.rs new file mode 100644 index 0000000..27c3e31 --- /dev/null +++ b/oryxc/src/sema/typecheck.rs @@ -0,0 +1,192 @@ +use crate::compiler::{ + CompilerState, + FileData, +}; +use crate::errors::OryxError; +use crate::parser::{ + self, + AstType, +}; +use crate::prelude::*; +use crate::unistr::UniStr; + +pub fn typecheck_multi_def_bind( + c_state: &CompilerState, + fdata: &FileData, + scope_id: ScopeId, + node: u32, +) -> Result<(), OryxError> { + let ast = fdata.ast.get().unwrap(); + let tokens = fdata.tokens.get().unwrap(); + let scope = &fdata.scopes.get().unwrap()[scope_id.into()]; + + let SubNodes(decls, exprs) = ast.nodes.sub()[node as usize]; + let (decls, exprs) = (decls as usize, exprs as usize); + let nidents = ast.extra[decls] as usize; + let nexprs = ast.extra[exprs] as usize; + + /* Mark our identifiers as ‘resolving’ */ + let mut circularp = false; + for i in 0..nidents { + let itok = ast.extra[decls + 1 + i * 2] as usize; + let view = tokens.view()[itok]; + /* Pointer fuckery to bypass the borrow checker */ + let s = + UniStr(unsafe { &*(&fdata.buffer[view.0..view.1] as *const str) }); + let id = c_state.ident_intr.intern(s); + let mut sym = scope.symtab.get_mut(&id).unwrap(); + match sym.state { + ResolutionState::Unresolved => sym.state = ResolutionState::Resolving, + ResolutionState::Resolving | ResolutionState::Poisoned => { + sym.state = ResolutionState::Poisoned; + circularp = true; + }, + _ => unreachable!(), + } + } + + if circularp { + return Err(OryxError::new( + tokens.view()[ast.nodes.tok()[node as usize] as usize], + format!("circular dependency of multiple symbol definition"), + )); + } + + let mut types = Vec::with_capacity(nexprs); + for i in 0..nexprs { + let expr = ast.extra[exprs + 1 + i]; + let id = typecheck_expr(c_state, fdata, expr)?; + match c_state.type_intr.get(id) { + OryxType::MultiValue(xs) => { + /* TODO: There is probaby a method for this + * (append_from_slice()?) */ + for &x in xs.into_iter() { + types.push(x); + } + }, + _ => types.push(id), + }; + } + + if types.len() != nidents { + return Err(OryxError::new( + tokens.view()[ast.nodes.tok()[node as usize] as usize], + format!( + "attempted to assign {} values to {} symbols", + types.len(), + nidents + ), + )); + } + + for i in 0..nidents { + let (ii, ti) = (decls + 1 + i * 2, decls + 2 + i * 2); + let itok = ast.extra[ii] as usize; + let tnode = ast.extra[ti]; + + let alleged_type = if tnode == u32::MAX { + /* Inferred type */ + TypeId::INVALID + } else { + /* In ‘def a, b int = x, y;’, the type node for A and B are the + * same, so we need to make sure we only typecheck once. */ + let id = match ast.types[tnode as usize].get() { + TypeId::INVALID => typecheck_expr(c_state, fdata, tnode)?, + n => n, + }; + + let OryxType::Type(id) = c_state.type_intr.get(id) else { + return Err(OryxError::new( + tokens.view()[ast.nodes.tok()[tnode as usize] as usize], + "expected a type expression", + )); + }; + *id + }; + + let expr_type = types[i]; + let vtype = if alleged_type == TypeId::INVALID { + expr_type + } else if type_implicitly_casts_p(c_state, expr_type, alleged_type) { + alleged_type + } else { + return Err(OryxError::new( + tokens.view()[itok], + "expression type is not compatible with the explicitly provided symbol type", + )); + }; + + let view = tokens.view()[itok]; + /* Pointer fuckery to bypass the borrow checker */ + let s = + UniStr(unsafe { &*(&fdata.buffer[view.0..view.1] as *const str) }); + let id = c_state.ident_intr.intern(s); + let mut sym = scope.symtab.get_mut(&id).unwrap(); + sym.state = ResolutionState::Resolved; + sym.vtype = vtype; + } + + return Ok(()); +} + +fn typecheck_expr( + c_state: &CompilerState, + fdata: &FileData, + node: u32, +) -> Result { + let ast = fdata.ast.get().unwrap(); + let tokens = fdata.tokens.get().unwrap(); + + const { + assert!(parser::NEXPRKINDS == 10, "Missing expression case"); + }; + let expr_type = match ast.nodes.kind()[node as usize] { + AstType::BinaryOperator => todo!(), + AstType::Dereference => { + let pointer = ast.nodes.sub()[node as usize].0; + let ptype = typecheck_expr(c_state, fdata, pointer)?; + let &OryxType::Pointer { base } = c_state.type_intr.get(ptype) + else { + let tok = ast.nodes.tok()[pointer as usize] as usize; + return Err(OryxError::new( + tokens.view()[tok], + "cannot deference a non-pointer type (expression has type {})", + )); + }; + base + }, + AstType::FunCall => todo!(), + AstType::FunProto => todo!(), + AstType::Function => todo!(), + AstType::Identifier => todo!(), + AstType::Number => c_state.type_intr.intern(OryxType::UNumber), + AstType::Pointer => { + let pointee = ast.nodes.sub()[node as usize].0; + let base = typecheck_expr(c_state, fdata, pointee)?; + c_state.type_intr.intern(OryxType::Pointer { base }) + }, + AstType::String => todo!(), + AstType::UnaryOperator => todo!(), + _ => unreachable!(), + }; + + ast.types[node as usize].set(expr_type); + return Ok(expr_type); +} + +fn type_implicitly_casts_p( + c_state: &CompilerState, + from: TypeId, + to: TypeId, +) -> bool { + if from == to { + return true; + } + + /* TODO: Handle other types */ + return match (c_state.type_intr.get(from), c_state.type_intr.get(to)) { + (OryxType::UNumber, OryxType::Integer { .. }) => true, + (OryxType::UBoolean, OryxType::Boolean { .. }) => true, + _ => false, + }; +} -- cgit v1.2.3