use std::borrow::Cow;
use std::ffi::OsStr;
use std::fmt::Display;
use std::vec::Vec;
use std::{
	iter,
	mem,
	str,
};

use phf;
use soa_rs::{
	Soa,
	Soars,
};

use crate::{
	errors,
	unicode,
};

#[allow(dead_code)]
#[repr(u8)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum TokenType {
	Eof         = 0,
	Ampersand   = '&' as u8,
	AngleL      = '<' as u8,
	AngleR      = '>' as u8,
	Asterisk    = '*' as u8,
	Bar         = '|' as u8,
	BraceL      = '{' as u8,
	BraceR      = '}' as u8,
	BracketL    = '[' as u8,
	BracketR    = ']' as u8,
	Caret       = '^' as u8,
	Comma       = ',' as u8,
	Equals      = '=' as u8,
	Exclamation = '!' as u8,
	Minus       = '-' as u8,
	ParenL      = '(' as u8,
	ParenR      = ')' as u8,
	Plus        = '+' as u8,
	Semicolon   = ';' as u8,
	Slash       = '/' as u8,
	Tilde       = '~' as u8,
	AmpersandTilde,
	AngleL2,
	AngleL3,
	AngleR2,
	AngleR3,
	Ellipsis,
	Identifier,
	KeywordDef,
	KeywordFunc,
	KeywordReturn,
	Number,
	String,
}

impl TokenType {
	pub fn literalp(&self) -> bool {
		return match self {
			Self::Identifier
			| Self::KeywordDef
			| Self::KeywordFunc
			| Self::Number
			| Self::String => true,
			_ => false,
		};
	}

	/* Tokens that start an expression */
	pub fn exprp(&self) -> bool {
		return match self {
			Self::Ampersand
			| Self::Caret
			| Self::Exclamation
			| Self::Identifier
			| Self::KeywordFunc
			| Self::Minus
			| Self::Number
			| Self::ParenL
			| Self::Plus
			| Self::String
			| Self::Tilde => true,
			_ => false,
		};
	}
}

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Span(usize, usize);

#[derive(Soars)]
#[soa_derive(Debug)]
pub struct Token {
	pub kind: TokenType,
	pub view: Span,
}

pub struct Error {
	pub pos: usize,
	pub msg: Cow<'static, str>,
}

impl Error {
	fn new<T>(pos: usize, msg: T) -> Self
	where
		T: Into<Cow<'static, str>>,
	{
		return Self {
			pos,
			msg: msg.into(),
		};
	}
}

struct LexerContext<'a> {
	pos_a:          usize, /* Pos [a]fter char */
	pos_b:          usize, /* Pos [b]efore char */
	chars:          iter::Peekable<str::Chars<'a>>,
	string:         &'a str,
	filename:       &'a OsStr,
	expect_punct_p: bool,
}

impl<'a> LexerContext<'a> {
	fn new(filename: &'a OsStr, string: &'a str) -> Self {
		return Self {
			pos_a: 0,
			pos_b: 0,
			chars: string.chars().peekable(),
			string,
			filename,
			expect_punct_p: false,
		};
	}

	#[inline(always)]
	fn next(&mut self) -> Option<char> {
		let c = self.chars.next()?;
		self.pos_b = self.pos_a;
		self.pos_a += c.len_utf8();
		return Some(c);
	}

	#[inline(always)]
	fn peek(&mut self) -> Option<char> {
		return self.chars.peek().copied();
	}

	fn err_at_position<S>(&self, s: S) -> !
	where
		S: Display,
	{
		errors::err_at_position(self.filename, s);
	}

	#[inline(always)]
	fn literal_spacing_guard(&self) {
		if self.expect_punct_p {
			self.err_at_position(
				"Two literals may not be directly adjacent to each other",
			);
		}
	}
}

static KEYWORDS: phf::Map<&'static str, TokenType> = phf::phf_map! {
	"def" => TokenType::KeywordDef,
	"func" => TokenType::KeywordFunc,
	"return" => TokenType::KeywordReturn,
};

pub fn tokenize(filename: &OsStr, s: &str) -> Result<Soa<Token>, Vec<Error>> {
	let mut toks = Soa::<Token>::with_capacity(s.len() / 2);
	let mut ctx = LexerContext::new(filename, s);

	while let Some(c) = ctx.next() {
		let (i, j) = (ctx.pos_b, ctx.pos_a);
		if let Some(tok) = match c {
			'/' if ctx.peek().is_some_and(|c| c == '*') => {
				skip_comment(&mut ctx);
				ctx.expect_punct_p = false;
				None
			},
			'<' if ctx.peek().is_some_and(|c| c == '<') => {
				ctx.next(); /* Consume ‘<’ */
				let kind = if ctx.peek().is_some_and(|c| c == '<') {
					ctx.next(); /* Consume ‘<’ */
					TokenType::AngleL3
				} else {
					TokenType::AngleL2
				};
				Some(Token {
					kind,
					view: Span(i, ctx.pos_a),
				})
			},
			'>' if ctx.peek().is_some_and(|c| c == '>') => {
				ctx.next(); /* Consume ‘>’ */
				let kind = if ctx.peek().is_some_and(|c| c == '>') {
					ctx.next(); /* Consume ‘>’ */
					TokenType::AngleR3
				} else {
					TokenType::AngleR2
				};
				Some(Token {
					kind,
					view: Span(i, ctx.pos_a),
				})
			},
			'&' if ctx.peek().is_some_and(|c| c == '~') => {
				ctx.next(); /* Consume ‘~’ */
				Some(Token {
					kind: TokenType::AmpersandTilde,
					view: Span(i, j + 1),
				})
			},
			'!' | '&' | '(' | ')' | '*' | '+' | ',' | '-' | '/' | ';' | '<'
			| '=' | '>' | '[' | ']' | '^' | '{' | '|' | '}' | '~' | '…' => {
				Some(Token {
					kind: unsafe { mem::transmute(c as u8) },
					view: Span(i, j),
				})
			},
			'#' => {
				ctx.literal_spacing_guard();
				Some(tokenize_number_based(&mut ctx))
			},
			'0'..='9' => {
				ctx.literal_spacing_guard();
				Some(tokenize_number(&mut ctx, "0123456789"))
			},
			'"' => {
				ctx.literal_spacing_guard();
				Some(tokenize_string(&mut ctx))
			},
			_ if unicode::xid_start_p(c) => {
				ctx.literal_spacing_guard();
				Some(tokenize_identifier(&mut ctx))
			},
			_ if unicode::pattern_white_space_p(c) => {
				if !unicode::default_ignorable_code_point_p(c) {
					ctx.expect_punct_p = false;
				}
				None
			},
			c => {
				let msg = format!("Invalid character ‘{c}’");
				ctx.err_at_position(msg.as_str());
			},
		} {
			ctx.expect_punct_p = tok.kind.literalp();
			toks.push(tok);
		}
	}

	toks.push(Token {
		kind: TokenType::Eof,
		view: Span(s.len() - 1, s.len()),
	});
	return Ok(toks);
}

fn skip_comment<'a>(ctx: &mut LexerContext<'a>) {
	ctx.next(); /* Consume ‘*’ */
	let mut depth = 1;
	while let Some(c) = ctx.next() {
		match c {
			'/' if ctx.peek().is_some_and(|c| c == '*') => {
				depth += 1;
				ctx.next(); /* Consume ‘*’ */
			},
			'*' if ctx.peek().is_some_and(|c| c == '/') => {
				depth -= 1;
				ctx.next(); /* Consume ‘/’ */
				if depth == 0 {
					return;
				}
			},
			_ => {},
		};
	}
	ctx.err_at_position("Unterminated comment");
}

fn tokenize_number_based<'a>(ctx: &mut LexerContext<'a>) -> Token {
	let i = ctx.pos_b;
	let alphabet = match ctx.next() {
		Some('b') => "01",
		Some('o') => "01234567",
		Some('d') => "0123456789",
		Some('x') => "0123456789ABCDEF",
		Some(c) => {
			let msg = format!("Invalid number base specifier ‘{c}’");
			ctx.err_at_position(msg.as_str());
		},
		None => ctx.err_at_position("Expected number base specifier after ‘#’"),
	};
	let mut tok = match ctx.next() {
		Some(c) if alphabet.contains(c) => tokenize_number(ctx, alphabet),
		Some(c) => {
			let base = match alphabet.len() {
				2 => "binary",
				8 => "octal",
				10 => "decimal",
				16 => "hexadecimal",
				_ => unreachable!(),
			};
			let msg = format!("Invalid {base} digit ‘{c}’");
			ctx.err_at_position(msg.as_str());
		},
		None => ctx.err_at_position("Expected number after base specifier"),
	};
	tok.view = Span(i, ctx.pos_a);
	return tok;
}

fn tokenize_number<'a>(
	ctx: &mut LexerContext<'a>,
	alphabet: &'static str,
) -> Token {
	let i = ctx.pos_b;
	span_raw_number(ctx, alphabet, true);

	/* Fractional part */
	if ctx.peek().is_some_and(|c| c == '.') {
		ctx.next();
		if ctx.peek().is_some_and(|c| alphabet.contains(c)) {
			span_raw_number(ctx, alphabet, false);
		}
	}

	/* Exponential part */
	if ctx.peek().is_some_and(|c| c == 'e') {
		ctx.next();
		span_raw_number(ctx, alphabet, false);
	}

	return Token {
		kind: TokenType::Number,
		view: Span(i, ctx.pos_a),
	};
}

fn span_raw_number<'a>(
	ctx: &mut LexerContext<'a>,
	alphabet: &'static str,
	first_digit_lexed_p: bool,
) {
	if !first_digit_lexed_p {
		match ctx.next() {
			Some(c) if alphabet.contains(c) => c,
			Some(c) => {
				let base = match alphabet.len() {
					2 => "binary",
					8 => "octal",
					10 => "decimal",
					16 => "hexadecimal",
					_ => unreachable!(),
				};
				let msg = format!("Invalid {base} digit ‘{c}’");
				ctx.err_at_position(msg.as_str());
			},
			None => {
				let base = match alphabet.len() {
					2 => "binary",
					8 => "octal",
					10 => "decimal",
					16 => "hexadecimal",
					_ => unreachable!(),
				};
				let msg = format!(
					"Expected {base} digit but reached end-of-file instead"
				);
				ctx.err_at_position(msg.as_str());
			},
		};
	}

	let mut last_was_apos_p = false;
	while let Some(c) = ctx.peek() {
		match c {
			'\'' if last_was_apos_p => ctx.err_at_position(
				"Multiple concurrent digit separators in numeric literal",
			),
			'\'' => {
				last_was_apos_p = true;
				ctx.next();
			},
			_ if alphabet.contains(c) => {
				last_was_apos_p = false;
				ctx.next();
			},
			_ => break,
		};
	}

	if last_was_apos_p {
		ctx.err_at_position(
			"Numeric literals may not end with a digit separator",
		);
	}
}

fn tokenize_string<'a>(ctx: &mut LexerContext<'a>) -> Token {
	let i = ctx.pos_b;
	loop {
		if let Some(c) = ctx.next() {
			if c == '"' {
				break;
			}
		} else {
			ctx.err_at_position("Unterminated string");
		}
	}
	return Token {
		kind: TokenType::String,
		view: Span(i, ctx.pos_a),
	};
}

fn tokenize_identifier<'a>(ctx: &mut LexerContext<'a>) -> Token {
	let i = ctx.pos_b;
	while ctx.peek().is_some_and(unicode::xid_continue_p) {
		ctx.next();
	}
	let view = Span(i, ctx.pos_a);
	let kind = match KEYWORDS.get(&ctx.string[view.0..view.1]) {
		Some(kind) => kind.clone(),
		None => TokenType::Identifier,
	};
	return Token { kind, view };
}