module token
import scanner { Scanner }
import constants

// ===== Tokenizer =====

@[packed]
pub struct Token {
pub:
	start u32
	tag TokenType
}

pub enum TokenType as u8 {
	eof
	nil
	string
	number
	decimal
	true
	false
	table_start
	table_end
	identifier
	comma
	newline
	equal
	dot
	angle_bracket_left
	angle_bracket_right
	keyword
	operator
	operator_len2
	operator_len3
}

pub struct Tokenizer {
	Scanner
}

fn (mut t Tokenizer) next() ?u8 {
	pos := t.cur_pos()
	if t.end_of_input() || pos > t.input.len {
		return none
	}
	c := t.input[pos]
	t.advance(1) // 1 char
	return c
}

pub fn (mut t Tokenizer) parse_all() !Token {
	t.skip_ws()

	start_index := t.push_frame()!

	c := t.any() or {
		return Token{ start_index, .eof }
	}

	match true {
		c == `"` {
			t.rollback()
			t.commit_frame()
			return t.parse_str()
		}
		c in "0123456789".bytes() {
			t.rollback()
			t.commit_frame()
			return t.parse_num()
		}
		c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".bytes() {
			t.rollback()
			t.commit_frame()
			return t.parse_id()
		}
		c == `{` {
			return Token{ start_index, .table_start }
		}
		c == `}` {
			return Token{ start_index, .table_end }
		}
		c == `[` {
			return Token{ start_index, .angle_bracket_left }
		}
		c == `]` {
			return Token{ start_index, .angle_bracket_right }
		}
		c == 10 { // LF
			return Token{ start_index, .newline }
		}
		else {
			$if windows {
				if c == 13 && t.term(10) { // CRLF
					return Token{ start_index, .newline }
				}
			}

			t.rollback()
			t.commit_frame()
			return t.parse_op()
		}
	}
}

fn (mut t Tokenizer) parse_op() !Token {
	start_index := t.push_frame()!

	mut s := []u8{len: 0, cap: 32, init: 0}
	for t.one_of("+-*/%^<>=~#.$,?") {
		s << t.last()!
	}

	match s.bytestr() {
		"+", "-", "*", "/", "%", "^", "#", "=" {
			return Token{ start_index, .operator }
		}
		"==", "~=", "<=", ">=", "<", ">", ".." {
			return Token{ start_index, .operator_len2 }
		}
		"..." {
			return Token{ start_index, .operator_len3 }
		}
		"." {
			return Token{ start_index, .dot }
		}
		"," {
			return Token{ start_index, .comma }
		}
		else {
			return t.syntax_error("invalid operator '${s.bytestr()}'")
		}
	}
}

fn (mut t Tokenizer) parse_id() !Token {
	start_index := t.push_frame()!

	mut s := []u8{len: 0, cap: 32, init: 0}

	if t.range(`a`, `z`) || t.range(`A`, `Z`) || t.term(`_`) {
		s << t.last() or { return t.syntax_error("invalid identifier") }
	} else {
		return t.syntax_error("invalid identifier")
	}

	for t.range(`a`, `z`) || t.range(`A`, `Z`) || t.range(`0`, `9`) || t.term(`_`) {
		s << t.last() or { break }
	}

	return match s.bytestr() {
		"nil" { Token{ start_index, .nil } }
		"true" { Token{ start_index, .true } }
		"false" { Token{ start_index, .false } }
		else {
			if s.bytestr() in constants.keywords {
				return Token{ start_index, .keyword }
			}
			return Token{ start_index, .identifier }
		}
	}
}

fn (mut t Tokenizer) parse_num() !Token {
	start_index := t.push_frame()!

	mut s := []u8{len: 0, cap: 32, init: 0}

	if t.one_of("0123456789") {
		s << t.last()!
	} else {
		return t.syntax_error("invalid number")
	}

	mut is_decimal := false

	loop:
	for {
		if t.one_of("0123456789") {
			s << t.last() or { break loop }
			continue
		}

		if t.term(`.`) {
			if is_decimal {
				return t.syntax_error("invalid number")
			}
			s << `.`

			if !t.one_of("0123456789") {
				return t.syntax_error("invalid number")
			}

			is_decimal = true
			s << t.last() or { break loop }

			continue
		}
		break
	}

	if s.len == 0 {
		return t.syntax_error("invalid number")
	}

	return Token{
		start: start_index,
		tag: if is_decimal { .decimal } else { .number },
	}
}

fn (mut t Tokenizer) parse_str() !Token {
	start_index := t.push_frame()!

	if !t.term(`"`) {
		return t.syntax_error("invalid string")
	}

	for !t.end_of_input() {
		if t.term(`"`) {
			return Token{ start_index, .string }
		}
		if t.term(`\\`) {
			// skip escaped char kek this should be improved
			// TODO: improve this
			t.advance(1)
			continue
		}
		t.advance(1) // 1 char
	}

	return t.syntax_error("uncompleted string literal")
}