233 lines
4.0 KiB
V
233 lines
4.0 KiB
V
module token
|
|
import scanner { Scanner }
|
|
import constants
|
|
|
|
// ===== Tokenizer =====
|
|
|
|
@[packed]
|
|
pub struct Token {
|
|
pub:
|
|
start u32
|
|
tag TokenType
|
|
}
|
|
|
|
pub enum TokenType as u8 {
|
|
eof
|
|
nil
|
|
string
|
|
number
|
|
decimal
|
|
true
|
|
false
|
|
table_start
|
|
table_end
|
|
identifier
|
|
comma
|
|
newline
|
|
equal
|
|
dot
|
|
angle_bracket_left
|
|
angle_bracket_right
|
|
keyword
|
|
operator
|
|
operator_len2
|
|
operator_len3
|
|
}
|
|
|
|
pub struct Tokenizer {
|
|
Scanner
|
|
}
|
|
|
|
fn (mut t Tokenizer) next() ?u8 {
|
|
pos := t.cur_pos()
|
|
if t.end_of_input() || pos > t.input.len {
|
|
return none
|
|
}
|
|
c := t.input[pos]
|
|
t.advance(1) // 1 char
|
|
return c
|
|
}
|
|
|
|
pub fn (mut t Tokenizer) parse_all() !Token {
|
|
t.skip_ws()
|
|
|
|
start_index := t.push_frame()!
|
|
|
|
c := t.any() or {
|
|
return Token{ start_index, .eof }
|
|
}
|
|
|
|
match true {
|
|
c == `"` {
|
|
t.rollback()
|
|
t.commit_frame()
|
|
return t.parse_str()
|
|
}
|
|
c in "0123456789".bytes() {
|
|
t.rollback()
|
|
t.commit_frame()
|
|
return t.parse_num()
|
|
}
|
|
c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".bytes() {
|
|
t.rollback()
|
|
t.commit_frame()
|
|
return t.parse_id()
|
|
}
|
|
c == `{` {
|
|
return Token{ start_index, .table_start }
|
|
}
|
|
c == `}` {
|
|
return Token{ start_index, .table_end }
|
|
}
|
|
c == `[` {
|
|
return Token{ start_index, .angle_bracket_left }
|
|
}
|
|
c == `]` {
|
|
return Token{ start_index, .angle_bracket_right }
|
|
}
|
|
c == 10 { // LF
|
|
return Token{ start_index, .newline }
|
|
}
|
|
else {
|
|
$if windows {
|
|
if c == 13 && t.term(10) { // CRLF
|
|
return Token{ start_index, .newline }
|
|
}
|
|
}
|
|
|
|
t.rollback()
|
|
t.commit_frame()
|
|
return t.parse_op()
|
|
}
|
|
}
|
|
}
|
|
|
|
fn (mut t Tokenizer) parse_op() !Token {
|
|
start_index := t.push_frame()!
|
|
|
|
mut s := []u8{len: 0, cap: 32, init: 0}
|
|
for t.one_of("+-*/%^<>=~#.$,?") {
|
|
s << t.last()!
|
|
}
|
|
|
|
match s.bytestr() {
|
|
"+", "-", "*", "/", "%", "^", "#", "=" {
|
|
return Token{ start_index, .operator }
|
|
}
|
|
"==", "~=", "<=", ">=", "<", ">", ".." {
|
|
return Token{ start_index, .operator_len2 }
|
|
}
|
|
"..." {
|
|
return Token{ start_index, .operator_len3 }
|
|
}
|
|
"." {
|
|
return Token{ start_index, .dot }
|
|
}
|
|
"," {
|
|
return Token{ start_index, .comma }
|
|
}
|
|
else {
|
|
return t.syntax_error("invalid operator '${s.bytestr()}'")
|
|
}
|
|
}
|
|
}
|
|
|
|
fn (mut t Tokenizer) parse_id() !Token {
|
|
start_index := t.push_frame()!
|
|
|
|
mut s := []u8{len: 0, cap: 32, init: 0}
|
|
|
|
if t.range(`a`, `z`) || t.range(`A`, `Z`) || t.term(`_`) {
|
|
s << t.last() or { return t.syntax_error("invalid identifier") }
|
|
} else {
|
|
return t.syntax_error("invalid identifier")
|
|
}
|
|
|
|
for t.range(`a`, `z`) || t.range(`A`, `Z`) || t.range(`0`, `9`) || t.term(`_`) {
|
|
s << t.last() or { break }
|
|
}
|
|
|
|
return match s.bytestr() {
|
|
"nil" { Token{ start_index, .nil } }
|
|
"true" { Token{ start_index, .true } }
|
|
"false" { Token{ start_index, .false } }
|
|
else {
|
|
if s.bytestr() in constants.keywords {
|
|
return Token{ start_index, .keyword }
|
|
}
|
|
return Token{ start_index, .identifier }
|
|
}
|
|
}
|
|
}
|
|
|
|
fn (mut t Tokenizer) parse_num() !Token {
|
|
start_index := t.push_frame()!
|
|
|
|
mut s := []u8{len: 0, cap: 32, init: 0}
|
|
|
|
if t.one_of("0123456789") {
|
|
s << t.last()!
|
|
} else {
|
|
return t.syntax_error("invalid number")
|
|
}
|
|
|
|
mut is_decimal := false
|
|
|
|
loop:
|
|
for {
|
|
if t.one_of("0123456789") {
|
|
s << t.last() or { break loop }
|
|
continue
|
|
}
|
|
|
|
if t.term(`.`) {
|
|
if is_decimal {
|
|
return t.syntax_error("invalid number")
|
|
}
|
|
s << `.`
|
|
|
|
if !t.one_of("0123456789") {
|
|
return t.syntax_error("invalid number")
|
|
}
|
|
|
|
is_decimal = true
|
|
s << t.last() or { break loop }
|
|
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
|
|
if s.len == 0 {
|
|
return t.syntax_error("invalid number")
|
|
}
|
|
|
|
return Token{
|
|
start: start_index,
|
|
tag: if is_decimal { .decimal } else { .number },
|
|
}
|
|
}
|
|
|
|
fn (mut t Tokenizer) parse_str() !Token {
|
|
start_index := t.push_frame()!
|
|
|
|
if !t.term(`"`) {
|
|
return t.syntax_error("invalid string")
|
|
}
|
|
|
|
for !t.end_of_input() {
|
|
if t.term(`"`) {
|
|
return Token{ start_index, .string }
|
|
}
|
|
if t.term(`\\`) {
|
|
// skip escaped char kek this should be improved
|
|
// TODO: improve this
|
|
t.advance(1)
|
|
continue
|
|
}
|
|
t.advance(1) // 1 char
|
|
}
|
|
|
|
return t.syntax_error("uncompleted string literal")
|
|
}
|