module scanner // ===== Scanner ===== pub struct Scanner { pub: input string pub mut: max_pos u32 stack []u32 = []u32{len: 256, cap: 256, init: 0} frame u32 mut: lc int = 1 rc int } pub fn (mut s Scanner) cur_pos() u32 { return s.stack[s.frame] } pub fn (mut s Scanner) advance(delta u32) { s.rc += int(delta) s.stack[s.frame] += delta; if s.max_pos < s.stack[s.frame] { s.max_pos = s.stack[s.frame] } } @[manualfree] pub fn (mut s Scanner) push_frame() !u32 { s.frame += 1 if s.frame == s.stack.len { new_size := s.stack.len + (s.stack.len >> 1) new_stack := []u32{len: new_size, cap: new_size, init:0} unsafe { vmemcpy(new_stack[0..s.max_pos], s.stack[0..s.max_pos], new_stack.len) } unsafe { s.stack.free() } s.stack = new_stack } if s.frame > s.input.len { return error('Buffer too small') } s.stack[s.frame] = s.stack[s.frame - 1] return s.cur_pos() } pub fn (mut s Scanner) pop_frame() { if s.frame >= 1 { s.frame -= 1 } } pub fn (mut s Scanner) commit_frame() { s.frame -= 1 s.stack[s.frame] = s.stack[s.frame + 1]; } @[unsafe] pub fn (mut s Scanner) free() { s.stack.free() } @[manualfree] pub fn (mut s Scanner) reset() { unsafe { s.stack.free() } s.lc = 1 s.rc = 0 s.frame = 0 s.max_pos = 0 s.stack = []u32{len: 256, cap: 256, init: 0} } pub struct SyntaxError { Error line int row int m string } pub fn (err SyntaxError) msg() string { return "SyntaxError: ${err.m} -- at line ${err.line}-${err.row}" } pub fn (mut s Scanner) syntax_error(m string) SyntaxError { return SyntaxError{line: s.lc, row: s.rc, m: m} } pub fn (mut s Scanner) rollback() { // do not touch s.stack[s.frame] = if s.frame > 0 { s.stack[s.frame - 1] } else { 0 } } // ===== Char matching ===== pub fn (mut s Scanner) end_of_input() bool { return s.cur_pos() >= s.input.len } pub fn (mut s Scanner) term(c u8) bool { if s.end_of_input() || s.input[s.cur_pos()] != c { return false } s.advance(1) // 1 char return true } pub fn (mut s Scanner) one_of(str string) bool { if s.end_of_input() || s.input[s.cur_pos()] !in str.bytes() { return false } s.advance(1) // 1 char return true } pub fn (mut s Scanner) none_of(str string) bool { if s.end_of_input() || s.input[s.cur_pos()] in str.bytes() { return false } s.advance(1) // 1 char return true } pub fn (mut s Scanner) range(low u8, high u8) bool { if s.end_of_input() { return false } c := s.input[s.cur_pos()] if !(low <= c && c <= high) { return false } s.advance(1) // 1 char return true } // ===== Token extraction ===== pub fn (mut s Scanner) save_stash(from u32) string { return s.input[from..s.cur_pos()] } pub fn (mut s Scanner) skip_ws() { for !s.end_of_input() { $if windows { if s.input[s.cur_pos()] == u8(13) { // eg: WINDOWS ONLY s.lc += 1 s.rc = 0 // reset rows s.advance(1) // skip CR if s.end_of_input() || s.input[s.cur_pos()] != u8(10) { // skip LF if present continue } } } if s.input[s.cur_pos()] == u8(10) { // eg: LINUX ONLY s.lc += 1 s.rc = 0 // reset rows } if s.input[s.cur_pos()] in ' \r\n\t'.bytes() { s.advance(1) continue } break } } pub fn (mut s Scanner) last() !u8 { if s.stack[s.frame] > s.input.len { return error('Buffer too small') } return s.input[s.cur_pos() - 1] } pub fn (mut s Scanner) pred(func fn (u8) bool) bool { if s.end_of_input() || !func(s.input[s.cur_pos()]) { return false } s.advance(1) // 1 char return true } pub fn (mut s Scanner) many(str string) bool { if s.input.len < s.cur_pos() + u32(str.len) { return false } if s != s.input[s.cur_pos() .. s.cur_pos() + u32(str.len)] { return false } s.advance(u32(str.len)) return true } pub fn (mut s Scanner) any() ?u8 { if s.end_of_input() { return none } c := s.input[s.cur_pos()] s.advance(1) // 1 char return c }