vua/scanner.v
2025-06-24 03:00:44 -05:00

207 lines
3.8 KiB
V

module scanner
// ===== Scanner =====
pub struct Scanner {
pub:
input string
pub mut:
max_pos u32
stack []u32 = []u32{len: 256, cap: 256, init: 0}
frame u32
mut:
lc int = 1
rc int
}
pub fn (mut s Scanner) cur_pos() u32 {
return s.stack[s.frame]
}
pub fn (mut s Scanner) advance(delta u32) {
s.rc += int(delta)
s.stack[s.frame] += delta;
if s.max_pos < s.stack[s.frame] { s.max_pos = s.stack[s.frame] }
}
@[manualfree]
pub fn (mut s Scanner) push_frame() !u32 {
s.frame += 1
if s.frame == s.stack.len {
new_size := s.stack.len + (s.stack.len >> 1)
new_stack := []u32{len: new_size, cap: new_size, init:0}
unsafe { vmemcpy(new_stack[0..s.max_pos], s.stack[0..s.max_pos], new_stack.len) }
unsafe { s.stack.free() }
s.stack = new_stack
}
if s.frame > s.input.len {
return error('Buffer too small')
}
s.stack[s.frame] = s.stack[s.frame - 1]
return s.cur_pos()
}
pub fn (mut s Scanner) pop_frame() {
if s.frame >= 1 { s.frame -= 1 }
}
pub fn (mut s Scanner) commit_frame() {
s.frame -= 1
s.stack[s.frame] = s.stack[s.frame + 1];
}
@[unsafe]
pub fn (mut s Scanner) free() {
s.stack.free()
}
@[manualfree]
pub fn (mut s Scanner) reset() {
unsafe { s.stack.free() }
s.lc = 1
s.rc = 0
s.frame = 0
s.max_pos = 0
s.stack = []u32{len: 256, cap: 256, init: 0}
}
pub struct SyntaxError {
Error
line int
row int
m string
}
pub fn (err SyntaxError) msg() string {
return "SyntaxError: ${err.m} -- at line ${err.line}-${err.row}"
}
pub fn (mut s Scanner) syntax_error(m string) SyntaxError {
return SyntaxError{line: s.lc, row: s.rc, m: m}
}
pub fn (mut s Scanner) rollback() {
// do not touch
s.stack[s.frame] = if s.frame > 0 { s.stack[s.frame - 1] } else { 0 }
}
// ===== Char matching =====
pub fn (mut s Scanner) end_of_input() bool {
return s.cur_pos() >= s.input.len
}
pub fn (mut s Scanner) term(c u8) bool {
if s.end_of_input() || s.input[s.cur_pos()] != c {
return false
}
s.advance(1) // 1 char
return true
}
pub fn (mut s Scanner) one_of(str string) bool {
if s.end_of_input() || s.input[s.cur_pos()] !in str.bytes() {
return false
}
s.advance(1) // 1 char
return true
}
pub fn (mut s Scanner) none_of(str string) bool {
if s.end_of_input() || s.input[s.cur_pos()] in str.bytes() {
return false
}
s.advance(1) // 1 char
return true
}
pub fn (mut s Scanner) range(low u8, high u8) bool {
if s.end_of_input() {
return false
}
c := s.input[s.cur_pos()]
if !(low <= c && c <= high) {
return false
}
s.advance(1) // 1 char
return true
}
// ===== Token extraction =====
pub fn (mut s Scanner) save_stash(from u32) string {
return s.input[from..s.cur_pos()]
}
pub fn (mut s Scanner) skip_ws() {
for !s.end_of_input() {
$if windows {
if s.input[s.cur_pos()] == u8(13) { // eg: WINDOWS ONLY
s.lc += 1
s.rc = 0 // reset rows
s.advance(1) // skip CR
if s.end_of_input() || s.input[s.cur_pos()] != u8(10) { // skip LF if present
continue
}
}
}
if s.input[s.cur_pos()] == u8(10) { // eg: LINUX ONLY
s.lc += 1
s.rc = 0 // reset rows
}
if s.input[s.cur_pos()] in ' \r\n\t'.bytes() {
s.advance(1)
continue
}
break
}
}
pub fn (mut s Scanner) last() !u8 {
if s.stack[s.frame] > s.input.len {
return error('Buffer too small')
}
return s.input[s.cur_pos() - 1]
}
pub fn (mut s Scanner) pred(func fn (u8) bool) bool {
if s.end_of_input() || !func(s.input[s.cur_pos()]) {
return false
}
s.advance(1) // 1 char
return true
}
pub fn (mut s Scanner) many(str string) bool {
if s.input.len < s.cur_pos() + u32(str.len) {
return false
}
if s != s.input[s.cur_pos() .. s.cur_pos() + u32(str.len)] {
return false
}
s.advance(u32(str.len))
return true
}
pub fn (mut s Scanner) any() ?u8 {
if s.end_of_input() {
return none
}
c := s.input[s.cur_pos()]
s.advance(1) // 1 char
return c
}