This commit is contained in:
Duvet 2025-06-24 03:00:44 -05:00
parent 32823fed27
commit 4ef77a52e0
11 changed files with 710 additions and 642 deletions

278
ast.v
View File

@ -1,102 +1,230 @@
module ast
import tokenizer { Token, VuaToken }
import constants { Primitives }
import intern { InternPool }
import token as _ { Tokenizer, Token, TokenType }
struct Nil {}
struct String {
contents string
pub struct VuaNil {}
pub struct VuaBool {
value bool
}
pub struct VuaNumber {
value string
}
pub struct VuaString {
value string // string interning
}
pub struct VuaTable {
keys []u32 // string interning
values []u32 // index in all tables
}
pub struct VuaFunction {
name u32 // string interning
args []u32 // string interning
body []Token // slice of tokens representing the function body
}
struct Number {
contents string
pub type VuaValue =
VuaNil
| VuaBool
| VuaNumber
| VuaString
| VuaTable
| VuaFunction
@[heap]
pub struct Environment {
mut:
bools []VuaBool
ints []VuaNumber
decimals []VuaNumber
strings []VuaString
tables []VuaTable
functions map[string]VuaFunction
str_pool &InternPool = &InternPool{}
pub mut:
types map[string]Type = {
"string" : TypeAlias{"string", .string},
"number" : TypeAlias{"number", .number},
"decimal" : TypeAlias{"decimal", .decimal},
"bool" : TypeAlias{"bool", .bool},
"table" : TypeAlias{"table", .table},
"function": TypeAlias{"function", .function},
}
vars map[string]Var
}
struct Boolean {
value bool
}
/// index for bools, ints, decimals, strings, etc
pub type EnvironmentIndex = int
@[packed]
struct TableFlags {
array bool
object bool
enum bool
class bool
struct bool
}
struct Table {
flag TableFlags
data []Variable
}
struct Class {
properties []VariableDef
methods []Function
}
struct Function {
name string
params []VariableDef
body []Token
}
struct FunctionRef {
index u32
}
struct Lambda {
params []VariableDef
body []Token
scope ?FunctionRef
}
struct VariableDef {
name string
type VuaTaggedType
}
struct Variable {
// maybe this name should be a null terminated string
pub struct Var {
name string
value Expr
type string
value EnvironmentIndex
}
struct Expr {
index u32
type VuaTaggedType
pub struct TypeAlias {
name string
alias Primitives
}
enum VuaTaggedType as u8 {
nil
string
number
boolean
table
function
pub fn (t TypeAlias) str() string {
return t.name
}
pub struct StructDefinition {
name string
fields map[string]EnvironmentIndex
}
type TokenIndex = u32
pub struct UnionDefinition {
name string
fields map[string]EnvironmentIndex
}
pub type Type =
TypeAlias
| StructDefinition
| UnionDefinition
@[heap]
struct Nodes {
pub struct Parser {
input string
mut:
stack []Token
frame u32
max_pos u32
pub mut:
env &Environment
}
@[heap]
struct Tokens {
/// advances once
pub fn (mut p Parser) next() ?Token {
panic("TODO")
}
struct AST {
tokens Tokens
nodes Nodes
pos int
/// rollback the parser to the previous token
pub fn (mut p Parser) rollback() {
panic("TODO")
}
union NodeData {
/// extracts the string from the token starting position, might need re parsing
pub fn (mut p Parser) save_stash(start u32) string {
panic("TODO")
}
struct Node {
token_type VuaToken
data NodeData
/// expect a string and a token to match against, if either fails, it'll fail
pub fn (mut p Parser) expect(s string, tag TokenType) ?(string, Token) {
panic("TODO")
}
/// any token of type .keyword, returns the extracted keyword
pub fn (mut p Parser) keyword() ?string {
panic("TODO")
}
/// any token of type .identifier, returns the extracted identifier
pub fn (mut p Parser) identifier() ?string {
panic("TODO")
}
pub fn (mut p Parser) expr() !VuaValue {
if token := p.next() {
match token.tag {
.identifier {
id := p.save_stash(token.start)
if var := p.env.vars[id] {
eprintln(var)
} else {
return error("identifier error")
}
panic("invalid code path")
}
.nil, .true, .false, .number, .decimal, .string {
return error("expression error")
}
.keyword {
p.rollback()
return p.keyword_expr()
}
else {
p.rollback()
return error("unsupported type")
}
}
}
return error("impossible")
}
pub fn (mut p Parser) keyword_expr() !VuaValue {
keyword := p.keyword() or {
return error("invalid keyword")
}
match keyword {
"local" {
lhs := p.identifier() or {
return error("invalid identifier")
}
type_name := p.identifier()
if type_name == none {
p.rollback()
}
p.expect("=", .operator) or {
return error("invalid assignment")
}
rhs := p.next() or {
return error("invalid right hand side of assignment")
}
match rhs.tag {
.number, .decimal, .string, .true, .false, .nil {
p.env.vars[p.env.str_pool.intern(lhs)] = Var{
name: lhs,
type: match rhs.tag {
.true, .false { "bool" }
else { rhs.tag.str() }
},
value: rhs.start
}
match rhs.tag {
.true {
vbool := p.input[rhs.start..rhs.start + 4]
assert vbool == "true"
return VuaValue(VuaBool{true})
}
.false {
vbool := p.input[rhs.start..rhs.start + 6]
assert vbool == "false"
return VuaValue(VuaBool{false})
}
.number, .decimal {
vnum := p.save_stash(rhs.start)
return VuaValue(VuaNumber{vnum})
}
.string {
// might be impossible with tokens pre allocated
vstr := p.save_stash(rhs.start)
// dirty trick
return VuaValue(VuaString{vstr[1..vstr.len-1]})
}
.nil { return VuaValue(VuaNil{}) }
else { return error("failed rhs inference") }
}
}
else {
return error("invalid rhs type")
}
}
}
else {
return error("unsupported keyword")
}
}
panic('No expression found')
}

19
ast_test.v Normal file
View File

@ -0,0 +1,19 @@
module ast
fn test_expr() {
println('Testing expression parsing...')
mut p := &Parser{
Tokenizer: Tokenizer{
input: 'local hi string = "hola"'
}
env: &Environment{}
}
expr := p.expr() or {
assert false, 'Expression parsing failed: $err'
return
}
println('Parsed expression: $expr')
}

View File

@ -1,5 +1,25 @@
module constants
pub enum Primitives as u8 {
nil
bool
number
decimal
string
table
function
}
pub const primitives = [
'nil',
'bool',
'number',
'decimal',
'string',
'table',
'function'
]
pub const keywords = [
'and',
'assert',

23
intern.v Normal file
View File

@ -0,0 +1,23 @@
module intern
@[heap]
pub struct InternPool {
pub mut:
strings map[string]string
}
pub fn (mut ip InternPool) intern(s string) string {
if s in ip.strings {
return ip.strings[s]
}
ip.strings[s] = s
return s
}
pub fn (mut ip InternPool) count() int {
return ip.strings.len
}
pub fn (mut ip InternPool) clear() {
ip.strings.clear()
}

6
main.v
View File

@ -5,5 +5,9 @@ fn main() {
repl_instance := repl.Repl{}
println(repl_instance.welcome_msg)
println('--------------')
repl_instance.loop()!
g:
repl_instance.loop() or {
eprintln('Error in REPL loop: $err')
unsafe { goto g } // Retry the loop on error
}
}

View File

@ -1,100 +0,0 @@
module multiarray
struct MultiArray[T] {
mut:
len int
cap int
tags map[int]string
fields map[string]MultiArrayField
}
struct MultiArrayField {
data voidptr
}
fn MultiArray.new[T](len int, cap int, init T) MultiArray[T] {
mut result := MultiArray[T]{len: len, cap: cap}
$for field in T.fields {
result.fields[field.name + 's'] = MultiArrayField{
data: unsafe { vcalloc(u64(cap) * sizeof(T)) }
}
}
return result
}
fn (mut ma MultiArray[T]) add[T](table string, item T) {
if ma.len >= ma.cap {
unsafe { ma.grow() }
}
$for field in T.fields {
if field.name == table {
ma.tags[ma.len] = table
ma.len++
}
field_name := field.name + 's'
mut field_data_ptr := unsafe { ma.fields[field_name].data }
unsafe { vmemcpy(&u8(field_data_ptr) + sizeof(T) * u64(ma.len), item, sizeof(T)) }
}
}
@[unsafe]
fn (mut ma MultiArray[T]) grow() {
new_cap := if ma.cap == 0 { 1 } else { ma.cap * 2 }
$for field in T.fields {
field_name := field.name + 's'
old_data := &T(ma.fields[field_name].data)
new_data := vcalloc(u64(new_cap) * sizeof(T))
vmemcpy(new_data, old_data, u64(ma.len) * sizeof(T))
ma.fields[field_name] = MultiArrayField{ data: new_data }
}
ma.cap = new_cap
}
fn (ma MultiArray[T]) get(index int) ?T {
if index < 0 || index >= ma.len {
return none
}
mut result := T{}
$for field in T.fields {
field_name := field.name + 's'
field_data_ptr := unsafe { ma.fields[field_name].data }
unsafe { vmemcpy(&result, &u8(field_data_ptr) + sizeof(T) * u64(index), sizeof(T)) }
}
return result
}
@[unsafe]
fn (mut ma MultiArray[T]) free() {
$for field in T.fields {
field_name := field.name + 's'
if ma.fields[field_name].data != nil {
free( ma.fields[field_name].data )
ma.fields[field_name] = MultiArrayField{nil}
}
}
ma.len = 0; ma.cap = 0
}
fn (mut ma MultiArray[T]) iter() Iterator[T] {
return Iterator[T]{ma,0}
}
struct Iterator[T] {
mut:
ma MultiArray[T]
i int
}
struct IteratorEntry[T] {
tag string
data T
}
fn (mut it Iterator[T]) next() ?IteratorEntry[T] {
defer { it.i++ }
if it.i >= it.ma.len {
return none
}
val := it.ma.get(it.i) or {
return none
}
return IteratorEntry[T]{it.ma.tags[it.i],val}
}

View File

@ -1,31 +0,0 @@
module multiarray
union SoA {
nil nil = unsafe { nil }
int int
bool bool
}
fn test_basics() {
mut arr := MultiArray.new(0, 10, SoA{})
// test: add an int
arr.add('int' , SoA{ int: 42 })
arr.add('int' , SoA{ int: 43 })
arr.add('int' , SoA{ int: 44 })
arr.add('bool', SoA{ bool: true })
arr.add('nil' , SoA{})
arr.add('nil' , SoA{})
arr.add('nil' , SoA{})
arr.add('nil' , SoA{})
it := arr.iter()
for item in it {
println('Iterating over MultiArray[${item.tag}]:')
if item.tag == 'int' {
println('${item.tag}s > ${unsafe{item.data.int}}')
}
}
println('Created MultiArray with len: $arr.len, cap: $arr.cap')
}

4
repl.v
View File

@ -1,6 +1,6 @@
module repl
import tokenizer as _ { Token, Tokenizer, VuaToken }
import token as _ { Token, Tokenizer, TokenType }
import readline { read_line }
pub struct Repl {
@ -61,7 +61,7 @@ pub fn (r Repl) eval(ast []Token, input string) string {
.dot { s << '.' }
.angle_bracket_left { s << '[' }
.angle_bracket_right { s << ']' }
.reserved_keyword { s << 'reserved_keyword' }
.keyword { s << 'keyword' }
.operator, .operator_len2, .operator_len3{ s << 'operator' }
}
}

206
scanner.v Normal file
View File

@ -0,0 +1,206 @@
module scanner
// ===== Scanner =====
pub struct Scanner {
pub:
input string
pub mut:
max_pos u32
stack []u32 = []u32{len: 256, cap: 256, init: 0}
frame u32
mut:
lc int = 1
rc int
}
pub fn (mut s Scanner) cur_pos() u32 {
return s.stack[s.frame]
}
pub fn (mut s Scanner) advance(delta u32) {
s.rc += int(delta)
s.stack[s.frame] += delta;
if s.max_pos < s.stack[s.frame] { s.max_pos = s.stack[s.frame] }
}
@[manualfree]
pub fn (mut s Scanner) push_frame() !u32 {
s.frame += 1
if s.frame == s.stack.len {
new_size := s.stack.len + (s.stack.len >> 1)
new_stack := []u32{len: new_size, cap: new_size, init:0}
unsafe { vmemcpy(new_stack[0..s.max_pos], s.stack[0..s.max_pos], new_stack.len) }
unsafe { s.stack.free() }
s.stack = new_stack
}
if s.frame > s.input.len {
return error('Buffer too small')
}
s.stack[s.frame] = s.stack[s.frame - 1]
return s.cur_pos()
}
pub fn (mut s Scanner) pop_frame() {
if s.frame >= 1 { s.frame -= 1 }
}
pub fn (mut s Scanner) commit_frame() {
s.frame -= 1
s.stack[s.frame] = s.stack[s.frame + 1];
}
@[unsafe]
pub fn (mut s Scanner) free() {
s.stack.free()
}
@[manualfree]
pub fn (mut s Scanner) reset() {
unsafe { s.stack.free() }
s.lc = 1
s.rc = 0
s.frame = 0
s.max_pos = 0
s.stack = []u32{len: 256, cap: 256, init: 0}
}
pub struct SyntaxError {
Error
line int
row int
m string
}
pub fn (err SyntaxError) msg() string {
return "SyntaxError: ${err.m} -- at line ${err.line}-${err.row}"
}
pub fn (mut s Scanner) syntax_error(m string) SyntaxError {
return SyntaxError{line: s.lc, row: s.rc, m: m}
}
pub fn (mut s Scanner) rollback() {
// do not touch
s.stack[s.frame] = if s.frame > 0 { s.stack[s.frame - 1] } else { 0 }
}
// ===== Char matching =====
pub fn (mut s Scanner) end_of_input() bool {
return s.cur_pos() >= s.input.len
}
pub fn (mut s Scanner) term(c u8) bool {
if s.end_of_input() || s.input[s.cur_pos()] != c {
return false
}
s.advance(1) // 1 char
return true
}
pub fn (mut s Scanner) one_of(str string) bool {
if s.end_of_input() || s.input[s.cur_pos()] !in str.bytes() {
return false
}
s.advance(1) // 1 char
return true
}
pub fn (mut s Scanner) none_of(str string) bool {
if s.end_of_input() || s.input[s.cur_pos()] in str.bytes() {
return false
}
s.advance(1) // 1 char
return true
}
pub fn (mut s Scanner) range(low u8, high u8) bool {
if s.end_of_input() {
return false
}
c := s.input[s.cur_pos()]
if !(low <= c && c <= high) {
return false
}
s.advance(1) // 1 char
return true
}
// ===== Token extraction =====
pub fn (mut s Scanner) save_stash(from u32) string {
return s.input[from..s.cur_pos()]
}
pub fn (mut s Scanner) skip_ws() {
for !s.end_of_input() {
$if windows {
if s.input[s.cur_pos()] == u8(13) { // eg: WINDOWS ONLY
s.lc += 1
s.rc = 0 // reset rows
s.advance(1) // skip CR
if s.end_of_input() || s.input[s.cur_pos()] != u8(10) { // skip LF if present
continue
}
}
}
if s.input[s.cur_pos()] == u8(10) { // eg: LINUX ONLY
s.lc += 1
s.rc = 0 // reset rows
}
if s.input[s.cur_pos()] in ' \r\n\t'.bytes() {
s.advance(1)
continue
}
break
}
}
pub fn (mut s Scanner) last() !u8 {
if s.stack[s.frame] > s.input.len {
return error('Buffer too small')
}
return s.input[s.cur_pos() - 1]
}
pub fn (mut s Scanner) pred(func fn (u8) bool) bool {
if s.end_of_input() || !func(s.input[s.cur_pos()]) {
return false
}
s.advance(1) // 1 char
return true
}
pub fn (mut s Scanner) many(str string) bool {
if s.input.len < s.cur_pos() + u32(str.len) {
return false
}
if s != s.input[s.cur_pos() .. s.cur_pos() + u32(str.len)] {
return false
}
s.advance(u32(str.len))
return true
}
pub fn (mut s Scanner) any() ?u8 {
if s.end_of_input() {
return none
}
c := s.input[s.cur_pos()]
s.advance(1) // 1 char
return c
}

232
token.v Normal file
View File

@ -0,0 +1,232 @@
module token
import scanner { Scanner }
import constants
// ===== Tokenizer =====
@[packed]
pub struct Token {
pub:
start u32
tag TokenType
}
pub enum TokenType as u8 {
eof
nil
string
number
decimal
true
false
table_start
table_end
identifier
comma
newline
equal
dot
angle_bracket_left
angle_bracket_right
keyword
operator
operator_len2
operator_len3
}
pub struct Tokenizer {
Scanner
}
fn (mut t Tokenizer) next() ?u8 {
pos := t.cur_pos()
if t.end_of_input() || pos > t.input.len {
return none
}
c := t.input[pos]
t.advance(1) // 1 char
return c
}
pub fn (mut t Tokenizer) parse_all() !Token {
t.skip_ws()
start_index := t.push_frame()!
c := t.any() or {
return Token{ start_index, .eof }
}
match true {
c == `"` {
t.rollback()
t.commit_frame()
return t.parse_str()
}
c in "0123456789".bytes() {
t.rollback()
t.commit_frame()
return t.parse_num()
}
c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".bytes() {
t.rollback()
t.commit_frame()
return t.parse_id()
}
c == `{` {
return Token{ start_index, .table_start }
}
c == `}` {
return Token{ start_index, .table_end }
}
c == `[` {
return Token{ start_index, .angle_bracket_left }
}
c == `]` {
return Token{ start_index, .angle_bracket_right }
}
c == 10 { // LF
return Token{ start_index, .newline }
}
else {
$if windows {
if c == 13 && t.term(10) { // CRLF
return Token{ start_index, .newline }
}
}
t.rollback()
t.commit_frame()
return t.parse_op()
}
}
}
fn (mut t Tokenizer) parse_op() !Token {
start_index := t.push_frame()!
mut s := []u8{len: 0, cap: 32, init: 0}
for t.one_of("+-*/%^<>=~#.$,?") {
s << t.last()!
}
match s.bytestr() {
"+", "-", "*", "/", "%", "^", "#", "=" {
return Token{ start_index, .operator }
}
"==", "~=", "<=", ">=", "<", ">", ".." {
return Token{ start_index, .operator_len2 }
}
"..." {
return Token{ start_index, .operator_len3 }
}
"." {
return Token{ start_index, .dot }
}
"," {
return Token{ start_index, .comma }
}
else {
return t.syntax_error("invalid operator '${s.bytestr()}'")
}
}
}
fn (mut t Tokenizer) parse_id() !Token {
start_index := t.push_frame()!
mut s := []u8{len: 0, cap: 32, init: 0}
if t.range(`a`, `z`) || t.range(`A`, `Z`) || t.term(`_`) {
s << t.last() or { return t.syntax_error("invalid identifier") }
} else {
return t.syntax_error("invalid identifier")
}
for t.range(`a`, `z`) || t.range(`A`, `Z`) || t.range(`0`, `9`) || t.term(`_`) {
s << t.last() or { break }
}
return match s.bytestr() {
"nil" { Token{ start_index, .nil } }
"true" { Token{ start_index, .true } }
"false" { Token{ start_index, .false } }
else {
if s.bytestr() in constants.keywords {
return Token{ start_index, .keyword }
}
return Token{ start_index, .identifier }
}
}
}
fn (mut t Tokenizer) parse_num() !Token {
start_index := t.push_frame()!
mut s := []u8{len: 0, cap: 32, init: 0}
if t.one_of("0123456789") {
s << t.last()!
} else {
return t.syntax_error("invalid number")
}
mut is_decimal := false
loop:
for {
if t.one_of("0123456789") {
s << t.last() or { break loop }
continue
}
if t.term(`.`) {
if is_decimal {
return t.syntax_error("invalid number")
}
s << `.`
if !t.one_of("0123456789") {
return t.syntax_error("invalid number")
}
is_decimal = true
s << t.last() or { break loop }
continue
}
break
}
if s.len == 0 {
return t.syntax_error("invalid number")
}
return Token{
start: start_index,
tag: if is_decimal { .decimal } else { .number },
}
}
fn (mut t Tokenizer) parse_str() !Token {
start_index := t.push_frame()!
if !t.term(`"`) {
return t.syntax_error("invalid string")
}
for !t.end_of_input() {
if t.term(`"`) {
return Token{ start_index, .string }
}
if t.term(`\\`) {
// skip escaped char kek this should be improved
// TODO: improve this
t.advance(1)
continue
}
t.advance(1) // 1 char
}
return t.syntax_error("uncompleted string literal")
}

View File

@ -1,433 +0,0 @@
module tokenizer
import constants
@[packed]
pub struct Token {
pub:
start u32
tag VuaToken
}
pub enum VuaToken {
eof
nil
string
number
decimal
true
false
table_start
table_end
identifier
comma
newline
equal
dot
angle_bracket_left
angle_bracket_right
reserved_keyword
operator
operator_len2
operator_len3
}
pub struct Parser {
pub:
input string
pub mut:
max_pos u32
stack []u32 = []u32{len: 256, cap: 256, init: 0}
frame u32
mut:
lc int = 1
rc int
}
fn (mut p Parser) cur_pos() u32 {
return p.stack[p.frame]
}
fn (mut p Parser) advance(delta u32) {
p.rc += int(delta)
p.stack[p.frame] += delta;
if p.max_pos < p.stack[p.frame] { p.max_pos = p.stack[p.frame] }
}
@[manualfree]
fn (mut p Parser) push_frame() !u32 {
p.frame += 1
if p.frame == p.stack.len {
new_size := p.stack.len + (p.stack.len >> 1)
new_stack := []u32{len: new_size, cap: new_size, init:0}
unsafe { vmemcpy(new_stack[0..p.max_pos], p.stack[0..p.max_pos], new_stack.len) }
unsafe { p.stack.free() }
p.stack = new_stack
}
if p.frame > p.input.len {
return error('Buffer too small')
}
p.stack[p.frame] = p.stack[p.frame - 1]
return p.cur_pos()
}
fn (mut p Parser) pop_frame() {
if p.frame >= 1 { p.frame -= 1 }
}
fn (mut p Parser) commit_frame() {
p.frame -= 1
p.stack[p.frame] = p.stack[p.frame + 1];
}
@[unsafe]
fn (mut p Parser) free() {
p.stack.free()
}
struct SyntaxError {
Error
line int
row int
m string
}
fn (err SyntaxError) msg() string {
return "SyntaxError: ${err.m} -- at line ${err.line}-${err.row}"
}
fn (mut p Parser) syntax_error(m string) SyntaxError {
return SyntaxError{
line: p.lc,
row: p.rc,
m: m,
}
}
fn (mut p Parser) rollback() {
p.stack[p.frame] = if p.frame == 0 { u32(0) } else { p.stack[p.frame - 1] }
}
// ===== Char matching =====
pub fn (mut p Parser) end_of_input() bool {
return p.cur_pos() >= p.input.len
}
fn (mut p Parser) term(c u8) bool {
if p.end_of_input() || p.input[p.cur_pos()] != c {
return false
}
p.advance(1) // 1 char
return true
}
fn (mut p Parser) one_of(s string) bool {
if p.end_of_input() || p.input[p.cur_pos()] !in s.bytes() {
return false
}
p.advance(1) // 1 char
return true
}
fn (mut p Parser) none_of(s string) bool {
if p.end_of_input() || p.input[p.cur_pos()] in s.bytes() {
return false
}
p.advance(1) // 1 char
return true
}
fn (mut p Parser) range(low u8, high u8) bool {
if p.end_of_input() {
return false
}
c := p.input[p.cur_pos()]
if !(low <= c && c <= high) {
return false
}
p.advance(1) // 1 char
return true
}
// ===== Token extraction =====
fn (mut p Parser) save_stash(from u32) string {
return p.input[from..p.cur_pos()]
}
fn (mut p Parser) skip_ws() {
for !p.end_of_input() {
$if windows {
if p.input[p.cur_pos()] == u8(13) { // eg: WINDOWS ONLY
p.lc += 1
p.rc = 0 // reset rows
p.advance(1) // skip CR
if p.end_of_input() || p.input[p.cur_pos()] != u8(10) { // skip LF if present
continue
}
}
}
if p.input[p.cur_pos()] == u8(10) { // eg: LINUX ONLY
p.lc += 1
p.rc = 0 // reset rows
}
if p.input[p.cur_pos()] in ' \r\n\t'.bytes() {
p.advance(1)
continue
}
break
}
}
fn (mut p Parser) last() !u8 {
if p.stack[p.frame] > p.input.len {
return error('Buffer too small')
}
return p.input[p.cur_pos() - 1]
}
fn (mut p Parser) pred(func fn (u8) bool) bool {
if p.end_of_input() || !func(p.input[p.cur_pos()]) {
return false
}
p.advance(1) // 1 char
return true
}
fn (mut p Parser) many(s string) bool {
if p.input.len < p.cur_pos() + u32(s.len) {
return false
}
if s != p.input[p.cur_pos() .. p.cur_pos() + u32(s.len)] {
return false
}
p.advance(u32(s.len))
return true
}
fn (mut p Parser) any() ?u8 {
if p.end_of_input() {
return none
}
c := p.input[p.cur_pos()]
p.advance(1) // 1 char
return c
}
// ===== Tokenizer =====
pub struct Tokenizer {
Parser
}
fn (mut t Tokenizer) next() ?u8 {
pos := t.cur_pos()
if t.end_of_input() || pos > t.input.len {
return none
}
c := t.input[pos]
t.advance(1) // 1 char
return c
}
pub fn (mut t Tokenizer) parse_all() !Token {
t.skip_ws()
start_index := t.push_frame()!
c := t.any() or { return Token{ start_index, .eof } }
print(c.ascii_str())
match true {
c == `"` {
t.rollback()
t.commit_frame()
return t.parse_str()
}
c in "0123456789".bytes() {
t.rollback()
t.commit_frame()
return t.parse_num()
}
c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".bytes() {
t.rollback()
t.commit_frame()
return t.parse_id()
}
c == `{` {
return Token{ start_index, .table_start }
}
c == `}` {
return Token{ start_index, .table_end }
}
c == `[` {
return Token{ start_index, .angle_bracket_left }
}
c == `]` {
return Token{ start_index, .angle_bracket_right }
}
c == 10 { // LF
return Token{ start_index, .newline }
}
else {
$if windows {
if c == 13 && t.term(10) { // CRLF
return Token{ start_index, .newline }
}
}
t.rollback()
t.commit_frame()
return t.parse_op()
}
}
}
fn (mut t Tokenizer) parse_op() !Token {
start_index := t.push_frame()!
mut s := []u8{len: 0, cap: 32, init: 0}
for t.one_of("+-*/%^<>=~#") {
s << t.last()!
}
match s.bytestr() {
"+", "-", "*", "/", "%", "^", "#", "=" {
return Token{ start_index, .operator }
}
"==", "~=", "<=", ">=", "<", ">", ".." {
return Token{ start_index, .operator_len2 }
}
"..." {
return Token{ start_index, .operator_len3 }
}
"." {
return Token{ start_index, .dot }
}
"," {
return Token{ start_index, .comma }
}
else {
return t.syntax_error("invalid operator '${s.bytestr()}'")
}
}
}
fn (mut t Tokenizer) parse_id() !Token {
start_index := t.push_frame()!
mut s := []u8{len: 0, cap: 32, init: 0}
if t.range('a'[0], 'z'[0]) || t.range('A'[0], 'Z'[0]) || t.term('_'[0]) {
s << t.last() or { return t.syntax_error("invalid identifier") }
} else {
return t.syntax_error("invalid identifier")
}
for t.range('a'[0], 'z'[0]) || t.range('A'[0], 'Z'[0]) || t.range('0'[0], '9'[0]) || t.term('_'[0]) {
s << t.last() or { break }
}
return match s.bytestr() {
"true" { Token{ start_index, .true } }
"false" { Token{ start_index, .false } }
"nil" { Token{ start_index, .nil } }
else {
if s.bytestr() in constants.keywords {
return Token{start_index,.reserved_keyword}
}
return Token{start_index,.identifier}
}
}
}
fn (mut t Tokenizer) parse_num() !Token {
start_index := t.push_frame()!
mut s := []u8{len: 0, cap: 32, init: 0}
if t.one_of("0123456789") {
s << t.last()!
} else {
return t.syntax_error("invalid number")
}
mut is_decimal := false
loop:
for {
if t.one_of("0123456789") {
s << t.last() or { break loop }
continue
}
if t.term('.'[0]) {
if is_decimal {
return t.syntax_error("invalid number")
}
s << '.'[0]
if !t.one_of("0123456789") {
return t.syntax_error("invalid number")
}
is_decimal = true
s << t.last() or { break loop }
continue
}
break
}
if s.len == 0 {
return t.syntax_error("invalid number")
}
return Token{
start: start_index,
tag: if is_decimal { .decimal } else { .number },
}
}
fn (mut t Tokenizer) parse_str() !Token {
start_index := t.push_frame()!
mut s := []u8{len: 0, cap: 4096, init: 0}
ok := t.term('"'[0])
if !ok {
return t.syntax_error("invalid string")
}
for c in t {
s << c
}
c := t.last() or {
return t.syntax_error("unclosed string")
}
match c {
`"` {
return Token{ start_index, .string }
}
else {
s << c
}
}
return Token{ start_index, .eof }
}