diff --git a/ast.v b/ast.v index 8666781..fddc19e 100644 --- a/ast.v +++ b/ast.v @@ -1,102 +1,230 @@ module ast -import tokenizer { Token, VuaToken } +import constants { Primitives } +import intern { InternPool } +import token as _ { Tokenizer, Token, TokenType } -struct Nil {} - -struct String { - contents string +pub struct VuaNil {} +pub struct VuaBool { + value bool +} +pub struct VuaNumber { + value string +} +pub struct VuaString { + value string // string interning +} +pub struct VuaTable { + keys []u32 // string interning + values []u32 // index in all tables +} +pub struct VuaFunction { + name u32 // string interning + args []u32 // string interning + body []Token // slice of tokens representing the function body } -struct Number { - contents string +pub type VuaValue = + VuaNil + | VuaBool + | VuaNumber + | VuaString + | VuaTable + | VuaFunction + +@[heap] +pub struct Environment { +mut: + bools []VuaBool + ints []VuaNumber + decimals []VuaNumber + strings []VuaString + tables []VuaTable + functions map[string]VuaFunction + str_pool &InternPool = &InternPool{} +pub mut: + types map[string]Type = { + "string" : TypeAlias{"string", .string}, + "number" : TypeAlias{"number", .number}, + "decimal" : TypeAlias{"decimal", .decimal}, + "bool" : TypeAlias{"bool", .bool}, + "table" : TypeAlias{"table", .table}, + "function": TypeAlias{"function", .function}, + } + vars map[string]Var } -struct Boolean { - value bool -} +/// index for bools, ints, decimals, strings, etc +pub type EnvironmentIndex = int -@[packed] -struct TableFlags { - array bool - object bool - enum bool - class bool - struct bool -} - -struct Table { - flag TableFlags - data []Variable -} - -struct Class { - properties []VariableDef - methods []Function -} - -struct Function { - name string - params []VariableDef - body []Token -} - -struct FunctionRef { - index u32 -} - -struct Lambda { - params []VariableDef - body []Token - scope ?FunctionRef -} - -struct VariableDef { - name string - type VuaTaggedType -} - -struct Variable { - // maybe this name should be a null terminated string +pub struct Var { name string - value Expr + type string + value EnvironmentIndex } -struct Expr { - index u32 - type VuaTaggedType +pub struct TypeAlias { + name string + alias Primitives } -enum VuaTaggedType as u8 { - nil - string - number - boolean - table - function +pub fn (t TypeAlias) str() string { + return t.name } +pub struct StructDefinition { + name string + fields map[string]EnvironmentIndex +} -type TokenIndex = u32 +pub struct UnionDefinition { + name string + fields map[string]EnvironmentIndex +} + +pub type Type = + TypeAlias + | StructDefinition + | UnionDefinition @[heap] -struct Nodes { +pub struct Parser { + input string +mut: + stack []Token + frame u32 + max_pos u32 +pub mut: + env &Environment } -@[heap] -struct Tokens { +/// advances once +pub fn (mut p Parser) next() ?Token { + panic("TODO") } -struct AST { - tokens Tokens - nodes Nodes - pos int +/// rollback the parser to the previous token +pub fn (mut p Parser) rollback() { + panic("TODO") } -union NodeData { +/// extracts the string from the token starting position, might need re parsing +pub fn (mut p Parser) save_stash(start u32) string { + panic("TODO") } -struct Node { - token_type VuaToken - data NodeData +/// expect a string and a token to match against, if either fails, it'll fail +pub fn (mut p Parser) expect(s string, tag TokenType) ?(string, Token) { + panic("TODO") } +/// any token of type .keyword, returns the extracted keyword +pub fn (mut p Parser) keyword() ?string { + panic("TODO") +} + +/// any token of type .identifier, returns the extracted identifier +pub fn (mut p Parser) identifier() ?string { + panic("TODO") +} + +pub fn (mut p Parser) expr() !VuaValue { + if token := p.next() { + match token.tag { + .identifier { + id := p.save_stash(token.start) + if var := p.env.vars[id] { + eprintln(var) + } else { + return error("identifier error") + } + panic("invalid code path") + } + .nil, .true, .false, .number, .decimal, .string { + return error("expression error") + } + .keyword { + p.rollback() + return p.keyword_expr() + } + else { + p.rollback() + return error("unsupported type") + } + } + } + + return error("impossible") +} + +pub fn (mut p Parser) keyword_expr() !VuaValue { + keyword := p.keyword() or { + return error("invalid keyword") + } + + match keyword { + "local" { + lhs := p.identifier() or { + return error("invalid identifier") + } + + type_name := p.identifier() + + if type_name == none { + p.rollback() + } + + p.expect("=", .operator) or { + return error("invalid assignment") + } + + rhs := p.next() or { + return error("invalid right hand side of assignment") + } + + match rhs.tag { + .number, .decimal, .string, .true, .false, .nil { + p.env.vars[p.env.str_pool.intern(lhs)] = Var{ + name: lhs, + type: match rhs.tag { + .true, .false { "bool" } + else { rhs.tag.str() } + }, + value: rhs.start + } + + match rhs.tag { + .true { + vbool := p.input[rhs.start..rhs.start + 4] + assert vbool == "true" + return VuaValue(VuaBool{true}) + } + .false { + vbool := p.input[rhs.start..rhs.start + 6] + assert vbool == "false" + return VuaValue(VuaBool{false}) + } + .number, .decimal { + vnum := p.save_stash(rhs.start) + return VuaValue(VuaNumber{vnum}) + } + .string { + // might be impossible with tokens pre allocated + vstr := p.save_stash(rhs.start) + // dirty trick + return VuaValue(VuaString{vstr[1..vstr.len-1]}) + } + .nil { return VuaValue(VuaNil{}) } + else { return error("failed rhs inference") } + } + } + else { + return error("invalid rhs type") + } + } + } + else { + return error("unsupported keyword") + } + } + panic('No expression found') +} diff --git a/ast_test.v b/ast_test.v new file mode 100644 index 0000000..fc77c28 --- /dev/null +++ b/ast_test.v @@ -0,0 +1,19 @@ +module ast + +fn test_expr() { + println('Testing expression parsing...') + + mut p := &Parser{ + Tokenizer: Tokenizer{ + input: 'local hi string = "hola"' + } + env: &Environment{} + } + + expr := p.expr() or { + assert false, 'Expression parsing failed: $err' + return + } + + println('Parsed expression: $expr') +} diff --git a/constants.v b/constants.v index cf413c4..c6e2a24 100644 --- a/constants.v +++ b/constants.v @@ -1,5 +1,25 @@ module constants +pub enum Primitives as u8 { + nil + bool + number + decimal + string + table + function +} + +pub const primitives = [ + 'nil', + 'bool', + 'number', + 'decimal', + 'string', + 'table', + 'function' +] + pub const keywords = [ 'and', 'assert', diff --git a/intern.v b/intern.v new file mode 100644 index 0000000..73a8739 --- /dev/null +++ b/intern.v @@ -0,0 +1,23 @@ +module intern + +@[heap] +pub struct InternPool { +pub mut: + strings map[string]string +} + +pub fn (mut ip InternPool) intern(s string) string { + if s in ip.strings { + return ip.strings[s] + } + ip.strings[s] = s + return s +} + +pub fn (mut ip InternPool) count() int { + return ip.strings.len +} + +pub fn (mut ip InternPool) clear() { + ip.strings.clear() +} diff --git a/main.v b/main.v index cc1da6d..e7e5b91 100644 --- a/main.v +++ b/main.v @@ -5,5 +5,9 @@ fn main() { repl_instance := repl.Repl{} println(repl_instance.welcome_msg) println('--------------') - repl_instance.loop()! +g: + repl_instance.loop() or { + eprintln('Error in REPL loop: $err') + unsafe { goto g } // Retry the loop on error + } } diff --git a/multiarray.v b/multiarray.v deleted file mode 100644 index 374bf0c..0000000 --- a/multiarray.v +++ /dev/null @@ -1,100 +0,0 @@ -module multiarray - -struct MultiArray[T] { -mut: - len int - cap int - tags map[int]string - fields map[string]MultiArrayField -} -struct MultiArrayField { - data voidptr -} - -fn MultiArray.new[T](len int, cap int, init T) MultiArray[T] { - mut result := MultiArray[T]{len: len, cap: cap} - $for field in T.fields { - result.fields[field.name + 's'] = MultiArrayField{ - data: unsafe { vcalloc(u64(cap) * sizeof(T)) } - } - } - return result -} - -fn (mut ma MultiArray[T]) add[T](table string, item T) { - if ma.len >= ma.cap { - unsafe { ma.grow() } - } - $for field in T.fields { - if field.name == table { - ma.tags[ma.len] = table - ma.len++ - } - field_name := field.name + 's' - mut field_data_ptr := unsafe { ma.fields[field_name].data } - unsafe { vmemcpy(&u8(field_data_ptr) + sizeof(T) * u64(ma.len), item, sizeof(T)) } - } -} - -@[unsafe] -fn (mut ma MultiArray[T]) grow() { - new_cap := if ma.cap == 0 { 1 } else { ma.cap * 2 } - $for field in T.fields { - field_name := field.name + 's' - old_data := &T(ma.fields[field_name].data) - new_data := vcalloc(u64(new_cap) * sizeof(T)) - vmemcpy(new_data, old_data, u64(ma.len) * sizeof(T)) - ma.fields[field_name] = MultiArrayField{ data: new_data } - } - ma.cap = new_cap -} - -fn (ma MultiArray[T]) get(index int) ?T { - if index < 0 || index >= ma.len { - return none - } - mut result := T{} - $for field in T.fields { - field_name := field.name + 's' - field_data_ptr := unsafe { ma.fields[field_name].data } - unsafe { vmemcpy(&result, &u8(field_data_ptr) + sizeof(T) * u64(index), sizeof(T)) } - } - return result -} - -@[unsafe] -fn (mut ma MultiArray[T]) free() { - $for field in T.fields { - field_name := field.name + 's' - if ma.fields[field_name].data != nil { - free( ma.fields[field_name].data ) - ma.fields[field_name] = MultiArrayField{nil} - } - } - ma.len = 0; ma.cap = 0 -} - -fn (mut ma MultiArray[T]) iter() Iterator[T] { - return Iterator[T]{ma,0} -} - -struct Iterator[T] { -mut: - ma MultiArray[T] - i int -} -struct IteratorEntry[T] { - tag string - data T -} - -fn (mut it Iterator[T]) next() ?IteratorEntry[T] { - defer { it.i++ } - if it.i >= it.ma.len { - return none - } - val := it.ma.get(it.i) or { - return none - } - return IteratorEntry[T]{it.ma.tags[it.i],val} -} diff --git a/multiarray_test.v b/multiarray_test.v deleted file mode 100644 index ba7553a..0000000 --- a/multiarray_test.v +++ /dev/null @@ -1,31 +0,0 @@ -module multiarray - -union SoA { - nil nil = unsafe { nil } - int int - bool bool -} - -fn test_basics() { - mut arr := MultiArray.new(0, 10, SoA{}) - - // test: add an int - arr.add('int' , SoA{ int: 42 }) - arr.add('int' , SoA{ int: 43 }) - arr.add('int' , SoA{ int: 44 }) - arr.add('bool', SoA{ bool: true }) - arr.add('nil' , SoA{}) - arr.add('nil' , SoA{}) - arr.add('nil' , SoA{}) - arr.add('nil' , SoA{}) - - it := arr.iter() - for item in it { - println('Iterating over MultiArray[${item.tag}]:') - if item.tag == 'int' { - println('${item.tag}s > ${unsafe{item.data.int}}') - } - } - - println('Created MultiArray with len: $arr.len, cap: $arr.cap') -} diff --git a/repl.v b/repl.v index 4adb1e7..45411dd 100644 --- a/repl.v +++ b/repl.v @@ -1,6 +1,6 @@ module repl -import tokenizer as _ { Token, Tokenizer, VuaToken } +import token as _ { Token, Tokenizer, TokenType } import readline { read_line } pub struct Repl { @@ -61,7 +61,7 @@ pub fn (r Repl) eval(ast []Token, input string) string { .dot { s << '.' } .angle_bracket_left { s << '[' } .angle_bracket_right { s << ']' } - .reserved_keyword { s << 'reserved_keyword' } + .keyword { s << 'keyword' } .operator, .operator_len2, .operator_len3{ s << 'operator' } } } diff --git a/scanner.v b/scanner.v new file mode 100644 index 0000000..25cd252 --- /dev/null +++ b/scanner.v @@ -0,0 +1,206 @@ +module scanner + +// ===== Scanner ===== + +pub struct Scanner { +pub: + input string +pub mut: + max_pos u32 + stack []u32 = []u32{len: 256, cap: 256, init: 0} + frame u32 +mut: + lc int = 1 + rc int +} + +pub fn (mut s Scanner) cur_pos() u32 { + return s.stack[s.frame] +} + +pub fn (mut s Scanner) advance(delta u32) { + s.rc += int(delta) + s.stack[s.frame] += delta; + if s.max_pos < s.stack[s.frame] { s.max_pos = s.stack[s.frame] } +} + +@[manualfree] +pub fn (mut s Scanner) push_frame() !u32 { + s.frame += 1 + if s.frame == s.stack.len { + new_size := s.stack.len + (s.stack.len >> 1) + new_stack := []u32{len: new_size, cap: new_size, init:0} + + unsafe { vmemcpy(new_stack[0..s.max_pos], s.stack[0..s.max_pos], new_stack.len) } + unsafe { s.stack.free() } + + s.stack = new_stack + } + if s.frame > s.input.len { + return error('Buffer too small') + } + + s.stack[s.frame] = s.stack[s.frame - 1] + return s.cur_pos() +} + +pub fn (mut s Scanner) pop_frame() { + if s.frame >= 1 { s.frame -= 1 } +} + +pub fn (mut s Scanner) commit_frame() { + s.frame -= 1 + s.stack[s.frame] = s.stack[s.frame + 1]; +} + +@[unsafe] +pub fn (mut s Scanner) free() { + s.stack.free() +} + +@[manualfree] +pub fn (mut s Scanner) reset() { + unsafe { s.stack.free() } + s.lc = 1 + s.rc = 0 + s.frame = 0 + s.max_pos = 0 + s.stack = []u32{len: 256, cap: 256, init: 0} +} + +pub struct SyntaxError { + Error + line int + row int + m string +} + +pub fn (err SyntaxError) msg() string { + return "SyntaxError: ${err.m} -- at line ${err.line}-${err.row}" +} + +pub fn (mut s Scanner) syntax_error(m string) SyntaxError { + return SyntaxError{line: s.lc, row: s.rc, m: m} +} + +pub fn (mut s Scanner) rollback() { + // do not touch + s.stack[s.frame] = if s.frame > 0 { s.stack[s.frame - 1] } else { 0 } +} + +// ===== Char matching ===== + +pub fn (mut s Scanner) end_of_input() bool { + return s.cur_pos() >= s.input.len +} + +pub fn (mut s Scanner) term(c u8) bool { + if s.end_of_input() || s.input[s.cur_pos()] != c { + return false + } + + s.advance(1) // 1 char + return true +} + +pub fn (mut s Scanner) one_of(str string) bool { + if s.end_of_input() || s.input[s.cur_pos()] !in str.bytes() { + return false + } + s.advance(1) // 1 char + return true +} + +pub fn (mut s Scanner) none_of(str string) bool { + if s.end_of_input() || s.input[s.cur_pos()] in str.bytes() { + return false + } + s.advance(1) // 1 char + return true +} + +pub fn (mut s Scanner) range(low u8, high u8) bool { + if s.end_of_input() { + return false + } + c := s.input[s.cur_pos()] + + if !(low <= c && c <= high) { + return false + } + + s.advance(1) // 1 char + return true +} + +// ===== Token extraction ===== + +pub fn (mut s Scanner) save_stash(from u32) string { + return s.input[from..s.cur_pos()] +} + +pub fn (mut s Scanner) skip_ws() { + for !s.end_of_input() { + $if windows { + if s.input[s.cur_pos()] == u8(13) { // eg: WINDOWS ONLY + s.lc += 1 + s.rc = 0 // reset rows + s.advance(1) // skip CR + if s.end_of_input() || s.input[s.cur_pos()] != u8(10) { // skip LF if present + continue + } + } + } + + if s.input[s.cur_pos()] == u8(10) { // eg: LINUX ONLY + s.lc += 1 + s.rc = 0 // reset rows + } + + if s.input[s.cur_pos()] in ' \r\n\t'.bytes() { + s.advance(1) + continue + } + + break + } +} + +pub fn (mut s Scanner) last() !u8 { + if s.stack[s.frame] > s.input.len { + return error('Buffer too small') + } + return s.input[s.cur_pos() - 1] +} + +pub fn (mut s Scanner) pred(func fn (u8) bool) bool { + if s.end_of_input() || !func(s.input[s.cur_pos()]) { + return false + } + + s.advance(1) // 1 char + return true +} + +pub fn (mut s Scanner) many(str string) bool { + if s.input.len < s.cur_pos() + u32(str.len) { + return false + } + + if s != s.input[s.cur_pos() .. s.cur_pos() + u32(str.len)] { + return false + } + + s.advance(u32(str.len)) + return true +} + +pub fn (mut s Scanner) any() ?u8 { + if s.end_of_input() { + return none + } + c := s.input[s.cur_pos()] + s.advance(1) // 1 char + return c +} + diff --git a/token.v b/token.v new file mode 100644 index 0000000..96370bd --- /dev/null +++ b/token.v @@ -0,0 +1,232 @@ +module token +import scanner { Scanner } +import constants + +// ===== Tokenizer ===== + +@[packed] +pub struct Token { +pub: + start u32 + tag TokenType +} + +pub enum TokenType as u8 { + eof + nil + string + number + decimal + true + false + table_start + table_end + identifier + comma + newline + equal + dot + angle_bracket_left + angle_bracket_right + keyword + operator + operator_len2 + operator_len3 +} + +pub struct Tokenizer { + Scanner +} + +fn (mut t Tokenizer) next() ?u8 { + pos := t.cur_pos() + if t.end_of_input() || pos > t.input.len { + return none + } + c := t.input[pos] + t.advance(1) // 1 char + return c +} + +pub fn (mut t Tokenizer) parse_all() !Token { + t.skip_ws() + + start_index := t.push_frame()! + + c := t.any() or { + return Token{ start_index, .eof } + } + + match true { + c == `"` { + t.rollback() + t.commit_frame() + return t.parse_str() + } + c in "0123456789".bytes() { + t.rollback() + t.commit_frame() + return t.parse_num() + } + c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".bytes() { + t.rollback() + t.commit_frame() + return t.parse_id() + } + c == `{` { + return Token{ start_index, .table_start } + } + c == `}` { + return Token{ start_index, .table_end } + } + c == `[` { + return Token{ start_index, .angle_bracket_left } + } + c == `]` { + return Token{ start_index, .angle_bracket_right } + } + c == 10 { // LF + return Token{ start_index, .newline } + } + else { + $if windows { + if c == 13 && t.term(10) { // CRLF + return Token{ start_index, .newline } + } + } + + t.rollback() + t.commit_frame() + return t.parse_op() + } + } +} + +fn (mut t Tokenizer) parse_op() !Token { + start_index := t.push_frame()! + + mut s := []u8{len: 0, cap: 32, init: 0} + for t.one_of("+-*/%^<>=~#.$,?") { + s << t.last()! + } + + match s.bytestr() { + "+", "-", "*", "/", "%", "^", "#", "=" { + return Token{ start_index, .operator } + } + "==", "~=", "<=", ">=", "<", ">", ".." { + return Token{ start_index, .operator_len2 } + } + "..." { + return Token{ start_index, .operator_len3 } + } + "." { + return Token{ start_index, .dot } + } + "," { + return Token{ start_index, .comma } + } + else { + return t.syntax_error("invalid operator '${s.bytestr()}'") + } + } +} + +fn (mut t Tokenizer) parse_id() !Token { + start_index := t.push_frame()! + + mut s := []u8{len: 0, cap: 32, init: 0} + + if t.range(`a`, `z`) || t.range(`A`, `Z`) || t.term(`_`) { + s << t.last() or { return t.syntax_error("invalid identifier") } + } else { + return t.syntax_error("invalid identifier") + } + + for t.range(`a`, `z`) || t.range(`A`, `Z`) || t.range(`0`, `9`) || t.term(`_`) { + s << t.last() or { break } + } + + return match s.bytestr() { + "nil" { Token{ start_index, .nil } } + "true" { Token{ start_index, .true } } + "false" { Token{ start_index, .false } } + else { + if s.bytestr() in constants.keywords { + return Token{ start_index, .keyword } + } + return Token{ start_index, .identifier } + } + } +} + +fn (mut t Tokenizer) parse_num() !Token { + start_index := t.push_frame()! + + mut s := []u8{len: 0, cap: 32, init: 0} + + if t.one_of("0123456789") { + s << t.last()! + } else { + return t.syntax_error("invalid number") + } + + mut is_decimal := false + + loop: + for { + if t.one_of("0123456789") { + s << t.last() or { break loop } + continue + } + + if t.term(`.`) { + if is_decimal { + return t.syntax_error("invalid number") + } + s << `.` + + if !t.one_of("0123456789") { + return t.syntax_error("invalid number") + } + + is_decimal = true + s << t.last() or { break loop } + + continue + } + break + } + + if s.len == 0 { + return t.syntax_error("invalid number") + } + + return Token{ + start: start_index, + tag: if is_decimal { .decimal } else { .number }, + } +} + +fn (mut t Tokenizer) parse_str() !Token { + start_index := t.push_frame()! + + if !t.term(`"`) { + return t.syntax_error("invalid string") + } + + for !t.end_of_input() { + if t.term(`"`) { + return Token{ start_index, .string } + } + if t.term(`\\`) { + // skip escaped char kek this should be improved + // TODO: improve this + t.advance(1) + continue + } + t.advance(1) // 1 char + } + + return t.syntax_error("uncompleted string literal") +} diff --git a/tokenizer.v b/tokenizer.v deleted file mode 100644 index d3f7336..0000000 --- a/tokenizer.v +++ /dev/null @@ -1,433 +0,0 @@ -module tokenizer -import constants - -@[packed] -pub struct Token { -pub: - start u32 - tag VuaToken -} - -pub enum VuaToken { - eof - nil - string - number - decimal - true - false - table_start - table_end - identifier - comma - newline - equal - dot - angle_bracket_left - angle_bracket_right - reserved_keyword - operator - operator_len2 - operator_len3 -} - -pub struct Parser { -pub: - input string -pub mut: - max_pos u32 - stack []u32 = []u32{len: 256, cap: 256, init: 0} - frame u32 -mut: - lc int = 1 - rc int -} - -fn (mut p Parser) cur_pos() u32 { - return p.stack[p.frame] -} - -fn (mut p Parser) advance(delta u32) { - p.rc += int(delta) - p.stack[p.frame] += delta; - if p.max_pos < p.stack[p.frame] { p.max_pos = p.stack[p.frame] } -} - -@[manualfree] -fn (mut p Parser) push_frame() !u32 { - p.frame += 1 - if p.frame == p.stack.len { - new_size := p.stack.len + (p.stack.len >> 1) - new_stack := []u32{len: new_size, cap: new_size, init:0} - - unsafe { vmemcpy(new_stack[0..p.max_pos], p.stack[0..p.max_pos], new_stack.len) } - unsafe { p.stack.free() } - - p.stack = new_stack - } - if p.frame > p.input.len { - return error('Buffer too small') - } - - p.stack[p.frame] = p.stack[p.frame - 1] - return p.cur_pos() -} - -fn (mut p Parser) pop_frame() { - if p.frame >= 1 { p.frame -= 1 } -} - -fn (mut p Parser) commit_frame() { - p.frame -= 1 - p.stack[p.frame] = p.stack[p.frame + 1]; -} - -@[unsafe] -fn (mut p Parser) free() { - p.stack.free() -} - -struct SyntaxError { - Error - line int - row int - m string -} - -fn (err SyntaxError) msg() string { - return "SyntaxError: ${err.m} -- at line ${err.line}-${err.row}" -} - -fn (mut p Parser) syntax_error(m string) SyntaxError { - return SyntaxError{ - line: p.lc, - row: p.rc, - m: m, - } -} - -fn (mut p Parser) rollback() { - p.stack[p.frame] = if p.frame == 0 { u32(0) } else { p.stack[p.frame - 1] } -} - -// ===== Char matching ===== - -pub fn (mut p Parser) end_of_input() bool { - return p.cur_pos() >= p.input.len -} - -fn (mut p Parser) term(c u8) bool { - if p.end_of_input() || p.input[p.cur_pos()] != c { - return false - } - - p.advance(1) // 1 char - return true -} - -fn (mut p Parser) one_of(s string) bool { - if p.end_of_input() || p.input[p.cur_pos()] !in s.bytes() { - return false - } - p.advance(1) // 1 char - return true -} - -fn (mut p Parser) none_of(s string) bool { - if p.end_of_input() || p.input[p.cur_pos()] in s.bytes() { - return false - } - p.advance(1) // 1 char - return true -} - -fn (mut p Parser) range(low u8, high u8) bool { - if p.end_of_input() { - return false - } - c := p.input[p.cur_pos()] - - if !(low <= c && c <= high) { - return false - } - - p.advance(1) // 1 char - return true -} - -// ===== Token extraction ===== - -fn (mut p Parser) save_stash(from u32) string { - return p.input[from..p.cur_pos()] -} - -fn (mut p Parser) skip_ws() { - for !p.end_of_input() { - $if windows { - if p.input[p.cur_pos()] == u8(13) { // eg: WINDOWS ONLY - p.lc += 1 - p.rc = 0 // reset rows - p.advance(1) // skip CR - if p.end_of_input() || p.input[p.cur_pos()] != u8(10) { // skip LF if present - continue - } - } - } - - if p.input[p.cur_pos()] == u8(10) { // eg: LINUX ONLY - p.lc += 1 - p.rc = 0 // reset rows - } - - if p.input[p.cur_pos()] in ' \r\n\t'.bytes() { - p.advance(1) - continue - } - - break - } -} - -fn (mut p Parser) last() !u8 { - if p.stack[p.frame] > p.input.len { - return error('Buffer too small') - } - return p.input[p.cur_pos() - 1] -} - -fn (mut p Parser) pred(func fn (u8) bool) bool { - if p.end_of_input() || !func(p.input[p.cur_pos()]) { - return false - } - - p.advance(1) // 1 char - return true -} - -fn (mut p Parser) many(s string) bool { - if p.input.len < p.cur_pos() + u32(s.len) { - return false - } - - if s != p.input[p.cur_pos() .. p.cur_pos() + u32(s.len)] { - return false - } - - p.advance(u32(s.len)) - return true -} - -fn (mut p Parser) any() ?u8 { - if p.end_of_input() { - return none - } - c := p.input[p.cur_pos()] - p.advance(1) // 1 char - return c -} - -// ===== Tokenizer ===== - -pub struct Tokenizer { - Parser -} - -fn (mut t Tokenizer) next() ?u8 { - pos := t.cur_pos() - if t.end_of_input() || pos > t.input.len { - return none - } - c := t.input[pos] - t.advance(1) // 1 char - return c -} - -pub fn (mut t Tokenizer) parse_all() !Token { - t.skip_ws() - - start_index := t.push_frame()! - - c := t.any() or { return Token{ start_index, .eof } } - print(c.ascii_str()) - - match true { - c == `"` { - t.rollback() - t.commit_frame() - return t.parse_str() - } - c in "0123456789".bytes() { - t.rollback() - t.commit_frame() - return t.parse_num() - } - c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".bytes() { - t.rollback() - t.commit_frame() - return t.parse_id() - } - c == `{` { - return Token{ start_index, .table_start } - } - c == `}` { - return Token{ start_index, .table_end } - } - c == `[` { - return Token{ start_index, .angle_bracket_left } - } - c == `]` { - return Token{ start_index, .angle_bracket_right } - } - c == 10 { // LF - return Token{ start_index, .newline } - } - else { - $if windows { - if c == 13 && t.term(10) { // CRLF - return Token{ start_index, .newline } - } - } - - t.rollback() - t.commit_frame() - return t.parse_op() - } - } -} - -fn (mut t Tokenizer) parse_op() !Token { - start_index := t.push_frame()! - - mut s := []u8{len: 0, cap: 32, init: 0} - for t.one_of("+-*/%^<>=~#") { - s << t.last()! - } - - match s.bytestr() { - "+", "-", "*", "/", "%", "^", "#", "=" { - return Token{ start_index, .operator } - } - "==", "~=", "<=", ">=", "<", ">", ".." { - return Token{ start_index, .operator_len2 } - } - "..." { - return Token{ start_index, .operator_len3 } - } - "." { - return Token{ start_index, .dot } - } - "," { - return Token{ start_index, .comma } - } - else { - return t.syntax_error("invalid operator '${s.bytestr()}'") - } - } -} - -fn (mut t Tokenizer) parse_id() !Token { - start_index := t.push_frame()! - - mut s := []u8{len: 0, cap: 32, init: 0} - - if t.range('a'[0], 'z'[0]) || t.range('A'[0], 'Z'[0]) || t.term('_'[0]) { - s << t.last() or { return t.syntax_error("invalid identifier") } - } else { - return t.syntax_error("invalid identifier") - } - - for t.range('a'[0], 'z'[0]) || t.range('A'[0], 'Z'[0]) || t.range('0'[0], '9'[0]) || t.term('_'[0]) { - s << t.last() or { break } - } - - return match s.bytestr() { - "true" { Token{ start_index, .true } } - "false" { Token{ start_index, .false } } - "nil" { Token{ start_index, .nil } } - else { - if s.bytestr() in constants.keywords { - return Token{start_index,.reserved_keyword} - } - return Token{start_index,.identifier} - } - } -} - -fn (mut t Tokenizer) parse_num() !Token { - start_index := t.push_frame()! - - mut s := []u8{len: 0, cap: 32, init: 0} - - if t.one_of("0123456789") { - s << t.last()! - } else { - return t.syntax_error("invalid number") - } - - mut is_decimal := false - - loop: - for { - if t.one_of("0123456789") { - s << t.last() or { break loop } - continue - } - - if t.term('.'[0]) { - if is_decimal { - return t.syntax_error("invalid number") - } - s << '.'[0] - - if !t.one_of("0123456789") { - return t.syntax_error("invalid number") - } - - is_decimal = true - s << t.last() or { break loop } - - continue - } - break - } - - if s.len == 0 { - return t.syntax_error("invalid number") - } - - return Token{ - start: start_index, - tag: if is_decimal { .decimal } else { .number }, - } -} - -fn (mut t Tokenizer) parse_str() !Token { - start_index := t.push_frame()! - - mut s := []u8{len: 0, cap: 4096, init: 0} - - ok := t.term('"'[0]) - - if !ok { - return t.syntax_error("invalid string") - } - - for c in t { - s << c - } - - c := t.last() or { - return t.syntax_error("unclosed string") - } - - match c { - `"` { - return Token{ start_index, .string } - } - else { - s << c - } - } - - return Token{ start_index, .eof } -}