From 32823fed2785a3ac655d1bf0d7cba601899dda74 Mon Sep 17 00:00:00 2001 From: Duvet Date: Thu, 19 Jun 2025 22:38:38 -0500 Subject: [PATCH] first commit --- .editorconfig | 8 + .gitattributes | 8 + .gitignore | 24 +++ ast.v | 102 +++++++++++ constants.v | 35 ++++ main.v | 9 + multiarray.v | 100 +++++++++++ multiarray_test.v | 31 ++++ repl.v | 96 ++++++++++ tokenizer.v | 433 ++++++++++++++++++++++++++++++++++++++++++++++ v.mod | 7 + 11 files changed, 853 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 ast.v create mode 100644 constants.v create mode 100644 main.v create mode 100644 multiarray.v create mode 100644 multiarray_test.v create mode 100644 repl.v create mode 100644 tokenizer.v create mode 100644 v.mod diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..01072ca --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.v] +indent_style = tab diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9a98968 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,8 @@ +* text=auto eol=lf +*.bat eol=crlf + +*.v linguist-language=V +*.vv linguist-language=V +*.vsh linguist-language=V +v.mod linguist-language=V +.vdocignore linguist-language=ignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2e2bb77 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +# Binaries for programs and plugins +main +vua +*.exe +*.exe~ +*.so +*.dylib +*.dll + +# Ignore binary output folders +bin/ + +# Ignore common editor/system specific metadata +.DS_Store +.idea/ +.vscode/ +*.iml + +# ENV +.env + +# vweb and database +*.db +*.js diff --git a/ast.v b/ast.v new file mode 100644 index 0000000..8666781 --- /dev/null +++ b/ast.v @@ -0,0 +1,102 @@ +module ast +import tokenizer { Token, VuaToken } + +struct Nil {} + +struct String { + contents string +} + +struct Number { + contents string +} + +struct Boolean { + value bool +} + +@[packed] +struct TableFlags { + array bool + object bool + enum bool + class bool + struct bool +} + +struct Table { + flag TableFlags + data []Variable +} + +struct Class { + properties []VariableDef + methods []Function +} + +struct Function { + name string + params []VariableDef + body []Token +} + +struct FunctionRef { + index u32 +} + +struct Lambda { + params []VariableDef + body []Token + scope ?FunctionRef +} + +struct VariableDef { + name string + type VuaTaggedType +} + +struct Variable { + // maybe this name should be a null terminated string + name string + value Expr +} + +struct Expr { + index u32 + type VuaTaggedType +} + +enum VuaTaggedType as u8 { + nil + string + number + boolean + table + function +} + + +type TokenIndex = u32 + +@[heap] +struct Nodes { +} + +@[heap] +struct Tokens { +} + +struct AST { + tokens Tokens + nodes Nodes + pos int +} + +union NodeData { +} + +struct Node { + token_type VuaToken + data NodeData +} + diff --git a/constants.v b/constants.v new file mode 100644 index 0000000..cf413c4 --- /dev/null +++ b/constants.v @@ -0,0 +1,35 @@ +module constants + +pub const keywords = [ + 'and', + 'assert', + 'begin', + 'break', + 'continue', + 'class', + 'do', + 'else', + 'elseif', + 'end', + 'foreach', + 'forever', + 'function', + 'if', + 'import', + 'in', + 'local', + 'nil', + 'not', + 'module', + 'match', + 'or', + 'repeat', + 'return', + 'then', + 'true', + 'pragma', + 'record', + 'until', + 'while', +] + diff --git a/main.v b/main.v new file mode 100644 index 0000000..cc1da6d --- /dev/null +++ b/main.v @@ -0,0 +1,9 @@ +module main +import repl + +fn main() { + repl_instance := repl.Repl{} + println(repl_instance.welcome_msg) + println('--------------') + repl_instance.loop()! +} diff --git a/multiarray.v b/multiarray.v new file mode 100644 index 0000000..374bf0c --- /dev/null +++ b/multiarray.v @@ -0,0 +1,100 @@ +module multiarray + +struct MultiArray[T] { +mut: + len int + cap int + tags map[int]string + fields map[string]MultiArrayField +} +struct MultiArrayField { + data voidptr +} + +fn MultiArray.new[T](len int, cap int, init T) MultiArray[T] { + mut result := MultiArray[T]{len: len, cap: cap} + $for field in T.fields { + result.fields[field.name + 's'] = MultiArrayField{ + data: unsafe { vcalloc(u64(cap) * sizeof(T)) } + } + } + return result +} + +fn (mut ma MultiArray[T]) add[T](table string, item T) { + if ma.len >= ma.cap { + unsafe { ma.grow() } + } + $for field in T.fields { + if field.name == table { + ma.tags[ma.len] = table + ma.len++ + } + field_name := field.name + 's' + mut field_data_ptr := unsafe { ma.fields[field_name].data } + unsafe { vmemcpy(&u8(field_data_ptr) + sizeof(T) * u64(ma.len), item, sizeof(T)) } + } +} + +@[unsafe] +fn (mut ma MultiArray[T]) grow() { + new_cap := if ma.cap == 0 { 1 } else { ma.cap * 2 } + $for field in T.fields { + field_name := field.name + 's' + old_data := &T(ma.fields[field_name].data) + new_data := vcalloc(u64(new_cap) * sizeof(T)) + vmemcpy(new_data, old_data, u64(ma.len) * sizeof(T)) + ma.fields[field_name] = MultiArrayField{ data: new_data } + } + ma.cap = new_cap +} + +fn (ma MultiArray[T]) get(index int) ?T { + if index < 0 || index >= ma.len { + return none + } + mut result := T{} + $for field in T.fields { + field_name := field.name + 's' + field_data_ptr := unsafe { ma.fields[field_name].data } + unsafe { vmemcpy(&result, &u8(field_data_ptr) + sizeof(T) * u64(index), sizeof(T)) } + } + return result +} + +@[unsafe] +fn (mut ma MultiArray[T]) free() { + $for field in T.fields { + field_name := field.name + 's' + if ma.fields[field_name].data != nil { + free( ma.fields[field_name].data ) + ma.fields[field_name] = MultiArrayField{nil} + } + } + ma.len = 0; ma.cap = 0 +} + +fn (mut ma MultiArray[T]) iter() Iterator[T] { + return Iterator[T]{ma,0} +} + +struct Iterator[T] { +mut: + ma MultiArray[T] + i int +} +struct IteratorEntry[T] { + tag string + data T +} + +fn (mut it Iterator[T]) next() ?IteratorEntry[T] { + defer { it.i++ } + if it.i >= it.ma.len { + return none + } + val := it.ma.get(it.i) or { + return none + } + return IteratorEntry[T]{it.ma.tags[it.i],val} +} diff --git a/multiarray_test.v b/multiarray_test.v new file mode 100644 index 0000000..ba7553a --- /dev/null +++ b/multiarray_test.v @@ -0,0 +1,31 @@ +module multiarray + +union SoA { + nil nil = unsafe { nil } + int int + bool bool +} + +fn test_basics() { + mut arr := MultiArray.new(0, 10, SoA{}) + + // test: add an int + arr.add('int' , SoA{ int: 42 }) + arr.add('int' , SoA{ int: 43 }) + arr.add('int' , SoA{ int: 44 }) + arr.add('bool', SoA{ bool: true }) + arr.add('nil' , SoA{}) + arr.add('nil' , SoA{}) + arr.add('nil' , SoA{}) + arr.add('nil' , SoA{}) + + it := arr.iter() + for item in it { + println('Iterating over MultiArray[${item.tag}]:') + if item.tag == 'int' { + println('${item.tag}s > ${unsafe{item.data.int}}') + } + } + + println('Created MultiArray with len: $arr.len, cap: $arr.cap') +} diff --git a/repl.v b/repl.v new file mode 100644 index 0000000..4adb1e7 --- /dev/null +++ b/repl.v @@ -0,0 +1,96 @@ +module repl + +import tokenizer as _ { Token, Tokenizer, VuaToken } +import readline { read_line } + +pub struct Repl { +pub: + welcome_msg string = "Welcome to the REPL!" +} + +/** + * Caller owns returned memory. + */ +pub fn (r Repl) read(input string) ![]Token { + mut tokens := []Token{ + len: 0, + cap: 128, + init: Token{0, .eof}, + } + mut tokenizer := Tokenizer{input: input} + + for !tokenizer.end_of_input() { + token := tokenizer.parse_all()! + if token.tag == .eof { break } + if token.tag == .newline { + if tokens.len > 0 { + break + } else { + continue // skip empty lines + } + } + tokens << token + } + + if tokens.len == 0 { + return error('No input whatsoever') + } + + return tokens +} + +pub fn (r Repl) eval(ast []Token, input string) string { + mut s := []string{len: 0, cap: 4096, init: ''} + + for token in ast { + s << ', ' + match token.tag { + .eof { s << 'EOF' } + .nil { s << 'nil' } + .string { s << 'string' } + .number { s << 'number' } + .decimal { s << 'decimal' } + .true { s << 'true' } + .false { s << 'false' } + .table_start { s << '{' } + .table_end { s << '}' } + .identifier { s << 'identifier' } + .comma { s << ',' } + .newline { s << '\n' } + .equal { s << '=' } + .dot { s << '.' } + .angle_bracket_left { s << '[' } + .angle_bracket_right { s << ']' } + .reserved_keyword { s << 'reserved_keyword' } + .operator, .operator_len2, .operator_len3{ s << 'operator' } + } + } + + if s.len == 0 { + return 'No output' + } + + return s.join(' ') +} + +//fn (r Repl) print(t VuaType) { +pub fn (r Repl) print(t string) string { + return t +} + +pub fn (r Repl) loop() ! { + for { + input := read_line('vua>> ') or { + println('\nGoodbye') + break + } + if input.trim_space() == 'exit' { + println('\nGoodbye') + break + } + tokens := r.read(input.str())! + expr := r.eval(tokens, input.str()) + output := r.print(expr) + println(output) + } +} diff --git a/tokenizer.v b/tokenizer.v new file mode 100644 index 0000000..d3f7336 --- /dev/null +++ b/tokenizer.v @@ -0,0 +1,433 @@ +module tokenizer +import constants + +@[packed] +pub struct Token { +pub: + start u32 + tag VuaToken +} + +pub enum VuaToken { + eof + nil + string + number + decimal + true + false + table_start + table_end + identifier + comma + newline + equal + dot + angle_bracket_left + angle_bracket_right + reserved_keyword + operator + operator_len2 + operator_len3 +} + +pub struct Parser { +pub: + input string +pub mut: + max_pos u32 + stack []u32 = []u32{len: 256, cap: 256, init: 0} + frame u32 +mut: + lc int = 1 + rc int +} + +fn (mut p Parser) cur_pos() u32 { + return p.stack[p.frame] +} + +fn (mut p Parser) advance(delta u32) { + p.rc += int(delta) + p.stack[p.frame] += delta; + if p.max_pos < p.stack[p.frame] { p.max_pos = p.stack[p.frame] } +} + +@[manualfree] +fn (mut p Parser) push_frame() !u32 { + p.frame += 1 + if p.frame == p.stack.len { + new_size := p.stack.len + (p.stack.len >> 1) + new_stack := []u32{len: new_size, cap: new_size, init:0} + + unsafe { vmemcpy(new_stack[0..p.max_pos], p.stack[0..p.max_pos], new_stack.len) } + unsafe { p.stack.free() } + + p.stack = new_stack + } + if p.frame > p.input.len { + return error('Buffer too small') + } + + p.stack[p.frame] = p.stack[p.frame - 1] + return p.cur_pos() +} + +fn (mut p Parser) pop_frame() { + if p.frame >= 1 { p.frame -= 1 } +} + +fn (mut p Parser) commit_frame() { + p.frame -= 1 + p.stack[p.frame] = p.stack[p.frame + 1]; +} + +@[unsafe] +fn (mut p Parser) free() { + p.stack.free() +} + +struct SyntaxError { + Error + line int + row int + m string +} + +fn (err SyntaxError) msg() string { + return "SyntaxError: ${err.m} -- at line ${err.line}-${err.row}" +} + +fn (mut p Parser) syntax_error(m string) SyntaxError { + return SyntaxError{ + line: p.lc, + row: p.rc, + m: m, + } +} + +fn (mut p Parser) rollback() { + p.stack[p.frame] = if p.frame == 0 { u32(0) } else { p.stack[p.frame - 1] } +} + +// ===== Char matching ===== + +pub fn (mut p Parser) end_of_input() bool { + return p.cur_pos() >= p.input.len +} + +fn (mut p Parser) term(c u8) bool { + if p.end_of_input() || p.input[p.cur_pos()] != c { + return false + } + + p.advance(1) // 1 char + return true +} + +fn (mut p Parser) one_of(s string) bool { + if p.end_of_input() || p.input[p.cur_pos()] !in s.bytes() { + return false + } + p.advance(1) // 1 char + return true +} + +fn (mut p Parser) none_of(s string) bool { + if p.end_of_input() || p.input[p.cur_pos()] in s.bytes() { + return false + } + p.advance(1) // 1 char + return true +} + +fn (mut p Parser) range(low u8, high u8) bool { + if p.end_of_input() { + return false + } + c := p.input[p.cur_pos()] + + if !(low <= c && c <= high) { + return false + } + + p.advance(1) // 1 char + return true +} + +// ===== Token extraction ===== + +fn (mut p Parser) save_stash(from u32) string { + return p.input[from..p.cur_pos()] +} + +fn (mut p Parser) skip_ws() { + for !p.end_of_input() { + $if windows { + if p.input[p.cur_pos()] == u8(13) { // eg: WINDOWS ONLY + p.lc += 1 + p.rc = 0 // reset rows + p.advance(1) // skip CR + if p.end_of_input() || p.input[p.cur_pos()] != u8(10) { // skip LF if present + continue + } + } + } + + if p.input[p.cur_pos()] == u8(10) { // eg: LINUX ONLY + p.lc += 1 + p.rc = 0 // reset rows + } + + if p.input[p.cur_pos()] in ' \r\n\t'.bytes() { + p.advance(1) + continue + } + + break + } +} + +fn (mut p Parser) last() !u8 { + if p.stack[p.frame] > p.input.len { + return error('Buffer too small') + } + return p.input[p.cur_pos() - 1] +} + +fn (mut p Parser) pred(func fn (u8) bool) bool { + if p.end_of_input() || !func(p.input[p.cur_pos()]) { + return false + } + + p.advance(1) // 1 char + return true +} + +fn (mut p Parser) many(s string) bool { + if p.input.len < p.cur_pos() + u32(s.len) { + return false + } + + if s != p.input[p.cur_pos() .. p.cur_pos() + u32(s.len)] { + return false + } + + p.advance(u32(s.len)) + return true +} + +fn (mut p Parser) any() ?u8 { + if p.end_of_input() { + return none + } + c := p.input[p.cur_pos()] + p.advance(1) // 1 char + return c +} + +// ===== Tokenizer ===== + +pub struct Tokenizer { + Parser +} + +fn (mut t Tokenizer) next() ?u8 { + pos := t.cur_pos() + if t.end_of_input() || pos > t.input.len { + return none + } + c := t.input[pos] + t.advance(1) // 1 char + return c +} + +pub fn (mut t Tokenizer) parse_all() !Token { + t.skip_ws() + + start_index := t.push_frame()! + + c := t.any() or { return Token{ start_index, .eof } } + print(c.ascii_str()) + + match true { + c == `"` { + t.rollback() + t.commit_frame() + return t.parse_str() + } + c in "0123456789".bytes() { + t.rollback() + t.commit_frame() + return t.parse_num() + } + c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".bytes() { + t.rollback() + t.commit_frame() + return t.parse_id() + } + c == `{` { + return Token{ start_index, .table_start } + } + c == `}` { + return Token{ start_index, .table_end } + } + c == `[` { + return Token{ start_index, .angle_bracket_left } + } + c == `]` { + return Token{ start_index, .angle_bracket_right } + } + c == 10 { // LF + return Token{ start_index, .newline } + } + else { + $if windows { + if c == 13 && t.term(10) { // CRLF + return Token{ start_index, .newline } + } + } + + t.rollback() + t.commit_frame() + return t.parse_op() + } + } +} + +fn (mut t Tokenizer) parse_op() !Token { + start_index := t.push_frame()! + + mut s := []u8{len: 0, cap: 32, init: 0} + for t.one_of("+-*/%^<>=~#") { + s << t.last()! + } + + match s.bytestr() { + "+", "-", "*", "/", "%", "^", "#", "=" { + return Token{ start_index, .operator } + } + "==", "~=", "<=", ">=", "<", ">", ".." { + return Token{ start_index, .operator_len2 } + } + "..." { + return Token{ start_index, .operator_len3 } + } + "." { + return Token{ start_index, .dot } + } + "," { + return Token{ start_index, .comma } + } + else { + return t.syntax_error("invalid operator '${s.bytestr()}'") + } + } +} + +fn (mut t Tokenizer) parse_id() !Token { + start_index := t.push_frame()! + + mut s := []u8{len: 0, cap: 32, init: 0} + + if t.range('a'[0], 'z'[0]) || t.range('A'[0], 'Z'[0]) || t.term('_'[0]) { + s << t.last() or { return t.syntax_error("invalid identifier") } + } else { + return t.syntax_error("invalid identifier") + } + + for t.range('a'[0], 'z'[0]) || t.range('A'[0], 'Z'[0]) || t.range('0'[0], '9'[0]) || t.term('_'[0]) { + s << t.last() or { break } + } + + return match s.bytestr() { + "true" { Token{ start_index, .true } } + "false" { Token{ start_index, .false } } + "nil" { Token{ start_index, .nil } } + else { + if s.bytestr() in constants.keywords { + return Token{start_index,.reserved_keyword} + } + return Token{start_index,.identifier} + } + } +} + +fn (mut t Tokenizer) parse_num() !Token { + start_index := t.push_frame()! + + mut s := []u8{len: 0, cap: 32, init: 0} + + if t.one_of("0123456789") { + s << t.last()! + } else { + return t.syntax_error("invalid number") + } + + mut is_decimal := false + + loop: + for { + if t.one_of("0123456789") { + s << t.last() or { break loop } + continue + } + + if t.term('.'[0]) { + if is_decimal { + return t.syntax_error("invalid number") + } + s << '.'[0] + + if !t.one_of("0123456789") { + return t.syntax_error("invalid number") + } + + is_decimal = true + s << t.last() or { break loop } + + continue + } + break + } + + if s.len == 0 { + return t.syntax_error("invalid number") + } + + return Token{ + start: start_index, + tag: if is_decimal { .decimal } else { .number }, + } +} + +fn (mut t Tokenizer) parse_str() !Token { + start_index := t.push_frame()! + + mut s := []u8{len: 0, cap: 4096, init: 0} + + ok := t.term('"'[0]) + + if !ok { + return t.syntax_error("invalid string") + } + + for c in t { + s << c + } + + c := t.last() or { + return t.syntax_error("unclosed string") + } + + match c { + `"` { + return Token{ start_index, .string } + } + else { + s << c + } + } + + return Token{ start_index, .eof } +} diff --git a/v.mod b/v.mod new file mode 100644 index 0000000..987d81a --- /dev/null +++ b/v.mod @@ -0,0 +1,7 @@ +Module { + name: 'vua' + description: 'a programming language' + version: '0.0.0' + license: 'BSD-3' + dependencies: [] +}