commit 32823fed2785a3ac655d1bf0d7cba601899dda74 Author: Duvet Date: Thu Jun 19 22:38:38 2025 -0500 first commit diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..01072ca --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.v] +indent_style = tab diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9a98968 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,8 @@ +* text=auto eol=lf +*.bat eol=crlf + +*.v linguist-language=V +*.vv linguist-language=V +*.vsh linguist-language=V +v.mod linguist-language=V +.vdocignore linguist-language=ignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2e2bb77 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +# Binaries for programs and plugins +main +vua +*.exe +*.exe~ +*.so +*.dylib +*.dll + +# Ignore binary output folders +bin/ + +# Ignore common editor/system specific metadata +.DS_Store +.idea/ +.vscode/ +*.iml + +# ENV +.env + +# vweb and database +*.db +*.js diff --git a/ast.v b/ast.v new file mode 100644 index 0000000..8666781 --- /dev/null +++ b/ast.v @@ -0,0 +1,102 @@ +module ast +import tokenizer { Token, VuaToken } + +struct Nil {} + +struct String { + contents string +} + +struct Number { + contents string +} + +struct Boolean { + value bool +} + +@[packed] +struct TableFlags { + array bool + object bool + enum bool + class bool + struct bool +} + +struct Table { + flag TableFlags + data []Variable +} + +struct Class { + properties []VariableDef + methods []Function +} + +struct Function { + name string + params []VariableDef + body []Token +} + +struct FunctionRef { + index u32 +} + +struct Lambda { + params []VariableDef + body []Token + scope ?FunctionRef +} + +struct VariableDef { + name string + type VuaTaggedType +} + +struct Variable { + // maybe this name should be a null terminated string + name string + value Expr +} + +struct Expr { + index u32 + type VuaTaggedType +} + +enum VuaTaggedType as u8 { + nil + string + number + boolean + table + function +} + + +type TokenIndex = u32 + +@[heap] +struct Nodes { +} + +@[heap] +struct Tokens { +} + +struct AST { + tokens Tokens + nodes Nodes + pos int +} + +union NodeData { +} + +struct Node { + token_type VuaToken + data NodeData +} + diff --git a/constants.v b/constants.v new file mode 100644 index 0000000..cf413c4 --- /dev/null +++ b/constants.v @@ -0,0 +1,35 @@ +module constants + +pub const keywords = [ + 'and', + 'assert', + 'begin', + 'break', + 'continue', + 'class', + 'do', + 'else', + 'elseif', + 'end', + 'foreach', + 'forever', + 'function', + 'if', + 'import', + 'in', + 'local', + 'nil', + 'not', + 'module', + 'match', + 'or', + 'repeat', + 'return', + 'then', + 'true', + 'pragma', + 'record', + 'until', + 'while', +] + diff --git a/main.v b/main.v new file mode 100644 index 0000000..cc1da6d --- /dev/null +++ b/main.v @@ -0,0 +1,9 @@ +module main +import repl + +fn main() { + repl_instance := repl.Repl{} + println(repl_instance.welcome_msg) + println('--------------') + repl_instance.loop()! +} diff --git a/multiarray.v b/multiarray.v new file mode 100644 index 0000000..374bf0c --- /dev/null +++ b/multiarray.v @@ -0,0 +1,100 @@ +module multiarray + +struct MultiArray[T] { +mut: + len int + cap int + tags map[int]string + fields map[string]MultiArrayField +} +struct MultiArrayField { + data voidptr +} + +fn MultiArray.new[T](len int, cap int, init T) MultiArray[T] { + mut result := MultiArray[T]{len: len, cap: cap} + $for field in T.fields { + result.fields[field.name + 's'] = MultiArrayField{ + data: unsafe { vcalloc(u64(cap) * sizeof(T)) } + } + } + return result +} + +fn (mut ma MultiArray[T]) add[T](table string, item T) { + if ma.len >= ma.cap { + unsafe { ma.grow() } + } + $for field in T.fields { + if field.name == table { + ma.tags[ma.len] = table + ma.len++ + } + field_name := field.name + 's' + mut field_data_ptr := unsafe { ma.fields[field_name].data } + unsafe { vmemcpy(&u8(field_data_ptr) + sizeof(T) * u64(ma.len), item, sizeof(T)) } + } +} + +@[unsafe] +fn (mut ma MultiArray[T]) grow() { + new_cap := if ma.cap == 0 { 1 } else { ma.cap * 2 } + $for field in T.fields { + field_name := field.name + 's' + old_data := &T(ma.fields[field_name].data) + new_data := vcalloc(u64(new_cap) * sizeof(T)) + vmemcpy(new_data, old_data, u64(ma.len) * sizeof(T)) + ma.fields[field_name] = MultiArrayField{ data: new_data } + } + ma.cap = new_cap +} + +fn (ma MultiArray[T]) get(index int) ?T { + if index < 0 || index >= ma.len { + return none + } + mut result := T{} + $for field in T.fields { + field_name := field.name + 's' + field_data_ptr := unsafe { ma.fields[field_name].data } + unsafe { vmemcpy(&result, &u8(field_data_ptr) + sizeof(T) * u64(index), sizeof(T)) } + } + return result +} + +@[unsafe] +fn (mut ma MultiArray[T]) free() { + $for field in T.fields { + field_name := field.name + 's' + if ma.fields[field_name].data != nil { + free( ma.fields[field_name].data ) + ma.fields[field_name] = MultiArrayField{nil} + } + } + ma.len = 0; ma.cap = 0 +} + +fn (mut ma MultiArray[T]) iter() Iterator[T] { + return Iterator[T]{ma,0} +} + +struct Iterator[T] { +mut: + ma MultiArray[T] + i int +} +struct IteratorEntry[T] { + tag string + data T +} + +fn (mut it Iterator[T]) next() ?IteratorEntry[T] { + defer { it.i++ } + if it.i >= it.ma.len { + return none + } + val := it.ma.get(it.i) or { + return none + } + return IteratorEntry[T]{it.ma.tags[it.i],val} +} diff --git a/multiarray_test.v b/multiarray_test.v new file mode 100644 index 0000000..ba7553a --- /dev/null +++ b/multiarray_test.v @@ -0,0 +1,31 @@ +module multiarray + +union SoA { + nil nil = unsafe { nil } + int int + bool bool +} + +fn test_basics() { + mut arr := MultiArray.new(0, 10, SoA{}) + + // test: add an int + arr.add('int' , SoA{ int: 42 }) + arr.add('int' , SoA{ int: 43 }) + arr.add('int' , SoA{ int: 44 }) + arr.add('bool', SoA{ bool: true }) + arr.add('nil' , SoA{}) + arr.add('nil' , SoA{}) + arr.add('nil' , SoA{}) + arr.add('nil' , SoA{}) + + it := arr.iter() + for item in it { + println('Iterating over MultiArray[${item.tag}]:') + if item.tag == 'int' { + println('${item.tag}s > ${unsafe{item.data.int}}') + } + } + + println('Created MultiArray with len: $arr.len, cap: $arr.cap') +} diff --git a/repl.v b/repl.v new file mode 100644 index 0000000..4adb1e7 --- /dev/null +++ b/repl.v @@ -0,0 +1,96 @@ +module repl + +import tokenizer as _ { Token, Tokenizer, VuaToken } +import readline { read_line } + +pub struct Repl { +pub: + welcome_msg string = "Welcome to the REPL!" +} + +/** + * Caller owns returned memory. + */ +pub fn (r Repl) read(input string) ![]Token { + mut tokens := []Token{ + len: 0, + cap: 128, + init: Token{0, .eof}, + } + mut tokenizer := Tokenizer{input: input} + + for !tokenizer.end_of_input() { + token := tokenizer.parse_all()! + if token.tag == .eof { break } + if token.tag == .newline { + if tokens.len > 0 { + break + } else { + continue // skip empty lines + } + } + tokens << token + } + + if tokens.len == 0 { + return error('No input whatsoever') + } + + return tokens +} + +pub fn (r Repl) eval(ast []Token, input string) string { + mut s := []string{len: 0, cap: 4096, init: ''} + + for token in ast { + s << ', ' + match token.tag { + .eof { s << 'EOF' } + .nil { s << 'nil' } + .string { s << 'string' } + .number { s << 'number' } + .decimal { s << 'decimal' } + .true { s << 'true' } + .false { s << 'false' } + .table_start { s << '{' } + .table_end { s << '}' } + .identifier { s << 'identifier' } + .comma { s << ',' } + .newline { s << '\n' } + .equal { s << '=' } + .dot { s << '.' } + .angle_bracket_left { s << '[' } + .angle_bracket_right { s << ']' } + .reserved_keyword { s << 'reserved_keyword' } + .operator, .operator_len2, .operator_len3{ s << 'operator' } + } + } + + if s.len == 0 { + return 'No output' + } + + return s.join(' ') +} + +//fn (r Repl) print(t VuaType) { +pub fn (r Repl) print(t string) string { + return t +} + +pub fn (r Repl) loop() ! { + for { + input := read_line('vua>> ') or { + println('\nGoodbye') + break + } + if input.trim_space() == 'exit' { + println('\nGoodbye') + break + } + tokens := r.read(input.str())! + expr := r.eval(tokens, input.str()) + output := r.print(expr) + println(output) + } +} diff --git a/tokenizer.v b/tokenizer.v new file mode 100644 index 0000000..d3f7336 --- /dev/null +++ b/tokenizer.v @@ -0,0 +1,433 @@ +module tokenizer +import constants + +@[packed] +pub struct Token { +pub: + start u32 + tag VuaToken +} + +pub enum VuaToken { + eof + nil + string + number + decimal + true + false + table_start + table_end + identifier + comma + newline + equal + dot + angle_bracket_left + angle_bracket_right + reserved_keyword + operator + operator_len2 + operator_len3 +} + +pub struct Parser { +pub: + input string +pub mut: + max_pos u32 + stack []u32 = []u32{len: 256, cap: 256, init: 0} + frame u32 +mut: + lc int = 1 + rc int +} + +fn (mut p Parser) cur_pos() u32 { + return p.stack[p.frame] +} + +fn (mut p Parser) advance(delta u32) { + p.rc += int(delta) + p.stack[p.frame] += delta; + if p.max_pos < p.stack[p.frame] { p.max_pos = p.stack[p.frame] } +} + +@[manualfree] +fn (mut p Parser) push_frame() !u32 { + p.frame += 1 + if p.frame == p.stack.len { + new_size := p.stack.len + (p.stack.len >> 1) + new_stack := []u32{len: new_size, cap: new_size, init:0} + + unsafe { vmemcpy(new_stack[0..p.max_pos], p.stack[0..p.max_pos], new_stack.len) } + unsafe { p.stack.free() } + + p.stack = new_stack + } + if p.frame > p.input.len { + return error('Buffer too small') + } + + p.stack[p.frame] = p.stack[p.frame - 1] + return p.cur_pos() +} + +fn (mut p Parser) pop_frame() { + if p.frame >= 1 { p.frame -= 1 } +} + +fn (mut p Parser) commit_frame() { + p.frame -= 1 + p.stack[p.frame] = p.stack[p.frame + 1]; +} + +@[unsafe] +fn (mut p Parser) free() { + p.stack.free() +} + +struct SyntaxError { + Error + line int + row int + m string +} + +fn (err SyntaxError) msg() string { + return "SyntaxError: ${err.m} -- at line ${err.line}-${err.row}" +} + +fn (mut p Parser) syntax_error(m string) SyntaxError { + return SyntaxError{ + line: p.lc, + row: p.rc, + m: m, + } +} + +fn (mut p Parser) rollback() { + p.stack[p.frame] = if p.frame == 0 { u32(0) } else { p.stack[p.frame - 1] } +} + +// ===== Char matching ===== + +pub fn (mut p Parser) end_of_input() bool { + return p.cur_pos() >= p.input.len +} + +fn (mut p Parser) term(c u8) bool { + if p.end_of_input() || p.input[p.cur_pos()] != c { + return false + } + + p.advance(1) // 1 char + return true +} + +fn (mut p Parser) one_of(s string) bool { + if p.end_of_input() || p.input[p.cur_pos()] !in s.bytes() { + return false + } + p.advance(1) // 1 char + return true +} + +fn (mut p Parser) none_of(s string) bool { + if p.end_of_input() || p.input[p.cur_pos()] in s.bytes() { + return false + } + p.advance(1) // 1 char + return true +} + +fn (mut p Parser) range(low u8, high u8) bool { + if p.end_of_input() { + return false + } + c := p.input[p.cur_pos()] + + if !(low <= c && c <= high) { + return false + } + + p.advance(1) // 1 char + return true +} + +// ===== Token extraction ===== + +fn (mut p Parser) save_stash(from u32) string { + return p.input[from..p.cur_pos()] +} + +fn (mut p Parser) skip_ws() { + for !p.end_of_input() { + $if windows { + if p.input[p.cur_pos()] == u8(13) { // eg: WINDOWS ONLY + p.lc += 1 + p.rc = 0 // reset rows + p.advance(1) // skip CR + if p.end_of_input() || p.input[p.cur_pos()] != u8(10) { // skip LF if present + continue + } + } + } + + if p.input[p.cur_pos()] == u8(10) { // eg: LINUX ONLY + p.lc += 1 + p.rc = 0 // reset rows + } + + if p.input[p.cur_pos()] in ' \r\n\t'.bytes() { + p.advance(1) + continue + } + + break + } +} + +fn (mut p Parser) last() !u8 { + if p.stack[p.frame] > p.input.len { + return error('Buffer too small') + } + return p.input[p.cur_pos() - 1] +} + +fn (mut p Parser) pred(func fn (u8) bool) bool { + if p.end_of_input() || !func(p.input[p.cur_pos()]) { + return false + } + + p.advance(1) // 1 char + return true +} + +fn (mut p Parser) many(s string) bool { + if p.input.len < p.cur_pos() + u32(s.len) { + return false + } + + if s != p.input[p.cur_pos() .. p.cur_pos() + u32(s.len)] { + return false + } + + p.advance(u32(s.len)) + return true +} + +fn (mut p Parser) any() ?u8 { + if p.end_of_input() { + return none + } + c := p.input[p.cur_pos()] + p.advance(1) // 1 char + return c +} + +// ===== Tokenizer ===== + +pub struct Tokenizer { + Parser +} + +fn (mut t Tokenizer) next() ?u8 { + pos := t.cur_pos() + if t.end_of_input() || pos > t.input.len { + return none + } + c := t.input[pos] + t.advance(1) // 1 char + return c +} + +pub fn (mut t Tokenizer) parse_all() !Token { + t.skip_ws() + + start_index := t.push_frame()! + + c := t.any() or { return Token{ start_index, .eof } } + print(c.ascii_str()) + + match true { + c == `"` { + t.rollback() + t.commit_frame() + return t.parse_str() + } + c in "0123456789".bytes() { + t.rollback() + t.commit_frame() + return t.parse_num() + } + c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".bytes() { + t.rollback() + t.commit_frame() + return t.parse_id() + } + c == `{` { + return Token{ start_index, .table_start } + } + c == `}` { + return Token{ start_index, .table_end } + } + c == `[` { + return Token{ start_index, .angle_bracket_left } + } + c == `]` { + return Token{ start_index, .angle_bracket_right } + } + c == 10 { // LF + return Token{ start_index, .newline } + } + else { + $if windows { + if c == 13 && t.term(10) { // CRLF + return Token{ start_index, .newline } + } + } + + t.rollback() + t.commit_frame() + return t.parse_op() + } + } +} + +fn (mut t Tokenizer) parse_op() !Token { + start_index := t.push_frame()! + + mut s := []u8{len: 0, cap: 32, init: 0} + for t.one_of("+-*/%^<>=~#") { + s << t.last()! + } + + match s.bytestr() { + "+", "-", "*", "/", "%", "^", "#", "=" { + return Token{ start_index, .operator } + } + "==", "~=", "<=", ">=", "<", ">", ".." { + return Token{ start_index, .operator_len2 } + } + "..." { + return Token{ start_index, .operator_len3 } + } + "." { + return Token{ start_index, .dot } + } + "," { + return Token{ start_index, .comma } + } + else { + return t.syntax_error("invalid operator '${s.bytestr()}'") + } + } +} + +fn (mut t Tokenizer) parse_id() !Token { + start_index := t.push_frame()! + + mut s := []u8{len: 0, cap: 32, init: 0} + + if t.range('a'[0], 'z'[0]) || t.range('A'[0], 'Z'[0]) || t.term('_'[0]) { + s << t.last() or { return t.syntax_error("invalid identifier") } + } else { + return t.syntax_error("invalid identifier") + } + + for t.range('a'[0], 'z'[0]) || t.range('A'[0], 'Z'[0]) || t.range('0'[0], '9'[0]) || t.term('_'[0]) { + s << t.last() or { break } + } + + return match s.bytestr() { + "true" { Token{ start_index, .true } } + "false" { Token{ start_index, .false } } + "nil" { Token{ start_index, .nil } } + else { + if s.bytestr() in constants.keywords { + return Token{start_index,.reserved_keyword} + } + return Token{start_index,.identifier} + } + } +} + +fn (mut t Tokenizer) parse_num() !Token { + start_index := t.push_frame()! + + mut s := []u8{len: 0, cap: 32, init: 0} + + if t.one_of("0123456789") { + s << t.last()! + } else { + return t.syntax_error("invalid number") + } + + mut is_decimal := false + + loop: + for { + if t.one_of("0123456789") { + s << t.last() or { break loop } + continue + } + + if t.term('.'[0]) { + if is_decimal { + return t.syntax_error("invalid number") + } + s << '.'[0] + + if !t.one_of("0123456789") { + return t.syntax_error("invalid number") + } + + is_decimal = true + s << t.last() or { break loop } + + continue + } + break + } + + if s.len == 0 { + return t.syntax_error("invalid number") + } + + return Token{ + start: start_index, + tag: if is_decimal { .decimal } else { .number }, + } +} + +fn (mut t Tokenizer) parse_str() !Token { + start_index := t.push_frame()! + + mut s := []u8{len: 0, cap: 4096, init: 0} + + ok := t.term('"'[0]) + + if !ok { + return t.syntax_error("invalid string") + } + + for c in t { + s << c + } + + c := t.last() or { + return t.syntax_error("unclosed string") + } + + match c { + `"` { + return Token{ start_index, .string } + } + else { + s << c + } + } + + return Token{ start_index, .eof } +} diff --git a/v.mod b/v.mod new file mode 100644 index 0000000..987d81a --- /dev/null +++ b/v.mod @@ -0,0 +1,7 @@ +Module { + name: 'vua' + description: 'a programming language' + version: '0.0.0' + license: 'BSD-3' + dependencies: [] +}