first commit

2025-06-19 22:38:38 -05:00 · 2025-06-19 22:38:38 -05:00 · 32823fed27
commit 32823fed27
11 changed files with 853 additions and 0 deletions
--- a/.editorconfig
+++ b/.editorconfig
@ -0,0 +1,8 @@
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.v]
+indent_style = tab
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,8 @@
+* text=auto eol=lf
+*.bat eol=crlf
+
+*.v linguist-language=V
+*.vv linguist-language=V
+*.vsh linguist-language=V
+v.mod linguist-language=V
+.vdocignore linguist-language=ignore
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,24 @@
+# Binaries for programs and plugins
+main
+vua
+*.exe
+*.exe~
+*.so
+*.dylib
+*.dll
+
+# Ignore binary output folders
+bin/
+
+# Ignore common editor/system specific metadata
+.DS_Store
+.idea/
+.vscode/
+*.iml
+
+# ENV
+.env
+
+# vweb and database
+*.db
+*.js
--- a/ast.v
+++ b/ast.v
@ -0,0 +1,102 @@
+module ast
+import tokenizer { Token, VuaToken }
+
+struct Nil {}
+
+struct String {
+	contents string
+}
+
+struct Number {
+	contents string
+}
+
+struct Boolean {
+  value bool
+}
+
+@[packed]
+struct TableFlags {
+	array  bool
+	object bool
+	enum   bool
+	class  bool
+	struct bool
+}
+
+struct Table {
+	flag   TableFlags
+	data []Variable
+}
+
+struct Class {
+	properties []VariableDef
+	methods    []Function
+}
+
+struct Function {
+	name   string
+	params []VariableDef
+	body   []Token
+}
+
+struct FunctionRef {
+	index u32
+}
+
+struct Lambda {
+	params []VariableDef
+	body   []Token
+	scope  ?FunctionRef
+}
+
+struct VariableDef {
+	name string
+	type VuaTaggedType
+}
+
+struct Variable {
+	// maybe this name should be a null terminated string
+	name  string
+	value Expr
+}
+
+struct Expr {
+  index u32
+	type  VuaTaggedType
+}
+
+enum VuaTaggedType as u8 {
+	nil
+	string
+	number
+	boolean
+	table
+	function
+}
+
+
+type TokenIndex = u32
+
+@[heap]
+struct Nodes {
+}
+
+@[heap]
+struct Tokens {
+}
+
+struct AST {
+	tokens Tokens
+	nodes  Nodes
+	pos    int
+}
+
+union NodeData {
+}
+
+struct Node {
+	token_type VuaToken
+	data 		   NodeData
+}
+
--- a/constants.v
+++ b/constants.v
@ -0,0 +1,35 @@
+module constants
+
+pub const keywords = [
+	'and',
+	'assert',
+	'begin',
+	'break',
+	'continue',
+	'class',
+	'do',
+	'else',
+	'elseif',
+	'end',
+	'foreach',
+	'forever',
+	'function',
+	'if',
+	'import',
+	'in',
+	'local',
+	'nil',
+	'not',
+	'module',
+	'match',
+	'or',
+	'repeat',
+	'return',
+	'then',
+	'true',
+	'pragma',
+	'record',
+	'until',
+	'while',
+]
+
--- a/main.v
+++ b/main.v
@ -0,0 +1,9 @@
+module main
+import repl
+
+fn main() {
+	repl_instance := repl.Repl{}
+	println(repl_instance.welcome_msg)
+	println('--------------')
+	repl_instance.loop()!
+}
--- a/multiarray.v
+++ b/multiarray.v
@ -0,0 +1,100 @@
+module multiarray
+
+struct MultiArray[T] {
+mut:
+	len    int
+	cap    int
+	tags   map[int]string
+	fields map[string]MultiArrayField
+}
+struct MultiArrayField {
+	data voidptr
+}
+
+fn MultiArray.new[T](len int, cap int, init T) MultiArray[T] {
+	mut result := MultiArray[T]{len: len, cap: cap}
+	$for field in T.fields {
+		result.fields[field.name + 's'] = MultiArrayField{
+			data: unsafe { vcalloc(u64(cap) * sizeof(T)) }
+		}
+	}
+	return result
+}
+
+fn (mut ma MultiArray[T]) add[T](table string, item T) {
+	if ma.len >= ma.cap {
+		unsafe { ma.grow() }
+	}
+	$for field in T.fields {
+		if field.name == table {
+			ma.tags[ma.len] = table
+			ma.len++
+		}
+		field_name := field.name + 's'
+		mut field_data_ptr := unsafe { ma.fields[field_name].data }
+		unsafe { vmemcpy(&u8(field_data_ptr) + sizeof(T) * u64(ma.len), item, sizeof(T)) }
+	}
+}
+
+@[unsafe]
+fn (mut ma MultiArray[T]) grow() {
+	new_cap := if ma.cap == 0 { 1 } else { ma.cap * 2 }
+	$for field in T.fields {
+		field_name := field.name + 's'
+		old_data := &T(ma.fields[field_name].data)
+		new_data := vcalloc(u64(new_cap) * sizeof(T))
+		vmemcpy(new_data, old_data, u64(ma.len) * sizeof(T))
+		ma.fields[field_name] = MultiArrayField{ data: new_data }
+	}
+	ma.cap = new_cap
+}
+
+fn (ma MultiArray[T]) get(index int) ?T {
+	if index < 0 || index >= ma.len {
+		return none
+	}
+	mut result := T{}
+	$for field in T.fields {
+		field_name := field.name + 's'
+		field_data_ptr := unsafe { ma.fields[field_name].data }
+		unsafe { vmemcpy(&result, &u8(field_data_ptr) + sizeof(T) * u64(index), sizeof(T)) }
+	}
+	return result
+}
+
+@[unsafe]
+fn (mut ma MultiArray[T]) free() {
+	$for field in T.fields {
+		field_name := field.name + 's'
+		if ma.fields[field_name].data != nil {
+			free( ma.fields[field_name].data )
+			ma.fields[field_name] = MultiArrayField{nil}
+		}
+	}
+	ma.len = 0; ma.cap = 0
+}
+
+fn (mut ma MultiArray[T]) iter() Iterator[T] {
+	return Iterator[T]{ma,0}
+}
+
+struct Iterator[T] {
+mut:
+	ma MultiArray[T]
+	i  int
+}
+struct IteratorEntry[T] {
+	tag  string
+	data T
+}
+
+fn (mut it Iterator[T]) next() ?IteratorEntry[T] {
+	defer { it.i++ }
+	if it.i >= it.ma.len {
+		return none
+	}
+	val := it.ma.get(it.i) or {
+		return none
+	}
+	return IteratorEntry[T]{it.ma.tags[it.i],val}
+}
--- a/multiarray_test.v
+++ b/multiarray_test.v
@ -0,0 +1,31 @@
+module multiarray
+
+union SoA {
+	nil  nil = unsafe { nil }
+	int  int
+	bool bool
+}
+
+fn test_basics() {
+	mut arr := MultiArray.new(0, 10, SoA{})
+
+  // test: add an int
+	arr.add('int' , SoA{ int: 42 })
+	arr.add('int' , SoA{ int: 43 })
+	arr.add('int' , SoA{ int: 44 })
+	arr.add('bool', SoA{ bool: true })
+	arr.add('nil' , SoA{})
+	arr.add('nil' , SoA{})
+	arr.add('nil' , SoA{})
+	arr.add('nil' , SoA{})
+
+	it := arr.iter()
+	for item in it {
+		println('Iterating over MultiArray[${item.tag}]:')
+		if item.tag == 'int' {
+			println('${item.tag}s > ${unsafe{item.data.int}}')
+		}
+	}
+
+	println('Created MultiArray with len: $arr.len, cap: $arr.cap')
+}
--- a/repl.v
+++ b/repl.v
@ -0,0 +1,96 @@
+module repl
+
+import tokenizer as _ { Token, Tokenizer, VuaToken }
+import readline { read_line }
+
+pub struct Repl {
+pub:
+	welcome_msg string = "Welcome to the REPL!"
+}
+
+/**
+ * Caller owns returned memory.
+ */
+pub fn (r Repl) read(input string) ![]Token {
+	mut tokens := []Token{
+		len: 0,
+		cap: 128,
+		init: Token{0, .eof},
+	}
+	mut tokenizer := Tokenizer{input: input}
+
+	for !tokenizer.end_of_input() {
+		token := tokenizer.parse_all()!
+		if token.tag == .eof { break }
+		if token.tag == .newline {
+			if tokens.len > 0 {
+				break
+			} else {
+				continue // skip empty lines
+			}
+		}
+		tokens << token
+	}
+
+	if tokens.len == 0 {
+		return error('No input whatsoever')
+	}
+
+	return tokens
+}
+
+pub fn (r Repl) eval(ast []Token, input string) string {
+	mut s := []string{len: 0, cap: 4096, init: ''}
+
+	for token in ast {
+	  s << ', '
+		match token.tag {
+			.eof { s << 'EOF' }
+			.nil { s << 'nil' }
+			.string { s << 'string' }
+			.number { s << 'number' }
+			.decimal { s << 'decimal' }
+			.true { s << 'true' }
+			.false { s << 'false' }
+			.table_start { s << '{' }
+			.table_end { s << '}' }
+			.identifier { s << 'identifier' }
+			.comma { s << ',' }
+			.newline { s << '\n' }
+			.equal { s << '=' }
+			.dot { s << '.' }
+			.angle_bracket_left { s << '[' }
+			.angle_bracket_right { s << ']' }
+			.reserved_keyword { s << 'reserved_keyword' }
+			.operator, .operator_len2, .operator_len3{ s << 'operator' }
+		}
+	}
+
+	if s.len == 0 {
+		return 'No output'
+	}
+
+	return s.join(' ')
+}
+
+//fn (r Repl) print(t VuaType) {
+pub fn (r Repl) print(t string) string {
+	return t
+}
+
+pub fn (r Repl) loop() ! {
+	for {
+		input := read_line('vua>> ') or {
+			println('\nGoodbye')
+			break
+		}
+		if input.trim_space() == 'exit' {
+			println('\nGoodbye')
+			break
+		}
+		tokens := r.read(input.str())!
+		expr := r.eval(tokens, input.str())
+		output := r.print(expr)
+		println(output)
+	}
+}
--- a/tokenizer.v
+++ b/tokenizer.v
@ -0,0 +1,433 @@
+module tokenizer
+import constants
+
+@[packed]
+pub struct Token {
+pub:
+	start u32
+	tag   VuaToken
+}
+
+pub enum VuaToken {
+	eof
+	nil
+	string
+	number
+	decimal
+	true
+	false
+	table_start
+	table_end
+	identifier
+	comma
+	newline
+	equal
+	dot
+	angle_bracket_left
+	angle_bracket_right
+	reserved_keyword
+	operator
+	operator_len2
+	operator_len3
+}
+
+pub struct Parser {
+pub:
+	input string
+pub mut:
+	max_pos u32
+	stack []u32 = []u32{len: 256, cap: 256, init: 0}
+	frame u32
+mut:
+	lc int = 1
+	rc int
+}
+
+fn (mut p Parser) cur_pos() u32 {
+	return p.stack[p.frame]
+}
+
+fn (mut p Parser) advance(delta u32) {
+	p.rc += int(delta)
+	p.stack[p.frame] += delta;
+	if p.max_pos < p.stack[p.frame] { p.max_pos = p.stack[p.frame] }
+}
+
+@[manualfree]
+fn (mut p Parser) push_frame() !u32 {
+	p.frame += 1
+	if p.frame == p.stack.len {
+		new_size := p.stack.len + (p.stack.len >> 1)
+		new_stack := []u32{len: new_size, cap: new_size, init:0}
+
+		unsafe { vmemcpy(new_stack[0..p.max_pos], p.stack[0..p.max_pos], new_stack.len) }
+		unsafe { p.stack.free() }
+
+		p.stack = new_stack
+	}
+	if p.frame > p.input.len {
+		return error('Buffer too small')
+	}
+
+	p.stack[p.frame] = p.stack[p.frame - 1]
+	return p.cur_pos()
+}
+
+fn (mut p Parser) pop_frame() {
+	if p.frame >= 1 { p.frame -= 1 }
+}
+
+fn (mut p Parser) commit_frame() {
+	p.frame -= 1
+	p.stack[p.frame] = p.stack[p.frame + 1];
+}
+
+@[unsafe]
+fn (mut p Parser) free() {
+	p.stack.free()
+}
+
+struct SyntaxError {
+	Error
+	line int
+	row int
+	m string
+}
+
+fn (err SyntaxError) msg() string {
+	return "SyntaxError: ${err.m} -- at line ${err.line}-${err.row}"
+}
+
+fn (mut p Parser) syntax_error(m string) SyntaxError {
+	return SyntaxError{
+		line: p.lc,
+		row: p.rc,
+		m: m,
+	}
+}
+
+fn (mut p Parser) rollback() {
+	p.stack[p.frame] = if p.frame == 0 { u32(0) } else { p.stack[p.frame - 1] }
+}
+
+// ===== Char matching =====
+
+pub fn (mut p Parser) end_of_input() bool {
+	return p.cur_pos() >= p.input.len
+}
+
+fn (mut p Parser) term(c u8) bool {
+	if p.end_of_input() || p.input[p.cur_pos()] != c {
+		return false
+	}
+
+	p.advance(1) // 1 char
+	return true
+}
+
+fn (mut p Parser) one_of(s string) bool {
+	if p.end_of_input() || p.input[p.cur_pos()] !in s.bytes() {
+		return false
+	}
+	p.advance(1) // 1 char
+	return true
+}
+
+fn (mut p Parser) none_of(s string) bool {
+	if p.end_of_input() || p.input[p.cur_pos()] in s.bytes() {
+		return false
+	}
+	p.advance(1) // 1 char
+	return true
+}
+
+fn (mut p Parser) range(low u8, high u8) bool {
+	if p.end_of_input() {
+		return false
+	}
+	c := p.input[p.cur_pos()]
+
+	if !(low <= c && c <= high) {
+		return false
+	}
+
+	p.advance(1) // 1 char
+	return true
+}
+
+// ===== Token extraction =====
+
+fn (mut p Parser) save_stash(from u32) string {
+	return p.input[from..p.cur_pos()]
+}
+
+fn (mut p Parser) skip_ws() {
+	for !p.end_of_input() {
+		$if windows {
+			if p.input[p.cur_pos()] == u8(13) { // eg: WINDOWS ONLY
+				p.lc += 1
+				p.rc = 0 // reset rows
+				p.advance(1) // skip CR
+				if p.end_of_input() || p.input[p.cur_pos()] != u8(10) { // skip LF if present
+					continue
+				}
+			}
+		}
+
+		if p.input[p.cur_pos()] == u8(10) { // eg: LINUX ONLY
+			p.lc += 1
+			p.rc = 0 // reset rows
+		}
+
+		if p.input[p.cur_pos()] in ' \r\n\t'.bytes() {
+			p.advance(1)
+			continue
+		}
+
+		break
+	}
+}
+
+fn (mut p Parser) last() !u8 {
+	if p.stack[p.frame] > p.input.len {
+		return error('Buffer too small')
+	}
+	return p.input[p.cur_pos() - 1]
+}
+
+fn (mut p Parser) pred(func fn (u8) bool) bool {
+	if p.end_of_input() || !func(p.input[p.cur_pos()]) {
+		return false
+	}
+
+	p.advance(1) // 1 char
+	return true
+}
+
+fn (mut p Parser) many(s string) bool {
+	if p.input.len < p.cur_pos() + u32(s.len) {
+		return false
+	}
+
+	if s != p.input[p.cur_pos() .. p.cur_pos() + u32(s.len)] {
+		return false
+	}
+
+	p.advance(u32(s.len))
+	return true
+}
+
+fn (mut p Parser) any() ?u8 {
+	if p.end_of_input() {
+		return none
+	}
+	c := p.input[p.cur_pos()]
+	p.advance(1) // 1 char
+	return c
+}
+
+// ===== Tokenizer =====
+
+pub struct Tokenizer {
+	Parser
+}
+
+fn (mut t Tokenizer) next() ?u8 {
+	pos := t.cur_pos()
+	if t.end_of_input() || pos > t.input.len {
+		return none
+	}
+	c := t.input[pos]
+	t.advance(1) // 1 char
+	return c
+}
+
+pub fn (mut t Tokenizer) parse_all() !Token {
+	t.skip_ws()
+
+	start_index := t.push_frame()!
+
+	c := t.any() or { return Token{ start_index, .eof } }
+	print(c.ascii_str())
+
+	match true {
+		c == `"` {
+			t.rollback()
+			t.commit_frame()
+			return t.parse_str()
+		}
+		c in "0123456789".bytes() {
+			t.rollback()
+			t.commit_frame()
+			return t.parse_num()
+		}
+		c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".bytes() {
+			t.rollback()
+			t.commit_frame()
+			return t.parse_id()
+		}
+		c == `{` {
+			return Token{ start_index, .table_start }
+		}
+		c == `}` {
+			return Token{ start_index, .table_end }
+		}
+		c == `[` {
+			return Token{ start_index, .angle_bracket_left }
+		}
+		c == `]` {
+			return Token{ start_index, .angle_bracket_right }
+		}
+		c == 10 { // LF
+			return Token{ start_index, .newline }
+		}
+		else {
+			$if windows {
+				if c == 13 && t.term(10) { // CRLF
+					return Token{ start_index, .newline }
+				}
+			}
+
+			t.rollback()
+			t.commit_frame()
+			return t.parse_op()
+		}
+	}
+}
+
+fn (mut t Tokenizer) parse_op() !Token {
+	start_index := t.push_frame()!
+
+	mut s := []u8{len: 0, cap: 32, init: 0}
+	for t.one_of("+-*/%^<>=~#") {
+		s << t.last()!
+	}
+
+	match s.bytestr() {
+		"+", "-", "*", "/", "%", "^", "#", "=" {
+			return Token{ start_index, .operator }
+		}
+		"==", "~=", "<=", ">=", "<", ">", ".." {
+			return Token{ start_index, .operator_len2 }
+		}
+		"..." {
+			return Token{ start_index, .operator_len3 }
+		}
+		"." {
+			return Token{ start_index, .dot }
+		}
+		"," {
+			return Token{ start_index, .comma }
+		}
+		else {
+			return t.syntax_error("invalid operator '${s.bytestr()}'")
+		}
+	}
+}
+
+fn (mut t Tokenizer) parse_id() !Token {
+	start_index := t.push_frame()!
+
+	mut s := []u8{len: 0, cap: 32, init: 0}
+
+	if t.range('a'[0], 'z'[0]) || t.range('A'[0], 'Z'[0]) || t.term('_'[0]) {
+		s << t.last() or { return t.syntax_error("invalid identifier") }
+	} else {
+		return t.syntax_error("invalid identifier")
+	}
+
+	for t.range('a'[0], 'z'[0]) || t.range('A'[0], 'Z'[0]) || t.range('0'[0], '9'[0]) || t.term('_'[0]) {
+		s << t.last() or { break }
+	}
+
+	return match s.bytestr() {
+		"true" { Token{ start_index, .true } }
+		"false" { Token{ start_index, .false } }
+		"nil" { Token{ start_index, .nil } }
+		else {
+			if s.bytestr() in constants.keywords {
+				return Token{start_index,.reserved_keyword}
+			}
+			return Token{start_index,.identifier}
+		}
+	}
+}
+
+fn (mut t Tokenizer) parse_num() !Token {
+	start_index := t.push_frame()!
+
+	mut s := []u8{len: 0, cap: 32, init: 0}
+
+	if t.one_of("0123456789") {
+		s << t.last()!
+	} else {
+		return t.syntax_error("invalid number")
+	}
+
+	mut is_decimal := false
+
+	loop:
+	for {
+		if t.one_of("0123456789") {
+			s << t.last() or { break loop }
+			continue
+		}
+
+		if t.term('.'[0]) {
+			if is_decimal {
+				return t.syntax_error("invalid number")
+			}
+			s << '.'[0]
+
+			if !t.one_of("0123456789") {
+				return t.syntax_error("invalid number")
+			}
+
+			is_decimal = true
+			s << t.last() or { break loop }
+
+			continue
+		}
+		break
+	}
+
+	if s.len == 0 {
+		return t.syntax_error("invalid number")
+	}
+
+	return Token{
+		start: start_index,
+		tag: if is_decimal { .decimal } else { .number },
+	}
+}
+
+fn (mut t Tokenizer) parse_str() !Token {
+	start_index := t.push_frame()!
+
+	mut s := []u8{len: 0, cap: 4096, init: 0}
+
+	ok := t.term('"'[0])
+
+	if !ok {
+		return t.syntax_error("invalid string")
+	}
+
+	for c in t {
+		s << c
+	}
+
+	c := t.last() or {
+		return t.syntax_error("unclosed string")
+	}
+
+	match c {
+		`"` {
+			return Token{ start_index, .string }
+		}
+		else {
+			s << c
+		}
+	}
+
+	return Token{ start_index, .eof }
+}
--- a/v.mod
+++ b/v.mod
@ -0,0 +1,7 @@
+Module {
+	name: 'vua'
+	description: 'a programming language'
+	version: '0.0.0'
+	license: 'BSD-3'
+	dependencies: []
+}