aether/language.zig
2025-05-23 23:33:21 -05:00

440 lines
14 KiB
Zig

const std = @import("std");
const Tokenizer = @import("tokenizer.zig");
const TokenType = Tokenizer.TokenType;
const StringPool = @import("strings.zig");
const StringIndex = StringPool.StringIndex;
const assert = std.debug.assert;
const Self = @This();
pub const Error = enum {};
pub const JsonType = enum {
null,
bool,
number,
string,
array,
object,
};
pub const JsonValue = union(JsonType) {
null: void,
bool: bool,
number: f64,
string: StringIndex,
array: ArraySlice,
object: ObjectEntry,
};
pub const JsonInput = union(JsonType) {
null: void,
bool: bool,
number: f64,
string: []const u8,
array: []JsonInput,
object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput),
};
/// same as ObjectEntry but simpler
/// start is the offset
pub const ArraySlice = struct {
start: usize,
len: usize,
};
/// just += the properties and value indexes to get the next item
/// property_idx and value_idx are the offset
/// it should be ordered
pub const ObjectEntry = struct {
len: usize,
property_idx: usize,
value_idx: usize,
};
pub const Flags = packed struct {
allow_trailing_comma: bool = false,
};
pub const Options = struct {
comptime indent_len: usize = 4,
comptime max_depth: usize = 256,
comptime flags: Flags = .{},
};
index: std.MultiArrayList(JsonValue) = .{},
string_index: StringPool = .empty,
property_index: StringPool = .empty,
options: Options = .{},
pub const init: Self = .{};
pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
self.index.deinit(allocator);
self.property_index.deinit(allocator);
}
fn addNumber(self: *Self, allocator: std.mem.Allocator, number: f64) !usize {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
self.index.set(idx, .{ .number = number });
return idx;
}
fn addProperty(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
const stridx = try self.property_index.add(allocator, bytes);
try self.index.ensureUnusedCapacity(allocator, 1);
return @intFromEnum(stridx);
}
fn addString(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
const stridx = try self.string_index.add(allocator, bytes);
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
self.index.set(idx, .{ .string = stridx });
return idx;
}
fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput)) !usize {
var entry: ?ObjectEntry = null;
for (object.keys(), object.values(), 0..) |key, value, times| {
const stridx = try self.property_index.add(allocator, key);
try self.index.ensureUnusedCapacity(allocator, 1);
const vidx = self.index.addOneAssumeCapacity();
self.index.set(vidx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
if (times == 0) {
entry = ObjectEntry{
.len = object.entries.len,
.property_idx = stridx,
.value_idx = vidx,
};
}
}
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
if (entry) |e| {
self.index.set(idx, .{ .object = e });
return idx;
} else {
self.index.set(idx, .{ .object = ObjectEntry{
.len = 0,
.property_idx = 0,
.value_idx = 0,
} });
return idx;
}
}
fn addEmptyObject(self: *Self, allocator: std.mem.Allocator) !usize {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
const object: ObjectEntry = .{
.property_idx = self.property_index.string_bytes.items.len,
.value_idx = self.index.len,
.len = 0,
};
self.index.set(idx, .{ .object = object });
return idx;
}
fn addArray(self: *Self, allocator: std.mem.Allocator, array: []JsonInput) !usize {
var entry: ?ArraySlice = null;
for (array, 0..) |value, times| {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
self.index.set(idx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
if (times == 0) {
entry = ArraySlice{
.start = idx,
.len = array.len,
};
}
}
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
if (entry) |e| {
self.index.set(idx, .{ .array = e });
return idx;
} else {
self.index.set(idx, .{ .array = ArraySlice{
.start = 0,
.len = 0,
} });
return idx;
}
}
fn addBool(self: *Self, allocator: std.mem.Allocator, value: bool) !usize {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
self.index.set(idx, .{ .bool = value });
return idx;
}
fn addNull(self: *Self, allocator: std.mem.Allocator) !usize {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
self.index.set(idx, .{ .null = {} });
return idx;
}
fn addValue(self: *Self, allocator: std.mem.Allocator, value: JsonInput) !void {
switch (value) {
.null => try self.addNull(allocator),
.bool => try self.addBool(allocator, value.bool),
.number => try self.addNumber(allocator, value.number),
.string => try self.addString(allocator, value.string),
.array => try self.addArray(allocator, value.array),
.object => try self.addObject(allocator, value.object),
}
}
fn getProperty(self: *Self, index: []const u8) ?StringIndex {
return self.property_index.string_table.get(index);
}
fn getNumber(self: *Self, index: usize) ?f64 {
if (self.index.get(index)) |n| return n;
return null;
}
fn getObject(self: *Self, allocator: std.mem.Allocator, index: usize) !?struct {
[]StringIndex,
[]usize,
} {
const entry = self.index.get(index);
if (entry.object.len == 0) {
return .{ &.{}, &.{} };
}
var pidx = entry.object.property_idx;
var vidx = entry.object.value_idx;
const keys = try allocator.alloc(StringIndex, entry.object.len);
const values = try allocator.alloc(usize, entry.object.len);
for (0..entry.object.len) |i| {
const slice = StringIndex.slice(@enumFromInt(pidx), &self.property_index);
keys[i] = @enumFromInt(pidx);
values[i] = vidx;
pidx += slice.len + 1;
vidx += 1;
}
return .{ keys, values };
}
fn getArray(self: *Self, allocator: std.mem.Allocator, index: usize) ?[]usize {
const entry = self.index.get(index) orelse return null;
if (entry.array.len == 0) {
return &.{};
}
var idx = entry.array.start;
const values = try allocator.alloc(usize, entry.array.len);
for (entry.array.len) |i| {
values[i] = idx;
idx += 1;
}
return values;
}
fn getBool(self: *Self, index: usize) ?bool {
const entry = self.index.get(index) orelse return null;
return entry.bool;
}
fn getNull(self: *Self, index: usize) ?void {
const entry = self.index.get(index) orelse return null;
return entry.null;
}
fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput {
const entry = self.index.get(index);
switch (entry) {
.null => return .{ .null = {} },
.bool => return .{ .bool = entry.bool },
.number => return .{ .number = entry.number },
.string => {
const str = entry.string.slice(&self.string_index);
return .{ .string = str };
},
.array => {
const res = try allocator.alloc(JsonInput, entry.array.len);
var idx = entry.array.start;
for (0..entry.array.len) |i| {
if (try self.getValue(allocator, idx)) |v| {
res[i] = v;
idx += 1;
} else unreachable;
}
return .{ .array = res };
},
.object => {
var kidx = entry.object.property_idx;
var vidx = entry.object.value_idx;
var obj: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput) = .empty;
try obj.ensureTotalCapacity(allocator, entry.object.len);
for (0..entry.object.len) |_| {
const key = StringIndex.slice(@enumFromInt(kidx), &self.property_index);
const val = (try self.getValue(allocator, vidx)).?;
obj.putAssumeCapacityNoClobber(key, val);
kidx += 1;
vidx += 1;
}
return .{ .object = obj };
},
}
}
/// always returns 0 (root)
pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
const allocator = tokenizer.allocator;
var it = tokenizer.iterator();
const root = try self.addEmptyObject(allocator);
var work_query: std.BoundedArray(usize, self.options.max_depth) = try .init(64);
var do_once = true;
while (it.next()) |token| {
defer tokenizer.skipWhitespace();
flag: switch (token.type) {
.object_begin => {
if (do_once) {
self.index.set(root, .{ .object = .{
.len = 0,
.property_idx = self.property_index.string_table.size,
.value_idx = self.index.len,
} });
try work_query.append(root);
do_once = false;
continue;
}
const scope_idx = work_query.get(work_query.len - 1);
const obj_idx = try self.addEmptyObject(allocator);
const scope = self.index.get(scope_idx).object;
// add map to itself
const new_data = ObjectEntry{
.len = scope.len + 1,
.property_idx = self.property_index.string_table.size,
.value_idx = scope.value_idx,
};
self.index.set(scope_idx, .{ .object = new_data });
try work_query.append(obj_idx);
continue;
},
.object_end => {
_ = work_query.pop().?;
continue;
},
.property => {
const scope_idx = work_query.get(work_query.len - 1);
const scope = self.index.get(scope_idx).object;
_ = try self.addProperty(allocator, token.value.?.string);
self.index.set(scope_idx, .{ .object = ObjectEntry{
.len = scope.len + 1,
.property_idx = scope.property_idx,
.value_idx = scope.value_idx,
} });
continue;
},
.string => {
// maybe we could dismiss the while loop altogether and just do this
// the whole time
if (it.peek()) |next| if (next.type == .colon) {
continue :flag TokenType.property;
};
//const scope_idx = work_query.get(work_query.len - 1);
//const scope = self.index.get(scope_idx).object;
_ = try self.addString(allocator, token.value.?.string);
//self.index.set(scope_idx, .{ .object = scope });
continue;
},
.number => {
//const scope_idx = work_query.get(work_query.len - 1);
//const scope = self.index.get(scope_idx).object;
_ = try self.addNumber(allocator, token.value.?.number);
//self.index.set(scope_idx, .{ .object = scope });
continue;
},
.true, .false => {
//const scope_idx = work_query.get(work_query.len - 1);
//const scope = self.index.get(scope_idx).object;
_ = try self.addBool(allocator, if (token.type == .true) true else false);
//self.index.set(scope_idx, .{ .object = scope });
continue;
},
.null => {
const scope_idx = work_query.get(work_query.len - 1);
const scope = self.index.get(scope_idx).object;
_ = try self.addNull(allocator);
self.index.set(scope_idx, .{ .object = scope });
continue;
},
.comma => if (it.peek()) |t|
if (t.type == .object_end and !self.options.flags.allow_trailing_comma) {
return error.TrailingComma;
},
else => {},
}
}
return root;
}
test parse {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const allocator = arena.allocator();
var self = init;
defer deinit(&self, allocator);
var tokenizer = try Tokenizer.init(allocator, blk: {
const json =
\\ {
\\ "lazy": true,
\\ "name": "yuzu"
\\ }
;
break :blk json;
});
const root = try parse(&self, &tokenizer);
const keys, const values = (try self.getObject(allocator, root)).?;
const stdout = std.io.getStdOut().writer();
try stdout.writeAll("{\n");
for (keys, values, 0..) |k, v, i| {
const key: []const u8 = k.slice(&self.property_index);
try stdout.print(
\\ "{s}": {d}
, .{ key, v });
if (i < keys.len)
try stdout.writeAll(",\n")
else
try stdout.writeAll("\n");
}
try stdout.writeAll("}\n");
}