aether/language.zig
2025-05-23 20:05:33 -05:00

450 lines
14 KiB
Zig

const std = @import("std");
const Tokenizer = @import("tokenizer.zig");
const TokenType = Tokenizer.TokenType;
const StringPool = @import("strings.zig");
const StringIndex = StringPool.StringIndex;
const assert = std.debug.assert;
const Self = @This();
pub const Error = enum {};
pub const JsonType = enum {
null,
bool,
number,
string,
array,
object,
};
pub const JsonValue = union(JsonType) {
null: void,
bool: bool,
number: f64,
string: StringIndex,
array: ArraySlice,
object: ObjectEntry,
};
pub const JsonInput = union(JsonType) {
null: void,
bool: bool,
number: f64,
string: []const u8,
array: []JsonInput,
object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput),
};
/// same as ObjectEntry but simpler
/// start is the offset
pub const ArraySlice = struct {
start: usize,
len: usize,
};
/// just += the properties and value indexes to get the next item
/// property_idx and value_idx are the offset
/// it should be ordered
pub const ObjectEntry = struct {
len: usize,
property_idx: usize,
value_idx: usize,
};
pub const Flags = packed struct {
allow_trailing_comma: bool = false,
};
pub const Options = struct {
comptime max_depth: usize = 256,
comptime flags: Flags = .{},
};
index: std.MultiArrayList(JsonValue) = .{},
string_index: StringPool = .empty,
property_index: StringPool = .empty,
options: Options = .{},
pub const init: Self = .{};
pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
self.index.deinit(allocator);
self.property_index.deinit(allocator);
}
fn addNumber(self: *Self, allocator: std.mem.Allocator, number: f64) !usize {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
self.index.set(idx, .{ .number = number });
return idx;
}
fn addProperty(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
const stridx = try self.property_index.add(allocator, bytes);
try self.index.ensureUnusedCapacity(allocator, 1);
return @intFromEnum(stridx);
}
fn addString(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
const stridx = try self.string_index.add(allocator, bytes);
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
self.index.set(idx, .{ .string = stridx });
return idx;
}
fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput)) !usize {
var entry: ?ObjectEntry = null;
for (object.keys(), object.values(), 0..) |key, value, times| {
const stridx = try self.property_index.add(allocator, key);
try self.index.ensureUnusedCapacity(allocator, 1);
const vidx = self.index.addOneAssumeCapacity();
self.index.set(vidx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
if (times == 0) {
entry = ObjectEntry{
.len = object.entries.len,
.property_idx = stridx,
.value_idx = vidx,
};
}
}
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
if (entry) |e| {
self.index.set(idx, .{ .object = e });
return idx;
} else {
self.index.set(idx, .{ .object = ObjectEntry{
.len = 0,
.property_idx = 0,
.value_idx = 0,
} });
return idx;
}
}
fn addEmptyObject(self: *Self, allocator: std.mem.Allocator) !usize {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
const object: ObjectEntry = .{
.property_idx = self.property_index.string_bytes.items.len,
.value_idx = self.index.len,
.len = 0,
};
self.index.set(idx, .{ .object = object });
return idx;
}
fn addArray(self: *Self, allocator: std.mem.Allocator, array: []JsonInput) !usize {
var entry: ?ArraySlice = null;
for (array, 0..) |value, times| {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
self.index.set(idx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
if (times == 0) {
entry = ArraySlice{
.start = idx,
.len = array.len,
};
}
}
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
if (entry) |e| {
self.index.set(idx, .{ .array = e });
return idx;
} else {
self.index.set(idx, .{ .array = ArraySlice{
.start = 0,
.len = 0,
} });
return idx;
}
}
fn addBool(self: *Self, allocator: std.mem.Allocator, value: bool) !usize {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
self.index.set(idx, .{ .bool = value });
return idx;
}
fn addNull(self: *Self, allocator: std.mem.Allocator) !usize {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
self.index.set(idx, .{ .null = {} });
return idx;
}
fn addValue(self: *Self, allocator: std.mem.Allocator, value: JsonInput) !void {
switch (value) {
.null => try self.addNull(allocator),
.bool => try self.addBool(allocator, value.bool),
.number => try self.addNumber(allocator, value.number),
.string => try self.addString(allocator, value.string),
.array => try self.addArray(allocator, value.array),
.object => try self.addObject(allocator, value.object),
}
}
fn getProperty(self: *Self, index: []const u8) ?StringIndex {
return self.property_index.string_table.get(index);
}
fn getNumber(self: *Self, index: usize) ?f64 {
if (self.index.get(index)) |n| return n;
return null;
}
fn getObject(self: *Self, allocator: std.mem.Allocator, index: usize) !?struct {
[]StringIndex,
[]usize,
} {
const entry = self.index.get(index);
if (entry.object.len == 0) {
return .{ &.{}, &.{} };
}
var pidx = entry.object.property_idx;
var vidx = entry.object.value_idx;
const keys = try allocator.alloc(StringIndex, entry.object.len);
const values = try allocator.alloc(usize, entry.object.len);
for (0..entry.object.len) |i| {
const slice = StringIndex.slice(@enumFromInt(pidx), &self.property_index);
keys[i] = @enumFromInt(pidx);
values[i] = vidx;
pidx += slice.len + 1;
vidx += 1;
}
return .{ keys, values };
}
fn getArray(self: *Self, allocator: std.mem.Allocator, index: usize) ?[]usize {
const entry = self.index.get(index) orelse return null;
if (entry.array.len == 0) {
return &.{};
}
var idx = entry.array.start;
const values = try allocator.alloc(usize, entry.array.len);
for (entry.array.len) |i| {
values[i] = idx;
idx += 1;
}
return values;
}
fn getBool(self: *Self, index: usize) ?bool {
const entry = self.index.get(index) orelse return null;
return entry.bool;
}
fn getNull(self: *Self, index: usize) ?void {
const entry = self.index.get(index) orelse return null;
return entry.null;
}
fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput {
const entry = self.index.get(index);
switch (entry) {
.null => return .{ .null = {} },
.bool => return .{ .bool = entry.bool },
.number => return .{ .number = entry.number },
.string => {
const str = entry.string.slice(&self.string_index);
return .{ .string = str };
},
.array => {
const res = try allocator.alloc(JsonInput, entry.array.len);
var idx = entry.array.start;
for (0..entry.array.len) |i| {
if (try self.getValue(allocator, idx)) |v| {
res[i] = v;
idx += 1;
} else unreachable;
}
return .{ .array = res };
},
.object => {
var kidx = entry.object.property_idx;
var vidx = entry.object.value_idx;
var obj: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput) = .empty;
try obj.ensureTotalCapacity(allocator, entry.object.len);
for (0..entry.object.len) |_| {
const key = StringIndex.slice(@enumFromInt(kidx), &self.property_index);
const val = (try self.getValue(allocator, vidx)).?;
obj.putAssumeCapacityNoClobber(key, val);
kidx += 1;
vidx += 1;
}
return .{ .object = obj };
},
}
}
pub fn parse(self: *Self, tokenizer: *Tokenizer) !void {
const allocator = tokenizer.allocator;
var it = tokenizer.iterator();
var depth_buf = try allocator.alloc(usize, self.options.max_depth);
defer allocator.free(depth_buf);
var cycles: usize = 0;
while (it.next()) |token| {
flag: switch (token.type) {
.object_begin => {
std.debug.print("{{", .{});
const obj_idx = try self.addEmptyObject(allocator);
depth_buf[cycles] = obj_idx;
if (tokenizer.prev_token) |t| if (t.type == .object_begin) {
// add map to itself
const data = self.index.get(depth_buf[cycles - 1]);
switch (data) {
.object => |valid_entry| {
const new_data = ObjectEntry{
.len = valid_entry.len + 1,
.property_idx = self.property_index.string_table.size,
.value_idx = obj_idx,
};
self.index.set(depth_buf[cycles - 1], .{ .object = new_data });
tokenizer.prev_token = null; // reset
},
else => unreachable,
}
} else tokenizer.pushBack(token);
cycles += 1;
continue;
},
.object_end => {
const keys, const vals = (try self.getObject(allocator, depth_buf[cycles - 1])).?;
std.debug.print("\nfound {d} keys and {d} values\n", .{ keys.len, vals.len });
for (keys, vals) |k, v| {
const key = k.slice(&self.property_index);
const val = self.index.get(v);
std.debug.print(
\\"{s}": {s},
, .{ key, @tagName(val) });
}
std.debug.print("}}", .{});
},
.property => {
_ = try self.addProperty(allocator, token.value.?.string);
const last_obj = self.index.get(depth_buf[cycles - 1]);
if (cycles > 0) {
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
.len = last_obj.object.len + 1,
.property_idx = last_obj.object.property_idx,
.value_idx = last_obj.object.value_idx,
} });
continue;
}
},
.string => {
// maybe we could dismiss the while loop altogether and just do this
// the whole time
if (it.peek()) |next| if (next.type == .colon) {
continue :flag TokenType.property;
};
_ = try self.addString(allocator, token.value.?.string);
const last_obj = self.index.get(depth_buf[cycles - 1]);
if (cycles > 0) {
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
.len = last_obj.object.len,
.property_idx = last_obj.object.property_idx,
.value_idx = last_obj.object.value_idx,
} });
continue;
}
},
.number => {
_ = try self.addNumber(allocator, token.value.?.number);
const last_obj = self.index.get(depth_buf[cycles - 1]);
if (cycles > 0) {
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
.len = last_obj.object.len,
.property_idx = last_obj.object.property_idx,
.value_idx = last_obj.object.value_idx,
} });
continue;
}
},
.true, .false => {
_ = try self.addBool(allocator, if (token.type == .true) true else false);
const last_obj = self.index.get(depth_buf[cycles - 1]);
if (cycles > 0) {
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
.len = last_obj.object.len,
.property_idx = last_obj.object.property_idx,
.value_idx = last_obj.object.value_idx,
} });
continue;
}
},
.null => {
_ = try self.addNull(allocator);
const last_obj = self.index.get(depth_buf[cycles - 1]);
if (cycles > 0) {
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
.len = last_obj.object.len,
.property_idx = last_obj.object.property_idx,
.value_idx = last_obj.object.value_idx,
} });
continue;
}
},
.comma => {
if (it.peek()) |tc| if (tc.type == .object_end and self.options.flags.allow_trailing_comma) {
return error.TrailingComma;
};
},
else => {},
}
tokenizer.skipWhitespace();
}
}
test parse {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena.deinit();
const allocator = arena.allocator();
var self = init;
defer deinit(&self, allocator);
var tokenizer = try Tokenizer.init(allocator, blk: {
const json =
\\ {
\\ "key": "hello",
\\ "key2": "world",
\\ "key3": true,
\\ "key4": null,
\\ "key5": 123
\\ }
;
break :blk json;
});
try parse(&self, &tokenizer);
}