450 lines
14 KiB
Zig
450 lines
14 KiB
Zig
const std = @import("std");
|
|
const Tokenizer = @import("tokenizer.zig");
|
|
const TokenType = Tokenizer.TokenType;
|
|
const StringPool = @import("strings.zig");
|
|
const StringIndex = StringPool.StringIndex;
|
|
const assert = std.debug.assert;
|
|
|
|
const Self = @This();
|
|
|
|
pub const Error = enum {};
|
|
|
|
pub const JsonType = enum {
|
|
null,
|
|
bool,
|
|
number,
|
|
string,
|
|
array,
|
|
object,
|
|
};
|
|
|
|
pub const JsonValue = union(JsonType) {
|
|
null: void,
|
|
bool: bool,
|
|
number: f64,
|
|
string: StringIndex,
|
|
array: ArraySlice,
|
|
object: ObjectEntry,
|
|
};
|
|
|
|
pub const JsonInput = union(JsonType) {
|
|
null: void,
|
|
bool: bool,
|
|
number: f64,
|
|
string: []const u8,
|
|
array: []JsonInput,
|
|
object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput),
|
|
};
|
|
|
|
/// same as ObjectEntry but simpler
|
|
/// start is the offset
|
|
pub const ArraySlice = struct {
|
|
start: usize,
|
|
len: usize,
|
|
};
|
|
|
|
/// just += the properties and value indexes to get the next item
|
|
/// property_idx and value_idx are the offset
|
|
/// it should be ordered
|
|
pub const ObjectEntry = struct {
|
|
len: usize,
|
|
property_idx: usize,
|
|
value_idx: usize,
|
|
};
|
|
|
|
pub const Flags = packed struct {
|
|
allow_trailing_comma: bool = false,
|
|
};
|
|
|
|
pub const Options = struct {
|
|
comptime max_depth: usize = 256,
|
|
comptime flags: Flags = .{},
|
|
};
|
|
|
|
index: std.MultiArrayList(JsonValue) = .{},
|
|
string_index: StringPool = .empty,
|
|
property_index: StringPool = .empty,
|
|
|
|
options: Options = .{},
|
|
|
|
pub const init: Self = .{};
|
|
|
|
pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
|
|
self.index.deinit(allocator);
|
|
self.property_index.deinit(allocator);
|
|
}
|
|
|
|
fn addNumber(self: *Self, allocator: std.mem.Allocator, number: f64) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .number = number });
|
|
return idx;
|
|
}
|
|
|
|
fn addProperty(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
|
|
const stridx = try self.property_index.add(allocator, bytes);
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
return @intFromEnum(stridx);
|
|
}
|
|
|
|
fn addString(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
|
|
const stridx = try self.string_index.add(allocator, bytes);
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .string = stridx });
|
|
return idx;
|
|
}
|
|
|
|
fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput)) !usize {
|
|
var entry: ?ObjectEntry = null;
|
|
|
|
for (object.keys(), object.values(), 0..) |key, value, times| {
|
|
const stridx = try self.property_index.add(allocator, key);
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const vidx = self.index.addOneAssumeCapacity();
|
|
self.index.set(vidx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
|
|
if (times == 0) {
|
|
entry = ObjectEntry{
|
|
.len = object.entries.len,
|
|
.property_idx = stridx,
|
|
.value_idx = vidx,
|
|
};
|
|
}
|
|
}
|
|
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
if (entry) |e| {
|
|
self.index.set(idx, .{ .object = e });
|
|
return idx;
|
|
} else {
|
|
self.index.set(idx, .{ .object = ObjectEntry{
|
|
.len = 0,
|
|
.property_idx = 0,
|
|
.value_idx = 0,
|
|
} });
|
|
return idx;
|
|
}
|
|
}
|
|
|
|
fn addEmptyObject(self: *Self, allocator: std.mem.Allocator) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
const object: ObjectEntry = .{
|
|
.property_idx = self.property_index.string_bytes.items.len,
|
|
.value_idx = self.index.len,
|
|
.len = 0,
|
|
};
|
|
self.index.set(idx, .{ .object = object });
|
|
return idx;
|
|
}
|
|
|
|
fn addArray(self: *Self, allocator: std.mem.Allocator, array: []JsonInput) !usize {
|
|
var entry: ?ArraySlice = null;
|
|
for (array, 0..) |value, times| {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
|
|
if (times == 0) {
|
|
entry = ArraySlice{
|
|
.start = idx,
|
|
.len = array.len,
|
|
};
|
|
}
|
|
}
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
if (entry) |e| {
|
|
self.index.set(idx, .{ .array = e });
|
|
return idx;
|
|
} else {
|
|
self.index.set(idx, .{ .array = ArraySlice{
|
|
.start = 0,
|
|
.len = 0,
|
|
} });
|
|
return idx;
|
|
}
|
|
}
|
|
|
|
fn addBool(self: *Self, allocator: std.mem.Allocator, value: bool) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .bool = value });
|
|
return idx;
|
|
}
|
|
|
|
fn addNull(self: *Self, allocator: std.mem.Allocator) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .null = {} });
|
|
return idx;
|
|
}
|
|
|
|
fn addValue(self: *Self, allocator: std.mem.Allocator, value: JsonInput) !void {
|
|
switch (value) {
|
|
.null => try self.addNull(allocator),
|
|
.bool => try self.addBool(allocator, value.bool),
|
|
.number => try self.addNumber(allocator, value.number),
|
|
.string => try self.addString(allocator, value.string),
|
|
.array => try self.addArray(allocator, value.array),
|
|
.object => try self.addObject(allocator, value.object),
|
|
}
|
|
}
|
|
|
|
fn getProperty(self: *Self, index: []const u8) ?StringIndex {
|
|
return self.property_index.string_table.get(index);
|
|
}
|
|
|
|
fn getNumber(self: *Self, index: usize) ?f64 {
|
|
if (self.index.get(index)) |n| return n;
|
|
return null;
|
|
}
|
|
|
|
fn getObject(self: *Self, allocator: std.mem.Allocator, index: usize) !?struct {
|
|
[]StringIndex,
|
|
[]usize,
|
|
} {
|
|
const entry = self.index.get(index);
|
|
|
|
if (entry.object.len == 0) {
|
|
return .{ &.{}, &.{} };
|
|
}
|
|
|
|
var pidx = entry.object.property_idx;
|
|
var vidx = entry.object.value_idx;
|
|
|
|
const keys = try allocator.alloc(StringIndex, entry.object.len);
|
|
const values = try allocator.alloc(usize, entry.object.len);
|
|
|
|
for (0..entry.object.len) |i| {
|
|
const slice = StringIndex.slice(@enumFromInt(pidx), &self.property_index);
|
|
keys[i] = @enumFromInt(pidx);
|
|
values[i] = vidx;
|
|
pidx += slice.len + 1;
|
|
vidx += 1;
|
|
}
|
|
|
|
return .{ keys, values };
|
|
}
|
|
|
|
fn getArray(self: *Self, allocator: std.mem.Allocator, index: usize) ?[]usize {
|
|
const entry = self.index.get(index) orelse return null;
|
|
|
|
if (entry.array.len == 0) {
|
|
return &.{};
|
|
}
|
|
|
|
var idx = entry.array.start;
|
|
const values = try allocator.alloc(usize, entry.array.len);
|
|
|
|
for (entry.array.len) |i| {
|
|
values[i] = idx;
|
|
idx += 1;
|
|
}
|
|
return values;
|
|
}
|
|
|
|
fn getBool(self: *Self, index: usize) ?bool {
|
|
const entry = self.index.get(index) orelse return null;
|
|
return entry.bool;
|
|
}
|
|
|
|
fn getNull(self: *Self, index: usize) ?void {
|
|
const entry = self.index.get(index) orelse return null;
|
|
return entry.null;
|
|
}
|
|
|
|
fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput {
|
|
const entry = self.index.get(index);
|
|
switch (entry) {
|
|
.null => return .{ .null = {} },
|
|
.bool => return .{ .bool = entry.bool },
|
|
.number => return .{ .number = entry.number },
|
|
.string => {
|
|
const str = entry.string.slice(&self.string_index);
|
|
return .{ .string = str };
|
|
},
|
|
.array => {
|
|
const res = try allocator.alloc(JsonInput, entry.array.len);
|
|
var idx = entry.array.start;
|
|
for (0..entry.array.len) |i| {
|
|
if (try self.getValue(allocator, idx)) |v| {
|
|
res[i] = v;
|
|
idx += 1;
|
|
} else unreachable;
|
|
}
|
|
return .{ .array = res };
|
|
},
|
|
.object => {
|
|
var kidx = entry.object.property_idx;
|
|
var vidx = entry.object.value_idx;
|
|
var obj: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput) = .empty;
|
|
|
|
try obj.ensureTotalCapacity(allocator, entry.object.len);
|
|
for (0..entry.object.len) |_| {
|
|
const key = StringIndex.slice(@enumFromInt(kidx), &self.property_index);
|
|
const val = (try self.getValue(allocator, vidx)).?;
|
|
|
|
obj.putAssumeCapacityNoClobber(key, val);
|
|
kidx += 1;
|
|
vidx += 1;
|
|
}
|
|
|
|
return .{ .object = obj };
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn parse(self: *Self, tokenizer: *Tokenizer) !void {
|
|
const allocator = tokenizer.allocator;
|
|
|
|
var it = tokenizer.iterator();
|
|
|
|
var depth_buf = try allocator.alloc(usize, self.options.max_depth);
|
|
defer allocator.free(depth_buf);
|
|
|
|
var cycles: usize = 0;
|
|
|
|
while (it.next()) |token| {
|
|
flag: switch (token.type) {
|
|
.object_begin => {
|
|
std.debug.print("{{", .{});
|
|
const obj_idx = try self.addEmptyObject(allocator);
|
|
|
|
depth_buf[cycles] = obj_idx;
|
|
|
|
if (tokenizer.prev_token) |t| if (t.type == .object_begin) {
|
|
// add map to itself
|
|
const data = self.index.get(depth_buf[cycles - 1]);
|
|
|
|
switch (data) {
|
|
.object => |valid_entry| {
|
|
const new_data = ObjectEntry{
|
|
.len = valid_entry.len + 1,
|
|
.property_idx = self.property_index.string_table.size,
|
|
.value_idx = obj_idx,
|
|
};
|
|
self.index.set(depth_buf[cycles - 1], .{ .object = new_data });
|
|
tokenizer.prev_token = null; // reset
|
|
},
|
|
else => unreachable,
|
|
}
|
|
} else tokenizer.pushBack(token);
|
|
cycles += 1;
|
|
continue;
|
|
},
|
|
.object_end => {
|
|
const keys, const vals = (try self.getObject(allocator, depth_buf[cycles - 1])).?;
|
|
std.debug.print("\nfound {d} keys and {d} values\n", .{ keys.len, vals.len });
|
|
for (keys, vals) |k, v| {
|
|
const key = k.slice(&self.property_index);
|
|
const val = self.index.get(v);
|
|
std.debug.print(
|
|
\\"{s}": {s},
|
|
, .{ key, @tagName(val) });
|
|
}
|
|
std.debug.print("}}", .{});
|
|
},
|
|
.property => {
|
|
_ = try self.addProperty(allocator, token.value.?.string);
|
|
const last_obj = self.index.get(depth_buf[cycles - 1]);
|
|
if (cycles > 0) {
|
|
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
|
|
.len = last_obj.object.len + 1,
|
|
.property_idx = last_obj.object.property_idx,
|
|
.value_idx = last_obj.object.value_idx,
|
|
} });
|
|
continue;
|
|
}
|
|
},
|
|
.string => {
|
|
// maybe we could dismiss the while loop altogether and just do this
|
|
// the whole time
|
|
if (it.peek()) |next| if (next.type == .colon) {
|
|
continue :flag TokenType.property;
|
|
};
|
|
|
|
_ = try self.addString(allocator, token.value.?.string);
|
|
const last_obj = self.index.get(depth_buf[cycles - 1]);
|
|
if (cycles > 0) {
|
|
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
|
|
.len = last_obj.object.len,
|
|
.property_idx = last_obj.object.property_idx,
|
|
.value_idx = last_obj.object.value_idx,
|
|
} });
|
|
continue;
|
|
}
|
|
},
|
|
.number => {
|
|
_ = try self.addNumber(allocator, token.value.?.number);
|
|
const last_obj = self.index.get(depth_buf[cycles - 1]);
|
|
if (cycles > 0) {
|
|
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
|
|
.len = last_obj.object.len,
|
|
.property_idx = last_obj.object.property_idx,
|
|
.value_idx = last_obj.object.value_idx,
|
|
} });
|
|
continue;
|
|
}
|
|
},
|
|
.true, .false => {
|
|
_ = try self.addBool(allocator, if (token.type == .true) true else false);
|
|
const last_obj = self.index.get(depth_buf[cycles - 1]);
|
|
if (cycles > 0) {
|
|
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
|
|
.len = last_obj.object.len,
|
|
.property_idx = last_obj.object.property_idx,
|
|
.value_idx = last_obj.object.value_idx,
|
|
} });
|
|
continue;
|
|
}
|
|
},
|
|
.null => {
|
|
_ = try self.addNull(allocator);
|
|
const last_obj = self.index.get(depth_buf[cycles - 1]);
|
|
if (cycles > 0) {
|
|
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
|
|
.len = last_obj.object.len,
|
|
.property_idx = last_obj.object.property_idx,
|
|
.value_idx = last_obj.object.value_idx,
|
|
} });
|
|
continue;
|
|
}
|
|
},
|
|
.comma => {
|
|
if (it.peek()) |tc| if (tc.type == .object_end and self.options.flags.allow_trailing_comma) {
|
|
return error.TrailingComma;
|
|
};
|
|
},
|
|
else => {},
|
|
}
|
|
|
|
tokenizer.skipWhitespace();
|
|
}
|
|
}
|
|
|
|
test parse {
|
|
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
|
defer arena.deinit();
|
|
|
|
const allocator = arena.allocator();
|
|
|
|
var self = init;
|
|
defer deinit(&self, allocator);
|
|
|
|
var tokenizer = try Tokenizer.init(allocator, blk: {
|
|
const json =
|
|
\\ {
|
|
\\ "key": "hello",
|
|
\\ "key2": "world",
|
|
\\ "key3": true,
|
|
\\ "key4": null,
|
|
\\ "key5": 123
|
|
\\ }
|
|
;
|
|
break :blk json;
|
|
});
|
|
|
|
try parse(&self, &tokenizer);
|
|
}
|