619 lines
19 KiB
Zig
619 lines
19 KiB
Zig
const std = @import("std");
|
|
const Tokenizer = @import("tokenizer.zig");
|
|
const TokenType = Tokenizer.TokenType;
|
|
const Token = Tokenizer.Token;
|
|
const StringPool = @import("strings.zig");
|
|
const StringIndex = StringPool.StringIndex;
|
|
const assert = std.debug.assert;
|
|
|
|
const Self = @This();
|
|
|
|
pub const Error = enum {
|
|
TrailingComma,
|
|
};
|
|
|
|
pub const JsonType = enum {
|
|
null,
|
|
bool,
|
|
number,
|
|
string,
|
|
array,
|
|
object,
|
|
};
|
|
|
|
pub const JsonValue = union(JsonType) {
|
|
null: void,
|
|
bool: bool,
|
|
number: f64,
|
|
string: StringIndex,
|
|
array: ArraySlice,
|
|
object: ObjectEntry,
|
|
};
|
|
|
|
pub const JsonInput = union(JsonType) {
|
|
null: void,
|
|
bool: bool,
|
|
number: f64,
|
|
string: []const u8,
|
|
array: []JsonInput,
|
|
object: std.StringArrayHashMapUnmanaged(JsonInput),
|
|
|
|
pub fn deinit(self: JsonInput, allocator: std.mem.Allocator) void {
|
|
switch (self) {
|
|
JsonInput.array => |array| {
|
|
for (array) |json_input| {
|
|
json_input.deinit(allocator);
|
|
}
|
|
allocator.free(array);
|
|
},
|
|
.object => |*object| {
|
|
var it = object.iterator();
|
|
while (it.next()) |entry| {
|
|
entry.value_ptr.deinit(allocator);
|
|
//allocator.free(entry.key_ptr.*);
|
|
}
|
|
@constCast(object).deinit(allocator);
|
|
},
|
|
.string => |_| {},
|
|
else => {},
|
|
}
|
|
}
|
|
|
|
pub fn format(
|
|
self: @This(),
|
|
comptime fmt: []const u8,
|
|
opts: std.fmt.FormatOptions,
|
|
writer: anytype,
|
|
) !void {
|
|
switch (self) {
|
|
.null => try writer.writeAll("null"),
|
|
.bool => try writer.writeAll(if (self.bool) "true" else "false"),
|
|
.number => try writer.print("{d}", .{self.number}),
|
|
.string => try writer.print("\"{s}\"", .{self.string}),
|
|
.array => {
|
|
try writer.writeByte('[');
|
|
for (self.array, 0..) |val, i| {
|
|
try val.format(fmt, opts, writer);
|
|
if (i < self.array.len - 1) try writer.writeByte(',');
|
|
}
|
|
try writer.writeByte(']');
|
|
},
|
|
.object => {
|
|
try writer.writeByte('{');
|
|
for (self.object.keys(), self.object.values(), 0..) |k, v, i| {
|
|
try writer.print("\"{s}\"", .{k});
|
|
try writer.writeByte(':');
|
|
try v.format(fmt, opts, writer);
|
|
if (i < self.object.entries.len - 1) try writer.writeByte(',');
|
|
}
|
|
try writer.writeByte('}');
|
|
},
|
|
}
|
|
}
|
|
};
|
|
|
|
/// same as ObjectEntry but simpler
|
|
/// start is the offset
|
|
pub const ArraySlice = struct {
|
|
start: usize,
|
|
len: usize,
|
|
};
|
|
|
|
/// just += the properties and value indexes to get the next item
|
|
/// property_idx and value_idx are the offset
|
|
/// it should be ordered
|
|
pub const ObjectEntry = struct {
|
|
len: usize,
|
|
property_idx: usize,
|
|
value_idx: usize,
|
|
};
|
|
|
|
pub const Flags = packed struct {
|
|
allow_trailing_comma: bool = false,
|
|
};
|
|
|
|
pub const Options = struct {
|
|
comptime indent_len: usize = 4,
|
|
comptime max_depth: usize = 256,
|
|
comptime flags: Flags = .{},
|
|
};
|
|
|
|
index: std.MultiArrayList(JsonValue) = .{},
|
|
string_index: StringPool = .empty,
|
|
property_index: StringPool = .empty,
|
|
|
|
options: Options = .{},
|
|
|
|
pub const init = Self{};
|
|
|
|
pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
|
|
self.index.deinit(allocator);
|
|
self.property_index.deinit(allocator);
|
|
self.string_index.deinit(allocator);
|
|
}
|
|
|
|
fn addNumber(self: *Self, allocator: std.mem.Allocator, number: f64) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .number = number });
|
|
return idx;
|
|
}
|
|
|
|
fn addProperty(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
|
|
const stridx = try self.property_index.add(allocator, bytes);
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
return @intFromEnum(stridx);
|
|
}
|
|
|
|
fn addString(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
|
|
const stridx = try self.string_index.add(allocator, bytes);
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .string = stridx });
|
|
return idx;
|
|
}
|
|
|
|
fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput)) !usize {
|
|
var entry: ?ObjectEntry = null;
|
|
|
|
for (object.keys(), object.values(), 0..) |key, value, times| {
|
|
const stridx = try self.property_index.add(allocator, key);
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const vidx = self.index.addOneAssumeCapacity();
|
|
self.index.set(vidx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
|
|
if (times == 0) {
|
|
entry = ObjectEntry{
|
|
.len = object.entries.len,
|
|
.property_idx = stridx,
|
|
.value_idx = vidx,
|
|
};
|
|
}
|
|
}
|
|
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
if (entry) |e| {
|
|
self.index.set(idx, .{ .object = e });
|
|
return idx;
|
|
} else {
|
|
self.index.set(idx, .{ .object = ObjectEntry{
|
|
.len = 0,
|
|
.property_idx = 0,
|
|
.value_idx = 1,
|
|
} });
|
|
return idx;
|
|
}
|
|
}
|
|
|
|
fn addEmptyObject(self: *Self, allocator: std.mem.Allocator) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
const object: ObjectEntry = .{
|
|
.property_idx = self.property_index.string_bytes.items.len,
|
|
.value_idx = self.index.len + 1,
|
|
.len = 0,
|
|
};
|
|
self.index.set(idx, .{ .object = object });
|
|
return idx;
|
|
}
|
|
|
|
fn addArray(self: *Self, allocator: std.mem.Allocator, array: []JsonInput) !usize {
|
|
var entry: ?ArraySlice = null;
|
|
for (array, 0..) |value, times| {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
|
|
if (times == 0) {
|
|
entry = ArraySlice{
|
|
.start = idx,
|
|
.len = array.len,
|
|
};
|
|
}
|
|
}
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
if (entry) |e| {
|
|
self.index.set(idx, .{ .array = e });
|
|
return idx;
|
|
} else {
|
|
self.index.set(idx, .{ .array = ArraySlice{
|
|
.start = 0,
|
|
.len = 0,
|
|
} });
|
|
return idx;
|
|
}
|
|
}
|
|
|
|
fn addEmptyArray(self: *Self, allocator: std.mem.Allocator) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .array = ArraySlice{
|
|
.start = self.index.len,
|
|
.len = 0,
|
|
} });
|
|
return idx;
|
|
}
|
|
|
|
fn addBool(self: *Self, allocator: std.mem.Allocator, value: bool) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .bool = value });
|
|
return idx;
|
|
}
|
|
|
|
fn addNull(self: *Self, allocator: std.mem.Allocator) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .null = {} });
|
|
return idx;
|
|
}
|
|
|
|
fn addValue(self: *Self, allocator: std.mem.Allocator, value: JsonInput) !void {
|
|
switch (value) {
|
|
.null => try self.addNull(allocator),
|
|
.bool => try self.addBool(allocator, value.bool),
|
|
.number => try self.addNumber(allocator, value.number),
|
|
.string => try self.addString(allocator, value.string),
|
|
.array => try self.addArray(allocator, value.array),
|
|
.object => try self.addObject(allocator, value.object),
|
|
}
|
|
}
|
|
|
|
fn getProperty(self: *Self, index: []const u8) ?StringIndex {
|
|
return self.property_index.string_table.get(index);
|
|
}
|
|
|
|
fn getNumber(self: *Self, index: usize) ?f64 {
|
|
if (self.index.get(index)) |n| return n;
|
|
return null;
|
|
}
|
|
|
|
fn getObject(self: *Self, allocator: std.mem.Allocator, index: usize) !struct {
|
|
[]StringIndex,
|
|
[]usize,
|
|
} {
|
|
const entry = self.index.get(index);
|
|
|
|
if (entry.object.len == 0) {
|
|
return .{ &.{}, &.{} };
|
|
}
|
|
|
|
var pidx = entry.object.property_idx;
|
|
var vidx = entry.object.value_idx;
|
|
|
|
const keys = try allocator.alloc(StringIndex, entry.object.len);
|
|
const values = try allocator.alloc(usize, entry.object.len);
|
|
|
|
for (0..entry.object.len) |i| {
|
|
const slice = StringIndex.slice(@enumFromInt(pidx), &self.property_index);
|
|
keys[i] = @enumFromInt(pidx);
|
|
values[i] = vidx;
|
|
pidx += slice.len + 1;
|
|
vidx += 1;
|
|
}
|
|
|
|
return .{ keys, values };
|
|
}
|
|
|
|
fn getArray(self: *Self, allocator: std.mem.Allocator, index: usize) ![]usize {
|
|
const entry = self.index.get(index);
|
|
|
|
if (entry.array.len == 0) {
|
|
return &.{};
|
|
}
|
|
|
|
var idx = entry.array.start;
|
|
const values = try allocator.alloc(usize, entry.array.len);
|
|
|
|
for (0..entry.array.len) |i| {
|
|
values[i] = idx;
|
|
idx += 1;
|
|
}
|
|
return values;
|
|
}
|
|
|
|
fn getBool(self: *Self, index: usize) ?bool {
|
|
const entry = self.index.get(index) orelse return null;
|
|
return entry.bool;
|
|
}
|
|
|
|
fn getNull(self: *Self, index: usize) ?void {
|
|
const entry = self.index.get(index) orelse return null;
|
|
return entry.null;
|
|
}
|
|
// Recursively compute how many index slots a node occupies (including nested)
|
|
fn skipSlots(self: *Self, slot: usize) usize {
|
|
const e = self.index.get(slot);
|
|
switch (e) {
|
|
.object => |obj| {
|
|
var total: usize = 1;
|
|
var v = obj.value_idx;
|
|
for (0..obj.len) |_| {
|
|
const s = skipSlots(self, v);
|
|
total += s;
|
|
v += s;
|
|
}
|
|
return total;
|
|
},
|
|
.array => |arr| {
|
|
var total: usize = 1;
|
|
var c = arr.start;
|
|
for (0..arr.len) |_| {
|
|
const s = skipSlots(self, c);
|
|
total += s;
|
|
c += s;
|
|
}
|
|
return total;
|
|
},
|
|
else => return 1,
|
|
}
|
|
}
|
|
|
|
// Compute bytes length of properties starting at pidx
|
|
fn skipProps(self: *Self, pidx: usize, count: usize) usize {
|
|
var total: usize = 0;
|
|
var p = pidx;
|
|
for (0..count) |_| {
|
|
const key_slice = StringIndex.slice(@enumFromInt(p), &self.property_index);
|
|
const len = key_slice.len + 1;
|
|
total += len;
|
|
p += len;
|
|
}
|
|
return total;
|
|
}
|
|
|
|
fn skipNestedProps(self: *Self, pptr: *usize, slot: usize) void {
|
|
const e = self.index.get(slot);
|
|
if (e == .object) {
|
|
var v = e.object.value_idx;
|
|
// Skip each nested key and its deeper nested props
|
|
for (0..e.object.len) |_| {
|
|
// Skip this key
|
|
const k: *StringIndex = @ptrCast(pptr);
|
|
const slice = k.slice(&self.property_index);
|
|
pptr.* += slice.len + 1;
|
|
// Recurse into this property's value
|
|
skipNestedProps(self, pptr, v);
|
|
// Skip slots of the value in index array
|
|
const s = skipSlots(self, v);
|
|
v += s;
|
|
}
|
|
}
|
|
}
|
|
|
|
fn getValue(
|
|
self: *Self,
|
|
allocator: std.mem.Allocator,
|
|
idx: usize,
|
|
) !JsonInput {
|
|
const entry = self.index.get(idx);
|
|
|
|
switch (entry) {
|
|
.null => return .null,
|
|
.bool => return .{ .bool = entry.bool },
|
|
.number => return .{ .number = entry.number },
|
|
.string => |string| {
|
|
const sl = string.slice(&self.string_index);
|
|
return .{ .string = sl };
|
|
},
|
|
.array => |arr| {
|
|
var out = try allocator.alloc(JsonInput, arr.len);
|
|
var c = arr.start;
|
|
for (0..arr.len) |i| {
|
|
const v = try self.getValue(allocator, c);
|
|
out[i] = v;
|
|
c += skipSlots(self, c);
|
|
}
|
|
return .{ .array = out[0..arr.len] };
|
|
},
|
|
.object => |obj| {
|
|
var map: std.StringArrayHashMapUnmanaged(JsonInput) = .empty;
|
|
var p = obj.property_idx;
|
|
var v = obj.value_idx;
|
|
for (0..obj.len) |_| {
|
|
// Extract key
|
|
const k: StringIndex = @enumFromInt(p);
|
|
const key_slice = k.slice(&self.property_index);
|
|
// Extract and assign value
|
|
const val = try self.getValue(allocator, v);
|
|
try map.put(allocator, key_slice, val);
|
|
// Advance past this key
|
|
p += key_slice.len + 1;
|
|
// Skip nested property names of this value
|
|
self.skipNestedProps(&p, v);
|
|
// Advance past the value slots
|
|
const s = self.skipSlots(v);
|
|
v += s;
|
|
}
|
|
return .{ .object = map };
|
|
},
|
|
}
|
|
}
|
|
|
|
test getValue {
|
|
const allocator = std.testing.allocator;
|
|
|
|
const json =
|
|
\\ {
|
|
\\ "name": "Yuzu",
|
|
\\ "author": true,
|
|
\\ "age": 15,
|
|
\\ "address": {
|
|
\\ "street": 1,
|
|
\\ "deeply_nested": {
|
|
\\ "k": 5,
|
|
\\ "socialist": "expansion",
|
|
\\ "idk": {"a":"b"}
|
|
\\ }
|
|
\\ },
|
|
\\ "offset": "yes"
|
|
\\ }
|
|
;
|
|
|
|
var tokenizer: Tokenizer = try .init(allocator, json);
|
|
defer tokenizer.deinit();
|
|
|
|
var self = init;
|
|
defer self.deinit(allocator);
|
|
|
|
const idx: usize = try parse(&self, &tokenizer);
|
|
|
|
var root = try getValue(&self, allocator, idx);
|
|
defer root.deinit(allocator);
|
|
|
|
try std.testing.expect(root == .object);
|
|
std.debug.print("{}\n", .{root});
|
|
}
|
|
|
|
/// always returns 0 (root)
|
|
pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|
const allocator = tokenizer.allocator;
|
|
|
|
var it = tokenizer.iterator();
|
|
|
|
const root = try self.addEmptyObject(allocator);
|
|
|
|
var token = it.next() orelse
|
|
return root;
|
|
|
|
var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0);
|
|
|
|
flag: switch (token.type) {
|
|
.eof => {
|
|
assert(query.slice().len == 0);
|
|
return root;
|
|
},
|
|
.property => {
|
|
defer tokenizer.skipWhitespace();
|
|
|
|
const scope_idx = query.get(query.len - 1);
|
|
switch (self.index.get(scope_idx)) {
|
|
.object => |scope| {
|
|
//std.debug.print("prop: {s} \n", .{token.value.?.string});
|
|
const pidx = try self.addProperty(allocator, token.value.?.string);
|
|
allocator.free(token.value.?.string);
|
|
self.index.set(scope_idx, .{ .object = ObjectEntry{
|
|
.len = scope.len + 1,
|
|
.property_idx = if (scope.len == 0) pidx else scope.property_idx,
|
|
.value_idx = scope.value_idx,
|
|
} });
|
|
},
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
token = next;
|
|
switch (next.type) {
|
|
.colon => {
|
|
token = it.next() orelse return error.InvalidSyntax;
|
|
continue :flag token.type;
|
|
},
|
|
else => continue :flag next.type,
|
|
// else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.object_begin => {
|
|
defer tokenizer.skipWhitespace();
|
|
|
|
if (query.slice().len == 0) {
|
|
try query.ensureUnusedCapacity(1);
|
|
const ptr = query.addOneAssumeCapacity();
|
|
ptr.* = root;
|
|
self.index.set(root, .{ .object = ObjectEntry{
|
|
.len = 0,
|
|
.property_idx = 0,
|
|
.value_idx = 1,
|
|
} });
|
|
} else {
|
|
const idx_ptr = try query.addOne();
|
|
idx_ptr.* = try self.addEmptyObject(allocator);
|
|
self.index.set(idx_ptr.*, .{
|
|
.object = ObjectEntry{
|
|
.len = 0,
|
|
.property_idx = self.index.len, //self.property_index.string_bytes.items.len,
|
|
.value_idx = self.index.len,
|
|
},
|
|
});
|
|
}
|
|
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
token = next;
|
|
switch (next.type) {
|
|
.string => continue :flag .property,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.object_end => {
|
|
defer tokenizer.skipWhitespace();
|
|
assert(query.pop() != null);
|
|
|
|
const next = it.next() orelse
|
|
return root;
|
|
token = next;
|
|
switch (next.type) {
|
|
.comma => continue :flag .comma,
|
|
.object_end, .array_end => |t| continue :flag t,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.true, .false => {
|
|
defer tokenizer.skipWhitespace();
|
|
|
|
_ = try self.addBool(allocator, if (token.type == .true) true else false);
|
|
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
token = next;
|
|
switch (next.type) {
|
|
.comma => continue :flag .comma,
|
|
.object_end => continue :flag .object_end,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.string => {
|
|
defer tokenizer.skipWhitespace();
|
|
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
switch (next.type) {
|
|
.colon => {
|
|
continue :flag .property;
|
|
},
|
|
else => |t| {
|
|
_ = try self.addString(allocator, token.value.?.string);
|
|
allocator.free(token.value.?.string);
|
|
|
|
token = next;
|
|
continue :flag t;
|
|
},
|
|
}
|
|
},
|
|
.number => {
|
|
defer tokenizer.skipWhitespace();
|
|
|
|
_ = try self.addNumber(allocator, token.value.?.number);
|
|
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
token = next;
|
|
switch (next.type) {
|
|
.comma => continue :flag .comma,
|
|
.object_end => continue :flag .object_end,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.comma => {
|
|
if (!self.options.flags.allow_trailing_comma) {
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
token = next;
|
|
switch (next.type) {
|
|
.object_end, .array_end => return error.TrailingComma,
|
|
else => continue :flag token.type,
|
|
}
|
|
}
|
|
},
|
|
else => {
|
|
// std.debug.print("token: {s}\n", .{@tagName(token.type)});
|
|
},
|
|
}
|
|
|
|
return root;
|
|
}
|