586 lines
19 KiB
Zig
586 lines
19 KiB
Zig
const std = @import("std");
|
|
const mem = std.mem;
|
|
const Tokenizer = @import("tokenizer.zig");
|
|
const TokenType = Tokenizer.TokenType;
|
|
const Token = Tokenizer.Token;
|
|
const StringPool = @import("strings.zig");
|
|
const StringIndex = StringPool.StringIndex;
|
|
const assert = std.debug.assert;
|
|
|
|
const Self = @This();
|
|
|
|
pub const Error = enum { Eof, TrailingComma, MissingKey, MissingValue, UnexpectedToken };
|
|
|
|
pub const JsonType = enum { null, bool, number, string, array, object };
|
|
|
|
pub const JsonNumber = union(enum) {
|
|
int: i128,
|
|
float: f64,
|
|
|
|
pub fn cast(self: JsonNumber, comptime T: type) T {
|
|
return switch (self) {
|
|
.int => |i| switch (@typeInfo(T)) {
|
|
.float => @as(T, @floatFromInt(i)),
|
|
.int => @as(T, @intCast(i)),
|
|
else => @compileError("not a number type"),
|
|
},
|
|
.float => |f| switch (@typeInfo(T)) {
|
|
.float => @as(T, @floatCast(f)),
|
|
.int => @as(T, @intFromFloat(f)),
|
|
else => @compileError("not a number type"),
|
|
},
|
|
};
|
|
}
|
|
};
|
|
|
|
pub const JsonValue = union(JsonType) {
|
|
null: void,
|
|
bool: bool,
|
|
number: JsonNumber,
|
|
string: StringIndex,
|
|
array: ArraySlice,
|
|
object: ObjectEntry,
|
|
};
|
|
|
|
pub const JsonInput = union(JsonType) {
|
|
null: void,
|
|
bool: bool,
|
|
number: JsonNumber,
|
|
string: []const u8,
|
|
array: []JsonInput,
|
|
object: std.StringArrayHashMapUnmanaged(JsonInput),
|
|
|
|
pub fn deinit(self: JsonInput, allocator: mem.Allocator) void {
|
|
switch (self) {
|
|
.array => |array| {
|
|
for (array) |json_input|
|
|
json_input.deinit(allocator);
|
|
allocator.free(array);
|
|
},
|
|
.object => |*object| {
|
|
var it = object.iterator();
|
|
while (it.next()) |entry|
|
|
entry.value_ptr.deinit(allocator);
|
|
@constCast(object).deinit(allocator);
|
|
},
|
|
else => {},
|
|
}
|
|
}
|
|
|
|
pub fn format(
|
|
self: @This(),
|
|
comptime fmt: []const u8,
|
|
opts: std.fmt.FormatOptions,
|
|
writer: anytype,
|
|
) !void {
|
|
switch (self) {
|
|
.null => try writer.writeAll("null"),
|
|
.bool => try writer.writeAll(if (self.bool) "true" else "false"),
|
|
.number => switch (self.number) {
|
|
.int => try writer.print("{d}", .{self.number.int}),
|
|
.float => try writer.print("{d:.1}", .{self.number.float}),
|
|
},
|
|
.string => try writer.print("\"{s}\"", .{self.string}),
|
|
.array => {
|
|
try writer.writeByte('[');
|
|
for (self.array, 0..) |val, i| {
|
|
try val.format(fmt, opts, writer);
|
|
if (i < self.array.len - 1) try writer.writeByte(',');
|
|
}
|
|
try writer.writeByte(']');
|
|
},
|
|
.object => {
|
|
try writer.writeByte('{');
|
|
for (self.object.keys(), self.object.values(), 0..) |k, v, i| {
|
|
try writer.print("\"{s}\"", .{k});
|
|
try writer.writeByte(':');
|
|
try v.format(fmt, opts, writer);
|
|
if (i < self.object.entries.len - 1) try writer.writeByte(',');
|
|
}
|
|
try writer.writeByte('}');
|
|
},
|
|
}
|
|
}
|
|
};
|
|
|
|
/// same as ObjectEntry but simpler
|
|
///.tip is the offset
|
|
pub const ArraySlice = struct {
|
|
len: usize,
|
|
tip: usize,
|
|
};
|
|
|
|
/// just += the value indexes to get the next item
|
|
pub const ObjectEntry = struct {
|
|
len: usize,
|
|
tip: usize,
|
|
};
|
|
|
|
pub const PropertyEntry = struct {
|
|
tip: StringIndex,
|
|
};
|
|
|
|
pub const Flags = packed struct {
|
|
/// Make the tokenizer omit comments, TBD
|
|
allow_comments: bool = false,
|
|
/// Not to error on trailing comma, default is `false` for obvious reasons
|
|
allow_trailing_comma: bool = false,
|
|
};
|
|
|
|
pub const Options = struct {
|
|
comptime indent_len: usize = 4,
|
|
comptime max_depth: usize = 256,
|
|
flags: Flags = .{},
|
|
};
|
|
|
|
index: std.MultiArrayList(JsonValue) = .{},
|
|
strings: StringPool = .empty,
|
|
properties: StringPool = .empty,
|
|
property_map: std.AutoArrayHashMapUnmanaged(usize, PropertyEntry) = .empty,
|
|
|
|
options: Options = .{},
|
|
|
|
pub const init = Self{};
|
|
|
|
pub fn deinit(self: *Self, allocator: mem.Allocator) void {
|
|
self.index.deinit(allocator);
|
|
self.properties.deinit(allocator);
|
|
self.strings.deinit(allocator);
|
|
self.property_map.deinit(allocator);
|
|
}
|
|
|
|
fn addNumber(self: *Self, allocator: mem.Allocator, number: JsonNumber) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .number = number });
|
|
return idx;
|
|
}
|
|
|
|
fn addProperty(self: *Self, allocator: mem.Allocator, bytes: []const u8) !usize {
|
|
const stridx = try self.properties.add(allocator, bytes);
|
|
try self.property_map.ensureUnusedCapacity(allocator, 1);
|
|
return @intFromEnum(stridx);
|
|
}
|
|
|
|
fn addString(self: *Self, allocator: mem.Allocator, bytes: []const u8) !usize {
|
|
const stridx = try self.strings.add(allocator, bytes);
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .string = stridx });
|
|
return idx;
|
|
}
|
|
|
|
fn addEmpty(self: *Self, allocator: mem.Allocator) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
return idx;
|
|
}
|
|
|
|
fn addBool(self: *Self, allocator: mem.Allocator, value: bool) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .bool = value });
|
|
return idx;
|
|
}
|
|
|
|
fn addNull(self: *Self, allocator: mem.Allocator) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .null = {} });
|
|
return idx;
|
|
}
|
|
|
|
// Recursively compute how many index slots a node occupies (including nested)
|
|
pub fn skipSlots(self: *Self, slot: usize) usize {
|
|
switch (self.index.get(slot)) {
|
|
.object => |obj| {
|
|
var total: usize = 1;
|
|
var v = obj.tip;
|
|
for (0..obj.len) |_| {
|
|
const s = skipSlots(self, v);
|
|
total += s;
|
|
v += s;
|
|
}
|
|
return total;
|
|
},
|
|
.array => |arr| {
|
|
var total: usize = 1;
|
|
var c = arr.tip;
|
|
for (0..arr.len) |_| {
|
|
const s = skipSlots(self, c);
|
|
total += s;
|
|
c += s;
|
|
}
|
|
return total;
|
|
},
|
|
else => return 1,
|
|
}
|
|
}
|
|
|
|
pub fn getValue(
|
|
self: *Self,
|
|
allocator: mem.Allocator,
|
|
idx: usize,
|
|
) !JsonInput {
|
|
if (self.index.len == 0)
|
|
return error.InvalidSyntax;
|
|
|
|
switch (self.index.get(idx)) {
|
|
.null => return .{ .null = {} },
|
|
.bool => |b| return .{ .bool = b },
|
|
.number => |number| return .{ .number = number },
|
|
.string => |string| {
|
|
const sl = string.slice(&self.strings);
|
|
return .{ .string = sl };
|
|
},
|
|
.array => |arr| {
|
|
var out = try allocator.alloc(JsonInput, arr.len);
|
|
errdefer allocator.free(out);
|
|
var c = arr.tip;
|
|
for (0..arr.len) |i| {
|
|
const v = try self.getValue(allocator, c);
|
|
out[i] = v;
|
|
c += skipSlots(self, c);
|
|
}
|
|
return .{ .array = out[0..arr.len] };
|
|
},
|
|
.object => |obj| {
|
|
var map: std.StringArrayHashMapUnmanaged(JsonInput) = .empty;
|
|
errdefer map.deinit(allocator);
|
|
var tip = obj.tip;
|
|
for (0..obj.len) |_| if (self.property_map.get(tip)) |pen| {
|
|
try map.put(
|
|
allocator,
|
|
pen.tip.slice(&self.properties),
|
|
try self.getValue(allocator, tip),
|
|
);
|
|
tip += self.skipSlots(tip);
|
|
} else return error.MissingKey;
|
|
return .{ .object = map };
|
|
},
|
|
}
|
|
}
|
|
|
|
/// always returns 0 (root)
|
|
pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usize {
|
|
tokenizer.skipWhitespace();
|
|
|
|
if (tokenizer.endOfInput())
|
|
return error.Eof;
|
|
|
|
const root = try self.addEmpty(allocator);
|
|
|
|
var token = try tokenizer.nextToken(allocator);
|
|
|
|
var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0);
|
|
|
|
flag: switch (token.type) {
|
|
.eof => {
|
|
if (root != 0) return error.InvalidSyntax;
|
|
if (query.slice().len != 0) return error.InvalidSyntax;
|
|
return root;
|
|
},
|
|
.property => {
|
|
const scope_idx = query.get(query.len - 1);
|
|
switch (self.index.get(scope_idx)) {
|
|
.object => |scope| {
|
|
const pidx = try self.addProperty(allocator, token.value.?.string);
|
|
const reer = self.index.len;
|
|
|
|
self.property_map.putAssumeCapacity(reer, .{ .tip = @enumFromInt(pidx) });
|
|
allocator.free(token.value.?.string);
|
|
|
|
self.index.set(scope_idx, .{ .object = ObjectEntry{
|
|
.len = scope.len + 1,
|
|
.tip = scope.tip,
|
|
} });
|
|
},
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
|
|
const next = try tokenizer.nextToken(allocator);
|
|
token = next;
|
|
switch (next.type) {
|
|
.colon => {
|
|
token = try tokenizer.nextToken(allocator);
|
|
continue :flag token.type;
|
|
},
|
|
else => continue :flag next.type,
|
|
}
|
|
},
|
|
.object_begin => {
|
|
if (query.slice().len < 1) {
|
|
const ptr = try query.addOne();
|
|
ptr.* = root;
|
|
self.index.set(root, .{ .object = ObjectEntry{
|
|
.len = 0,
|
|
.tip = 1,
|
|
} });
|
|
} else {
|
|
//order
|
|
const parent_idx = query.get(query.len - 1);
|
|
|
|
const idx_ptr = try query.addOne();
|
|
idx_ptr.* = try self.addEmpty(allocator);
|
|
self.index.set(idx_ptr.*, .{
|
|
.object = ObjectEntry{
|
|
.len = 0,
|
|
.tip = self.index.len,
|
|
},
|
|
});
|
|
switch (self.index.get(parent_idx)) {
|
|
.array => |slice| {
|
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
|
.len = slice.len + 1,
|
|
.tip = if (slice.len == 0) idx_ptr.* else slice.tip,
|
|
} });
|
|
},
|
|
else => {},
|
|
}
|
|
}
|
|
|
|
const next = try tokenizer.nextToken(allocator);
|
|
token = next;
|
|
switch (next.type) {
|
|
.string => continue :flag .property,
|
|
.object_end => continue :flag .object_end,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.object_end, .array_end => {
|
|
if (query.pop() == null)
|
|
return error.InvalidSyntax; // double close
|
|
|
|
if (query.slice().len == 0)
|
|
return root;
|
|
|
|
const next = try tokenizer.nextToken(allocator);
|
|
|
|
token = next;
|
|
switch (next.type) {
|
|
.comma => continue :flag .comma,
|
|
.object_end, .array_end => continue :flag next.type,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.array_begin => {
|
|
defer tokenizer.skipWhitespace();
|
|
|
|
if (query.slice().len < 1) {
|
|
const ptr = try query.addOne();
|
|
ptr.* = root;
|
|
self.index.set(root, .{ .array = ArraySlice{
|
|
.len = 0,
|
|
.tip = 1,
|
|
} });
|
|
} else {
|
|
// order matters
|
|
const parent_idx = query.get(query.len - 1);
|
|
|
|
const idx_ptr = try query.addOne();
|
|
|
|
idx_ptr.* = try self.addEmpty(allocator);
|
|
self.index.set(idx_ptr.*, .{ .array = ArraySlice{
|
|
.len = 0,
|
|
.tip = idx_ptr.* + 1,
|
|
} });
|
|
|
|
switch (self.index.get(parent_idx)) {
|
|
.array => |slice| {
|
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
|
.len = slice.len + 1,
|
|
.tip = if (slice.len == 0) idx_ptr.* else slice.tip,
|
|
} });
|
|
},
|
|
else => {},
|
|
}
|
|
}
|
|
|
|
const next = try tokenizer.nextToken(allocator);
|
|
token = next;
|
|
switch (next.type) {
|
|
.property => return error.InvalidSyntax,
|
|
else => continue :flag next.type,
|
|
}
|
|
},
|
|
.true, .false => {
|
|
const idx = try self.addBool(allocator, if (token.type == .true) true else false);
|
|
|
|
if (query.len == 0) {
|
|
// root
|
|
self.index.set(root, .{ .bool = if (token.type == .true) true else false });
|
|
return root;
|
|
}
|
|
|
|
const parent_idx = query.get(query.len - 1);
|
|
switch (self.index.get(parent_idx)) {
|
|
.array => |slice| {
|
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
|
.len = slice.len + 1,
|
|
.tip = if (slice.len == 0) idx else slice.tip,
|
|
} });
|
|
},
|
|
else => {},
|
|
}
|
|
|
|
const next = try tokenizer.nextToken(allocator);
|
|
token = next;
|
|
switch (next.type) {
|
|
.comma => continue :flag .comma,
|
|
.object_end, .array_end => continue :flag next.type,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.string => {
|
|
if (query.len == 0) {
|
|
// root
|
|
_ = try self.addString(allocator, token.value.?.string);
|
|
allocator.free(token.value.?.string);
|
|
// hardcoded shite
|
|
self.index.set(root, .{ .string = @enumFromInt(0) });
|
|
return root;
|
|
}
|
|
|
|
const parent_idx = query.get(query.len - 1);
|
|
|
|
const next = try tokenizer.nextToken(allocator);
|
|
switch (next.type) {
|
|
.colon => {
|
|
continue :flag .property;
|
|
},
|
|
else => |t| {
|
|
const idx = try self.addString(allocator, token.value.?.string);
|
|
allocator.free(token.value.?.string);
|
|
switch (self.index.get(parent_idx)) {
|
|
.array => |slice| {
|
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
|
.len = slice.len + 1,
|
|
.tip = if (slice.len == 0) idx else slice.tip,
|
|
} });
|
|
},
|
|
else => {},
|
|
}
|
|
|
|
token = next;
|
|
continue :flag t;
|
|
},
|
|
}
|
|
},
|
|
.int, .float => |number| {
|
|
if (query.len == 0) {
|
|
// root
|
|
_ = switch (number) {
|
|
.int => try self.addNumber(allocator, .{ .int = token.value.?.int }),
|
|
.float => try self.addNumber(allocator, .{ .float = token.value.?.float }),
|
|
else => unreachable,
|
|
};
|
|
self.index.set(root, .{ .number = switch (number) {
|
|
.int => .{ .int = token.value.?.int },
|
|
.float => .{ .float = token.value.?.float },
|
|
else => unreachable,
|
|
} });
|
|
return root;
|
|
}
|
|
|
|
const parent_idx = query.get(query.len - 1);
|
|
const idx = switch (number) {
|
|
.int => try self.addNumber(allocator, .{ .int = token.value.?.int }),
|
|
.float => try self.addNumber(allocator, .{ .float = token.value.?.float }),
|
|
else => unreachable,
|
|
};
|
|
switch (self.index.get(parent_idx)) {
|
|
.array => |slice| {
|
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
|
.len = slice.len + 1,
|
|
.tip = if (slice.len == 0) idx else slice.tip,
|
|
} });
|
|
},
|
|
else => {},
|
|
}
|
|
|
|
const next = try tokenizer.nextToken(allocator);
|
|
token = next;
|
|
switch (next.type) {
|
|
.comma => continue :flag .comma,
|
|
.object_end, .array_end => continue :flag next.type,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.comma => {
|
|
if (!self.options.flags.allow_trailing_comma) {
|
|
const next = try tokenizer.nextToken(allocator);
|
|
token = next;
|
|
switch (next.type) {
|
|
.object_end, .array_end => return error.TrailingComma,
|
|
.comma => return error.InvalidSyntax,
|
|
else => continue :flag token.type,
|
|
}
|
|
}
|
|
},
|
|
.null => {
|
|
const idx = try self.addNull(allocator);
|
|
|
|
if (query.len == 0) {
|
|
// root
|
|
self.index.set(root, .{ .null = {} });
|
|
return root;
|
|
}
|
|
const parent_idx = query.get(query.len - 1);
|
|
switch (self.index.get(parent_idx)) {
|
|
.array => |slice| {
|
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
|
.len = slice.len + 1,
|
|
.tip = if (slice.len == 0) idx else slice.tip,
|
|
} });
|
|
},
|
|
else => {},
|
|
}
|
|
const next = tokenizer.nextToken(allocator) catch |err| switch (err) {
|
|
error.InvalidSyntax => return err,
|
|
else => return root,
|
|
};
|
|
token = next;
|
|
switch (next.type) {
|
|
.comma => continue :flag .comma,
|
|
.object_end, .array_end => continue :flag next.type,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
|
|
return root;
|
|
}
|
|
|
|
test getValue {
|
|
const allocator = std.testing.allocator;
|
|
|
|
const text =
|
|
\\{
|
|
\\ "a":"A",
|
|
\\ "b":"B",
|
|
\\ "c": {
|
|
\\ "d": "D"
|
|
\\ },
|
|
\\ "e": "E",
|
|
\\ "f": [1]
|
|
\\}
|
|
; // 1: a, 2: b, 3: c, 4: d, 5: e, 6: f
|
|
|
|
var tokenizer: Tokenizer = try .init(allocator, text);
|
|
defer tokenizer.deinit(allocator);
|
|
|
|
var self = try allocator.create(Self);
|
|
self.* = Self.init;
|
|
defer allocator.destroy(self);
|
|
defer self.deinit(allocator);
|
|
|
|
const idx: usize = try self.parse(allocator, &tokenizer);
|
|
|
|
var root = try self.getValue(allocator, idx);
|
|
defer root.deinit(allocator);
|
|
|
|
try std.testing.expect(root == .object);
|
|
std.debug.print("{}\n", .{root});
|
|
}
|