523 lines
16 KiB
Zig
523 lines
16 KiB
Zig
const std = @import("std");
|
|
const Tokenizer = @import("tokenizer.zig");
|
|
const TokenType = Tokenizer.TokenType;
|
|
const Token = Tokenizer.Token;
|
|
const StringPool = @import("strings.zig");
|
|
const StringIndex = StringPool.StringIndex;
|
|
const assert = std.debug.assert;
|
|
|
|
const Self = @This();
|
|
|
|
pub const Error = enum {
|
|
TrailingComma,
|
|
};
|
|
|
|
pub const JsonType = enum {
|
|
null,
|
|
bool,
|
|
number,
|
|
string,
|
|
array,
|
|
object,
|
|
};
|
|
|
|
pub const JsonValue = union(JsonType) {
|
|
null: void,
|
|
bool: bool,
|
|
number: f64,
|
|
string: StringIndex,
|
|
array: ArraySlice,
|
|
object: ObjectEntry,
|
|
};
|
|
|
|
pub const JsonInput = union(JsonType) {
|
|
null: void,
|
|
bool: bool,
|
|
number: f64,
|
|
string: []const u8,
|
|
array: []JsonInput,
|
|
object: std.StringArrayHashMapUnmanaged(JsonInput),
|
|
|
|
pub fn format(
|
|
self: @This(),
|
|
comptime fmt: []const u8,
|
|
opts: std.fmt.FormatOptions,
|
|
writer: anytype,
|
|
) !void {
|
|
switch (self) {
|
|
.null => try writer.writeAll("null"),
|
|
.bool => try writer.writeAll(if (self.bool) "true" else "false"),
|
|
.number => try writer.print("{d}", .{self.number}),
|
|
.string => try writer.print("\"{s}\"", .{self.string}),
|
|
.array => {
|
|
try writer.writeByte('[');
|
|
for (self.array, 0..) |val, i| {
|
|
try val.format(fmt, opts, writer);
|
|
if (i < self.array.len - 1) try writer.writeByte(',');
|
|
}
|
|
try writer.writeByte(']');
|
|
},
|
|
.object => {
|
|
try writer.writeByte('{');
|
|
for (self.object.keys(), self.object.values(), 0..) |k, v, i| {
|
|
try writer.print("\"{s}\"", .{k});
|
|
try writer.writeByte(':');
|
|
try v.format(fmt, opts, writer);
|
|
if (i < self.object.entries.len - 1) try writer.writeByte(',');
|
|
}
|
|
try writer.writeByte('}');
|
|
},
|
|
}
|
|
}
|
|
};
|
|
|
|
/// same as ObjectEntry but simpler
|
|
/// start is the offset
|
|
pub const ArraySlice = struct {
|
|
start: usize,
|
|
len: usize,
|
|
};
|
|
|
|
/// just += the properties and value indexes to get the next item
|
|
/// property_idx and value_idx are the offset
|
|
/// it should be ordered
|
|
pub const ObjectEntry = struct {
|
|
len: usize,
|
|
property_idx: usize,
|
|
value_idx: usize,
|
|
};
|
|
|
|
pub const Flags = packed struct {
|
|
allow_trailing_comma: bool = false,
|
|
};
|
|
|
|
pub const Options = struct {
|
|
comptime indent_len: usize = 4,
|
|
comptime max_depth: usize = 256,
|
|
comptime flags: Flags = .{},
|
|
};
|
|
|
|
index: std.MultiArrayList(JsonValue) = .{},
|
|
string_index: StringPool = .empty,
|
|
property_index: StringPool = .empty,
|
|
|
|
options: Options = .{},
|
|
|
|
pub const init: Self = .{};
|
|
|
|
pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
|
|
self.index.deinit(allocator);
|
|
self.property_index.deinit(allocator);
|
|
}
|
|
|
|
fn addNumber(self: *Self, allocator: std.mem.Allocator, number: f64) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .number = number });
|
|
return idx;
|
|
}
|
|
|
|
fn addProperty(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
|
|
const stridx = try self.property_index.add(allocator, bytes);
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
return @intFromEnum(stridx);
|
|
}
|
|
|
|
fn addString(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
|
|
const stridx = try self.string_index.add(allocator, bytes);
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .string = stridx });
|
|
return idx;
|
|
}
|
|
|
|
fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput)) !usize {
|
|
var entry: ?ObjectEntry = null;
|
|
|
|
for (object.keys(), object.values(), 0..) |key, value, times| {
|
|
const stridx = try self.property_index.add(allocator, key);
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const vidx = self.index.addOneAssumeCapacity();
|
|
self.index.set(vidx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
|
|
if (times == 0) {
|
|
entry = ObjectEntry{
|
|
.len = object.entries.len,
|
|
.property_idx = stridx,
|
|
.value_idx = vidx,
|
|
};
|
|
}
|
|
}
|
|
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
if (entry) |e| {
|
|
self.index.set(idx, .{ .object = e });
|
|
return idx;
|
|
} else {
|
|
self.index.set(idx, .{ .object = ObjectEntry{
|
|
.len = 0,
|
|
.property_idx = 0,
|
|
.value_idx = 1,
|
|
} });
|
|
return idx;
|
|
}
|
|
}
|
|
|
|
fn addEmptyObject(self: *Self, allocator: std.mem.Allocator) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
const object: ObjectEntry = .{
|
|
.property_idx = self.property_index.string_bytes.items.len,
|
|
.value_idx = self.index.len + 1,
|
|
.len = 0,
|
|
};
|
|
self.index.set(idx, .{ .object = object });
|
|
return idx;
|
|
}
|
|
|
|
fn addArray(self: *Self, allocator: std.mem.Allocator, array: []JsonInput) !usize {
|
|
var entry: ?ArraySlice = null;
|
|
for (array, 0..) |value, times| {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
|
|
if (times == 0) {
|
|
entry = ArraySlice{
|
|
.start = idx,
|
|
.len = array.len,
|
|
};
|
|
}
|
|
}
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
if (entry) |e| {
|
|
self.index.set(idx, .{ .array = e });
|
|
return idx;
|
|
} else {
|
|
self.index.set(idx, .{ .array = ArraySlice{
|
|
.start = 0,
|
|
.len = 0,
|
|
} });
|
|
return idx;
|
|
}
|
|
}
|
|
|
|
fn addEmptyArray(self: *Self, allocator: std.mem.Allocator) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .array = ArraySlice{
|
|
.start = self.index.len,
|
|
.len = 0,
|
|
} });
|
|
return idx;
|
|
}
|
|
|
|
fn addBool(self: *Self, allocator: std.mem.Allocator, value: bool) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .bool = value });
|
|
return idx;
|
|
}
|
|
|
|
fn addNull(self: *Self, allocator: std.mem.Allocator) !usize {
|
|
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
const idx = self.index.addOneAssumeCapacity();
|
|
self.index.set(idx, .{ .null = {} });
|
|
return idx;
|
|
}
|
|
|
|
fn addValue(self: *Self, allocator: std.mem.Allocator, value: JsonInput) !void {
|
|
switch (value) {
|
|
.null => try self.addNull(allocator),
|
|
.bool => try self.addBool(allocator, value.bool),
|
|
.number => try self.addNumber(allocator, value.number),
|
|
.string => try self.addString(allocator, value.string),
|
|
.array => try self.addArray(allocator, value.array),
|
|
.object => try self.addObject(allocator, value.object),
|
|
}
|
|
}
|
|
|
|
fn getProperty(self: *Self, index: []const u8) ?StringIndex {
|
|
return self.property_index.string_table.get(index);
|
|
}
|
|
|
|
fn getNumber(self: *Self, index: usize) ?f64 {
|
|
if (self.index.get(index)) |n| return n;
|
|
return null;
|
|
}
|
|
|
|
fn getObject(self: *Self, allocator: std.mem.Allocator, index: usize) !?struct {
|
|
[]StringIndex,
|
|
[]usize,
|
|
} {
|
|
const entry = self.index.get(index);
|
|
|
|
if (entry.object.len == 0) {
|
|
return .{ &.{}, &.{} };
|
|
}
|
|
|
|
var pidx = entry.object.property_idx;
|
|
var vidx = entry.object.value_idx;
|
|
|
|
const keys = try allocator.alloc(StringIndex, entry.object.len);
|
|
const values = try allocator.alloc(usize, entry.object.len);
|
|
|
|
for (0..entry.object.len) |i| {
|
|
const slice = StringIndex.slice(@enumFromInt(pidx), &self.property_index);
|
|
keys[i] = @enumFromInt(pidx);
|
|
values[i] = vidx;
|
|
pidx += slice.len + 1;
|
|
vidx += 1;
|
|
}
|
|
|
|
return .{ keys, values };
|
|
}
|
|
|
|
fn getArray(self: *Self, allocator: std.mem.Allocator, index: usize) ?[]usize {
|
|
const entry = self.index.get(index) orelse return null;
|
|
|
|
if (entry.array.len == 0) {
|
|
return &.{};
|
|
}
|
|
|
|
var idx = entry.array.start;
|
|
const values = try allocator.alloc(usize, entry.array.len);
|
|
|
|
for (entry.array.len) |i| {
|
|
values[i] = idx;
|
|
idx += 1;
|
|
}
|
|
return values;
|
|
}
|
|
|
|
fn getBool(self: *Self, index: usize) ?bool {
|
|
const entry = self.index.get(index) orelse return null;
|
|
return entry.bool;
|
|
}
|
|
|
|
fn getNull(self: *Self, index: usize) ?void {
|
|
const entry = self.index.get(index) orelse return null;
|
|
return entry.null;
|
|
}
|
|
//
|
|
//fn getValue(
|
|
// self: *Self,
|
|
// allocator: std.mem.Allocator,
|
|
// index: usize,
|
|
//) !?JsonInput {
|
|
//
|
|
//}
|
|
|
|
/// always returns 0 (root)
|
|
pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|
const allocator = tokenizer.allocator;
|
|
|
|
var it = tokenizer.iterator();
|
|
|
|
const root = try self.addEmptyObject(allocator);
|
|
defer std.debug.print("idx: {s}\n", .{
|
|
@tagName(self.index.get(self.index.get(root).object.value_idx)),
|
|
});
|
|
|
|
var token = it.next() orelse
|
|
return root;
|
|
|
|
var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0);
|
|
|
|
flag: switch (token.type) {
|
|
.eof => {
|
|
assert(query.slice().len == 0);
|
|
return root;
|
|
},
|
|
.property => {
|
|
defer tokenizer.skipWhitespace();
|
|
|
|
const scope_idx = query.get(query.len - 1);
|
|
switch (self.index.get(scope_idx)) {
|
|
.object => |scope| {
|
|
std.debug.print("prop: {s} \n", .{token.value.?.string});
|
|
const pidx = try self.addProperty(allocator, token.value.?.string);
|
|
self.index.set(scope_idx, .{ .object = ObjectEntry{
|
|
.len = scope.len + 1,
|
|
.property_idx = if (scope.len == 0) pidx else scope.property_idx,
|
|
.value_idx = scope.value_idx,
|
|
} });
|
|
},
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
token = next;
|
|
switch (next.type) {
|
|
.colon => {
|
|
token = it.next() orelse return error.InvalidSyntax;
|
|
continue :flag token.type;
|
|
},
|
|
else => continue :flag next.type,
|
|
// else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.object_begin => {
|
|
defer tokenizer.skipWhitespace();
|
|
|
|
if (query.slice().len == 0) {
|
|
try query.ensureUnusedCapacity(1);
|
|
const ptr = query.addOneAssumeCapacity();
|
|
ptr.* = root;
|
|
self.index.set(root, .{ .object = ObjectEntry{
|
|
.len = 0,
|
|
.property_idx = 0,
|
|
.value_idx = 1,
|
|
} });
|
|
} else {
|
|
const idx_ptr = try query.addOne();
|
|
idx_ptr.* = try self.addEmptyObject(allocator);
|
|
self.index.set(idx_ptr.*, .{
|
|
.object = ObjectEntry{
|
|
.len = 0,
|
|
.property_idx = self.index.len, //self.property_index.string_bytes.items.len,
|
|
.value_idx = self.index.len,
|
|
},
|
|
});
|
|
}
|
|
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
token = next;
|
|
switch (next.type) {
|
|
.string => continue :flag .property,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.object_end => {
|
|
defer tokenizer.skipWhitespace();
|
|
assert(query.pop() != null);
|
|
|
|
const next = it.next() orelse
|
|
return root;
|
|
token = next;
|
|
switch (next.type) {
|
|
.comma => continue :flag .comma,
|
|
.object_end, .array_end => |t| continue :flag t,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.true, .false => {
|
|
defer tokenizer.skipWhitespace();
|
|
|
|
_ = try self.addBool(allocator, if (token.type == .true) true else false);
|
|
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
token = next;
|
|
switch (next.type) {
|
|
.comma => continue :flag .comma,
|
|
.object_end => continue :flag .object_end,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.string => {
|
|
defer tokenizer.skipWhitespace();
|
|
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
switch (next.type) {
|
|
.colon => {
|
|
continue :flag .property;
|
|
},
|
|
else => |t| {
|
|
_ = try self.addString(allocator, token.value.?.string);
|
|
|
|
token = next;
|
|
continue :flag t;
|
|
},
|
|
}
|
|
},
|
|
.number => {
|
|
defer tokenizer.skipWhitespace();
|
|
|
|
_ = try self.addNumber(allocator, token.value.?.number);
|
|
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
token = next;
|
|
switch (next.type) {
|
|
.comma => continue :flag .comma,
|
|
.object_end => continue :flag .object_end,
|
|
else => return error.InvalidSyntax,
|
|
}
|
|
},
|
|
.comma => {
|
|
if (!self.options.flags.allow_trailing_comma) {
|
|
const next = it.next() orelse return error.InvalidSyntax;
|
|
token = next;
|
|
switch (next.type) {
|
|
.object_end, .array_end => return error.TrailingComma,
|
|
else => continue :flag token.type,
|
|
}
|
|
}
|
|
},
|
|
else => {
|
|
std.debug.print("token: {s}\n", .{@tagName(token.type)});
|
|
},
|
|
}
|
|
|
|
return root;
|
|
}
|
|
|
|
test parse {
|
|
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
|
defer arena.deinit();
|
|
|
|
const allocator = arena.allocator();
|
|
|
|
var self = init;
|
|
defer deinit(&self, allocator);
|
|
|
|
var tokenizer = try Tokenizer.init(allocator, blk: {
|
|
const json =
|
|
\\ {
|
|
\\ "bio": "cool",
|
|
\\ "age": 15,
|
|
\\ "name": "yuzu",
|
|
\\ "admin": true,
|
|
\\ "address": {
|
|
\\ "lorem": "ipsum",
|
|
\\ "simple": true
|
|
\\ },
|
|
\\ "xd": true
|
|
\\ }
|
|
;
|
|
break :blk json;
|
|
});
|
|
|
|
const idx = try parse(&self, &tokenizer);
|
|
|
|
const keys, const values = (try getObject(&self, allocator, idx)).?;
|
|
for (keys, values, 0..) |k, v, i| {
|
|
_ = i;
|
|
const key = k.slice(&self.property_index);
|
|
const val = self.index.get(v);
|
|
switch (val) {
|
|
.object => {
|
|
const keys2, const values2 = (try getObject(&self, allocator, v)).?;
|
|
for (keys2, values2, 0..) |k2, v2, ii2| {
|
|
_ = ii2;
|
|
const key2 = k2.slice(&self.property_index);
|
|
const val2 = self.index.get(v2);
|
|
std.debug.print(
|
|
\\ - "{s}": {s}
|
|
, .{ key2, @tagName(val2) });
|
|
if (val2 == .string) {
|
|
std.debug.print(" ({s})", .{
|
|
val2.string.slice(&self.string_index),
|
|
});
|
|
}
|
|
std.debug.print("\n", .{});
|
|
}
|
|
},
|
|
else => {},
|
|
}
|
|
std.debug.print(
|
|
\\"{s}": {s}
|
|
, .{ key, @tagName(val) });
|
|
std.debug.print("\n", .{});
|
|
}
|
|
}
|