const std = @import("std"); const Tokenizer = @import("tokenizer.zig"); const StringPool = @import("strings.zig"); const StringIndex = StringPool.StringIndex; const assert = std.debug.assert; const Self = @This(); pub const Error = enum {}; pub const JsonType = enum { null, bool, number, string, array, object, }; pub const JsonValue = union(JsonType) { null: void, bool: bool, number: f64, string: StringIndex, array: ArrayIndex.Slice, object: ObjectIndex.Entry, }; pub const JsonInput = union(JsonType) { null: void, bool: bool, number: f64, string: []const u8, array: []JsonInput, object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput), }; pub const ArrayIndex = enum(usize) { _, pub const Slice = struct { start: usize, len: usize, }; }; pub const ObjectIndex = enum(usize) { _, pub const Entry = struct { len: usize, property_idx: usize, value_idx: usize, }; }; pub const Options = struct { comptime max_depth: usize = 256, }; index: std.MultiArrayList(JsonValue) = .{}, string_index: StringPool = .empty, options: Options = .{}, pub const init: Self = .{}; pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { self.index.deinit(allocator); self.string_index.deinit(allocator); } fn addNumber(self: *Self, allocator: std.mem.Allocator, number: f64) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .number = number }); return idx; } fn addString(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize { const stridx = try self.string_index.add(allocator, bytes); try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .string = stridx }); return idx; } fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput)) !usize { var entry: ?ObjectIndex.Entry = null; for (object.keys(), object.values(), 0..) |key, value, times| { const stridx = try self.string_index.add(allocator, key); try self.index.ensureUnusedCapacity(allocator, 1); const vidx = self.index.addOneAssumeCapacity(); self.index.set(vidx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value))); if (times == 0) { entry = ObjectIndex.Entry{ .len = object.entries.len, .property_idx = stridx, .value_idx = vidx, }; } } try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); if (entry) |e| { self.index.set(idx, .{ .object = e }); return idx; } else { self.index.set(idx, .{ .object = ObjectIndex.Entry{ .len = 0, .property_idx = 0, .value_idx = 0, } }); return idx; } } fn addEmptyObject(self: *Self, allocator: std.mem.Allocator) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); const object: ObjectIndex.Entry = .{ .property_idx = self.string_index.string_bytes.items.len, .value_idx = self.index.len, .len = 0, }; self.index.set(idx, .{ .object = object }); return idx; } fn addArray(self: *Self, allocator: std.mem.Allocator, array: []JsonInput) !usize { var entry: ?ArrayIndex.Slice = null; for (array, 0..) |value, times| { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value))); if (times == 0) { entry = ArrayIndex.Slice{ .start = idx, .len = array.len, }; } } try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); if (entry) |e| { self.index.set(idx, .{ .array = e }); return idx; } else { self.index.set(idx, .{ .array = ArrayIndex.Slice{ .start = 0, .len = 0, } }); return idx; } } fn addBool(self: *Self, allocator: std.mem.Allocator, value: bool) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .bool = value }); return idx; } fn addValue(self: *Self, allocator: std.mem.Allocator, value: JsonInput) !void { switch (value) { .null => {}, .bool => try self.addBool(allocator, value.bool), .number => try self.addNumber(allocator, value.number), .string => try self.addString(allocator, value.string), .array => try self.addArray(allocator, value.array), .object => try self.addObject(allocator, value.object), } } fn getString(self: *Self, index: []const u8) ?StringIndex { return self.string_index.string_table.get(index); } fn getNumber(self: *Self, index: usize) ?f64 { if (self.index.get(index)) |n| return n; return null; } fn getObject(self: *Self, allocator: std.mem.Allocator, index: usize) !?struct { []StringIndex, []usize, } { const entry = self.index.get(index); if (entry.object.len == 0) { return .{ &.{}, &.{} }; } var pidx = entry.object.property_idx; var vidx = entry.object.value_idx; const keys = try allocator.alloc(StringIndex, entry.object.len); const values = try allocator.alloc(usize, entry.object.len); for (0..entry.object.len) |i| { const slice = StringIndex.slice(@enumFromInt(pidx), &self.string_index); keys[i] = @enumFromInt(pidx); values[i] = vidx; pidx += slice.len + 1; vidx += 1; } return .{ keys, values }; } fn getArray(self: *Self, allocator: std.mem.Allocator, index: usize) ?[]usize { const entry = self.index.get(index) orelse return null; if (entry.array.len == 0) { return &.{}; } var idx = entry.array.start; const values = try allocator.alloc(usize, entry.array.len); for (entry.array.len) |i| { values[i] = idx; idx += 1; } return values; } fn getBool(self: *Self, index: usize) ?bool { const entry = self.index.get(index) orelse return null; return entry.bool; } fn getNull(self: *Self, index: usize) ?void { const entry = self.index.get(index) orelse return null; return entry.null; } fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput { const entry = self.index.get(index); switch (entry) { .null => return .{ .null = {} }, .bool => return .{ .bool = entry.bool }, .number => return .{ .number = entry.number }, .string => { const str = entry.string.slice(&self.string_index); return .{ .string = str }; }, .array => { const res = try allocator.alloc(JsonInput, entry.array.len); var idx = entry.array.start; for (0..entry.array.len) |i| { if (try self.getValue(allocator, idx)) |v| { res[i] = v; idx += 1; } else unreachable; } return .{ .array = res }; }, .object => { var kidx = entry.object.property_idx; var vidx = entry.object.value_idx; var obj: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput) = .empty; try obj.ensureTotalCapacity(allocator, entry.object.len); for (0..entry.object.len) |_| { const key = StringIndex.slice(@enumFromInt(kidx), &self.string_index); const val = (try self.getValue(allocator, vidx)).?; obj.putAssumeCapacityNoClobber(key, val); kidx += 1; vidx += 1; } return .{ .object = obj }; }, } } pub fn parse(self: *Self, tokenizer: *Tokenizer) !void { const allocator = tokenizer.allocator; var it = tokenizer.iterator(); var depth_buf = try allocator.alloc(usize, self.options.max_depth); defer allocator.free(depth_buf); var cycles: usize = 0; while (it.next()) |token| { switch (token.type) { .object_begin => { std.debug.print("{{", .{}); const obj_idx = try self.addEmptyObject(allocator); depth_buf[cycles] = obj_idx; if (tokenizer.prev_token) |t| if (t.type == .object_begin) { // add map to itself const data = self.index.get(depth_buf[cycles - 1]); switch (data) { .object => |valid_entry| { const new_data = ObjectIndex.Entry{ .len = valid_entry.len + 1, .property_idx = self.string_index.string_table.size, .value_idx = obj_idx, }; self.index.set(depth_buf[cycles - 1], .{ .object = new_data }); tokenizer.prev_token = null; // reset }, else => unreachable, } } else tokenizer.pushBack(token); cycles += 1; continue; }, .object_end => { const keys, const vals = (try self.getObject(allocator, depth_buf[cycles - 1])).?; std.debug.print("\nfound {d} keys and {d} values\n", .{ keys.len, vals.len }); for (keys, vals) |k, v| { const key = k.slice(&self.string_index); const val = self.index.get(v); std.debug.print( \\"{s}": {s}, , .{ key, @tagName(val) }); } std.debug.print("}}", .{}); }, .string => { const idx = try self.addString(allocator, token.value.?.string); const last_obj = self.index.get(depth_buf[cycles - 1]); if (cycles > 0) { const stridx = self.index.get(idx).string; self.index.set(depth_buf[cycles - 1], .{ .object = ObjectIndex.Entry{ .len = last_obj.object.len + 1, .property_idx = if (cycles > 1) @intFromEnum(stridx) else last_obj.object.property_idx, .value_idx = last_obj.object.value_idx, } }); continue; } }, .number => { _ = try self.addNumber(allocator, token.value.?.number); const last_obj = self.index.get(depth_buf[cycles - 1]); if (cycles > 0) { self.index.set(depth_buf[cycles - 1], .{ .object = ObjectIndex.Entry{ .len = last_obj.object.len, .property_idx = last_obj.object.property_idx, .value_idx = last_obj.object.value_idx, } }); continue; } }, .true, .false => { _ = try self.addBool(allocator, if (token.type == .true) true else false); const last_obj = self.index.get(depth_buf[cycles - 1]); if (cycles > 0) { self.index.set(depth_buf[cycles - 1], .{ .object = ObjectIndex.Entry{ .len = last_obj.object.len, .property_idx = last_obj.object.property_idx, .value_idx = last_obj.object.value_idx, } }); continue; } }, else => {}, } tokenizer.skipWhitespace(); } } test parse { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); const allocator = arena.allocator(); var self = init; defer deinit(&self, allocator); var tokenizer = try Tokenizer.init(allocator, blk: { const json = \\ { \\ "key": 123, \\ "key2": false, \\ "key3": true, \\ "key4": null \\ } ; break :blk json; }); try parse(&self, &tokenizer); }