const std = @import("std"); const Tokenizer = @import("tokenizer.zig"); const TokenType = Tokenizer.TokenType; const Token = Tokenizer.Token; const StringPool = @import("strings.zig"); const StringIndex = StringPool.StringIndex; const assert = std.debug.assert; const Self = @This(); pub const Error = enum { TrailingComma, }; pub const JsonType = enum { null, bool, number, string, array, object, }; pub const JsonValue = union(JsonType) { null: void, bool: bool, number: f64, string: StringIndex, array: ArraySlice, object: ObjectEntry, }; pub const JsonInput = union(JsonType) { null: void, bool: bool, number: f64, string: []const u8, array: []JsonInput, object: std.StringArrayHashMapUnmanaged(JsonInput), pub fn deinit(self: JsonInput, allocator: std.mem.Allocator) void { switch (self) { JsonInput.array => |array| { for (array) |json_input| { json_input.deinit(allocator); } allocator.free(array); }, .object => |*object| { var it = object.iterator(); while (it.next()) |entry| { entry.value_ptr.deinit(allocator); //allocator.free(entry.key_ptr.*); } @constCast(object).deinit(allocator); }, .string => |_| {}, else => {}, } } pub fn format( self: @This(), comptime fmt: []const u8, opts: std.fmt.FormatOptions, writer: anytype, ) !void { switch (self) { .null => try writer.writeAll("null"), .bool => try writer.writeAll(if (self.bool) "true" else "false"), .number => try writer.print("{d}", .{self.number}), .string => try writer.print("\"{s}\"", .{self.string}), .array => { try writer.writeByte('['); for (self.array, 0..) |val, i| { try val.format(fmt, opts, writer); if (i < self.array.len - 1) try writer.writeByte(','); } try writer.writeByte(']'); }, .object => { try writer.writeByte('{'); for (self.object.keys(), self.object.values(), 0..) |k, v, i| { try writer.print("\"{s}\"", .{k}); try writer.writeByte(':'); try v.format(fmt, opts, writer); if (i < self.object.entries.len - 1) try writer.writeByte(','); } try writer.writeByte('}'); }, } } }; /// same as ObjectEntry but simpler /// start is the offset pub const ArraySlice = struct { start: usize, len: usize, }; /// just += the properties and value indexes to get the next item /// property_idx and value_idx are the offset /// it should be ordered pub const ObjectEntry = struct { len: usize, property_idx: usize, value_idx: usize, }; pub const Flags = packed struct { allow_trailing_comma: bool = false, }; pub const Options = struct { comptime indent_len: usize = 4, comptime max_depth: usize = 256, comptime flags: Flags = .{}, }; index: std.MultiArrayList(JsonValue) = .{}, string_index: StringPool = .empty, property_index: StringPool = .empty, options: Options = .{}, pub const init = Self{}; pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { self.index.deinit(allocator); self.property_index.deinit(allocator); self.string_index.deinit(allocator); } fn addNumber(self: *Self, allocator: std.mem.Allocator, number: f64) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .number = number }); return idx; } fn addProperty(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize { const stridx = try self.property_index.add(allocator, bytes); try self.index.ensureUnusedCapacity(allocator, 1); return @intFromEnum(stridx); } fn addString(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize { const stridx = try self.string_index.add(allocator, bytes); try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .string = stridx }); return idx; } fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput)) !usize { var entry: ?ObjectEntry = null; for (object.keys(), object.values(), 0..) |key, value, times| { const stridx = try self.property_index.add(allocator, key); try self.index.ensureUnusedCapacity(allocator, 1); const vidx = self.index.addOneAssumeCapacity(); self.index.set(vidx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value))); if (times == 0) { entry = ObjectEntry{ .len = object.entries.len, .property_idx = stridx, .value_idx = vidx, }; } } try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); if (entry) |e| { self.index.set(idx, .{ .object = e }); return idx; } else { self.index.set(idx, .{ .object = ObjectEntry{ .len = 0, .property_idx = 0, .value_idx = 1, } }); return idx; } } fn addEmptyObject(self: *Self, allocator: std.mem.Allocator) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); const object: ObjectEntry = .{ .property_idx = self.property_index.string_bytes.items.len, .value_idx = self.index.len + 1, .len = 0, }; self.index.set(idx, .{ .object = object }); return idx; } fn addArray(self: *Self, allocator: std.mem.Allocator, array: []JsonInput) !usize { var entry: ?ArraySlice = null; for (array, 0..) |value, times| { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value))); if (times == 0) { entry = ArraySlice{ .start = idx, .len = array.len, }; } } try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); if (entry) |e| { self.index.set(idx, .{ .array = e }); return idx; } else { self.index.set(idx, .{ .array = ArraySlice{ .start = 0, .len = 0, } }); return idx; } } fn addEmptyArray(self: *Self, allocator: std.mem.Allocator) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .array = ArraySlice{ .start = self.index.len, .len = 0, } }); return idx; } fn addBool(self: *Self, allocator: std.mem.Allocator, value: bool) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .bool = value }); return idx; } fn addNull(self: *Self, allocator: std.mem.Allocator) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .null = {} }); return idx; } fn addValue(self: *Self, allocator: std.mem.Allocator, value: JsonInput) !void { switch (value) { .null => try self.addNull(allocator), .bool => try self.addBool(allocator, value.bool), .number => try self.addNumber(allocator, value.number), .string => try self.addString(allocator, value.string), .array => try self.addArray(allocator, value.array), .object => try self.addObject(allocator, value.object), } } fn getProperty(self: *Self, index: []const u8) ?StringIndex { return self.property_index.string_table.get(index); } fn getNumber(self: *Self, index: usize) ?f64 { if (self.index.get(index)) |n| return n; return null; } fn getObject(self: *Self, allocator: std.mem.Allocator, index: usize) !struct { []StringIndex, []usize, } { const entry = self.index.get(index); if (entry.object.len == 0) { return .{ &.{}, &.{} }; } var pidx = entry.object.property_idx; var vidx = entry.object.value_idx; const keys = try allocator.alloc(StringIndex, entry.object.len); const values = try allocator.alloc(usize, entry.object.len); for (0..entry.object.len) |i| { const slice = StringIndex.slice(@enumFromInt(pidx), &self.property_index); keys[i] = @enumFromInt(pidx); values[i] = vidx; pidx += slice.len + 1; vidx += 1; } return .{ keys, values }; } fn getArray(self: *Self, allocator: std.mem.Allocator, index: usize) ![]usize { const entry = self.index.get(index); if (entry.array.len == 0) { return &.{}; } var idx = entry.array.start; const values = try allocator.alloc(usize, entry.array.len); for (0..entry.array.len) |i| { values[i] = idx; idx += 1; } return values; } fn getBool(self: *Self, index: usize) ?bool { const entry = self.index.get(index) orelse return null; return entry.bool; } fn getNull(self: *Self, index: usize) ?void { const entry = self.index.get(index) orelse return null; return entry.null; } // Recursively compute how many index slots a node occupies (including nested) fn skipSlots(self: *Self, slot: usize) usize { const e = self.index.get(slot); switch (e) { .object => |obj| { var total: usize = 1; var v = obj.value_idx; for (0..obj.len) |_| { const s = skipSlots(self, v); total += s; v += s; } return total; }, .array => |arr| { var total: usize = 1; var c = arr.start; for (0..arr.len) |_| { const s = skipSlots(self, c); total += s; c += s; } return total; }, else => return 1, } } // Compute bytes length of properties starting at pidx fn skipProps(self: *Self, pidx: usize, count: usize) usize { var total: usize = 0; var p = pidx; for (0..count) |_| { const key_slice = StringIndex.slice(@enumFromInt(p), &self.property_index); const len = key_slice.len + 1; total += len; p += len; } return total; } fn skipNestedProps(self: *Self, pptr: *usize, slot: usize) void { const e = self.index.get(slot); if (e == .object) { var v = e.object.value_idx; // Skip each nested key and its deeper nested props for (0..e.object.len) |_| { // Skip this key const k: *StringIndex = @ptrCast(pptr); const slice = k.slice(&self.property_index); pptr.* += slice.len + 1; // Recurse into this property's value skipNestedProps(self, pptr, v); // Skip slots of the value in index array const s = skipSlots(self, v); v += s; } } } fn getValue( self: *Self, allocator: std.mem.Allocator, idx: usize, ) !JsonInput { const entry = self.index.get(idx); switch (entry) { .null => return .null, .bool => return .{ .bool = entry.bool }, .number => return .{ .number = entry.number }, .string => |string| { const sl = string.slice(&self.string_index); return .{ .string = sl }; }, .array => |arr| { var out = try allocator.alloc(JsonInput, arr.len); var c = arr.start; for (0..arr.len) |i| { const v = try self.getValue(allocator, c); out[i] = v; c += skipSlots(self, c); } return .{ .array = out[0..arr.len] }; }, .object => |obj| { var map: std.StringArrayHashMapUnmanaged(JsonInput) = .empty; var p = obj.property_idx; var v = obj.value_idx; for (0..obj.len) |_| { // Extract key const k: StringIndex = @enumFromInt(p); const key_slice = k.slice(&self.property_index); // Extract and assign value const val = try self.getValue(allocator, v); try map.put(allocator, key_slice, val); // Advance past this key p += key_slice.len + 1; // Skip nested property names of this value self.skipNestedProps(&p, v); // Advance past the value slots const s = self.skipSlots(v); v += s; } return .{ .object = map }; }, } } test getValue { const allocator = std.testing.allocator; const json = \\ { \\ "name": "Yuzu", \\ "author": true, \\ "age": 15, \\ "address": { \\ "street": 1, \\ "deeply_nested": { \\ "k": 5, \\ "socialist": "expansion", \\ "idk": {"a":"b"} \\ } \\ }, \\ "offset": "yes" \\ } ; var tokenizer: Tokenizer = try .init(allocator, json); defer tokenizer.deinit(); var self = init; defer self.deinit(allocator); const idx: usize = try parse(&self, &tokenizer); var root = try getValue(&self, allocator, idx); defer root.deinit(allocator); try std.testing.expect(root == .object); std.debug.print("{}\n", .{root}); } /// always returns 0 (root) pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize { const allocator = tokenizer.allocator; var it = tokenizer.iterator(); const root = try self.addEmptyObject(allocator); var token = it.next() orelse return root; var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0); flag: switch (token.type) { .eof => { assert(query.slice().len == 0); return root; }, .property => { defer tokenizer.skipWhitespace(); const scope_idx = query.get(query.len - 1); switch (self.index.get(scope_idx)) { .object => |scope| { //std.debug.print("prop: {s} \n", .{token.value.?.string}); const pidx = try self.addProperty(allocator, token.value.?.string); allocator.free(token.value.?.string); self.index.set(scope_idx, .{ .object = ObjectEntry{ .len = scope.len + 1, .property_idx = if (scope.len == 0) pidx else scope.property_idx, .value_idx = scope.value_idx, } }); }, else => return error.InvalidSyntax, } const next = it.next() orelse return error.InvalidSyntax; token = next; switch (next.type) { .colon => { token = it.next() orelse return error.InvalidSyntax; continue :flag token.type; }, else => continue :flag next.type, // else => return error.InvalidSyntax, } }, .object_begin => { defer tokenizer.skipWhitespace(); if (query.slice().len == 0) { try query.ensureUnusedCapacity(1); const ptr = query.addOneAssumeCapacity(); ptr.* = root; self.index.set(root, .{ .object = ObjectEntry{ .len = 0, .property_idx = 0, .value_idx = 1, } }); } else { const idx_ptr = try query.addOne(); idx_ptr.* = try self.addEmptyObject(allocator); self.index.set(idx_ptr.*, .{ .object = ObjectEntry{ .len = 0, .property_idx = self.index.len, //self.property_index.string_bytes.items.len, .value_idx = self.index.len, }, }); } const next = it.next() orelse return error.InvalidSyntax; token = next; switch (next.type) { .string => continue :flag .property, else => return error.InvalidSyntax, } }, .object_end => { defer tokenizer.skipWhitespace(); assert(query.pop() != null); const next = it.next() orelse return root; token = next; switch (next.type) { .comma => continue :flag .comma, .object_end, .array_end => |t| continue :flag t, else => return error.InvalidSyntax, } }, .true, .false => { defer tokenizer.skipWhitespace(); _ = try self.addBool(allocator, if (token.type == .true) true else false); const next = it.next() orelse return error.InvalidSyntax; token = next; switch (next.type) { .comma => continue :flag .comma, .object_end => continue :flag .object_end, else => return error.InvalidSyntax, } }, .string => { defer tokenizer.skipWhitespace(); const next = it.next() orelse return error.InvalidSyntax; switch (next.type) { .colon => { continue :flag .property; }, else => |t| { _ = try self.addString(allocator, token.value.?.string); allocator.free(token.value.?.string); token = next; continue :flag t; }, } }, .number => { defer tokenizer.skipWhitespace(); _ = try self.addNumber(allocator, token.value.?.number); const next = it.next() orelse return error.InvalidSyntax; token = next; switch (next.type) { .comma => continue :flag .comma, .object_end => continue :flag .object_end, else => return error.InvalidSyntax, } }, .comma => { if (!self.options.flags.allow_trailing_comma) { const next = it.next() orelse return error.InvalidSyntax; token = next; switch (next.type) { .object_end, .array_end => return error.TrailingComma, else => continue :flag token.type, } } }, else => { // std.debug.print("token: {s}\n", .{@tagName(token.type)}); }, } return root; }