const std = @import("std"); const mem = std.mem; const Tokenizer = @import("tokenizer.zig"); const TokenType = Tokenizer.TokenType; const Token = Tokenizer.Token; const StringPool = @import("strings.zig"); const StringIndex = StringPool.StringIndex; const assert = std.debug.assert; const Self = @This(); pub const Error = enum { Eof, TrailingComma, MissingKey, MissingValue, UnexpectedToken }; pub const JsonType = enum { null, bool, number, string, array, object }; pub const JsonNumber = union(enum) { int: i128, float: f64, pub fn cast(self: JsonNumber, comptime T: type) T { return switch (self) { .int => |i| switch (@typeInfo(T)) { .float => @as(T, @floatFromInt(i)), .int => @as(T, @intCast(i)), else => @compileError("not a number type"), }, .float => |f| switch (@typeInfo(T)) { .float => @as(T, @floatCast(f)), .int => @as(T, @intFromFloat(f)), else => @compileError("not a number type"), }, }; } }; pub const JsonValue = union(JsonType) { null: void, bool: bool, number: JsonNumber, string: StringIndex, array: ArraySlice, object: ObjectEntry, }; pub const JsonInput = union(JsonType) { null: void, bool: bool, number: JsonNumber, string: []const u8, array: []JsonInput, object: std.StringArrayHashMapUnmanaged(JsonInput), pub fn deinit(self: JsonInput, allocator: mem.Allocator) void { switch (self) { .array => |array| { for (array) |json_input| json_input.deinit(allocator); allocator.free(array); }, .object => |*object| { var it = object.iterator(); while (it.next()) |entry| entry.value_ptr.deinit(allocator); @constCast(object).deinit(allocator); }, else => {}, } } pub fn format( self: @This(), comptime fmt: []const u8, opts: std.fmt.FormatOptions, writer: anytype, ) !void { switch (self) { .null => try writer.writeAll("null"), .bool => try writer.writeAll(if (self.bool) "true" else "false"), .number => switch (self.number) { .int => try writer.print("{d}", .{self.number.int}), .float => try writer.print("{d:.1}", .{self.number.float}), }, .string => try writer.print("\"{s}\"", .{self.string}), .array => { try writer.writeByte('['); for (self.array, 0..) |val, i| { try val.format(fmt, opts, writer); if (i < self.array.len - 1) try writer.writeByte(','); } try writer.writeByte(']'); }, .object => { try writer.writeByte('{'); for (self.object.keys(), self.object.values(), 0..) |k, v, i| { try writer.print("\"{s}\"", .{k}); try writer.writeByte(':'); try v.format(fmt, opts, writer); if (i < self.object.entries.len - 1) try writer.writeByte(','); } try writer.writeByte('}'); }, } } }; /// same as ObjectEntry but simpler ///.tip is the offset pub const ArraySlice = struct { len: usize, tip: usize, }; /// just += the value indexes to get the next item pub const ObjectEntry = struct { len: usize, tip: usize, }; pub const PropertyEntry = struct { tip: StringIndex, }; pub const Flags = packed struct { /// Make the tokenizer omit comments, TBD allow_comments: bool = false, /// Not to error on trailing comma, default is `false` for obvious reasons allow_trailing_comma: bool = false, }; pub const Options = struct { comptime indent_len: usize = 4, comptime max_depth: usize = 256, flags: Flags = .{}, }; index: std.MultiArrayList(JsonValue) = .{}, strings: StringPool = .empty, properties: StringPool = .empty, property_map: std.AutoArrayHashMapUnmanaged(usize, PropertyEntry) = .empty, options: Options = .{}, pub const init = Self{}; pub fn deinit(self: *Self, allocator: mem.Allocator) void { self.index.deinit(allocator); self.properties.deinit(allocator); self.strings.deinit(allocator); self.property_map.deinit(allocator); } fn addNumber(self: *Self, allocator: mem.Allocator, number: JsonNumber) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .number = number }); return idx; } fn addProperty(self: *Self, allocator: mem.Allocator, bytes: []const u8) !usize { const stridx = try self.properties.add(allocator, bytes); try self.property_map.ensureUnusedCapacity(allocator, 1); return @intFromEnum(stridx); } fn addString(self: *Self, allocator: mem.Allocator, bytes: []const u8) !usize { const stridx = try self.strings.add(allocator, bytes); try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .string = stridx }); return idx; } fn addEmpty(self: *Self, allocator: mem.Allocator) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); return idx; } fn addBool(self: *Self, allocator: mem.Allocator, value: bool) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .bool = value }); return idx; } fn addNull(self: *Self, allocator: mem.Allocator) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); self.index.set(idx, .{ .null = {} }); return idx; } // Recursively compute how many index slots a node occupies (including nested) pub fn skipSlots(self: *Self, slot: usize) usize { switch (self.index.get(slot)) { .object => |obj| { var total: usize = 1; var v = obj.tip; for (0..obj.len) |_| { const s = skipSlots(self, v); total += s; v += s; } return total; }, .array => |arr| { var total: usize = 1; var c = arr.tip; for (0..arr.len) |_| { const s = skipSlots(self, c); total += s; c += s; } return total; }, else => return 1, } } pub fn getValue( self: *Self, allocator: mem.Allocator, idx: usize, ) !JsonInput { if (self.index.len == 0) return error.InvalidSyntax; switch (self.index.get(idx)) { .null => return .{ .null = {} }, .bool => |b| return .{ .bool = b }, .number => |number| return .{ .number = number }, .string => |string| { const sl = string.slice(&self.strings); return .{ .string = sl }; }, .array => |arr| { var out = try allocator.alloc(JsonInput, arr.len); errdefer allocator.free(out); var c = arr.tip; for (0..arr.len) |i| { const v = try self.getValue(allocator, c); out[i] = v; c += skipSlots(self, c); } return .{ .array = out[0..arr.len] }; }, .object => |obj| { var map: std.StringArrayHashMapUnmanaged(JsonInput) = .empty; errdefer map.deinit(allocator); var tip = obj.tip; for (0..obj.len) |_| if (self.property_map.get(tip)) |pen| { try map.put( allocator, pen.tip.slice(&self.properties), try self.getValue(allocator, tip), ); tip += self.skipSlots(tip); } else return error.MissingKey; return .{ .object = map }; }, } } /// always returns 0 (root) pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usize { const allow_comments = self.options.flags.allow_comments; tokenizer.skipWhitespace(); if (tokenizer.endOfInput()) return error.Eof; const root = try self.addEmpty(allocator); var token = try tokenizer.nextToken(allocator, allow_comments); var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0); flag: switch (token.type) { .eof => { if (root != 0) return error.InvalidSyntax; if (query.slice().len != 0) return error.InvalidSyntax; return root; }, .property => { const scope_idx = query.get(query.len - 1); switch (self.index.get(scope_idx)) { .object => |scope| { const pidx = try self.addProperty(allocator, token.value.?.string); const reer = self.index.len; self.property_map.putAssumeCapacity(reer, .{ .tip = @enumFromInt(pidx) }); allocator.free(token.value.?.string); self.index.set(scope_idx, .{ .object = ObjectEntry{ .len = scope.len + 1, .tip = scope.tip, } }); }, else => return error.InvalidSyntax, } const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .colon => { token = try tokenizer.nextToken(allocator, allow_comments); continue :flag token.type; }, else => continue :flag next.type, } }, .object_begin => { if (query.slice().len < 1) { const ptr = try query.addOne(); ptr.* = root; self.index.set(root, .{ .object = ObjectEntry{ .len = 0, .tip = 1, } }); } else { //order const parent_idx = query.get(query.len - 1); const idx_ptr = try query.addOne(); idx_ptr.* = try self.addEmpty(allocator); self.index.set(idx_ptr.*, .{ .object = ObjectEntry{ .len = 0, .tip = self.index.len, }, }); switch (self.index.get(parent_idx)) { .array => |slice| { self.index.set(parent_idx, .{ .array = ArraySlice{ .len = slice.len + 1, .tip = if (slice.len == 0) idx_ptr.* else slice.tip, } }); }, else => {}, } } const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .string => continue :flag .property, .object_end => continue :flag .object_end, else => return error.InvalidSyntax, } }, .object_end, .array_end => { if (query.pop() == null) return error.InvalidSyntax; // double close if (query.slice().len == 0) return root; const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .comma => continue :flag .comma, .object_end, .array_end => continue :flag next.type, else => return error.InvalidSyntax, } }, .array_begin => { defer tokenizer.skipWhitespace(); if (query.slice().len < 1) { const ptr = try query.addOne(); ptr.* = root; self.index.set(root, .{ .array = ArraySlice{ .len = 0, .tip = 1, } }); } else { // order matters const parent_idx = query.get(query.len - 1); const idx_ptr = try query.addOne(); idx_ptr.* = try self.addEmpty(allocator); self.index.set(idx_ptr.*, .{ .array = ArraySlice{ .len = 0, .tip = idx_ptr.* + 1, } }); switch (self.index.get(parent_idx)) { .array => |slice| { self.index.set(parent_idx, .{ .array = ArraySlice{ .len = slice.len + 1, .tip = if (slice.len == 0) idx_ptr.* else slice.tip, } }); }, else => {}, } } const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .property => return error.InvalidSyntax, else => continue :flag next.type, } }, .true, .false => { const idx = try self.addBool(allocator, if (token.type == .true) true else false); if (query.len == 0) { // root self.index.set(root, .{ .bool = if (token.type == .true) true else false }); return root; } const parent_idx = query.get(query.len - 1); switch (self.index.get(parent_idx)) { .array => |slice| { self.index.set(parent_idx, .{ .array = ArraySlice{ .len = slice.len + 1, .tip = if (slice.len == 0) idx else slice.tip, } }); }, else => {}, } const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .comma => continue :flag .comma, .object_end, .array_end => continue :flag next.type, else => return error.InvalidSyntax, } }, .string => { if (query.len == 0) { // root _ = try self.addString(allocator, token.value.?.string); allocator.free(token.value.?.string); // hardcoded shite self.index.set(root, .{ .string = @enumFromInt(0) }); return root; } const parent_idx = query.get(query.len - 1); const next = try tokenizer.nextToken(allocator, allow_comments); switch (next.type) { .colon => { continue :flag .property; }, else => |t| { const idx = try self.addString(allocator, token.value.?.string); allocator.free(token.value.?.string); switch (self.index.get(parent_idx)) { .array => |slice| { self.index.set(parent_idx, .{ .array = ArraySlice{ .len = slice.len + 1, .tip = if (slice.len == 0) idx else slice.tip, } }); }, else => {}, } token = next; continue :flag t; }, } }, .int, .float => |number| { if (query.len == 0) { // root _ = switch (number) { .int => try self.addNumber(allocator, .{ .int = token.value.?.int }), .float => try self.addNumber(allocator, .{ .float = token.value.?.float }), else => unreachable, }; self.index.set(root, .{ .number = switch (number) { .int => .{ .int = token.value.?.int }, .float => .{ .float = token.value.?.float }, else => unreachable, } }); return root; } const parent_idx = query.get(query.len - 1); const idx = switch (number) { .int => try self.addNumber(allocator, .{ .int = token.value.?.int }), .float => try self.addNumber(allocator, .{ .float = token.value.?.float }), else => unreachable, }; switch (self.index.get(parent_idx)) { .array => |slice| { self.index.set(parent_idx, .{ .array = ArraySlice{ .len = slice.len + 1, .tip = if (slice.len == 0) idx else slice.tip, } }); }, else => {}, } const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .comma => continue :flag .comma, .object_end, .array_end => continue :flag next.type, else => return error.InvalidSyntax, } }, .comma => { if (!self.options.flags.allow_trailing_comma) { const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .object_end, .array_end => return error.TrailingComma, .comma => return error.InvalidSyntax, else => continue :flag token.type, } } }, .null => { const idx = try self.addNull(allocator); if (query.len == 0) { // root self.index.set(root, .{ .null = {} }); return root; } const parent_idx = query.get(query.len - 1); switch (self.index.get(parent_idx)) { .array => |slice| { self.index.set(parent_idx, .{ .array = ArraySlice{ .len = slice.len + 1, .tip = if (slice.len == 0) idx else slice.tip, } }); }, else => {}, } const next = tokenizer.nextToken(allocator, allow_comments) catch |err| switch (err) { error.InvalidSyntax => return err, else => return root, }; token = next; switch (next.type) { .comma => continue :flag .comma, .object_end, .array_end => continue :flag next.type, else => return error.InvalidSyntax, } }, else => return error.InvalidSyntax, } return root; } test getValue { const allocator = std.testing.allocator; const text = \\{ // epic comment \\ "a":"A", \\ "b":"B", \\ "c": { \\ "d": "D" \\ }, \\ "e": "E", \\ "f": [1] \\} ; // 1: a, 2: b, 3: c, 4: d, 5: e, 6: f var tokenizer: Tokenizer = try .init(allocator, text); defer tokenizer.deinit(allocator); var self = try allocator.create(Self); self.* = Self.init; defer allocator.destroy(self); defer self.deinit(allocator); self.options.flags.allow_comments = true; const idx: usize = try self.parse(allocator, &tokenizer); var root = try self.getValue(allocator, idx); defer root.deinit(allocator); try std.testing.expect(root == .object); std.debug.print("{}\n", .{root}); }