diff --git a/language.zig b/language.zig index d140bdc..1b29552 100644 --- a/language.zig +++ b/language.zig @@ -7,7 +7,9 @@ const assert = std.debug.assert; const Self = @This(); -pub const Error = enum {}; +pub const Error = enum { + TrailingComma, +}; pub const JsonType = enum { null, @@ -33,7 +35,39 @@ pub const JsonInput = union(JsonType) { number: f64, string: []const u8, array: []JsonInput, - object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput), + object: std.StringArrayHashMapUnmanaged(JsonInput), + + pub fn format( + self: @This(), + comptime fmt: []const u8, + opts: std.fmt.FormatOptions, + writer: anytype, + ) !void { + switch (self) { + .null => try writer.writeAll("null"), + .bool => try writer.writeAll(if (self.bool) "true" else "false"), + .number => try writer.print("{d}", .{self.number}), + .string => try writer.print("\"{s}\"", .{self.string}), + .array => { + try writer.writeByte('['); + for (self.array, 0..) |val, i| { + try val.format(fmt, opts, writer); + if (i < self.array.len - 1) try writer.writeByte(','); + } + try writer.writeByte(']'); + }, + .object => { + try writer.writeByte('{'); + for (self.object.keys(), self.object.values(), 0..) |k, v, i| { + try writer.print("\"{s}\"", .{k}); + try writer.writeByte(':'); + try v.format(fmt, opts, writer); + if (i < self.object.entries.len - 1) try writer.writeByte(','); + } + try writer.writeByte('}'); + }, + } + } }; /// same as ObjectEntry but simpler @@ -167,6 +201,16 @@ fn addArray(self: *Self, allocator: std.mem.Allocator, array: []JsonInput) !usiz } } +fn addEmptyArray(self: *Self, allocator: std.mem.Allocator) !usize { + try self.index.ensureUnusedCapacity(allocator, 1); + const idx = self.index.addOneAssumeCapacity(); + self.index.set(idx, .{ .array = ArraySlice{ + .start = self.index.len, + .len = 0, + } }); + return idx; +} + fn addBool(self: *Self, allocator: std.mem.Allocator, value: bool) !usize { try self.index.ensureUnusedCapacity(allocator, 1); const idx = self.index.addOneAssumeCapacity(); @@ -279,7 +323,7 @@ fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput .object => { var kidx = entry.object.property_idx; var vidx = entry.object.value_idx; - var obj: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput) = .empty; + var obj: std.StringArrayHashMapUnmanaged(JsonInput) = .empty; try obj.ensureTotalCapacity(allocator, entry.object.len); for (0..entry.object.len) |_| { @@ -287,7 +331,7 @@ fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput const val = (try self.getValue(allocator, vidx)).?; obj.putAssumeCapacityNoClobber(key, val); - kidx += 1; + kidx += key.len + 1; vidx += 1; } @@ -303,97 +347,80 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize { var it = tokenizer.iterator(); const root = try self.addEmptyObject(allocator); - var work_query: std.BoundedArray(usize, self.options.max_depth) = try .init(64); - var do_once = true; + var work_query = try allocator.alloc(usize, self.options.max_depth); + var cycles: usize = 0; + + //defer assert(cycles == 0); while (it.next()) |token| { defer tokenizer.skipWhitespace(); + + std.debug.print("token: {s}\n", .{@tagName(token.type)}); + flag: switch (token.type) { + .array_end => { + cycles -= 1; + }, + .object_end => { + cycles -= 1; + }, + .array_begin => { + const idx = try self.addEmptyArray(allocator); + work_query[cycles] = idx; + cycles += 1; + }, .object_begin => { - if (do_once) { + if (cycles == 0) { self.index.set(root, .{ .object = .{ .len = 0, .property_idx = self.property_index.string_table.size, .value_idx = self.index.len, } }); - try work_query.append(root); - do_once = false; - continue; + work_query[cycles] = root; + } else { + const obj_idx = try self.addEmptyObject(allocator); + work_query[cycles] = obj_idx; } - const scope_idx = work_query.get(work_query.len - 1); - - const obj_idx = try self.addEmptyObject(allocator); - const scope = self.index.get(scope_idx).object; - - // add map to itself - const new_data = ObjectEntry{ - .len = scope.len + 1, - .property_idx = self.property_index.string_table.size, - .value_idx = scope.value_idx, - }; - self.index.set(scope_idx, .{ .object = new_data }); - try work_query.append(obj_idx); - continue; - }, - .object_end => { - _ = work_query.pop().?; - continue; + cycles += 1; }, .property => { - const scope_idx = work_query.get(work_query.len - 1); - const scope = self.index.get(scope_idx).object; - - _ = try self.addProperty(allocator, token.value.?.string); - self.index.set(scope_idx, .{ .object = ObjectEntry{ - .len = scope.len + 1, - .property_idx = scope.property_idx, - .value_idx = scope.value_idx, - } }); - continue; + const scope_idx = work_query[cycles - 1]; + switch (self.index.get(scope_idx)) { + .object => |scope| { + //std.debug.print("depth: {d}\n", .{cycles}); + _ = try self.addProperty(allocator, token.value.?.string); + self.index.set(scope_idx, .{ .object = ObjectEntry{ + .len = scope.len + 1, + .property_idx = scope.property_idx, + .value_idx = scope.value_idx, + } }); + }, + else => unreachable, + } }, .string => { - // maybe we could dismiss the while loop altogether and just do this - // the whole time if (it.peek()) |next| if (next.type == .colon) { - continue :flag TokenType.property; + continue :flag .property; }; - - //const scope_idx = work_query.get(work_query.len - 1); - //const scope = self.index.get(scope_idx).object; - _ = try self.addString(allocator, token.value.?.string); - //self.index.set(scope_idx, .{ .object = scope }); - continue; }, .number => { - //const scope_idx = work_query.get(work_query.len - 1); - //const scope = self.index.get(scope_idx).object; - _ = try self.addNumber(allocator, token.value.?.number); - //self.index.set(scope_idx, .{ .object = scope }); - continue; }, .true, .false => { - //const scope_idx = work_query.get(work_query.len - 1); - //const scope = self.index.get(scope_idx).object; - _ = try self.addBool(allocator, if (token.type == .true) true else false); - //self.index.set(scope_idx, .{ .object = scope }); - continue; }, .null => { - const scope_idx = work_query.get(work_query.len - 1); - const scope = self.index.get(scope_idx).object; - _ = try self.addNull(allocator); - self.index.set(scope_idx, .{ .object = scope }); - continue; }, - .comma => if (it.peek()) |t| - if (t.type == .object_end and !self.options.flags.allow_trailing_comma) { - return error.TrailingComma; - }, - else => {}, + .comma => if (it.peek()) |t| { + if (t.type == .object_end) { + if (!self.options.flags.allow_trailing_comma) { + return error.TrailingComma; + } + } + }, + else => continue, } } @@ -413,27 +440,19 @@ test parse { const json = \\ { \\ "lazy": true, - \\ "name": "yuzu" + \\ "name": "yuzu", + \\ "dislikes": [["Math", 3], ["Sports", 1]], + \\ "age": 15 \\ } ; break :blk json; }); - const root = try parse(&self, &tokenizer); - const keys, const values = (try self.getObject(allocator, root)).?; + const root = blk: { + const idx = try parse(&self, &tokenizer); + const val = (try getValue(&self, allocator, idx)).?; + break :blk val; + }; - const stdout = std.io.getStdOut().writer(); - try stdout.writeAll("{\n"); - for (keys, values, 0..) |k, v, i| { - const key: []const u8 = k.slice(&self.property_index); - - try stdout.print( - \\ "{s}": {d} - , .{ key, v }); - if (i < keys.len) - try stdout.writeAll(",\n") - else - try stdout.writeAll("\n"); - } - try stdout.writeAll("}\n"); + std.debug.print("root: {any}\n", .{root}); } diff --git a/tokenizer.zig b/tokenizer.zig index 79b2617..b7a6cd1 100644 --- a/tokenizer.zig +++ b/tokenizer.zig @@ -464,9 +464,13 @@ pub const Iterator = struct { pub fn peek(it: *Iterator) ?Token { const frame = it.tokenizer.frame; const pos = it.tokenizer.position; + const prev = it.tokenizer.prev_token; + const max_pos = it.tokenizer.max_position; defer { it.tokenizer.position = pos; it.tokenizer.frame = frame; + it.tokenizer.max_position = max_pos; + it.tokenizer.prev_token = prev; } if (it.tokenizer.endOfInput()) return null; return it.tokenizer.nextToken() catch null;