diff --git a/language.zig b/language.zig index 9fefe1d..e6cd23d 100644 --- a/language.zig +++ b/language.zig @@ -38,6 +38,27 @@ pub const JsonInput = union(JsonType) { array: []JsonInput, object: std.StringArrayHashMapUnmanaged(JsonInput), + pub fn deinit(self: JsonInput, allocator: std.mem.Allocator) void { + switch (self) { + JsonInput.array => |array| { + for (array) |json_input| { + json_input.deinit(allocator); + } + allocator.free(array); + }, + .object => |*object| { + var it = object.iterator(); + while (it.next()) |entry| { + entry.value_ptr.deinit(allocator); + //allocator.free(entry.key_ptr.*); + } + @constCast(object).deinit(allocator); + }, + .string => |_| {}, + else => {}, + } + } + pub fn format( self: @This(), comptime fmt: []const u8, @@ -103,11 +124,12 @@ property_index: StringPool = .empty, options: Options = .{}, -pub const init: Self = .{}; +pub const init = Self{}; pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { self.index.deinit(allocator); self.property_index.deinit(allocator); + self.string_index.deinit(allocator); } fn addNumber(self: *Self, allocator: std.mem.Allocator, number: f64) !usize { @@ -246,7 +268,7 @@ fn getNumber(self: *Self, index: usize) ?f64 { return null; } -fn getObject(self: *Self, allocator: std.mem.Allocator, index: usize) !?struct { +fn getObject(self: *Self, allocator: std.mem.Allocator, index: usize) !struct { []StringIndex, []usize, } { @@ -273,8 +295,8 @@ fn getObject(self: *Self, allocator: std.mem.Allocator, index: usize) !?struct { return .{ keys, values }; } -fn getArray(self: *Self, allocator: std.mem.Allocator, index: usize) ?[]usize { - const entry = self.index.get(index) orelse return null; +fn getArray(self: *Self, allocator: std.mem.Allocator, index: usize) ![]usize { + const entry = self.index.get(index); if (entry.array.len == 0) { return &.{}; @@ -283,7 +305,7 @@ fn getArray(self: *Self, allocator: std.mem.Allocator, index: usize) ?[]usize { var idx = entry.array.start; const values = try allocator.alloc(usize, entry.array.len); - for (entry.array.len) |i| { + for (0..entry.array.len) |i| { values[i] = idx; idx += 1; } @@ -299,14 +321,149 @@ fn getNull(self: *Self, index: usize) ?void { const entry = self.index.get(index) orelse return null; return entry.null; } -// -//fn getValue( -// self: *Self, -// allocator: std.mem.Allocator, -// index: usize, -//) !?JsonInput { -// -//} +// Recursively compute how many index slots a node occupies (including nested) +fn skipSlots(self: *Self, slot: usize) usize { + const e = self.index.get(slot); + switch (e) { + .object => |obj| { + var total: usize = 1; + var v = obj.value_idx; + for (0..obj.len) |_| { + const s = skipSlots(self, v); + total += s; + v += s; + } + return total; + }, + .array => |arr| { + var total: usize = 1; + var c = arr.start; + for (0..arr.len) |_| { + const s = skipSlots(self, c); + total += s; + c += s; + } + return total; + }, + else => return 1, + } +} + +// Compute bytes length of properties starting at pidx +fn skipProps(self: *Self, pidx: usize, count: usize) usize { + var total: usize = 0; + var p = pidx; + for (0..count) |_| { + const key_slice = StringIndex.slice(@enumFromInt(p), &self.property_index); + const len = key_slice.len + 1; + total += len; + p += len; + } + return total; +} + +fn skipNestedProps(self: *Self, pptr: *usize, slot: usize) void { + const e = self.index.get(slot); + if (e == .object) { + var v = e.object.value_idx; + // Skip each nested key and its deeper nested props + for (0..e.object.len) |_| { + // Skip this key + const k: *StringIndex = @ptrCast(pptr); + const slice = k.slice(&self.property_index); + pptr.* += slice.len + 1; + // Recurse into this property's value + skipNestedProps(self, pptr, v); + // Skip slots of the value in index array + const s = skipSlots(self, v); + v += s; + } + } +} + +fn getValue( + self: *Self, + allocator: std.mem.Allocator, + idx: usize, +) !JsonInput { + const entry = self.index.get(idx); + + switch (entry) { + .null => return .null, + .bool => return .{ .bool = entry.bool }, + .number => return .{ .number = entry.number }, + .string => |string| { + const sl = string.slice(&self.string_index); + return .{ .string = sl }; + }, + .array => |arr| { + var out = try allocator.alloc(JsonInput, arr.len); + var c = arr.start; + for (0..arr.len) |i| { + const v = try self.getValue(allocator, c); + out[i] = v; + c += skipSlots(self, c); + } + return .{ .array = out[0..arr.len] }; + }, + .object => |obj| { + var map: std.StringArrayHashMapUnmanaged(JsonInput) = .empty; + var p = obj.property_idx; + var v = obj.value_idx; + for (0..obj.len) |_| { + // Extract key + const k: StringIndex = @enumFromInt(p); + const key_slice = k.slice(&self.property_index); + // Extract and assign value + const val = try self.getValue(allocator, v); + try map.put(allocator, key_slice, val); + // Advance past this key + p += key_slice.len + 1; + // Skip nested property names of this value + self.skipNestedProps(&p, v); + // Advance past the value slots + const s = self.skipSlots(v); + v += s; + } + return .{ .object = map }; + }, + } +} + +test getValue { + const allocator = std.testing.allocator; + + const json = + \\ { + \\ "name": "Yuzu", + \\ "author": true, + \\ "age": 15, + \\ "address": { + \\ "street": 1, + \\ "deeply_nested": { + \\ "k": 5, + \\ "socialist": "expansion", + \\ "idk": {"a":"b"} + \\ } + \\ }, + \\ "offset": "yes" + \\ } + ; + + var tokenizer: Tokenizer = try .init(allocator, json); + defer tokenizer.deinit(); + + var self = init; + defer self.deinit(allocator); + + const idx: usize = try parse(&self, &tokenizer); + + var root = try getValue(&self, allocator, idx); + defer root.deinit(allocator); + + try std.testing.expect(root == .object); + std.debug.print("{}\n", .{root}); +} /// always returns 0 (root) pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize { @@ -315,9 +472,6 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize { var it = tokenizer.iterator(); const root = try self.addEmptyObject(allocator); - defer std.debug.print("idx: {s}\n", .{ - @tagName(self.index.get(self.index.get(root).object.value_idx)), - }); var token = it.next() orelse return root; @@ -335,8 +489,9 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize { const scope_idx = query.get(query.len - 1); switch (self.index.get(scope_idx)) { .object => |scope| { - std.debug.print("prop: {s} \n", .{token.value.?.string}); + //std.debug.print("prop: {s} \n", .{token.value.?.string}); const pidx = try self.addProperty(allocator, token.value.?.string); + allocator.free(token.value.?.string); self.index.set(scope_idx, .{ .object = ObjectEntry{ .len = scope.len + 1, .property_idx = if (scope.len == 0) pidx else scope.property_idx, @@ -424,6 +579,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize { }, else => |t| { _ = try self.addString(allocator, token.value.?.string); + allocator.free(token.value.?.string); token = next; continue :flag t; @@ -454,69 +610,9 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize { } }, else => { - std.debug.print("token: {s}\n", .{@tagName(token.type)}); + // std.debug.print("token: {s}\n", .{@tagName(token.type)}); }, } return root; } - -test parse { - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - defer arena.deinit(); - - const allocator = arena.allocator(); - - var self = init; - defer deinit(&self, allocator); - - var tokenizer = try Tokenizer.init(allocator, blk: { - const json = - \\ { - \\ "bio": "cool", - \\ "age": 15, - \\ "name": "yuzu", - \\ "admin": true, - \\ "address": { - \\ "lorem": "ipsum", - \\ "simple": true - \\ }, - \\ "xd": true - \\ } - ; - break :blk json; - }); - - const idx = try parse(&self, &tokenizer); - - const keys, const values = (try getObject(&self, allocator, idx)).?; - for (keys, values, 0..) |k, v, i| { - _ = i; - const key = k.slice(&self.property_index); - const val = self.index.get(v); - switch (val) { - .object => { - const keys2, const values2 = (try getObject(&self, allocator, v)).?; - for (keys2, values2, 0..) |k2, v2, ii2| { - _ = ii2; - const key2 = k2.slice(&self.property_index); - const val2 = self.index.get(v2); - std.debug.print( - \\ - "{s}": {s} - , .{ key2, @tagName(val2) }); - if (val2 == .string) { - std.debug.print(" ({s})", .{ - val2.string.slice(&self.string_index), - }); - } - std.debug.print("\n", .{}); - } - }, - else => {}, - } - std.debug.print( - \\"{s}": {s} - , .{ key, @tagName(val) }); - std.debug.print("\n", .{}); - } -} diff --git a/tokenizer.zig b/tokenizer.zig index a6de90c..22df86b 100644 --- a/tokenizer.zig +++ b/tokenizer.zig @@ -448,11 +448,11 @@ pub const Iterator = struct { pub fn next(it: *Iterator) ?Token { defer it.tokenizer.skipWhitespace(); if (it.tokenizer.endOfInput()) { - std.debug.print("got eof\n", .{}); + // std.debug.print("got eof\n", .{}); return null; } - return it.tokenizer.nextToken() catch |err| { - std.debug.print("got err: {s}\n", .{@errorName(err)}); + return it.tokenizer.nextToken() catch { + // std.debug.print("got err: {s}\n", .{@errorName(err)}); return null; }; }