From 099973955aa9bddb85d864901f3acd93c7b31588 Mon Sep 17 00:00:00 2001 From: yuzu Date: Fri, 23 May 2025 23:33:21 -0500 Subject: [PATCH] a --- language.zig | 176 ++++++++++++++++++++++++-------------------------- tokenizer.zig | 13 +++- 2 files changed, 94 insertions(+), 95 deletions(-) diff --git a/language.zig b/language.zig index e872253..d140bdc 100644 --- a/language.zig +++ b/language.zig @@ -57,6 +57,7 @@ pub const Flags = packed struct { }; pub const Options = struct { + comptime indent_len: usize = 4, comptime max_depth: usize = 256, comptime flags: Flags = .{}, }; @@ -295,67 +296,60 @@ fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput } } -pub fn parse(self: *Self, tokenizer: *Tokenizer) !void { +/// always returns 0 (root) +pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize { const allocator = tokenizer.allocator; var it = tokenizer.iterator(); - var depth_buf = try allocator.alloc(usize, self.options.max_depth); - defer allocator.free(depth_buf); - - var cycles: usize = 0; + const root = try self.addEmptyObject(allocator); + var work_query: std.BoundedArray(usize, self.options.max_depth) = try .init(64); + var do_once = true; while (it.next()) |token| { + defer tokenizer.skipWhitespace(); flag: switch (token.type) { .object_begin => { - std.debug.print("{{", .{}); + if (do_once) { + self.index.set(root, .{ .object = .{ + .len = 0, + .property_idx = self.property_index.string_table.size, + .value_idx = self.index.len, + } }); + try work_query.append(root); + do_once = false; + continue; + } + const scope_idx = work_query.get(work_query.len - 1); + const obj_idx = try self.addEmptyObject(allocator); + const scope = self.index.get(scope_idx).object; - depth_buf[cycles] = obj_idx; - - if (tokenizer.prev_token) |t| if (t.type == .object_begin) { - // add map to itself - const data = self.index.get(depth_buf[cycles - 1]); - - switch (data) { - .object => |valid_entry| { - const new_data = ObjectEntry{ - .len = valid_entry.len + 1, - .property_idx = self.property_index.string_table.size, - .value_idx = obj_idx, - }; - self.index.set(depth_buf[cycles - 1], .{ .object = new_data }); - tokenizer.prev_token = null; // reset - }, - else => unreachable, - } - } else tokenizer.pushBack(token); - cycles += 1; + // add map to itself + const new_data = ObjectEntry{ + .len = scope.len + 1, + .property_idx = self.property_index.string_table.size, + .value_idx = scope.value_idx, + }; + self.index.set(scope_idx, .{ .object = new_data }); + try work_query.append(obj_idx); continue; }, .object_end => { - const keys, const vals = (try self.getObject(allocator, depth_buf[cycles - 1])).?; - std.debug.print("\nfound {d} keys and {d} values\n", .{ keys.len, vals.len }); - for (keys, vals) |k, v| { - const key = k.slice(&self.property_index); - const val = self.index.get(v); - std.debug.print( - \\"{s}": {s}, - , .{ key, @tagName(val) }); - } - std.debug.print("}}", .{}); + _ = work_query.pop().?; + continue; }, .property => { + const scope_idx = work_query.get(work_query.len - 1); + const scope = self.index.get(scope_idx).object; + _ = try self.addProperty(allocator, token.value.?.string); - const last_obj = self.index.get(depth_buf[cycles - 1]); - if (cycles > 0) { - self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{ - .len = last_obj.object.len + 1, - .property_idx = last_obj.object.property_idx, - .value_idx = last_obj.object.value_idx, - } }); - continue; - } + self.index.set(scope_idx, .{ .object = ObjectEntry{ + .len = scope.len + 1, + .property_idx = scope.property_idx, + .value_idx = scope.value_idx, + } }); + continue; }, .string => { // maybe we could dismiss the while loop altogether and just do this @@ -364,63 +358,46 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !void { continue :flag TokenType.property; }; + //const scope_idx = work_query.get(work_query.len - 1); + //const scope = self.index.get(scope_idx).object; + _ = try self.addString(allocator, token.value.?.string); - const last_obj = self.index.get(depth_buf[cycles - 1]); - if (cycles > 0) { - self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{ - .len = last_obj.object.len, - .property_idx = last_obj.object.property_idx, - .value_idx = last_obj.object.value_idx, - } }); - continue; - } + //self.index.set(scope_idx, .{ .object = scope }); + continue; }, .number => { + //const scope_idx = work_query.get(work_query.len - 1); + //const scope = self.index.get(scope_idx).object; + _ = try self.addNumber(allocator, token.value.?.number); - const last_obj = self.index.get(depth_buf[cycles - 1]); - if (cycles > 0) { - self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{ - .len = last_obj.object.len, - .property_idx = last_obj.object.property_idx, - .value_idx = last_obj.object.value_idx, - } }); - continue; - } + //self.index.set(scope_idx, .{ .object = scope }); + continue; }, .true, .false => { + //const scope_idx = work_query.get(work_query.len - 1); + //const scope = self.index.get(scope_idx).object; + _ = try self.addBool(allocator, if (token.type == .true) true else false); - const last_obj = self.index.get(depth_buf[cycles - 1]); - if (cycles > 0) { - self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{ - .len = last_obj.object.len, - .property_idx = last_obj.object.property_idx, - .value_idx = last_obj.object.value_idx, - } }); - continue; - } + //self.index.set(scope_idx, .{ .object = scope }); + continue; }, .null => { + const scope_idx = work_query.get(work_query.len - 1); + const scope = self.index.get(scope_idx).object; + _ = try self.addNull(allocator); - const last_obj = self.index.get(depth_buf[cycles - 1]); - if (cycles > 0) { - self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{ - .len = last_obj.object.len, - .property_idx = last_obj.object.property_idx, - .value_idx = last_obj.object.value_idx, - } }); - continue; - } + self.index.set(scope_idx, .{ .object = scope }); + continue; }, - .comma => { - if (it.peek()) |tc| if (tc.type == .object_end and self.options.flags.allow_trailing_comma) { + .comma => if (it.peek()) |t| + if (t.type == .object_end and !self.options.flags.allow_trailing_comma) { return error.TrailingComma; - }; - }, + }, else => {}, } - - tokenizer.skipWhitespace(); } + + return root; } test parse { @@ -435,15 +412,28 @@ test parse { var tokenizer = try Tokenizer.init(allocator, blk: { const json = \\ { - \\ "key": "hello", - \\ "key2": "world", - \\ "key3": true, - \\ "key4": null, - \\ "key5": 123 + \\ "lazy": true, + \\ "name": "yuzu" \\ } ; break :blk json; }); - try parse(&self, &tokenizer); + const root = try parse(&self, &tokenizer); + const keys, const values = (try self.getObject(allocator, root)).?; + + const stdout = std.io.getStdOut().writer(); + try stdout.writeAll("{\n"); + for (keys, values, 0..) |k, v, i| { + const key: []const u8 = k.slice(&self.property_index); + + try stdout.print( + \\ "{s}": {d} + , .{ key, v }); + if (i < keys.len) + try stdout.writeAll(",\n") + else + try stdout.writeAll("\n"); + } + try stdout.writeAll("}\n"); } diff --git a/tokenizer.zig b/tokenizer.zig index 4fd450f..79b2617 100644 --- a/tokenizer.zig +++ b/tokenizer.zig @@ -27,6 +27,7 @@ pub const Error = error{ }; pub const TokenType = enum(u8) { + zero, eof, null, true, @@ -445,20 +446,28 @@ pub fn nextString(self: *Self) Error!Token { pub const Iterator = struct { tokenizer: *Self, + pub fn next(it: *Iterator) ?Token { defer it.tokenizer.skipWhitespace(); if (it.tokenizer.endOfInput()) return null; return it.tokenizer.nextToken() catch null; } + pub fn reset(it: *Iterator) void { it.tokenizer.position = 0; it.tokenizer.max_position = 0; it.tokenizer.frame = 0; it.tokenizer.prev_token = null; } + + /// nasty trick pub fn peek(it: *Iterator) ?Token { - defer it.tokenizer.position -%= 1; - defer it.tokenizer.skipWhitespace(); + const frame = it.tokenizer.frame; + const pos = it.tokenizer.position; + defer { + it.tokenizer.position = pos; + it.tokenizer.frame = frame; + } if (it.tokenizer.endOfInput()) return null; return it.tokenizer.nextToken() catch null; }