This commit is contained in:
yuzu 2025-05-23 23:33:21 -05:00
parent d6bbd29a93
commit 099973955a
2 changed files with 94 additions and 95 deletions

View File

@ -57,6 +57,7 @@ pub const Flags = packed struct {
};
pub const Options = struct {
comptime indent_len: usize = 4,
comptime max_depth: usize = 256,
comptime flags: Flags = .{},
};
@ -295,67 +296,60 @@ fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput
}
}
pub fn parse(self: *Self, tokenizer: *Tokenizer) !void {
/// always returns 0 (root)
pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
const allocator = tokenizer.allocator;
var it = tokenizer.iterator();
var depth_buf = try allocator.alloc(usize, self.options.max_depth);
defer allocator.free(depth_buf);
var cycles: usize = 0;
const root = try self.addEmptyObject(allocator);
var work_query: std.BoundedArray(usize, self.options.max_depth) = try .init(64);
var do_once = true;
while (it.next()) |token| {
defer tokenizer.skipWhitespace();
flag: switch (token.type) {
.object_begin => {
std.debug.print("{{", .{});
const obj_idx = try self.addEmptyObject(allocator);
depth_buf[cycles] = obj_idx;
if (tokenizer.prev_token) |t| if (t.type == .object_begin) {
// add map to itself
const data = self.index.get(depth_buf[cycles - 1]);
switch (data) {
.object => |valid_entry| {
const new_data = ObjectEntry{
.len = valid_entry.len + 1,
if (do_once) {
self.index.set(root, .{ .object = .{
.len = 0,
.property_idx = self.property_index.string_table.size,
.value_idx = obj_idx,
};
self.index.set(depth_buf[cycles - 1], .{ .object = new_data });
tokenizer.prev_token = null; // reset
},
else => unreachable,
.value_idx = self.index.len,
} });
try work_query.append(root);
do_once = false;
continue;
}
} else tokenizer.pushBack(token);
cycles += 1;
const scope_idx = work_query.get(work_query.len - 1);
const obj_idx = try self.addEmptyObject(allocator);
const scope = self.index.get(scope_idx).object;
// add map to itself
const new_data = ObjectEntry{
.len = scope.len + 1,
.property_idx = self.property_index.string_table.size,
.value_idx = scope.value_idx,
};
self.index.set(scope_idx, .{ .object = new_data });
try work_query.append(obj_idx);
continue;
},
.object_end => {
const keys, const vals = (try self.getObject(allocator, depth_buf[cycles - 1])).?;
std.debug.print("\nfound {d} keys and {d} values\n", .{ keys.len, vals.len });
for (keys, vals) |k, v| {
const key = k.slice(&self.property_index);
const val = self.index.get(v);
std.debug.print(
\\"{s}": {s},
, .{ key, @tagName(val) });
}
std.debug.print("}}", .{});
_ = work_query.pop().?;
continue;
},
.property => {
const scope_idx = work_query.get(work_query.len - 1);
const scope = self.index.get(scope_idx).object;
_ = try self.addProperty(allocator, token.value.?.string);
const last_obj = self.index.get(depth_buf[cycles - 1]);
if (cycles > 0) {
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
.len = last_obj.object.len + 1,
.property_idx = last_obj.object.property_idx,
.value_idx = last_obj.object.value_idx,
self.index.set(scope_idx, .{ .object = ObjectEntry{
.len = scope.len + 1,
.property_idx = scope.property_idx,
.value_idx = scope.value_idx,
} });
continue;
}
},
.string => {
// maybe we could dismiss the while loop altogether and just do this
@ -364,63 +358,46 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !void {
continue :flag TokenType.property;
};
//const scope_idx = work_query.get(work_query.len - 1);
//const scope = self.index.get(scope_idx).object;
_ = try self.addString(allocator, token.value.?.string);
const last_obj = self.index.get(depth_buf[cycles - 1]);
if (cycles > 0) {
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
.len = last_obj.object.len,
.property_idx = last_obj.object.property_idx,
.value_idx = last_obj.object.value_idx,
} });
//self.index.set(scope_idx, .{ .object = scope });
continue;
}
},
.number => {
//const scope_idx = work_query.get(work_query.len - 1);
//const scope = self.index.get(scope_idx).object;
_ = try self.addNumber(allocator, token.value.?.number);
const last_obj = self.index.get(depth_buf[cycles - 1]);
if (cycles > 0) {
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
.len = last_obj.object.len,
.property_idx = last_obj.object.property_idx,
.value_idx = last_obj.object.value_idx,
} });
//self.index.set(scope_idx, .{ .object = scope });
continue;
}
},
.true, .false => {
//const scope_idx = work_query.get(work_query.len - 1);
//const scope = self.index.get(scope_idx).object;
_ = try self.addBool(allocator, if (token.type == .true) true else false);
const last_obj = self.index.get(depth_buf[cycles - 1]);
if (cycles > 0) {
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
.len = last_obj.object.len,
.property_idx = last_obj.object.property_idx,
.value_idx = last_obj.object.value_idx,
} });
//self.index.set(scope_idx, .{ .object = scope });
continue;
}
},
.null => {
const scope_idx = work_query.get(work_query.len - 1);
const scope = self.index.get(scope_idx).object;
_ = try self.addNull(allocator);
const last_obj = self.index.get(depth_buf[cycles - 1]);
if (cycles > 0) {
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
.len = last_obj.object.len,
.property_idx = last_obj.object.property_idx,
.value_idx = last_obj.object.value_idx,
} });
self.index.set(scope_idx, .{ .object = scope });
continue;
}
},
.comma => {
if (it.peek()) |tc| if (tc.type == .object_end and self.options.flags.allow_trailing_comma) {
.comma => if (it.peek()) |t|
if (t.type == .object_end and !self.options.flags.allow_trailing_comma) {
return error.TrailingComma;
};
},
else => {},
}
tokenizer.skipWhitespace();
}
return root;
}
test parse {
@ -435,15 +412,28 @@ test parse {
var tokenizer = try Tokenizer.init(allocator, blk: {
const json =
\\ {
\\ "key": "hello",
\\ "key2": "world",
\\ "key3": true,
\\ "key4": null,
\\ "key5": 123
\\ "lazy": true,
\\ "name": "yuzu"
\\ }
;
break :blk json;
});
try parse(&self, &tokenizer);
const root = try parse(&self, &tokenizer);
const keys, const values = (try self.getObject(allocator, root)).?;
const stdout = std.io.getStdOut().writer();
try stdout.writeAll("{\n");
for (keys, values, 0..) |k, v, i| {
const key: []const u8 = k.slice(&self.property_index);
try stdout.print(
\\ "{s}": {d}
, .{ key, v });
if (i < keys.len)
try stdout.writeAll(",\n")
else
try stdout.writeAll("\n");
}
try stdout.writeAll("}\n");
}

View File

@ -27,6 +27,7 @@ pub const Error = error{
};
pub const TokenType = enum(u8) {
zero,
eof,
null,
true,
@ -445,20 +446,28 @@ pub fn nextString(self: *Self) Error!Token {
pub const Iterator = struct {
tokenizer: *Self,
pub fn next(it: *Iterator) ?Token {
defer it.tokenizer.skipWhitespace();
if (it.tokenizer.endOfInput()) return null;
return it.tokenizer.nextToken() catch null;
}
pub fn reset(it: *Iterator) void {
it.tokenizer.position = 0;
it.tokenizer.max_position = 0;
it.tokenizer.frame = 0;
it.tokenizer.prev_token = null;
}
/// nasty trick
pub fn peek(it: *Iterator) ?Token {
defer it.tokenizer.position -%= 1;
defer it.tokenizer.skipWhitespace();
const frame = it.tokenizer.frame;
const pos = it.tokenizer.position;
defer {
it.tokenizer.position = pos;
it.tokenizer.frame = frame;
}
if (it.tokenizer.endOfInput()) return null;
return it.tokenizer.nextToken() catch null;
}