This commit is contained in:
yuzu 2025-05-24 01:22:18 -05:00
parent 099973955a
commit 7372e0092b
2 changed files with 107 additions and 84 deletions

View File

@ -7,7 +7,9 @@ const assert = std.debug.assert;
const Self = @This();
pub const Error = enum {};
pub const Error = enum {
TrailingComma,
};
pub const JsonType = enum {
null,
@ -33,7 +35,39 @@ pub const JsonInput = union(JsonType) {
number: f64,
string: []const u8,
array: []JsonInput,
object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput),
object: std.StringArrayHashMapUnmanaged(JsonInput),
pub fn format(
self: @This(),
comptime fmt: []const u8,
opts: std.fmt.FormatOptions,
writer: anytype,
) !void {
switch (self) {
.null => try writer.writeAll("null"),
.bool => try writer.writeAll(if (self.bool) "true" else "false"),
.number => try writer.print("{d}", .{self.number}),
.string => try writer.print("\"{s}\"", .{self.string}),
.array => {
try writer.writeByte('[');
for (self.array, 0..) |val, i| {
try val.format(fmt, opts, writer);
if (i < self.array.len - 1) try writer.writeByte(',');
}
try writer.writeByte(']');
},
.object => {
try writer.writeByte('{');
for (self.object.keys(), self.object.values(), 0..) |k, v, i| {
try writer.print("\"{s}\"", .{k});
try writer.writeByte(':');
try v.format(fmt, opts, writer);
if (i < self.object.entries.len - 1) try writer.writeByte(',');
}
try writer.writeByte('}');
},
}
}
};
/// same as ObjectEntry but simpler
@ -167,6 +201,16 @@ fn addArray(self: *Self, allocator: std.mem.Allocator, array: []JsonInput) !usiz
}
}
fn addEmptyArray(self: *Self, allocator: std.mem.Allocator) !usize {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
self.index.set(idx, .{ .array = ArraySlice{
.start = self.index.len,
.len = 0,
} });
return idx;
}
fn addBool(self: *Self, allocator: std.mem.Allocator, value: bool) !usize {
try self.index.ensureUnusedCapacity(allocator, 1);
const idx = self.index.addOneAssumeCapacity();
@ -279,7 +323,7 @@ fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput
.object => {
var kidx = entry.object.property_idx;
var vidx = entry.object.value_idx;
var obj: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput) = .empty;
var obj: std.StringArrayHashMapUnmanaged(JsonInput) = .empty;
try obj.ensureTotalCapacity(allocator, entry.object.len);
for (0..entry.object.len) |_| {
@ -287,7 +331,7 @@ fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput
const val = (try self.getValue(allocator, vidx)).?;
obj.putAssumeCapacityNoClobber(key, val);
kidx += 1;
kidx += key.len + 1;
vidx += 1;
}
@ -303,97 +347,80 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
var it = tokenizer.iterator();
const root = try self.addEmptyObject(allocator);
var work_query: std.BoundedArray(usize, self.options.max_depth) = try .init(64);
var do_once = true;
var work_query = try allocator.alloc(usize, self.options.max_depth);
var cycles: usize = 0;
//defer assert(cycles == 0);
while (it.next()) |token| {
defer tokenizer.skipWhitespace();
std.debug.print("token: {s}\n", .{@tagName(token.type)});
flag: switch (token.type) {
.array_end => {
cycles -= 1;
},
.object_end => {
cycles -= 1;
},
.array_begin => {
const idx = try self.addEmptyArray(allocator);
work_query[cycles] = idx;
cycles += 1;
},
.object_begin => {
if (do_once) {
if (cycles == 0) {
self.index.set(root, .{ .object = .{
.len = 0,
.property_idx = self.property_index.string_table.size,
.value_idx = self.index.len,
} });
try work_query.append(root);
do_once = false;
continue;
work_query[cycles] = root;
} else {
const obj_idx = try self.addEmptyObject(allocator);
work_query[cycles] = obj_idx;
}
const scope_idx = work_query.get(work_query.len - 1);
const obj_idx = try self.addEmptyObject(allocator);
const scope = self.index.get(scope_idx).object;
// add map to itself
const new_data = ObjectEntry{
.len = scope.len + 1,
.property_idx = self.property_index.string_table.size,
.value_idx = scope.value_idx,
};
self.index.set(scope_idx, .{ .object = new_data });
try work_query.append(obj_idx);
continue;
},
.object_end => {
_ = work_query.pop().?;
continue;
cycles += 1;
},
.property => {
const scope_idx = work_query.get(work_query.len - 1);
const scope = self.index.get(scope_idx).object;
_ = try self.addProperty(allocator, token.value.?.string);
self.index.set(scope_idx, .{ .object = ObjectEntry{
.len = scope.len + 1,
.property_idx = scope.property_idx,
.value_idx = scope.value_idx,
} });
continue;
const scope_idx = work_query[cycles - 1];
switch (self.index.get(scope_idx)) {
.object => |scope| {
//std.debug.print("depth: {d}\n", .{cycles});
_ = try self.addProperty(allocator, token.value.?.string);
self.index.set(scope_idx, .{ .object = ObjectEntry{
.len = scope.len + 1,
.property_idx = scope.property_idx,
.value_idx = scope.value_idx,
} });
},
else => unreachable,
}
},
.string => {
// maybe we could dismiss the while loop altogether and just do this
// the whole time
if (it.peek()) |next| if (next.type == .colon) {
continue :flag TokenType.property;
continue :flag .property;
};
//const scope_idx = work_query.get(work_query.len - 1);
//const scope = self.index.get(scope_idx).object;
_ = try self.addString(allocator, token.value.?.string);
//self.index.set(scope_idx, .{ .object = scope });
continue;
},
.number => {
//const scope_idx = work_query.get(work_query.len - 1);
//const scope = self.index.get(scope_idx).object;
_ = try self.addNumber(allocator, token.value.?.number);
//self.index.set(scope_idx, .{ .object = scope });
continue;
},
.true, .false => {
//const scope_idx = work_query.get(work_query.len - 1);
//const scope = self.index.get(scope_idx).object;
_ = try self.addBool(allocator, if (token.type == .true) true else false);
//self.index.set(scope_idx, .{ .object = scope });
continue;
},
.null => {
const scope_idx = work_query.get(work_query.len - 1);
const scope = self.index.get(scope_idx).object;
_ = try self.addNull(allocator);
self.index.set(scope_idx, .{ .object = scope });
continue;
},
.comma => if (it.peek()) |t|
if (t.type == .object_end and !self.options.flags.allow_trailing_comma) {
return error.TrailingComma;
},
else => {},
.comma => if (it.peek()) |t| {
if (t.type == .object_end) {
if (!self.options.flags.allow_trailing_comma) {
return error.TrailingComma;
}
}
},
else => continue,
}
}
@ -413,27 +440,19 @@ test parse {
const json =
\\ {
\\ "lazy": true,
\\ "name": "yuzu"
\\ "name": "yuzu",
\\ "dislikes": [["Math", 3], ["Sports", 1]],
\\ "age": 15
\\ }
;
break :blk json;
});
const root = try parse(&self, &tokenizer);
const keys, const values = (try self.getObject(allocator, root)).?;
const root = blk: {
const idx = try parse(&self, &tokenizer);
const val = (try getValue(&self, allocator, idx)).?;
break :blk val;
};
const stdout = std.io.getStdOut().writer();
try stdout.writeAll("{\n");
for (keys, values, 0..) |k, v, i| {
const key: []const u8 = k.slice(&self.property_index);
try stdout.print(
\\ "{s}": {d}
, .{ key, v });
if (i < keys.len)
try stdout.writeAll(",\n")
else
try stdout.writeAll("\n");
}
try stdout.writeAll("}\n");
std.debug.print("root: {any}\n", .{root});
}

View File

@ -464,9 +464,13 @@ pub const Iterator = struct {
pub fn peek(it: *Iterator) ?Token {
const frame = it.tokenizer.frame;
const pos = it.tokenizer.position;
const prev = it.tokenizer.prev_token;
const max_pos = it.tokenizer.max_position;
defer {
it.tokenizer.position = pos;
it.tokenizer.frame = frame;
it.tokenizer.max_position = max_pos;
it.tokenizer.prev_token = prev;
}
if (it.tokenizer.endOfInput()) return null;
return it.tokenizer.nextToken() catch null;