idk
This commit is contained in:
parent
7bc4973bf4
commit
d6bbd29a93
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
.zig-cache
|
@ -1,5 +1,6 @@
|
||||
const std = @import("std");
|
||||
const Tokenizer = @import("tokenizer.zig");
|
||||
const TokenType = Tokenizer.TokenType;
|
||||
const StringPool = @import("strings.zig");
|
||||
const StringIndex = StringPool.StringIndex;
|
||||
const assert = std.debug.assert;
|
||||
@ -22,8 +23,8 @@ pub const JsonValue = union(JsonType) {
|
||||
bool: bool,
|
||||
number: f64,
|
||||
string: StringIndex,
|
||||
array: ArrayIndex.Slice,
|
||||
object: ObjectIndex.Entry,
|
||||
array: ArraySlice,
|
||||
object: ObjectEntry,
|
||||
};
|
||||
|
||||
pub const JsonInput = union(JsonType) {
|
||||
@ -35,31 +36,34 @@ pub const JsonInput = union(JsonType) {
|
||||
object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput),
|
||||
};
|
||||
|
||||
pub const ArrayIndex = enum(usize) {
|
||||
_,
|
||||
|
||||
pub const Slice = struct {
|
||||
/// same as ObjectEntry but simpler
|
||||
/// start is the offset
|
||||
pub const ArraySlice = struct {
|
||||
start: usize,
|
||||
len: usize,
|
||||
};
|
||||
};
|
||||
|
||||
pub const ObjectIndex = enum(usize) {
|
||||
_,
|
||||
|
||||
pub const Entry = struct {
|
||||
/// just += the properties and value indexes to get the next item
|
||||
/// property_idx and value_idx are the offset
|
||||
/// it should be ordered
|
||||
pub const ObjectEntry = struct {
|
||||
len: usize,
|
||||
property_idx: usize,
|
||||
value_idx: usize,
|
||||
};
|
||||
|
||||
pub const Flags = packed struct {
|
||||
allow_trailing_comma: bool = false,
|
||||
};
|
||||
|
||||
pub const Options = struct {
|
||||
comptime max_depth: usize = 256,
|
||||
comptime flags: Flags = .{},
|
||||
};
|
||||
|
||||
index: std.MultiArrayList(JsonValue) = .{},
|
||||
string_index: StringPool = .empty,
|
||||
property_index: StringPool = .empty,
|
||||
|
||||
options: Options = .{},
|
||||
|
||||
@ -67,7 +71,7 @@ pub const init: Self = .{};
|
||||
|
||||
pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
|
||||
self.index.deinit(allocator);
|
||||
self.string_index.deinit(allocator);
|
||||
self.property_index.deinit(allocator);
|
||||
}
|
||||
|
||||
fn addNumber(self: *Self, allocator: std.mem.Allocator, number: f64) !usize {
|
||||
@ -77,6 +81,12 @@ fn addNumber(self: *Self, allocator: std.mem.Allocator, number: f64) !usize {
|
||||
return idx;
|
||||
}
|
||||
|
||||
fn addProperty(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
|
||||
const stridx = try self.property_index.add(allocator, bytes);
|
||||
try self.index.ensureUnusedCapacity(allocator, 1);
|
||||
return @intFromEnum(stridx);
|
||||
}
|
||||
|
||||
fn addString(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usize {
|
||||
const stridx = try self.string_index.add(allocator, bytes);
|
||||
try self.index.ensureUnusedCapacity(allocator, 1);
|
||||
@ -86,15 +96,15 @@ fn addString(self: *Self, allocator: std.mem.Allocator, bytes: []const u8) !usiz
|
||||
}
|
||||
|
||||
fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHashMapUnmanaged([]const u8, JsonInput)) !usize {
|
||||
var entry: ?ObjectIndex.Entry = null;
|
||||
var entry: ?ObjectEntry = null;
|
||||
|
||||
for (object.keys(), object.values(), 0..) |key, value, times| {
|
||||
const stridx = try self.string_index.add(allocator, key);
|
||||
const stridx = try self.property_index.add(allocator, key);
|
||||
try self.index.ensureUnusedCapacity(allocator, 1);
|
||||
const vidx = self.index.addOneAssumeCapacity();
|
||||
self.index.set(vidx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
|
||||
if (times == 0) {
|
||||
entry = ObjectIndex.Entry{
|
||||
entry = ObjectEntry{
|
||||
.len = object.entries.len,
|
||||
.property_idx = stridx,
|
||||
.value_idx = vidx,
|
||||
@ -108,7 +118,7 @@ fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHas
|
||||
self.index.set(idx, .{ .object = e });
|
||||
return idx;
|
||||
} else {
|
||||
self.index.set(idx, .{ .object = ObjectIndex.Entry{
|
||||
self.index.set(idx, .{ .object = ObjectEntry{
|
||||
.len = 0,
|
||||
.property_idx = 0,
|
||||
.value_idx = 0,
|
||||
@ -120,8 +130,8 @@ fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHas
|
||||
fn addEmptyObject(self: *Self, allocator: std.mem.Allocator) !usize {
|
||||
try self.index.ensureUnusedCapacity(allocator, 1);
|
||||
const idx = self.index.addOneAssumeCapacity();
|
||||
const object: ObjectIndex.Entry = .{
|
||||
.property_idx = self.string_index.string_bytes.items.len,
|
||||
const object: ObjectEntry = .{
|
||||
.property_idx = self.property_index.string_bytes.items.len,
|
||||
.value_idx = self.index.len,
|
||||
.len = 0,
|
||||
};
|
||||
@ -130,13 +140,13 @@ fn addEmptyObject(self: *Self, allocator: std.mem.Allocator) !usize {
|
||||
}
|
||||
|
||||
fn addArray(self: *Self, allocator: std.mem.Allocator, array: []JsonInput) !usize {
|
||||
var entry: ?ArrayIndex.Slice = null;
|
||||
var entry: ?ArraySlice = null;
|
||||
for (array, 0..) |value, times| {
|
||||
try self.index.ensureUnusedCapacity(allocator, 1);
|
||||
const idx = self.index.addOneAssumeCapacity();
|
||||
self.index.set(idx, @unionInit(JsonValue, std.meta.activeTag(value), self.addValue(allocator, value)));
|
||||
if (times == 0) {
|
||||
entry = ArrayIndex.Slice{
|
||||
entry = ArraySlice{
|
||||
.start = idx,
|
||||
.len = array.len,
|
||||
};
|
||||
@ -148,7 +158,7 @@ fn addArray(self: *Self, allocator: std.mem.Allocator, array: []JsonInput) !usiz
|
||||
self.index.set(idx, .{ .array = e });
|
||||
return idx;
|
||||
} else {
|
||||
self.index.set(idx, .{ .array = ArrayIndex.Slice{
|
||||
self.index.set(idx, .{ .array = ArraySlice{
|
||||
.start = 0,
|
||||
.len = 0,
|
||||
} });
|
||||
@ -163,9 +173,16 @@ fn addBool(self: *Self, allocator: std.mem.Allocator, value: bool) !usize {
|
||||
return idx;
|
||||
}
|
||||
|
||||
fn addNull(self: *Self, allocator: std.mem.Allocator) !usize {
|
||||
try self.index.ensureUnusedCapacity(allocator, 1);
|
||||
const idx = self.index.addOneAssumeCapacity();
|
||||
self.index.set(idx, .{ .null = {} });
|
||||
return idx;
|
||||
}
|
||||
|
||||
fn addValue(self: *Self, allocator: std.mem.Allocator, value: JsonInput) !void {
|
||||
switch (value) {
|
||||
.null => {},
|
||||
.null => try self.addNull(allocator),
|
||||
.bool => try self.addBool(allocator, value.bool),
|
||||
.number => try self.addNumber(allocator, value.number),
|
||||
.string => try self.addString(allocator, value.string),
|
||||
@ -174,8 +191,8 @@ fn addValue(self: *Self, allocator: std.mem.Allocator, value: JsonInput) !void {
|
||||
}
|
||||
}
|
||||
|
||||
fn getString(self: *Self, index: []const u8) ?StringIndex {
|
||||
return self.string_index.string_table.get(index);
|
||||
fn getProperty(self: *Self, index: []const u8) ?StringIndex {
|
||||
return self.property_index.string_table.get(index);
|
||||
}
|
||||
|
||||
fn getNumber(self: *Self, index: usize) ?f64 {
|
||||
@ -200,7 +217,7 @@ fn getObject(self: *Self, allocator: std.mem.Allocator, index: usize) !?struct {
|
||||
const values = try allocator.alloc(usize, entry.object.len);
|
||||
|
||||
for (0..entry.object.len) |i| {
|
||||
const slice = StringIndex.slice(@enumFromInt(pidx), &self.string_index);
|
||||
const slice = StringIndex.slice(@enumFromInt(pidx), &self.property_index);
|
||||
keys[i] = @enumFromInt(pidx);
|
||||
values[i] = vidx;
|
||||
pidx += slice.len + 1;
|
||||
@ -265,7 +282,7 @@ fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput
|
||||
|
||||
try obj.ensureTotalCapacity(allocator, entry.object.len);
|
||||
for (0..entry.object.len) |_| {
|
||||
const key = StringIndex.slice(@enumFromInt(kidx), &self.string_index);
|
||||
const key = StringIndex.slice(@enumFromInt(kidx), &self.property_index);
|
||||
const val = (try self.getValue(allocator, vidx)).?;
|
||||
|
||||
obj.putAssumeCapacityNoClobber(key, val);
|
||||
@ -289,7 +306,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !void {
|
||||
var cycles: usize = 0;
|
||||
|
||||
while (it.next()) |token| {
|
||||
switch (token.type) {
|
||||
flag: switch (token.type) {
|
||||
.object_begin => {
|
||||
std.debug.print("{{", .{});
|
||||
const obj_idx = try self.addEmptyObject(allocator);
|
||||
@ -302,9 +319,9 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !void {
|
||||
|
||||
switch (data) {
|
||||
.object => |valid_entry| {
|
||||
const new_data = ObjectIndex.Entry{
|
||||
const new_data = ObjectEntry{
|
||||
.len = valid_entry.len + 1,
|
||||
.property_idx = self.string_index.string_table.size,
|
||||
.property_idx = self.property_index.string_table.size,
|
||||
.value_idx = obj_idx,
|
||||
};
|
||||
self.index.set(depth_buf[cycles - 1], .{ .object = new_data });
|
||||
@ -320,7 +337,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !void {
|
||||
const keys, const vals = (try self.getObject(allocator, depth_buf[cycles - 1])).?;
|
||||
std.debug.print("\nfound {d} keys and {d} values\n", .{ keys.len, vals.len });
|
||||
for (keys, vals) |k, v| {
|
||||
const key = k.slice(&self.string_index);
|
||||
const key = k.slice(&self.property_index);
|
||||
const val = self.index.get(v);
|
||||
std.debug.print(
|
||||
\\"{s}": {s},
|
||||
@ -328,14 +345,31 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !void {
|
||||
}
|
||||
std.debug.print("}}", .{});
|
||||
},
|
||||
.string => {
|
||||
const idx = try self.addString(allocator, token.value.?.string);
|
||||
.property => {
|
||||
_ = try self.addProperty(allocator, token.value.?.string);
|
||||
const last_obj = self.index.get(depth_buf[cycles - 1]);
|
||||
if (cycles > 0) {
|
||||
const stridx = self.index.get(idx).string;
|
||||
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectIndex.Entry{
|
||||
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
|
||||
.len = last_obj.object.len + 1,
|
||||
.property_idx = if (cycles > 1) @intFromEnum(stridx) else last_obj.object.property_idx,
|
||||
.property_idx = last_obj.object.property_idx,
|
||||
.value_idx = last_obj.object.value_idx,
|
||||
} });
|
||||
continue;
|
||||
}
|
||||
},
|
||||
.string => {
|
||||
// maybe we could dismiss the while loop altogether and just do this
|
||||
// the whole time
|
||||
if (it.peek()) |next| if (next.type == .colon) {
|
||||
continue :flag TokenType.property;
|
||||
};
|
||||
|
||||
_ = try self.addString(allocator, token.value.?.string);
|
||||
const last_obj = self.index.get(depth_buf[cycles - 1]);
|
||||
if (cycles > 0) {
|
||||
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
|
||||
.len = last_obj.object.len,
|
||||
.property_idx = last_obj.object.property_idx,
|
||||
.value_idx = last_obj.object.value_idx,
|
||||
} });
|
||||
continue;
|
||||
@ -345,7 +379,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !void {
|
||||
_ = try self.addNumber(allocator, token.value.?.number);
|
||||
const last_obj = self.index.get(depth_buf[cycles - 1]);
|
||||
if (cycles > 0) {
|
||||
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectIndex.Entry{
|
||||
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
|
||||
.len = last_obj.object.len,
|
||||
.property_idx = last_obj.object.property_idx,
|
||||
.value_idx = last_obj.object.value_idx,
|
||||
@ -357,7 +391,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !void {
|
||||
_ = try self.addBool(allocator, if (token.type == .true) true else false);
|
||||
const last_obj = self.index.get(depth_buf[cycles - 1]);
|
||||
if (cycles > 0) {
|
||||
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectIndex.Entry{
|
||||
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
|
||||
.len = last_obj.object.len,
|
||||
.property_idx = last_obj.object.property_idx,
|
||||
.value_idx = last_obj.object.value_idx,
|
||||
@ -365,6 +399,23 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !void {
|
||||
continue;
|
||||
}
|
||||
},
|
||||
.null => {
|
||||
_ = try self.addNull(allocator);
|
||||
const last_obj = self.index.get(depth_buf[cycles - 1]);
|
||||
if (cycles > 0) {
|
||||
self.index.set(depth_buf[cycles - 1], .{ .object = ObjectEntry{
|
||||
.len = last_obj.object.len,
|
||||
.property_idx = last_obj.object.property_idx,
|
||||
.value_idx = last_obj.object.value_idx,
|
||||
} });
|
||||
continue;
|
||||
}
|
||||
},
|
||||
.comma => {
|
||||
if (it.peek()) |tc| if (tc.type == .object_end and self.options.flags.allow_trailing_comma) {
|
||||
return error.TrailingComma;
|
||||
};
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
||||
@ -384,10 +435,11 @@ test parse {
|
||||
var tokenizer = try Tokenizer.init(allocator, blk: {
|
||||
const json =
|
||||
\\ {
|
||||
\\ "key": 123,
|
||||
\\ "key2": false,
|
||||
\\ "key": "hello",
|
||||
\\ "key2": "world",
|
||||
\\ "key3": true,
|
||||
\\ "key4": null
|
||||
\\ "key4": null,
|
||||
\\ "key5": 123
|
||||
\\ }
|
||||
;
|
||||
break :blk json;
|
@ -232,14 +232,14 @@ pub fn nextNumber(self: *Self) Error!Token {
|
||||
return error.BadNumber; // no floating point
|
||||
};
|
||||
|
||||
return self.commit(Token{
|
||||
return Token{
|
||||
.type = .number,
|
||||
.value = .{
|
||||
.number = float,
|
||||
},
|
||||
.start = start,
|
||||
.end = self.currentPosition(),
|
||||
});
|
||||
};
|
||||
};
|
||||
|
||||
while (self.matchCharRange('0', '9') != null) {}
|
||||
@ -248,14 +248,14 @@ pub fn nextNumber(self: *Self) Error!Token {
|
||||
return error.BadNumber; // floating point
|
||||
};
|
||||
|
||||
return self.commit(Token{
|
||||
return .{
|
||||
.type = .number,
|
||||
.value = .{
|
||||
.number = float,
|
||||
},
|
||||
.start = start,
|
||||
.end = self.currentPosition(),
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
/// Parse an identifier token
|
||||
@ -284,32 +284,32 @@ pub fn nextIdentifier(self: *Self) Error!Token {
|
||||
|
||||
// true
|
||||
if (std.mem.eql(u8, ident, "true")) {
|
||||
return self.commit(Token{
|
||||
return .{
|
||||
.type = .true,
|
||||
.value = null,
|
||||
.start = start,
|
||||
.end = self.currentPosition(),
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
// false
|
||||
if (std.mem.eql(u8, ident, "false")) {
|
||||
return self.commit(Token{
|
||||
return .{
|
||||
.type = .false,
|
||||
.value = null,
|
||||
.start = start,
|
||||
.end = self.currentPosition(),
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
// null
|
||||
if (std.mem.eql(u8, ident, "null")) {
|
||||
return self.commit(Token{
|
||||
return .{
|
||||
.type = .null,
|
||||
.value = null,
|
||||
.start = start,
|
||||
.end = self.currentPosition(),
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
unreachable;
|
||||
@ -350,17 +350,17 @@ pub fn nextToken(self: *Self) Error!Token {
|
||||
':' => .colon,
|
||||
'"' => {
|
||||
self.rollback();
|
||||
return (self.nextString());
|
||||
return self.commit(self.nextString());
|
||||
},
|
||||
else => {
|
||||
self.rollback();
|
||||
// Try different token types in order of precedence
|
||||
if (std.ascii.isDigit(c) or c == '-') {
|
||||
return (self.nextNumber());
|
||||
return self.commit(self.nextNumber());
|
||||
}
|
||||
|
||||
if (std.ascii.isAlphabetic(c)) {
|
||||
return (self.nextIdentifier());
|
||||
return self.commit(self.nextIdentifier());
|
||||
}
|
||||
|
||||
return error.InvalidSyntax;
|
||||
@ -392,12 +392,12 @@ pub fn nextString(self: *Self) Error!Token {
|
||||
|
||||
switch (self.lastChar()) {
|
||||
'"' => {
|
||||
return self.commit(Token{
|
||||
return .{
|
||||
.type = .string,
|
||||
.value = .{ .string = try buffer.toOwnedSlice() },
|
||||
.start = start,
|
||||
.end = self.currentPosition(),
|
||||
});
|
||||
};
|
||||
},
|
||||
'\\' => {
|
||||
self.advance(1);
|
||||
@ -415,12 +415,12 @@ pub fn nextString(self: *Self) Error!Token {
|
||||
var code_points: [4]u8 = undefined;
|
||||
inline for (0..4) |i| {
|
||||
if (self.endOfInput())
|
||||
return self.commit(Token{
|
||||
return .{
|
||||
.type = .eof,
|
||||
.value = null,
|
||||
.start = start,
|
||||
.end = start + 1,
|
||||
});
|
||||
};
|
||||
self.advance(1);
|
||||
code_points[i] = self.lastChar();
|
||||
}
|
||||
@ -446,6 +446,7 @@ pub fn nextString(self: *Self) Error!Token {
|
||||
pub const Iterator = struct {
|
||||
tokenizer: *Self,
|
||||
pub fn next(it: *Iterator) ?Token {
|
||||
defer it.tokenizer.skipWhitespace();
|
||||
if (it.tokenizer.endOfInput()) return null;
|
||||
return it.tokenizer.nextToken() catch null;
|
||||
}
|
||||
@ -455,6 +456,12 @@ pub const Iterator = struct {
|
||||
it.tokenizer.frame = 0;
|
||||
it.tokenizer.prev_token = null;
|
||||
}
|
||||
pub fn peek(it: *Iterator) ?Token {
|
||||
defer it.tokenizer.position -%= 1;
|
||||
defer it.tokenizer.skipWhitespace();
|
||||
if (it.tokenizer.endOfInput()) return null;
|
||||
return it.tokenizer.nextToken() catch null;
|
||||
}
|
||||
};
|
||||
|
||||
/// iterator
|
||||
|
Loading…
x
Reference in New Issue
Block a user