a
This commit is contained in:
parent
7372e0092b
commit
11b101d3e8
216
language.zig
216
language.zig
@ -1,6 +1,7 @@
|
||||
const std = @import("std");
|
||||
const Tokenizer = @import("tokenizer.zig");
|
||||
const TokenType = Tokenizer.TokenType;
|
||||
const Token = Tokenizer.Token;
|
||||
const StringPool = @import("strings.zig");
|
||||
const StringIndex = StringPool.StringIndex;
|
||||
const assert = std.debug.assert;
|
||||
@ -156,7 +157,7 @@ fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHas
|
||||
self.index.set(idx, .{ .object = ObjectEntry{
|
||||
.len = 0,
|
||||
.property_idx = 0,
|
||||
.value_idx = 0,
|
||||
.value_idx = 1,
|
||||
} });
|
||||
return idx;
|
||||
}
|
||||
@ -167,7 +168,7 @@ fn addEmptyObject(self: *Self, allocator: std.mem.Allocator) !usize {
|
||||
const idx = self.index.addOneAssumeCapacity();
|
||||
const object: ObjectEntry = .{
|
||||
.property_idx = self.property_index.string_bytes.items.len,
|
||||
.value_idx = self.index.len,
|
||||
.value_idx = self.index.len + 1,
|
||||
.len = 0,
|
||||
};
|
||||
self.index.set(idx, .{ .object = object });
|
||||
@ -299,43 +300,50 @@ fn getNull(self: *Self, index: usize) ?void {
|
||||
return entry.null;
|
||||
}
|
||||
|
||||
fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput {
|
||||
fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize, offset: usize) !struct { ?JsonInput, usize } {
|
||||
const entry = self.index.get(index);
|
||||
switch (entry) {
|
||||
.null => return .{ .null = {} },
|
||||
.bool => return .{ .bool = entry.bool },
|
||||
.number => return .{ .number = entry.number },
|
||||
.null => return .{ .{ .null = {} }, 1 },
|
||||
.bool => return .{ .{ .bool = entry.bool }, 1 },
|
||||
.number => return .{ .{ .number = entry.number }, 1 },
|
||||
.string => {
|
||||
const str = entry.string.slice(&self.string_index);
|
||||
return .{ .string = str };
|
||||
return .{ .{ .string = str }, 1 };
|
||||
},
|
||||
.array => {
|
||||
const res = try allocator.alloc(JsonInput, entry.array.len);
|
||||
var idx = entry.array.start;
|
||||
var offset_calc: usize = offset;
|
||||
|
||||
for (0..entry.array.len) |i| {
|
||||
if (try self.getValue(allocator, idx)) |v| {
|
||||
res[i] = v;
|
||||
idx += 1;
|
||||
} else unreachable;
|
||||
const val, const step = try self.getValue(allocator, idx, offset_calc);
|
||||
res[i] = val.?;
|
||||
idx += step;
|
||||
}
|
||||
return .{ .array = res };
|
||||
offset_calc += entry.array.len;
|
||||
|
||||
return .{ .{ .array = res }, offset_calc };
|
||||
},
|
||||
.object => {
|
||||
var kidx = entry.object.property_idx;
|
||||
var vidx = entry.object.value_idx;
|
||||
var obj: std.StringArrayHashMapUnmanaged(JsonInput) = .empty;
|
||||
var offset_calc: usize = offset;
|
||||
|
||||
try obj.ensureTotalCapacity(allocator, entry.object.len);
|
||||
for (0..entry.object.len) |_| {
|
||||
const key = StringIndex.slice(@enumFromInt(kidx), &self.property_index);
|
||||
const val = (try self.getValue(allocator, vidx)).?;
|
||||
const slice = StringIndex.slice(@enumFromInt(kidx), &self.property_index);
|
||||
const val, const step = try self.getValue(allocator, vidx, offset_calc);
|
||||
kidx += slice.len + 1;
|
||||
vidx += step;
|
||||
|
||||
obj.putAssumeCapacityNoClobber(key, val);
|
||||
kidx += key.len + 1;
|
||||
vidx += 1;
|
||||
std.debug.print("putting {s} -> {d}\n", .{ slice, vidx });
|
||||
obj.putAssumeCapacityNoClobber(slice, val.?);
|
||||
}
|
||||
|
||||
return .{ .object = obj };
|
||||
offset_calc += entry.object.len;
|
||||
|
||||
return .{ .{ .object = obj }, offset_calc };
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -347,81 +355,144 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
||||
var it = tokenizer.iterator();
|
||||
|
||||
const root = try self.addEmptyObject(allocator);
|
||||
var work_query = try allocator.alloc(usize, self.options.max_depth);
|
||||
var cycles: usize = 0;
|
||||
defer std.debug.print("idx: {s}\n", .{
|
||||
@tagName(self.index.get(self.index.get(root).object.value_idx)),
|
||||
});
|
||||
|
||||
//defer assert(cycles == 0);
|
||||
var token = it.next() orelse
|
||||
return root;
|
||||
|
||||
while (it.next()) |token| {
|
||||
defer tokenizer.skipWhitespace();
|
||||
|
||||
std.debug.print("token: {s}\n", .{@tagName(token.type)});
|
||||
var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0);
|
||||
|
||||
flag: switch (token.type) {
|
||||
.array_end => {
|
||||
cycles -= 1;
|
||||
},
|
||||
.object_end => {
|
||||
cycles -= 1;
|
||||
},
|
||||
.array_begin => {
|
||||
const idx = try self.addEmptyArray(allocator);
|
||||
work_query[cycles] = idx;
|
||||
cycles += 1;
|
||||
},
|
||||
.object_begin => {
|
||||
if (cycles == 0) {
|
||||
self.index.set(root, .{ .object = .{
|
||||
.len = 0,
|
||||
.property_idx = self.property_index.string_table.size,
|
||||
.value_idx = self.index.len,
|
||||
} });
|
||||
work_query[cycles] = root;
|
||||
} else {
|
||||
const obj_idx = try self.addEmptyObject(allocator);
|
||||
work_query[cycles] = obj_idx;
|
||||
}
|
||||
cycles += 1;
|
||||
.eof => {
|
||||
assert(query.slice().len == 0);
|
||||
return root;
|
||||
},
|
||||
.property => {
|
||||
const scope_idx = work_query[cycles - 1];
|
||||
defer tokenizer.skipWhitespace();
|
||||
|
||||
const scope_idx = query.get(query.len - 1);
|
||||
switch (self.index.get(scope_idx)) {
|
||||
.object => |scope| {
|
||||
//std.debug.print("depth: {d}\n", .{cycles});
|
||||
_ = try self.addProperty(allocator, token.value.?.string);
|
||||
const pidx = try self.addProperty(allocator, token.value.?.string);
|
||||
self.index.set(scope_idx, .{ .object = ObjectEntry{
|
||||
.len = scope.len + 1,
|
||||
.property_idx = scope.property_idx,
|
||||
.property_idx = if (scope.len == 0) pidx else scope.property_idx,
|
||||
.value_idx = scope.value_idx,
|
||||
} });
|
||||
},
|
||||
else => unreachable,
|
||||
else => return error.InvalidSyntax,
|
||||
}
|
||||
|
||||
const next = it.next() orelse return error.InvalidSyntax;
|
||||
token = next;
|
||||
switch (next.type) {
|
||||
.colon => {
|
||||
token = it.next() orelse return error.InvalidSyntax;
|
||||
continue :flag token.type;
|
||||
},
|
||||
else => continue :flag next.type,
|
||||
// else => return error.InvalidSyntax,
|
||||
}
|
||||
},
|
||||
.object_begin => {
|
||||
defer tokenizer.skipWhitespace();
|
||||
|
||||
if (query.slice().len == 0) {
|
||||
try query.ensureUnusedCapacity(1);
|
||||
const ptr = query.addOneAssumeCapacity();
|
||||
ptr.* = root;
|
||||
self.index.set(root, .{ .object = ObjectEntry{
|
||||
.len = 0,
|
||||
.property_idx = self.property_index.string_bytes.items.len,
|
||||
.value_idx = 1,
|
||||
} });
|
||||
} else {
|
||||
const idx_ptr = try query.addOne();
|
||||
idx_ptr.* = try self.addEmptyObject(allocator);
|
||||
self.index.set(idx_ptr.*, .{ .object = ObjectEntry{
|
||||
.len = 0,
|
||||
.property_idx = self.property_index.string_bytes.items.len,
|
||||
.value_idx = self.index.len,
|
||||
} });
|
||||
}
|
||||
|
||||
const next = it.next() orelse return error.InvalidSyntax;
|
||||
token = next;
|
||||
switch (next.type) {
|
||||
.string => continue :flag .property,
|
||||
else => return error.InvalidSyntax,
|
||||
}
|
||||
},
|
||||
.object_end => {
|
||||
defer tokenizer.skipWhitespace();
|
||||
assert(query.pop() != null);
|
||||
|
||||
const next = it.next() orelse
|
||||
return root;
|
||||
token = next;
|
||||
switch (next.type) {
|
||||
.comma => continue :flag .comma,
|
||||
.object_end, .array_end => |t| continue :flag t,
|
||||
else => return error.InvalidSyntax,
|
||||
}
|
||||
},
|
||||
.true, .false => {
|
||||
defer tokenizer.skipWhitespace();
|
||||
|
||||
_ = try self.addBool(allocator, if (token.type == .true) true else false);
|
||||
|
||||
const next = it.next() orelse return error.InvalidSyntax;
|
||||
token = next;
|
||||
switch (next.type) {
|
||||
.comma => continue :flag .comma,
|
||||
.object_end => continue :flag .object_end,
|
||||
else => return error.InvalidSyntax,
|
||||
}
|
||||
},
|
||||
.string => {
|
||||
if (it.peek()) |next| if (next.type == .colon) {
|
||||
defer tokenizer.skipWhitespace();
|
||||
|
||||
const next = it.next() orelse return error.InvalidSyntax;
|
||||
switch (next.type) {
|
||||
.colon => {
|
||||
continue :flag .property;
|
||||
};
|
||||
},
|
||||
else => |t| {
|
||||
_ = try self.addString(allocator, token.value.?.string);
|
||||
|
||||
token = next;
|
||||
continue :flag t;
|
||||
},
|
||||
}
|
||||
},
|
||||
.number => {
|
||||
defer tokenizer.skipWhitespace();
|
||||
|
||||
_ = try self.addNumber(allocator, token.value.?.number);
|
||||
|
||||
const next = it.next() orelse return error.InvalidSyntax;
|
||||
token = next;
|
||||
switch (next.type) {
|
||||
.comma => continue :flag .comma,
|
||||
.object_end => continue :flag .object_end,
|
||||
else => return error.InvalidSyntax,
|
||||
}
|
||||
},
|
||||
.true, .false => {
|
||||
_ = try self.addBool(allocator, if (token.type == .true) true else false);
|
||||
},
|
||||
.null => {
|
||||
_ = try self.addNull(allocator);
|
||||
},
|
||||
.comma => if (it.peek()) |t| {
|
||||
if (t.type == .object_end) {
|
||||
.comma => {
|
||||
if (!self.options.flags.allow_trailing_comma) {
|
||||
return error.TrailingComma;
|
||||
const next = it.next() orelse return error.InvalidSyntax;
|
||||
token = next;
|
||||
switch (next.type) {
|
||||
.object_end, .array_end => return error.TrailingComma,
|
||||
else => continue :flag token.type,
|
||||
}
|
||||
}
|
||||
},
|
||||
else => continue,
|
||||
}
|
||||
else => {
|
||||
std.debug.print("token: {s}\n", .{@tagName(token.type)});
|
||||
},
|
||||
}
|
||||
|
||||
return root;
|
||||
@ -439,10 +510,13 @@ test parse {
|
||||
var tokenizer = try Tokenizer.init(allocator, blk: {
|
||||
const json =
|
||||
\\ {
|
||||
\\ "lazy": true,
|
||||
\\ "bio": "cool",
|
||||
\\ "age": 15,
|
||||
\\ "name": "yuzu",
|
||||
\\ "dislikes": [["Math", 3], ["Sports", 1]],
|
||||
\\ "age": 15
|
||||
\\ "admin": true,
|
||||
\\ "address": {
|
||||
\\ "lorem": "ipsum"
|
||||
\\ }
|
||||
\\ }
|
||||
;
|
||||
break :blk json;
|
||||
@ -450,7 +524,7 @@ test parse {
|
||||
|
||||
const root = blk: {
|
||||
const idx = try parse(&self, &tokenizer);
|
||||
const val = (try getValue(&self, allocator, idx)).?;
|
||||
const val, _ = try getValue(&self, allocator, idx, 0);
|
||||
break :blk val;
|
||||
};
|
||||
|
||||
|
@ -58,18 +58,11 @@ pub const Token = struct {
|
||||
pub const Self = @This();
|
||||
|
||||
text: []const u8,
|
||||
position: usize,
|
||||
max_position: usize,
|
||||
stack: []usize,
|
||||
frame: usize,
|
||||
allocator: std.mem.Allocator,
|
||||
|
||||
prev_token: ?Token = null,
|
||||
|
||||
pub fn pushBack(self: *Self, token: Token) void {
|
||||
self.prev_token = token;
|
||||
}
|
||||
|
||||
/// Initialize a new tokenizer
|
||||
pub fn init(allocator: std.mem.Allocator, text: []const u8) std.mem.Allocator.Error!Self {
|
||||
const stack = try allocator.alloc(usize, 0x100);
|
||||
@ -77,7 +70,6 @@ pub fn init(allocator: std.mem.Allocator, text: []const u8) std.mem.Allocator.Er
|
||||
@memset(stack, 0);
|
||||
return .{
|
||||
.text = text,
|
||||
.position = 0,
|
||||
.max_position = 0,
|
||||
.stack = stack,
|
||||
.frame = 0,
|
||||
@ -201,6 +193,14 @@ pub fn matchCharRange(self: *Self, low: u8, high: u8) ?void {
|
||||
self.advance(1);
|
||||
}
|
||||
|
||||
pub fn anyChar(self: *Self) ?u8 {
|
||||
if (self.endOfInput())
|
||||
return null;
|
||||
const char = self.text[self.currentPosition()];
|
||||
self.advance(1);
|
||||
return char;
|
||||
}
|
||||
|
||||
// ========== Token Extraction ==========
|
||||
|
||||
fn extractSlice(self: *Self, start: usize) []const u8 {
|
||||
@ -319,35 +319,33 @@ pub fn nextIdentifier(self: *Self) Error!Token {
|
||||
/// Get the next token from the input
|
||||
/// WARNING: this function eats whitespaces
|
||||
pub fn nextToken(self: *Self) Error!Token {
|
||||
if (self.prev_token) |tok| {
|
||||
self.prev_token = null;
|
||||
return tok;
|
||||
}
|
||||
|
||||
const start = try self.pushFrame();
|
||||
errdefer self.popFrame();
|
||||
|
||||
self.skipWhitespace();
|
||||
|
||||
if (self.endOfInput()) {
|
||||
return Token{
|
||||
// Fall back to single character symbol
|
||||
const c = self.anyChar() orelse return .{
|
||||
.type = .eof,
|
||||
.value = null,
|
||||
.start = start,
|
||||
.end = start,
|
||||
};
|
||||
}
|
||||
|
||||
self.advance(1);
|
||||
// Fall back to single character symbol
|
||||
const c = self.lastChar();
|
||||
|
||||
const symbol_t: TokenType = switch (c) {
|
||||
'{' => .object_begin,
|
||||
'}' => .object_end,
|
||||
'[' => .array_begin,
|
||||
']' => .array_end,
|
||||
',' => .comma,
|
||||
',' => {
|
||||
self.skipWhitespace();
|
||||
return self.commit(Token{
|
||||
.type = .comma,
|
||||
.value = null,
|
||||
.end = start + 1,
|
||||
.start = start,
|
||||
});
|
||||
},
|
||||
':' => .colon,
|
||||
'"' => {
|
||||
self.rollback();
|
||||
@ -449,31 +447,20 @@ pub const Iterator = struct {
|
||||
|
||||
pub fn next(it: *Iterator) ?Token {
|
||||
defer it.tokenizer.skipWhitespace();
|
||||
if (it.tokenizer.endOfInput()) return null;
|
||||
return it.tokenizer.nextToken() catch null;
|
||||
if (it.tokenizer.endOfInput()) {
|
||||
std.debug.print("got eof\n", .{});
|
||||
return null;
|
||||
}
|
||||
return it.tokenizer.nextToken() catch |err| {
|
||||
std.debug.print("got err: {s}\n", .{@errorName(err)});
|
||||
return null;
|
||||
};
|
||||
}
|
||||
|
||||
pub fn reset(it: *Iterator) void {
|
||||
it.tokenizer.position = 0;
|
||||
it.tokenizer.max_position = 0;
|
||||
it.tokenizer.frame = 0;
|
||||
it.tokenizer.prev_token = null;
|
||||
}
|
||||
|
||||
/// nasty trick
|
||||
pub fn peek(it: *Iterator) ?Token {
|
||||
const frame = it.tokenizer.frame;
|
||||
const pos = it.tokenizer.position;
|
||||
const prev = it.tokenizer.prev_token;
|
||||
const max_pos = it.tokenizer.max_position;
|
||||
defer {
|
||||
it.tokenizer.position = pos;
|
||||
it.tokenizer.frame = frame;
|
||||
it.tokenizer.max_position = max_pos;
|
||||
it.tokenizer.prev_token = prev;
|
||||
}
|
||||
if (it.tokenizer.endOfInput()) return null;
|
||||
return it.tokenizer.nextToken() catch null;
|
||||
}
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user