a
This commit is contained in:
parent
7372e0092b
commit
11b101d3e8
254
language.zig
254
language.zig
@ -1,6 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const Tokenizer = @import("tokenizer.zig");
|
const Tokenizer = @import("tokenizer.zig");
|
||||||
const TokenType = Tokenizer.TokenType;
|
const TokenType = Tokenizer.TokenType;
|
||||||
|
const Token = Tokenizer.Token;
|
||||||
const StringPool = @import("strings.zig");
|
const StringPool = @import("strings.zig");
|
||||||
const StringIndex = StringPool.StringIndex;
|
const StringIndex = StringPool.StringIndex;
|
||||||
const assert = std.debug.assert;
|
const assert = std.debug.assert;
|
||||||
@ -156,7 +157,7 @@ fn addObject(self: *Self, allocator: std.mem.Allocator, object: std.AutoArrayHas
|
|||||||
self.index.set(idx, .{ .object = ObjectEntry{
|
self.index.set(idx, .{ .object = ObjectEntry{
|
||||||
.len = 0,
|
.len = 0,
|
||||||
.property_idx = 0,
|
.property_idx = 0,
|
||||||
.value_idx = 0,
|
.value_idx = 1,
|
||||||
} });
|
} });
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
@ -167,7 +168,7 @@ fn addEmptyObject(self: *Self, allocator: std.mem.Allocator) !usize {
|
|||||||
const idx = self.index.addOneAssumeCapacity();
|
const idx = self.index.addOneAssumeCapacity();
|
||||||
const object: ObjectEntry = .{
|
const object: ObjectEntry = .{
|
||||||
.property_idx = self.property_index.string_bytes.items.len,
|
.property_idx = self.property_index.string_bytes.items.len,
|
||||||
.value_idx = self.index.len,
|
.value_idx = self.index.len + 1,
|
||||||
.len = 0,
|
.len = 0,
|
||||||
};
|
};
|
||||||
self.index.set(idx, .{ .object = object });
|
self.index.set(idx, .{ .object = object });
|
||||||
@ -299,43 +300,50 @@ fn getNull(self: *Self, index: usize) ?void {
|
|||||||
return entry.null;
|
return entry.null;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize) !?JsonInput {
|
fn getValue(self: *Self, allocator: std.mem.Allocator, index: usize, offset: usize) !struct { ?JsonInput, usize } {
|
||||||
const entry = self.index.get(index);
|
const entry = self.index.get(index);
|
||||||
switch (entry) {
|
switch (entry) {
|
||||||
.null => return .{ .null = {} },
|
.null => return .{ .{ .null = {} }, 1 },
|
||||||
.bool => return .{ .bool = entry.bool },
|
.bool => return .{ .{ .bool = entry.bool }, 1 },
|
||||||
.number => return .{ .number = entry.number },
|
.number => return .{ .{ .number = entry.number }, 1 },
|
||||||
.string => {
|
.string => {
|
||||||
const str = entry.string.slice(&self.string_index);
|
const str = entry.string.slice(&self.string_index);
|
||||||
return .{ .string = str };
|
return .{ .{ .string = str }, 1 };
|
||||||
},
|
},
|
||||||
.array => {
|
.array => {
|
||||||
const res = try allocator.alloc(JsonInput, entry.array.len);
|
const res = try allocator.alloc(JsonInput, entry.array.len);
|
||||||
var idx = entry.array.start;
|
var idx = entry.array.start;
|
||||||
|
var offset_calc: usize = offset;
|
||||||
|
|
||||||
for (0..entry.array.len) |i| {
|
for (0..entry.array.len) |i| {
|
||||||
if (try self.getValue(allocator, idx)) |v| {
|
const val, const step = try self.getValue(allocator, idx, offset_calc);
|
||||||
res[i] = v;
|
res[i] = val.?;
|
||||||
idx += 1;
|
idx += step;
|
||||||
} else unreachable;
|
|
||||||
}
|
}
|
||||||
return .{ .array = res };
|
offset_calc += entry.array.len;
|
||||||
|
|
||||||
|
return .{ .{ .array = res }, offset_calc };
|
||||||
},
|
},
|
||||||
.object => {
|
.object => {
|
||||||
var kidx = entry.object.property_idx;
|
var kidx = entry.object.property_idx;
|
||||||
var vidx = entry.object.value_idx;
|
var vidx = entry.object.value_idx;
|
||||||
var obj: std.StringArrayHashMapUnmanaged(JsonInput) = .empty;
|
var obj: std.StringArrayHashMapUnmanaged(JsonInput) = .empty;
|
||||||
|
var offset_calc: usize = offset;
|
||||||
|
|
||||||
try obj.ensureTotalCapacity(allocator, entry.object.len);
|
try obj.ensureTotalCapacity(allocator, entry.object.len);
|
||||||
for (0..entry.object.len) |_| {
|
for (0..entry.object.len) |_| {
|
||||||
const key = StringIndex.slice(@enumFromInt(kidx), &self.property_index);
|
const slice = StringIndex.slice(@enumFromInt(kidx), &self.property_index);
|
||||||
const val = (try self.getValue(allocator, vidx)).?;
|
const val, const step = try self.getValue(allocator, vidx, offset_calc);
|
||||||
|
kidx += slice.len + 1;
|
||||||
|
vidx += step;
|
||||||
|
|
||||||
obj.putAssumeCapacityNoClobber(key, val);
|
std.debug.print("putting {s} -> {d}\n", .{ slice, vidx });
|
||||||
kidx += key.len + 1;
|
obj.putAssumeCapacityNoClobber(slice, val.?);
|
||||||
vidx += 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return .{ .object = obj };
|
offset_calc += entry.object.len;
|
||||||
|
|
||||||
|
return .{ .{ .object = obj }, offset_calc };
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -347,81 +355,144 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
var it = tokenizer.iterator();
|
var it = tokenizer.iterator();
|
||||||
|
|
||||||
const root = try self.addEmptyObject(allocator);
|
const root = try self.addEmptyObject(allocator);
|
||||||
var work_query = try allocator.alloc(usize, self.options.max_depth);
|
defer std.debug.print("idx: {s}\n", .{
|
||||||
var cycles: usize = 0;
|
@tagName(self.index.get(self.index.get(root).object.value_idx)),
|
||||||
|
});
|
||||||
|
|
||||||
//defer assert(cycles == 0);
|
var token = it.next() orelse
|
||||||
|
return root;
|
||||||
|
|
||||||
while (it.next()) |token| {
|
var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0);
|
||||||
defer tokenizer.skipWhitespace();
|
|
||||||
|
|
||||||
std.debug.print("token: {s}\n", .{@tagName(token.type)});
|
flag: switch (token.type) {
|
||||||
|
.eof => {
|
||||||
|
assert(query.slice().len == 0);
|
||||||
|
return root;
|
||||||
|
},
|
||||||
|
.property => {
|
||||||
|
defer tokenizer.skipWhitespace();
|
||||||
|
|
||||||
flag: switch (token.type) {
|
const scope_idx = query.get(query.len - 1);
|
||||||
.array_end => {
|
switch (self.index.get(scope_idx)) {
|
||||||
cycles -= 1;
|
.object => |scope| {
|
||||||
},
|
const pidx = try self.addProperty(allocator, token.value.?.string);
|
||||||
.object_end => {
|
self.index.set(scope_idx, .{ .object = ObjectEntry{
|
||||||
cycles -= 1;
|
.len = scope.len + 1,
|
||||||
},
|
.property_idx = if (scope.len == 0) pidx else scope.property_idx,
|
||||||
.array_begin => {
|
.value_idx = scope.value_idx,
|
||||||
const idx = try self.addEmptyArray(allocator);
|
|
||||||
work_query[cycles] = idx;
|
|
||||||
cycles += 1;
|
|
||||||
},
|
|
||||||
.object_begin => {
|
|
||||||
if (cycles == 0) {
|
|
||||||
self.index.set(root, .{ .object = .{
|
|
||||||
.len = 0,
|
|
||||||
.property_idx = self.property_index.string_table.size,
|
|
||||||
.value_idx = self.index.len,
|
|
||||||
} });
|
} });
|
||||||
work_query[cycles] = root;
|
},
|
||||||
} else {
|
else => return error.InvalidSyntax,
|
||||||
const obj_idx = try self.addEmptyObject(allocator);
|
}
|
||||||
work_query[cycles] = obj_idx;
|
|
||||||
}
|
const next = it.next() orelse return error.InvalidSyntax;
|
||||||
cycles += 1;
|
token = next;
|
||||||
},
|
switch (next.type) {
|
||||||
.property => {
|
.colon => {
|
||||||
const scope_idx = work_query[cycles - 1];
|
token = it.next() orelse return error.InvalidSyntax;
|
||||||
switch (self.index.get(scope_idx)) {
|
continue :flag token.type;
|
||||||
.object => |scope| {
|
},
|
||||||
//std.debug.print("depth: {d}\n", .{cycles});
|
else => continue :flag next.type,
|
||||||
_ = try self.addProperty(allocator, token.value.?.string);
|
// else => return error.InvalidSyntax,
|
||||||
self.index.set(scope_idx, .{ .object = ObjectEntry{
|
}
|
||||||
.len = scope.len + 1,
|
},
|
||||||
.property_idx = scope.property_idx,
|
.object_begin => {
|
||||||
.value_idx = scope.value_idx,
|
defer tokenizer.skipWhitespace();
|
||||||
} });
|
|
||||||
},
|
if (query.slice().len == 0) {
|
||||||
else => unreachable,
|
try query.ensureUnusedCapacity(1);
|
||||||
}
|
const ptr = query.addOneAssumeCapacity();
|
||||||
},
|
ptr.* = root;
|
||||||
.string => {
|
self.index.set(root, .{ .object = ObjectEntry{
|
||||||
if (it.peek()) |next| if (next.type == .colon) {
|
.len = 0,
|
||||||
|
.property_idx = self.property_index.string_bytes.items.len,
|
||||||
|
.value_idx = 1,
|
||||||
|
} });
|
||||||
|
} else {
|
||||||
|
const idx_ptr = try query.addOne();
|
||||||
|
idx_ptr.* = try self.addEmptyObject(allocator);
|
||||||
|
self.index.set(idx_ptr.*, .{ .object = ObjectEntry{
|
||||||
|
.len = 0,
|
||||||
|
.property_idx = self.property_index.string_bytes.items.len,
|
||||||
|
.value_idx = self.index.len,
|
||||||
|
} });
|
||||||
|
}
|
||||||
|
|
||||||
|
const next = it.next() orelse return error.InvalidSyntax;
|
||||||
|
token = next;
|
||||||
|
switch (next.type) {
|
||||||
|
.string => continue :flag .property,
|
||||||
|
else => return error.InvalidSyntax,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.object_end => {
|
||||||
|
defer tokenizer.skipWhitespace();
|
||||||
|
assert(query.pop() != null);
|
||||||
|
|
||||||
|
const next = it.next() orelse
|
||||||
|
return root;
|
||||||
|
token = next;
|
||||||
|
switch (next.type) {
|
||||||
|
.comma => continue :flag .comma,
|
||||||
|
.object_end, .array_end => |t| continue :flag t,
|
||||||
|
else => return error.InvalidSyntax,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.true, .false => {
|
||||||
|
defer tokenizer.skipWhitespace();
|
||||||
|
|
||||||
|
_ = try self.addBool(allocator, if (token.type == .true) true else false);
|
||||||
|
|
||||||
|
const next = it.next() orelse return error.InvalidSyntax;
|
||||||
|
token = next;
|
||||||
|
switch (next.type) {
|
||||||
|
.comma => continue :flag .comma,
|
||||||
|
.object_end => continue :flag .object_end,
|
||||||
|
else => return error.InvalidSyntax,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.string => {
|
||||||
|
defer tokenizer.skipWhitespace();
|
||||||
|
|
||||||
|
const next = it.next() orelse return error.InvalidSyntax;
|
||||||
|
switch (next.type) {
|
||||||
|
.colon => {
|
||||||
continue :flag .property;
|
continue :flag .property;
|
||||||
};
|
},
|
||||||
_ = try self.addString(allocator, token.value.?.string);
|
else => |t| {
|
||||||
},
|
_ = try self.addString(allocator, token.value.?.string);
|
||||||
.number => {
|
|
||||||
_ = try self.addNumber(allocator, token.value.?.number);
|
token = next;
|
||||||
},
|
continue :flag t;
|
||||||
.true, .false => {
|
},
|
||||||
_ = try self.addBool(allocator, if (token.type == .true) true else false);
|
}
|
||||||
},
|
},
|
||||||
.null => {
|
.number => {
|
||||||
_ = try self.addNull(allocator);
|
defer tokenizer.skipWhitespace();
|
||||||
},
|
|
||||||
.comma => if (it.peek()) |t| {
|
_ = try self.addNumber(allocator, token.value.?.number);
|
||||||
if (t.type == .object_end) {
|
|
||||||
if (!self.options.flags.allow_trailing_comma) {
|
const next = it.next() orelse return error.InvalidSyntax;
|
||||||
return error.TrailingComma;
|
token = next;
|
||||||
}
|
switch (next.type) {
|
||||||
|
.comma => continue :flag .comma,
|
||||||
|
.object_end => continue :flag .object_end,
|
||||||
|
else => return error.InvalidSyntax,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.comma => {
|
||||||
|
if (!self.options.flags.allow_trailing_comma) {
|
||||||
|
const next = it.next() orelse return error.InvalidSyntax;
|
||||||
|
token = next;
|
||||||
|
switch (next.type) {
|
||||||
|
.object_end, .array_end => return error.TrailingComma,
|
||||||
|
else => continue :flag token.type,
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
else => continue,
|
},
|
||||||
}
|
else => {
|
||||||
|
std.debug.print("token: {s}\n", .{@tagName(token.type)});
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return root;
|
return root;
|
||||||
@ -439,10 +510,13 @@ test parse {
|
|||||||
var tokenizer = try Tokenizer.init(allocator, blk: {
|
var tokenizer = try Tokenizer.init(allocator, blk: {
|
||||||
const json =
|
const json =
|
||||||
\\ {
|
\\ {
|
||||||
\\ "lazy": true,
|
\\ "bio": "cool",
|
||||||
|
\\ "age": 15,
|
||||||
\\ "name": "yuzu",
|
\\ "name": "yuzu",
|
||||||
\\ "dislikes": [["Math", 3], ["Sports", 1]],
|
\\ "admin": true,
|
||||||
\\ "age": 15
|
\\ "address": {
|
||||||
|
\\ "lorem": "ipsum"
|
||||||
|
\\ }
|
||||||
\\ }
|
\\ }
|
||||||
;
|
;
|
||||||
break :blk json;
|
break :blk json;
|
||||||
@ -450,7 +524,7 @@ test parse {
|
|||||||
|
|
||||||
const root = blk: {
|
const root = blk: {
|
||||||
const idx = try parse(&self, &tokenizer);
|
const idx = try parse(&self, &tokenizer);
|
||||||
const val = (try getValue(&self, allocator, idx)).?;
|
const val, _ = try getValue(&self, allocator, idx, 0);
|
||||||
break :blk val;
|
break :blk val;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -58,18 +58,11 @@ pub const Token = struct {
|
|||||||
pub const Self = @This();
|
pub const Self = @This();
|
||||||
|
|
||||||
text: []const u8,
|
text: []const u8,
|
||||||
position: usize,
|
|
||||||
max_position: usize,
|
max_position: usize,
|
||||||
stack: []usize,
|
stack: []usize,
|
||||||
frame: usize,
|
frame: usize,
|
||||||
allocator: std.mem.Allocator,
|
allocator: std.mem.Allocator,
|
||||||
|
|
||||||
prev_token: ?Token = null,
|
|
||||||
|
|
||||||
pub fn pushBack(self: *Self, token: Token) void {
|
|
||||||
self.prev_token = token;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Initialize a new tokenizer
|
/// Initialize a new tokenizer
|
||||||
pub fn init(allocator: std.mem.Allocator, text: []const u8) std.mem.Allocator.Error!Self {
|
pub fn init(allocator: std.mem.Allocator, text: []const u8) std.mem.Allocator.Error!Self {
|
||||||
const stack = try allocator.alloc(usize, 0x100);
|
const stack = try allocator.alloc(usize, 0x100);
|
||||||
@ -77,7 +70,6 @@ pub fn init(allocator: std.mem.Allocator, text: []const u8) std.mem.Allocator.Er
|
|||||||
@memset(stack, 0);
|
@memset(stack, 0);
|
||||||
return .{
|
return .{
|
||||||
.text = text,
|
.text = text,
|
||||||
.position = 0,
|
|
||||||
.max_position = 0,
|
.max_position = 0,
|
||||||
.stack = stack,
|
.stack = stack,
|
||||||
.frame = 0,
|
.frame = 0,
|
||||||
@ -201,6 +193,14 @@ pub fn matchCharRange(self: *Self, low: u8, high: u8) ?void {
|
|||||||
self.advance(1);
|
self.advance(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn anyChar(self: *Self) ?u8 {
|
||||||
|
if (self.endOfInput())
|
||||||
|
return null;
|
||||||
|
const char = self.text[self.currentPosition()];
|
||||||
|
self.advance(1);
|
||||||
|
return char;
|
||||||
|
}
|
||||||
|
|
||||||
// ========== Token Extraction ==========
|
// ========== Token Extraction ==========
|
||||||
|
|
||||||
fn extractSlice(self: *Self, start: usize) []const u8 {
|
fn extractSlice(self: *Self, start: usize) []const u8 {
|
||||||
@ -319,35 +319,33 @@ pub fn nextIdentifier(self: *Self) Error!Token {
|
|||||||
/// Get the next token from the input
|
/// Get the next token from the input
|
||||||
/// WARNING: this function eats whitespaces
|
/// WARNING: this function eats whitespaces
|
||||||
pub fn nextToken(self: *Self) Error!Token {
|
pub fn nextToken(self: *Self) Error!Token {
|
||||||
if (self.prev_token) |tok| {
|
|
||||||
self.prev_token = null;
|
|
||||||
return tok;
|
|
||||||
}
|
|
||||||
|
|
||||||
const start = try self.pushFrame();
|
const start = try self.pushFrame();
|
||||||
errdefer self.popFrame();
|
errdefer self.popFrame();
|
||||||
|
|
||||||
self.skipWhitespace();
|
self.skipWhitespace();
|
||||||
|
|
||||||
if (self.endOfInput()) {
|
|
||||||
return Token{
|
|
||||||
.type = .eof,
|
|
||||||
.value = null,
|
|
||||||
.start = start,
|
|
||||||
.end = start,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
self.advance(1);
|
|
||||||
// Fall back to single character symbol
|
// Fall back to single character symbol
|
||||||
const c = self.lastChar();
|
const c = self.anyChar() orelse return .{
|
||||||
|
.type = .eof,
|
||||||
|
.value = null,
|
||||||
|
.start = start,
|
||||||
|
.end = start,
|
||||||
|
};
|
||||||
|
|
||||||
const symbol_t: TokenType = switch (c) {
|
const symbol_t: TokenType = switch (c) {
|
||||||
'{' => .object_begin,
|
'{' => .object_begin,
|
||||||
'}' => .object_end,
|
'}' => .object_end,
|
||||||
'[' => .array_begin,
|
'[' => .array_begin,
|
||||||
']' => .array_end,
|
']' => .array_end,
|
||||||
',' => .comma,
|
',' => {
|
||||||
|
self.skipWhitespace();
|
||||||
|
return self.commit(Token{
|
||||||
|
.type = .comma,
|
||||||
|
.value = null,
|
||||||
|
.end = start + 1,
|
||||||
|
.start = start,
|
||||||
|
});
|
||||||
|
},
|
||||||
':' => .colon,
|
':' => .colon,
|
||||||
'"' => {
|
'"' => {
|
||||||
self.rollback();
|
self.rollback();
|
||||||
@ -449,31 +447,20 @@ pub const Iterator = struct {
|
|||||||
|
|
||||||
pub fn next(it: *Iterator) ?Token {
|
pub fn next(it: *Iterator) ?Token {
|
||||||
defer it.tokenizer.skipWhitespace();
|
defer it.tokenizer.skipWhitespace();
|
||||||
if (it.tokenizer.endOfInput()) return null;
|
if (it.tokenizer.endOfInput()) {
|
||||||
return it.tokenizer.nextToken() catch null;
|
std.debug.print("got eof\n", .{});
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return it.tokenizer.nextToken() catch |err| {
|
||||||
|
std.debug.print("got err: {s}\n", .{@errorName(err)});
|
||||||
|
return null;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reset(it: *Iterator) void {
|
pub fn reset(it: *Iterator) void {
|
||||||
it.tokenizer.position = 0;
|
it.tokenizer.position = 0;
|
||||||
it.tokenizer.max_position = 0;
|
it.tokenizer.max_position = 0;
|
||||||
it.tokenizer.frame = 0;
|
it.tokenizer.frame = 0;
|
||||||
it.tokenizer.prev_token = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// nasty trick
|
|
||||||
pub fn peek(it: *Iterator) ?Token {
|
|
||||||
const frame = it.tokenizer.frame;
|
|
||||||
const pos = it.tokenizer.position;
|
|
||||||
const prev = it.tokenizer.prev_token;
|
|
||||||
const max_pos = it.tokenizer.max_position;
|
|
||||||
defer {
|
|
||||||
it.tokenizer.position = pos;
|
|
||||||
it.tokenizer.frame = frame;
|
|
||||||
it.tokenizer.max_position = max_pos;
|
|
||||||
it.tokenizer.prev_token = prev;
|
|
||||||
}
|
|
||||||
if (it.tokenizer.endOfInput()) return null;
|
|
||||||
return it.tokenizer.nextToken() catch null;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user