general cleanup
This commit is contained in:
parent
3952f49d66
commit
f84303e83f
146
language.zig
146
language.zig
@ -9,22 +9,9 @@ const assert = std.debug.assert;
|
|||||||
|
|
||||||
const Self = @This();
|
const Self = @This();
|
||||||
|
|
||||||
pub const Error = enum {
|
pub const Error = enum { Eof, TrailingComma, MissingKey, MissingValue, UnexpectedToken };
|
||||||
Eof,
|
|
||||||
TrailingComma,
|
|
||||||
MissingKey,
|
|
||||||
MissingValue,
|
|
||||||
UnexpectedToken,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const JsonType = enum {
|
pub const JsonType = enum { null, bool, number, string, array, object };
|
||||||
null,
|
|
||||||
bool,
|
|
||||||
number,
|
|
||||||
string,
|
|
||||||
array,
|
|
||||||
object,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const JsonValue = union(JsonType) {
|
pub const JsonValue = union(JsonType) {
|
||||||
null: void,
|
null: void,
|
||||||
@ -36,16 +23,12 @@ pub const JsonValue = union(JsonType) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
pub const JsonInput = union(JsonType) {
|
pub const JsonInput = union(JsonType) {
|
||||||
|
|
||||||
// data structures
|
|
||||||
const Object = std.StringArrayHashMapUnmanaged(JsonInput);
|
|
||||||
|
|
||||||
null: void,
|
null: void,
|
||||||
bool: bool,
|
bool: bool,
|
||||||
number: f64,
|
number: f64,
|
||||||
string: []const u8,
|
string: []const u8,
|
||||||
array: []JsonInput,
|
array: []JsonInput,
|
||||||
object: Object,
|
object: std.StringArrayHashMapUnmanaged(JsonInput),
|
||||||
|
|
||||||
pub fn deinit(self: JsonInput, allocator: mem.Allocator) void {
|
pub fn deinit(self: JsonInput, allocator: mem.Allocator) void {
|
||||||
switch (self) {
|
switch (self) {
|
||||||
@ -99,15 +82,13 @@ pub const JsonInput = union(JsonType) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/// same as ObjectEntry but simpler
|
/// same as ObjectEntry but simpler
|
||||||
/// start is the offset
|
///.tip is the offset
|
||||||
pub const ArraySlice = struct {
|
pub const ArraySlice = struct {
|
||||||
len: usize,
|
len: usize,
|
||||||
start: usize,
|
tip: usize,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// just += the properties and value indexes to get the next item
|
/// just += the value indexes to get the next item
|
||||||
/// property_idx and value_idx are the offset
|
|
||||||
/// it should be ordered
|
|
||||||
pub const ObjectEntry = struct {
|
pub const ObjectEntry = struct {
|
||||||
len: usize,
|
len: usize,
|
||||||
tip: usize,
|
tip: usize,
|
||||||
@ -120,19 +101,14 @@ pub const PropertyEntry = struct {
|
|||||||
pub const Flags = packed struct {
|
pub const Flags = packed struct {
|
||||||
/// Make the tokenizer omit comments, TBD
|
/// Make the tokenizer omit comments, TBD
|
||||||
allow_comments: bool = false,
|
allow_comments: bool = false,
|
||||||
|
|
||||||
/// Not to error on trailing comma, default is `false` for obvious reasons
|
/// Not to error on trailing comma, default is `false` for obvious reasons
|
||||||
allow_trailing_comma: bool = false,
|
allow_trailing_comma: bool = false,
|
||||||
|
|
||||||
/// Allows parsing `packed struct` as an `int`, size is the backing int
|
/// Allows parsing `packed struct` as an `int`, size is the backing int
|
||||||
bitfields: bool = false,
|
bitfields: bool = false,
|
||||||
|
|
||||||
/// Allows parsing `enum` as an `int`, size is the backing int
|
/// Allows parsing `enum` as an `int`, size is the backing int
|
||||||
real_enums: bool = false,
|
real_enums: bool = false,
|
||||||
|
|
||||||
/// Allows parsing unions, default behaviour is yet to be concluded
|
/// Allows parsing unions, default behaviour is yet to be concluded
|
||||||
unions: bool = false,
|
unions: bool = false,
|
||||||
|
|
||||||
/// To cast numbers always as f64, as the name says
|
/// To cast numbers always as f64, as the name says
|
||||||
numbersf64: bool = false,
|
numbersf64: bool = false,
|
||||||
};
|
};
|
||||||
@ -168,7 +144,6 @@ fn addNumber(self: *Self, allocator: mem.Allocator, number: f64) !usize {
|
|||||||
|
|
||||||
fn addProperty(self: *Self, allocator: mem.Allocator, bytes: []const u8) !usize {
|
fn addProperty(self: *Self, allocator: mem.Allocator, bytes: []const u8) !usize {
|
||||||
const stridx = try self.properties.add(allocator, bytes);
|
const stridx = try self.properties.add(allocator, bytes);
|
||||||
try self.index.ensureUnusedCapacity(allocator, 1);
|
|
||||||
try self.property_map.ensureUnusedCapacity(allocator, 1);
|
try self.property_map.ensureUnusedCapacity(allocator, 1);
|
||||||
return @intFromEnum(stridx);
|
return @intFromEnum(stridx);
|
||||||
}
|
}
|
||||||
@ -203,8 +178,7 @@ fn addNull(self: *Self, allocator: mem.Allocator) !usize {
|
|||||||
|
|
||||||
// Recursively compute how many index slots a node occupies (including nested)
|
// Recursively compute how many index slots a node occupies (including nested)
|
||||||
fn skipSlots(self: *Self, slot: usize) usize {
|
fn skipSlots(self: *Self, slot: usize) usize {
|
||||||
const e = self.index.get(slot);
|
switch (self.index.get(slot)) {
|
||||||
switch (e) {
|
|
||||||
.object => |obj| {
|
.object => |obj| {
|
||||||
var total: usize = 1;
|
var total: usize = 1;
|
||||||
var v = obj.tip;
|
var v = obj.tip;
|
||||||
@ -217,7 +191,7 @@ fn skipSlots(self: *Self, slot: usize) usize {
|
|||||||
},
|
},
|
||||||
.array => |arr| {
|
.array => |arr| {
|
||||||
var total: usize = 1;
|
var total: usize = 1;
|
||||||
var c = arr.start;
|
var c = arr.tip;
|
||||||
for (0..arr.len) |_| {
|
for (0..arr.len) |_| {
|
||||||
const s = skipSlots(self, c);
|
const s = skipSlots(self, c);
|
||||||
total += s;
|
total += s;
|
||||||
@ -237,9 +211,7 @@ pub fn getValue(
|
|||||||
if (self.index.len == 0)
|
if (self.index.len == 0)
|
||||||
return error.InvalidSyntax;
|
return error.InvalidSyntax;
|
||||||
|
|
||||||
const entry = self.index.get(idx);
|
switch (self.index.get(idx)) {
|
||||||
|
|
||||||
switch (entry) {
|
|
||||||
.null => return .{ .null = {} },
|
.null => return .{ .null = {} },
|
||||||
.bool => |b| return .{ .bool = b },
|
.bool => |b| return .{ .bool = b },
|
||||||
.number => |number| return .{ .number = number },
|
.number => |number| return .{ .number = number },
|
||||||
@ -250,7 +222,7 @@ pub fn getValue(
|
|||||||
.array => |arr| {
|
.array => |arr| {
|
||||||
var out = try allocator.alloc(JsonInput, arr.len);
|
var out = try allocator.alloc(JsonInput, arr.len);
|
||||||
errdefer allocator.free(out);
|
errdefer allocator.free(out);
|
||||||
var c = arr.start;
|
var c = arr.tip;
|
||||||
for (0..arr.len) |i| {
|
for (0..arr.len) |i| {
|
||||||
const v = try self.getValue(allocator, c);
|
const v = try self.getValue(allocator, c);
|
||||||
out[i] = v;
|
out[i] = v;
|
||||||
@ -259,58 +231,42 @@ pub fn getValue(
|
|||||||
return .{ .array = out[0..arr.len] };
|
return .{ .array = out[0..arr.len] };
|
||||||
},
|
},
|
||||||
.object => |obj| {
|
.object => |obj| {
|
||||||
var map: JsonInput.Object = .empty;
|
var map: std.StringArrayHashMapUnmanaged(JsonInput) = .empty;
|
||||||
errdefer map.deinit(allocator);
|
errdefer map.deinit(allocator);
|
||||||
var tip = obj.tip;
|
var tip = obj.tip;
|
||||||
|
for (0..obj.len) |_| if (self.property_map.get(tip)) |pen| {
|
||||||
for (0..obj.len) |_|
|
try map.put(
|
||||||
if (self.property_map.get(tip)) |pen| {
|
allocator,
|
||||||
const key_slice = pen.tip.slice(&self.properties);
|
pen.tip.slice(&self.properties),
|
||||||
|
try self.getValue(allocator, tip),
|
||||||
const val = try self.getValue(allocator, tip);
|
);
|
||||||
try map.put(allocator, key_slice, val);
|
tip += self.skipSlots(tip);
|
||||||
const s = self.skipSlots(tip);
|
} else return error.MissingKey;
|
||||||
tip += s;
|
|
||||||
} else {
|
|
||||||
// for (self.property_map.keys(), self.property_map.values()) |k, v| {
|
|
||||||
// std.debug.print("{}: {s}\n", .{ v.tip, @tagName(self.index.get(k)) });
|
|
||||||
// std.debug.print("tip: {d}\n", .{k});
|
|
||||||
// }
|
|
||||||
return error.MissingKey;
|
|
||||||
};
|
|
||||||
return .{ .object = map };
|
return .{ .object = map };
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// always returns 0 (root)
|
/// always returns 0 (root)
|
||||||
pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usize {
|
||||||
tokenizer.skipWhitespace();
|
tokenizer.skipWhitespace();
|
||||||
|
|
||||||
if (tokenizer.endOfInput())
|
if (tokenizer.endOfInput())
|
||||||
return error.Eof;
|
return error.Eof;
|
||||||
|
|
||||||
const allocator = tokenizer.allocator;
|
|
||||||
|
|
||||||
const root = try self.addEmpty(allocator);
|
const root = try self.addEmpty(allocator);
|
||||||
|
|
||||||
var token = try tokenizer.nextToken();
|
var token = try tokenizer.nextToken(allocator);
|
||||||
|
|
||||||
var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0);
|
var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0);
|
||||||
|
|
||||||
flag: switch (token.type) {
|
flag: switch (token.type) {
|
||||||
.eof => {
|
.eof => {
|
||||||
if (root != 0) {
|
if (root != 0) return error.InvalidSyntax;
|
||||||
return error.InvalidSyntax;
|
if (query.slice().len != 0) return error.InvalidSyntax;
|
||||||
}
|
|
||||||
if (query.slice().len != 0) {
|
|
||||||
return error.InvalidSyntax;
|
|
||||||
}
|
|
||||||
return root;
|
return root;
|
||||||
},
|
},
|
||||||
.property => {
|
.property => {
|
||||||
defer tokenizer.skipWhitespace();
|
|
||||||
|
|
||||||
const scope_idx = query.get(query.len - 1);
|
const scope_idx = query.get(query.len - 1);
|
||||||
switch (self.index.get(scope_idx)) {
|
switch (self.index.get(scope_idx)) {
|
||||||
.object => |scope| {
|
.object => |scope| {
|
||||||
@ -325,26 +281,20 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
.tip = scope.tip,
|
.tip = scope.tip,
|
||||||
} });
|
} });
|
||||||
},
|
},
|
||||||
.array => {
|
|
||||||
return error.InvalidSyntax;
|
|
||||||
},
|
|
||||||
else => return error.InvalidSyntax,
|
else => return error.InvalidSyntax,
|
||||||
}
|
}
|
||||||
|
|
||||||
const next = try tokenizer.nextToken();
|
const next = try tokenizer.nextToken(allocator);
|
||||||
token = next;
|
token = next;
|
||||||
switch (next.type) {
|
switch (next.type) {
|
||||||
.colon => {
|
.colon => {
|
||||||
token = try tokenizer.nextToken();
|
token = try tokenizer.nextToken(allocator);
|
||||||
continue :flag token.type;
|
continue :flag token.type;
|
||||||
},
|
},
|
||||||
else => continue :flag next.type,
|
else => continue :flag next.type,
|
||||||
// else => return error.InvalidSyntax,
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
.object_begin => {
|
.object_begin => {
|
||||||
defer tokenizer.skipWhitespace();
|
|
||||||
|
|
||||||
if (query.slice().len < 1) {
|
if (query.slice().len < 1) {
|
||||||
const ptr = try query.addOne();
|
const ptr = try query.addOne();
|
||||||
ptr.* = root;
|
ptr.* = root;
|
||||||
@ -368,14 +318,14 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
.array => |slice| {
|
.array => |slice| {
|
||||||
self.index.set(parent_idx, .{ .array = ArraySlice{
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
||||||
.len = slice.len + 1,
|
.len = slice.len + 1,
|
||||||
.start = if (slice.len == 0) idx_ptr.* else slice.start,
|
.tip = if (slice.len == 0) idx_ptr.* else slice.tip,
|
||||||
} });
|
} });
|
||||||
},
|
},
|
||||||
else => {},
|
else => {},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const next = try tokenizer.nextToken();
|
const next = try tokenizer.nextToken(allocator);
|
||||||
token = next;
|
token = next;
|
||||||
switch (next.type) {
|
switch (next.type) {
|
||||||
.string => continue :flag .property,
|
.string => continue :flag .property,
|
||||||
@ -384,14 +334,13 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
.object_end, .array_end => {
|
.object_end, .array_end => {
|
||||||
tokenizer.skipWhitespace();
|
|
||||||
if (query.pop() == null)
|
if (query.pop() == null)
|
||||||
return error.InvalidSyntax; // double close
|
return error.InvalidSyntax; // double close
|
||||||
|
|
||||||
if (query.slice().len == 0)
|
if (query.slice().len == 0)
|
||||||
return root;
|
return root;
|
||||||
|
|
||||||
const next = try tokenizer.nextToken();
|
const next = try tokenizer.nextToken(allocator);
|
||||||
|
|
||||||
token = next;
|
token = next;
|
||||||
switch (next.type) {
|
switch (next.type) {
|
||||||
@ -408,7 +357,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
ptr.* = root;
|
ptr.* = root;
|
||||||
self.index.set(root, .{ .array = ArraySlice{
|
self.index.set(root, .{ .array = ArraySlice{
|
||||||
.len = 0,
|
.len = 0,
|
||||||
.start = 1,
|
.tip = 1,
|
||||||
} });
|
} });
|
||||||
} else {
|
} else {
|
||||||
// order matters
|
// order matters
|
||||||
@ -419,21 +368,21 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
idx_ptr.* = try self.addEmpty(allocator);
|
idx_ptr.* = try self.addEmpty(allocator);
|
||||||
self.index.set(idx_ptr.*, .{ .array = ArraySlice{
|
self.index.set(idx_ptr.*, .{ .array = ArraySlice{
|
||||||
.len = 0,
|
.len = 0,
|
||||||
.start = idx_ptr.* + 1,
|
.tip = idx_ptr.* + 1,
|
||||||
} });
|
} });
|
||||||
|
|
||||||
switch (self.index.get(parent_idx)) {
|
switch (self.index.get(parent_idx)) {
|
||||||
.array => |slice| {
|
.array => |slice| {
|
||||||
self.index.set(parent_idx, .{ .array = ArraySlice{
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
||||||
.len = slice.len + 1,
|
.len = slice.len + 1,
|
||||||
.start = if (slice.len == 0) idx_ptr.* else slice.start,
|
.tip = if (slice.len == 0) idx_ptr.* else slice.tip,
|
||||||
} });
|
} });
|
||||||
},
|
},
|
||||||
else => {},
|
else => {},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const next = try tokenizer.nextToken();
|
const next = try tokenizer.nextToken(allocator);
|
||||||
token = next;
|
token = next;
|
||||||
switch (next.type) {
|
switch (next.type) {
|
||||||
.property => return error.InvalidSyntax,
|
.property => return error.InvalidSyntax,
|
||||||
@ -455,16 +404,13 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
.array => |slice| {
|
.array => |slice| {
|
||||||
self.index.set(parent_idx, .{ .array = ArraySlice{
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
||||||
.len = slice.len + 1,
|
.len = slice.len + 1,
|
||||||
.start = if (slice.len == 0) idx else slice.start,
|
.tip = if (slice.len == 0) idx else slice.tip,
|
||||||
} });
|
} });
|
||||||
},
|
},
|
||||||
else => {},
|
else => {},
|
||||||
}
|
}
|
||||||
|
|
||||||
const next = tokenizer.nextToken() catch |err| switch (err) {
|
const next = try tokenizer.nextToken(allocator);
|
||||||
error.InvalidSyntax => return err,
|
|
||||||
else => return root,
|
|
||||||
};
|
|
||||||
token = next;
|
token = next;
|
||||||
switch (next.type) {
|
switch (next.type) {
|
||||||
.comma => continue :flag .comma,
|
.comma => continue :flag .comma,
|
||||||
@ -473,9 +419,6 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
.string => {
|
.string => {
|
||||||
defer tokenizer.skipWhitespace();
|
|
||||||
errdefer allocator.free(token.value.?.string);
|
|
||||||
|
|
||||||
if (query.len == 0) {
|
if (query.len == 0) {
|
||||||
// root
|
// root
|
||||||
_ = try self.addString(allocator, token.value.?.string);
|
_ = try self.addString(allocator, token.value.?.string);
|
||||||
@ -487,7 +430,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
|
|
||||||
const parent_idx = query.get(query.len - 1);
|
const parent_idx = query.get(query.len - 1);
|
||||||
|
|
||||||
const next = try tokenizer.nextToken();
|
const next = try tokenizer.nextToken(allocator);
|
||||||
switch (next.type) {
|
switch (next.type) {
|
||||||
.colon => {
|
.colon => {
|
||||||
continue :flag .property;
|
continue :flag .property;
|
||||||
@ -499,7 +442,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
.array => |slice| {
|
.array => |slice| {
|
||||||
self.index.set(parent_idx, .{ .array = ArraySlice{
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
||||||
.len = slice.len + 1,
|
.len = slice.len + 1,
|
||||||
.start = if (slice.len == 0) idx else slice.start,
|
.tip = if (slice.len == 0) idx else slice.tip,
|
||||||
} });
|
} });
|
||||||
},
|
},
|
||||||
else => {},
|
else => {},
|
||||||
@ -511,8 +454,6 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
.number => {
|
.number => {
|
||||||
defer tokenizer.skipWhitespace();
|
|
||||||
|
|
||||||
if (query.len == 0) {
|
if (query.len == 0) {
|
||||||
// root
|
// root
|
||||||
_ = try self.addNumber(allocator, token.value.?.number);
|
_ = try self.addNumber(allocator, token.value.?.number);
|
||||||
@ -526,13 +467,13 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
.array => |slice| {
|
.array => |slice| {
|
||||||
self.index.set(parent_idx, .{ .array = ArraySlice{
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
||||||
.len = slice.len + 1,
|
.len = slice.len + 1,
|
||||||
.start = if (slice.len == 0) idx else slice.start,
|
.tip = if (slice.len == 0) idx else slice.tip,
|
||||||
} });
|
} });
|
||||||
},
|
},
|
||||||
else => {},
|
else => {},
|
||||||
}
|
}
|
||||||
|
|
||||||
const next = try tokenizer.nextToken();
|
const next = try tokenizer.nextToken(allocator);
|
||||||
token = next;
|
token = next;
|
||||||
switch (next.type) {
|
switch (next.type) {
|
||||||
.comma => continue :flag .comma,
|
.comma => continue :flag .comma,
|
||||||
@ -542,7 +483,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
},
|
},
|
||||||
.comma => {
|
.comma => {
|
||||||
if (!self.options.flags.allow_trailing_comma) {
|
if (!self.options.flags.allow_trailing_comma) {
|
||||||
const next = try tokenizer.nextToken();
|
const next = try tokenizer.nextToken(allocator);
|
||||||
token = next;
|
token = next;
|
||||||
switch (next.type) {
|
switch (next.type) {
|
||||||
.object_end, .array_end => return error.TrailingComma,
|
.object_end, .array_end => return error.TrailingComma,
|
||||||
@ -552,7 +493,6 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
.null => {
|
.null => {
|
||||||
defer tokenizer.skipWhitespace();
|
|
||||||
const idx = try self.addNull(allocator);
|
const idx = try self.addNull(allocator);
|
||||||
|
|
||||||
if (query.len == 0) {
|
if (query.len == 0) {
|
||||||
@ -565,12 +505,12 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
|||||||
.array => |slice| {
|
.array => |slice| {
|
||||||
self.index.set(parent_idx, .{ .array = ArraySlice{
|
self.index.set(parent_idx, .{ .array = ArraySlice{
|
||||||
.len = slice.len + 1,
|
.len = slice.len + 1,
|
||||||
.start = if (slice.len == 0) idx else slice.start,
|
.tip = if (slice.len == 0) idx else slice.tip,
|
||||||
} });
|
} });
|
||||||
},
|
},
|
||||||
else => {},
|
else => {},
|
||||||
}
|
}
|
||||||
const next = tokenizer.nextToken() catch |err| switch (err) {
|
const next = tokenizer.nextToken(allocator) catch |err| switch (err) {
|
||||||
error.InvalidSyntax => return err,
|
error.InvalidSyntax => return err,
|
||||||
else => return root,
|
else => return root,
|
||||||
};
|
};
|
||||||
@ -603,14 +543,14 @@ test getValue {
|
|||||||
; // 1: a, 2: b, 3: c, 4: d, 5: e, 6: f
|
; // 1: a, 2: b, 3: c, 4: d, 5: e, 6: f
|
||||||
|
|
||||||
var tokenizer: Tokenizer = try .init(allocator, text);
|
var tokenizer: Tokenizer = try .init(allocator, text);
|
||||||
defer tokenizer.deinit();
|
defer tokenizer.deinit(allocator);
|
||||||
|
|
||||||
var self = try allocator.create(Self);
|
var self = try allocator.create(Self);
|
||||||
self.* = Self.init;
|
self.* = Self.init;
|
||||||
defer allocator.destroy(self);
|
defer allocator.destroy(self);
|
||||||
defer self.deinit(allocator);
|
defer self.deinit(allocator);
|
||||||
|
|
||||||
const idx: usize = try self.parse(&tokenizer);
|
const idx: usize = try self.parse(allocator, &tokenizer);
|
||||||
|
|
||||||
var root = try self.getValue(allocator, idx);
|
var root = try self.getValue(allocator, idx);
|
||||||
defer root.deinit(allocator);
|
defer root.deinit(allocator);
|
||||||
|
43
test.zig
43
test.zig
@ -5,9 +5,9 @@ const testing = std.testing;
|
|||||||
const Language = @import("language.zig");
|
const Language = @import("language.zig");
|
||||||
const Tokenizer = @import("tokenizer.zig");
|
const Tokenizer = @import("tokenizer.zig");
|
||||||
|
|
||||||
test Language {
|
const allocator = std.testing.allocator;
|
||||||
const allocator = std.testing.allocator;
|
|
||||||
|
|
||||||
|
test Language {
|
||||||
const text =
|
const text =
|
||||||
\\ {
|
\\ {
|
||||||
\\ "cute": true,
|
\\ "cute": true,
|
||||||
@ -25,14 +25,14 @@ test Language {
|
|||||||
;
|
;
|
||||||
|
|
||||||
var tokenizer: Tokenizer = try .init(allocator, text);
|
var tokenizer: Tokenizer = try .init(allocator, text);
|
||||||
defer tokenizer.deinit();
|
defer tokenizer.deinit(allocator);
|
||||||
|
|
||||||
var self = try allocator.create(Language);
|
var self = try allocator.create(Language);
|
||||||
defer allocator.destroy(self);
|
defer allocator.destroy(self);
|
||||||
self.* = Language.init;
|
self.* = Language.init;
|
||||||
defer self.deinit(allocator);
|
defer self.deinit(allocator);
|
||||||
|
|
||||||
const idx: usize = try self.parse(&tokenizer);
|
const idx: usize = try self.parse(allocator, &tokenizer);
|
||||||
|
|
||||||
var root = try self.getValue(allocator, idx);
|
var root = try self.getValue(allocator, idx);
|
||||||
defer root.deinit(allocator);
|
defer root.deinit(allocator);
|
||||||
@ -50,18 +50,18 @@ test {
|
|||||||
fn expectPass(comptime path: []const u8) !void {
|
fn expectPass(comptime path: []const u8) !void {
|
||||||
const file = @embedFile("tests" ++ path);
|
const file = @embedFile("tests" ++ path);
|
||||||
|
|
||||||
var tokenizer: Tokenizer = try .init(std.testing.allocator, file);
|
var tokenizer: Tokenizer = try .init(allocator, file);
|
||||||
defer tokenizer.deinit();
|
defer tokenizer.deinit(allocator);
|
||||||
|
|
||||||
var self = try std.testing.allocator.create(Language);
|
var self = try allocator.create(Language);
|
||||||
self.* = Language.init;
|
self.* = Language.init;
|
||||||
defer std.testing.allocator.destroy(self);
|
defer allocator.destroy(self);
|
||||||
defer self.deinit(std.testing.allocator);
|
defer self.deinit(allocator);
|
||||||
|
|
||||||
const idx: usize = try self.parse(&tokenizer);
|
const idx: usize = try self.parse(allocator, &tokenizer);
|
||||||
|
|
||||||
var root = try self.getValue(std.testing.allocator, idx);
|
var root = try self.getValue(allocator, idx);
|
||||||
defer root.deinit(std.testing.allocator);
|
defer root.deinit(allocator);
|
||||||
|
|
||||||
std.debug.print("{}\n", .{root});
|
std.debug.print("{}\n", .{root});
|
||||||
}
|
}
|
||||||
@ -69,22 +69,19 @@ fn expectPass(comptime path: []const u8) !void {
|
|||||||
fn expectFail(comptime path: []const u8) !void {
|
fn expectFail(comptime path: []const u8) !void {
|
||||||
const file = @embedFile("tests" ++ path);
|
const file = @embedFile("tests" ++ path);
|
||||||
|
|
||||||
var tokenizer: Tokenizer = try .init(std.testing.allocator, file);
|
var tokenizer: Tokenizer = try .init(allocator, file);
|
||||||
defer tokenizer.deinit();
|
defer tokenizer.deinit(allocator);
|
||||||
|
|
||||||
var self = try std.testing.allocator.create(Language);
|
var self = try allocator.create(Language);
|
||||||
self.* = Language.init;
|
self.* = Language.init;
|
||||||
defer std.testing.allocator.destroy(self);
|
defer allocator.destroy(self);
|
||||||
defer self.deinit(std.testing.allocator);
|
defer self.deinit(allocator);
|
||||||
|
|
||||||
const idx: usize = self.parse(&tokenizer) catch {
|
const idx: usize = self.parse(allocator, &tokenizer) catch
|
||||||
return;
|
return;
|
||||||
};
|
var root = self.getValue(allocator, idx) catch
|
||||||
|
|
||||||
var root = self.getValue(std.testing.allocator, idx) catch {
|
|
||||||
return;
|
return;
|
||||||
};
|
defer root.deinit(allocator);
|
||||||
defer root.deinit(std.testing.allocator);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// zig fmt: off
|
// zig fmt: off
|
||||||
|
@ -34,11 +34,7 @@ pub const TokenType = enum(u8) {
|
|||||||
|
|
||||||
pub const Token = struct {
|
pub const Token = struct {
|
||||||
type: TokenType,
|
type: TokenType,
|
||||||
value: ?union {
|
value: ?union { number: f64, string: []const u8, symbol: u8 },
|
||||||
number: f64,
|
|
||||||
string: []const u8,
|
|
||||||
symbol: u8,
|
|
||||||
},
|
|
||||||
start: usize,
|
start: usize,
|
||||||
end: usize,
|
end: usize,
|
||||||
};
|
};
|
||||||
@ -49,10 +45,9 @@ text: []const u8,
|
|||||||
max_position: usize,
|
max_position: usize,
|
||||||
stack: []usize,
|
stack: []usize,
|
||||||
frame: usize,
|
frame: usize,
|
||||||
allocator: std.mem.Allocator,
|
|
||||||
|
|
||||||
/// Initialize a new tokenizer
|
/// Initialize a new tokenizer
|
||||||
pub fn init(allocator: std.mem.Allocator, text: []const u8) std.mem.Allocator.Error!Self {
|
pub fn init(allocator: std.mem.Allocator, text: []const u8) mem.Allocator.Error!Self {
|
||||||
const stack = try allocator.alloc(usize, 0x100);
|
const stack = try allocator.alloc(usize, 0x100);
|
||||||
errdefer allocator.free(stack);
|
errdefer allocator.free(stack);
|
||||||
@memset(stack, 0);
|
@memset(stack, 0);
|
||||||
@ -61,13 +56,12 @@ pub fn init(allocator: std.mem.Allocator, text: []const u8) std.mem.Allocator.Er
|
|||||||
.max_position = 0,
|
.max_position = 0,
|
||||||
.stack = stack,
|
.stack = stack,
|
||||||
.frame = 0,
|
.frame = 0,
|
||||||
.allocator = allocator,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clean up resources
|
/// Clean up resources
|
||||||
pub fn deinit(self: *Self) void {
|
pub fn deinit(self: *Self, allocator: mem.Allocator) void {
|
||||||
self.allocator.free(self.stack);
|
allocator.free(self.stack);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ========== Core Parsing Functions ==========
|
// ========== Core Parsing Functions ==========
|
||||||
@ -82,13 +76,13 @@ fn advance(self: *Self, delta: usize) void {
|
|||||||
self.max_position = self.stack[self.frame];
|
self.max_position = self.stack[self.frame];
|
||||||
}
|
}
|
||||||
|
|
||||||
fn pushFrame(self: *Self) Error!usize {
|
fn pushFrame(self: *Self, allocator: mem.Allocator) Error!usize {
|
||||||
self.frame += 1;
|
self.frame += 1;
|
||||||
if (self.frame == self.stack.len) {
|
if (self.frame == self.stack.len) {
|
||||||
const new_stack = try self.allocator.alloc(usize, self.stack.len * 2);
|
const new_stack = try allocator.alloc(usize, self.stack.len * 2);
|
||||||
@memset(new_stack, 0);
|
@memset(new_stack, 0);
|
||||||
@memcpy(new_stack, self.stack);
|
@memcpy(new_stack, self.stack);
|
||||||
self.allocator.free(self.stack);
|
allocator.free(self.stack);
|
||||||
self.stack = new_stack;
|
self.stack = new_stack;
|
||||||
}
|
}
|
||||||
if (self.frame > self.text.len)
|
if (self.frame > self.text.len)
|
||||||
@ -209,10 +203,10 @@ pub fn skipWhitespace(self: *Self) void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a number token
|
/// Parse a number token
|
||||||
pub fn nextNumber(self: *Self) Error!Token {
|
pub fn nextNumber(self: *Self, allocator: mem.Allocator) Error!Token {
|
||||||
self.skipWhitespace();
|
self.skipWhitespace();
|
||||||
|
|
||||||
const start = try self.pushFrame();
|
const start = try self.pushFrame(allocator);
|
||||||
errdefer self.popFrame();
|
errdefer self.popFrame();
|
||||||
|
|
||||||
self.matchChar('-') orelse {}; // this may not fail
|
self.matchChar('-') orelse {}; // this may not fail
|
||||||
@ -262,14 +256,14 @@ pub fn nextNumber(self: *Self) Error!Token {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Parse an identifier token
|
/// Parse an identifier token
|
||||||
pub fn nextIdentifier(self: *Self) Error!Token {
|
pub fn nextIdentifier(self: *Self, allocator: mem.Allocator) Error!Token {
|
||||||
self.skipWhitespace();
|
self.skipWhitespace();
|
||||||
|
|
||||||
const start = try self.pushFrame();
|
const start = try self.pushFrame(allocator);
|
||||||
errdefer self.popFrame();
|
errdefer self.popFrame();
|
||||||
|
|
||||||
var buffer = try self.allocator.alloc(u8, 0x100);
|
var buffer = try allocator.alloc(u8, 0x100);
|
||||||
defer self.allocator.free(buffer);
|
defer allocator.free(buffer);
|
||||||
|
|
||||||
self.matchCharPredicate(std.ascii.isAlphabetic) orelse
|
self.matchCharPredicate(std.ascii.isAlphabetic) orelse
|
||||||
return error.InvalidSyntax;
|
return error.InvalidSyntax;
|
||||||
@ -319,10 +313,10 @@ pub fn nextIdentifier(self: *Self) Error!Token {
|
|||||||
|
|
||||||
/// Get the next token from the input
|
/// Get the next token from the input
|
||||||
/// WARNING: this function eats whitespaces
|
/// WARNING: this function eats whitespaces
|
||||||
pub fn nextToken(self: *Self) Error!Token {
|
pub fn nextToken(self: *Self, allocator: mem.Allocator) Error!Token {
|
||||||
self.skipWhitespace();
|
self.skipWhitespace();
|
||||||
|
|
||||||
const start = try self.pushFrame();
|
const start = try self.pushFrame(allocator);
|
||||||
errdefer self.popFrame();
|
errdefer self.popFrame();
|
||||||
|
|
||||||
// Fall back to single character symbol
|
// Fall back to single character symbol
|
||||||
@ -350,19 +344,19 @@ pub fn nextToken(self: *Self) Error!Token {
|
|||||||
':' => .colon,
|
':' => .colon,
|
||||||
'"' => {
|
'"' => {
|
||||||
self.rollback();
|
self.rollback();
|
||||||
const string = try self.nextString();
|
const string = try self.nextString(allocator);
|
||||||
errdefer self.allocator.free(string);
|
errdefer allocator.free(string);
|
||||||
return self.commit(string);
|
return self.commit(string);
|
||||||
},
|
},
|
||||||
else => {
|
else => {
|
||||||
self.rollback();
|
self.rollback();
|
||||||
// Try different token types in order of precedence
|
// Try different token types in order of precedence
|
||||||
if (std.ascii.isDigit(c) or c == '-') {
|
if (std.ascii.isDigit(c) or c == '-') {
|
||||||
return self.commit(self.nextNumber());
|
return self.commit(self.nextNumber(allocator));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (std.ascii.isAlphabetic(c)) {
|
if (std.ascii.isAlphabetic(c)) {
|
||||||
return self.commit(self.nextIdentifier());
|
return self.commit(self.nextIdentifier(allocator));
|
||||||
}
|
}
|
||||||
|
|
||||||
return error.InvalidSyntax;
|
return error.InvalidSyntax;
|
||||||
@ -377,15 +371,15 @@ pub fn nextToken(self: *Self) Error!Token {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn nextString(self: *Self) Error!Token {
|
pub fn nextString(self: *Self, allocator: mem.Allocator) Error!Token {
|
||||||
self.skipWhitespace();
|
self.skipWhitespace();
|
||||||
|
|
||||||
const start = try self.pushFrame();
|
const start = try self.pushFrame(allocator);
|
||||||
errdefer self.popFrame();
|
errdefer self.popFrame();
|
||||||
|
|
||||||
self.matchChar('"') orelse unreachable;
|
self.matchChar('"') orelse unreachable;
|
||||||
|
|
||||||
var buffer: std.ArrayList(u8) = .init(self.allocator);
|
var buffer: std.ArrayList(u8) = .init(allocator);
|
||||||
defer buffer.deinit();
|
defer buffer.deinit();
|
||||||
|
|
||||||
loop: while (!self.endOfInput()) {
|
loop: while (!self.endOfInput()) {
|
||||||
@ -449,6 +443,7 @@ pub fn nextString(self: *Self) Error!Token {
|
|||||||
|
|
||||||
pub const Iterator = struct {
|
pub const Iterator = struct {
|
||||||
tokenizer: *Self,
|
tokenizer: *Self,
|
||||||
|
allocator: mem.Allocator,
|
||||||
|
|
||||||
pub fn next(it: *Iterator) ?Token {
|
pub fn next(it: *Iterator) ?Token {
|
||||||
defer it.tokenizer.skipWhitespace();
|
defer it.tokenizer.skipWhitespace();
|
||||||
@ -457,7 +452,7 @@ pub const Iterator = struct {
|
|||||||
if (it.tokenizer.endOfInput()) {
|
if (it.tokenizer.endOfInput()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return it.tokenizer.nextToken() catch |err| switch (err) {
|
return it.tokenizer.nextToken(it.allocator) catch |err| switch (err) {
|
||||||
error.InvalidSyntax => unreachable,
|
error.InvalidSyntax => unreachable,
|
||||||
else => {
|
else => {
|
||||||
return null;
|
return null;
|
||||||
@ -473,16 +468,13 @@ pub const Iterator = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/// iterator
|
/// iterator
|
||||||
pub fn iterator(self: *Self) Iterator {
|
pub fn iterator(self: *Self, allocator: mem.Allocator) Iterator {
|
||||||
return Iterator{
|
return .{ .tokenizer = self, .allocator = allocator };
|
||||||
.tokenizer = self,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn stringToUtf8(bytes: []u8) ![]u8 {
|
pub fn stringToUtf8(bytes: []u8) ![]u8 {
|
||||||
const code_point = std.fmt.parseInt(u21, bytes, 16) catch {
|
const code_point = std.fmt.parseInt(u21, bytes, 16) catch
|
||||||
return error.BadNumber;
|
return error.BadNumber;
|
||||||
};
|
|
||||||
var buffer: [4]u8 = undefined;
|
var buffer: [4]u8 = undefined;
|
||||||
var index: usize = 0;
|
var index: usize = 0;
|
||||||
|
|
||||||
@ -553,9 +545,7 @@ pub fn skipWhitespaceSimd(text: []const u8) usize {
|
|||||||
|
|
||||||
// Find first non-whitespace
|
// Find first non-whitespace
|
||||||
const mask: std.meta.Int(.unsigned, ChunkSize) = @bitCast(anyws == FalseMask);
|
const mask: std.meta.Int(.unsigned, ChunkSize) = @bitCast(anyws == FalseMask);
|
||||||
if (mask != 0) {
|
if (mask != 0) return j + @ctz(mask);
|
||||||
return j + @ctz(mask);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scalar processing for remaining bytes
|
// Scalar processing for remaining bytes
|
||||||
|
Loading…
x
Reference in New Issue
Block a user