general cleanup

This commit is contained in:
yuzu 2025-05-29 22:59:42 -05:00
parent 3952f49d66
commit f84303e83f
3 changed files with 91 additions and 164 deletions

View File

@ -9,22 +9,9 @@ const assert = std.debug.assert;
const Self = @This(); const Self = @This();
pub const Error = enum { pub const Error = enum { Eof, TrailingComma, MissingKey, MissingValue, UnexpectedToken };
Eof,
TrailingComma,
MissingKey,
MissingValue,
UnexpectedToken,
};
pub const JsonType = enum { pub const JsonType = enum { null, bool, number, string, array, object };
null,
bool,
number,
string,
array,
object,
};
pub const JsonValue = union(JsonType) { pub const JsonValue = union(JsonType) {
null: void, null: void,
@ -36,16 +23,12 @@ pub const JsonValue = union(JsonType) {
}; };
pub const JsonInput = union(JsonType) { pub const JsonInput = union(JsonType) {
// data structures
const Object = std.StringArrayHashMapUnmanaged(JsonInput);
null: void, null: void,
bool: bool, bool: bool,
number: f64, number: f64,
string: []const u8, string: []const u8,
array: []JsonInput, array: []JsonInput,
object: Object, object: std.StringArrayHashMapUnmanaged(JsonInput),
pub fn deinit(self: JsonInput, allocator: mem.Allocator) void { pub fn deinit(self: JsonInput, allocator: mem.Allocator) void {
switch (self) { switch (self) {
@ -99,15 +82,13 @@ pub const JsonInput = union(JsonType) {
}; };
/// same as ObjectEntry but simpler /// same as ObjectEntry but simpler
/// start is the offset ///.tip is the offset
pub const ArraySlice = struct { pub const ArraySlice = struct {
len: usize, len: usize,
start: usize, tip: usize,
}; };
/// just += the properties and value indexes to get the next item /// just += the value indexes to get the next item
/// property_idx and value_idx are the offset
/// it should be ordered
pub const ObjectEntry = struct { pub const ObjectEntry = struct {
len: usize, len: usize,
tip: usize, tip: usize,
@ -120,19 +101,14 @@ pub const PropertyEntry = struct {
pub const Flags = packed struct { pub const Flags = packed struct {
/// Make the tokenizer omit comments, TBD /// Make the tokenizer omit comments, TBD
allow_comments: bool = false, allow_comments: bool = false,
/// Not to error on trailing comma, default is `false` for obvious reasons /// Not to error on trailing comma, default is `false` for obvious reasons
allow_trailing_comma: bool = false, allow_trailing_comma: bool = false,
/// Allows parsing `packed struct` as an `int`, size is the backing int /// Allows parsing `packed struct` as an `int`, size is the backing int
bitfields: bool = false, bitfields: bool = false,
/// Allows parsing `enum` as an `int`, size is the backing int /// Allows parsing `enum` as an `int`, size is the backing int
real_enums: bool = false, real_enums: bool = false,
/// Allows parsing unions, default behaviour is yet to be concluded /// Allows parsing unions, default behaviour is yet to be concluded
unions: bool = false, unions: bool = false,
/// To cast numbers always as f64, as the name says /// To cast numbers always as f64, as the name says
numbersf64: bool = false, numbersf64: bool = false,
}; };
@ -168,7 +144,6 @@ fn addNumber(self: *Self, allocator: mem.Allocator, number: f64) !usize {
fn addProperty(self: *Self, allocator: mem.Allocator, bytes: []const u8) !usize { fn addProperty(self: *Self, allocator: mem.Allocator, bytes: []const u8) !usize {
const stridx = try self.properties.add(allocator, bytes); const stridx = try self.properties.add(allocator, bytes);
try self.index.ensureUnusedCapacity(allocator, 1);
try self.property_map.ensureUnusedCapacity(allocator, 1); try self.property_map.ensureUnusedCapacity(allocator, 1);
return @intFromEnum(stridx); return @intFromEnum(stridx);
} }
@ -203,8 +178,7 @@ fn addNull(self: *Self, allocator: mem.Allocator) !usize {
// Recursively compute how many index slots a node occupies (including nested) // Recursively compute how many index slots a node occupies (including nested)
fn skipSlots(self: *Self, slot: usize) usize { fn skipSlots(self: *Self, slot: usize) usize {
const e = self.index.get(slot); switch (self.index.get(slot)) {
switch (e) {
.object => |obj| { .object => |obj| {
var total: usize = 1; var total: usize = 1;
var v = obj.tip; var v = obj.tip;
@ -217,7 +191,7 @@ fn skipSlots(self: *Self, slot: usize) usize {
}, },
.array => |arr| { .array => |arr| {
var total: usize = 1; var total: usize = 1;
var c = arr.start; var c = arr.tip;
for (0..arr.len) |_| { for (0..arr.len) |_| {
const s = skipSlots(self, c); const s = skipSlots(self, c);
total += s; total += s;
@ -237,9 +211,7 @@ pub fn getValue(
if (self.index.len == 0) if (self.index.len == 0)
return error.InvalidSyntax; return error.InvalidSyntax;
const entry = self.index.get(idx); switch (self.index.get(idx)) {
switch (entry) {
.null => return .{ .null = {} }, .null => return .{ .null = {} },
.bool => |b| return .{ .bool = b }, .bool => |b| return .{ .bool = b },
.number => |number| return .{ .number = number }, .number => |number| return .{ .number = number },
@ -250,7 +222,7 @@ pub fn getValue(
.array => |arr| { .array => |arr| {
var out = try allocator.alloc(JsonInput, arr.len); var out = try allocator.alloc(JsonInput, arr.len);
errdefer allocator.free(out); errdefer allocator.free(out);
var c = arr.start; var c = arr.tip;
for (0..arr.len) |i| { for (0..arr.len) |i| {
const v = try self.getValue(allocator, c); const v = try self.getValue(allocator, c);
out[i] = v; out[i] = v;
@ -259,58 +231,42 @@ pub fn getValue(
return .{ .array = out[0..arr.len] }; return .{ .array = out[0..arr.len] };
}, },
.object => |obj| { .object => |obj| {
var map: JsonInput.Object = .empty; var map: std.StringArrayHashMapUnmanaged(JsonInput) = .empty;
errdefer map.deinit(allocator); errdefer map.deinit(allocator);
var tip = obj.tip; var tip = obj.tip;
for (0..obj.len) |_| if (self.property_map.get(tip)) |pen| {
for (0..obj.len) |_| try map.put(
if (self.property_map.get(tip)) |pen| { allocator,
const key_slice = pen.tip.slice(&self.properties); pen.tip.slice(&self.properties),
try self.getValue(allocator, tip),
const val = try self.getValue(allocator, tip); );
try map.put(allocator, key_slice, val); tip += self.skipSlots(tip);
const s = self.skipSlots(tip); } else return error.MissingKey;
tip += s;
} else {
// for (self.property_map.keys(), self.property_map.values()) |k, v| {
// std.debug.print("{}: {s}\n", .{ v.tip, @tagName(self.index.get(k)) });
// std.debug.print("tip: {d}\n", .{k});
// }
return error.MissingKey;
};
return .{ .object = map }; return .{ .object = map };
}, },
} }
} }
/// always returns 0 (root) /// always returns 0 (root)
pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize { pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usize {
tokenizer.skipWhitespace(); tokenizer.skipWhitespace();
if (tokenizer.endOfInput()) if (tokenizer.endOfInput())
return error.Eof; return error.Eof;
const allocator = tokenizer.allocator;
const root = try self.addEmpty(allocator); const root = try self.addEmpty(allocator);
var token = try tokenizer.nextToken(); var token = try tokenizer.nextToken(allocator);
var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0); var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0);
flag: switch (token.type) { flag: switch (token.type) {
.eof => { .eof => {
if (root != 0) { if (root != 0) return error.InvalidSyntax;
return error.InvalidSyntax; if (query.slice().len != 0) return error.InvalidSyntax;
}
if (query.slice().len != 0) {
return error.InvalidSyntax;
}
return root; return root;
}, },
.property => { .property => {
defer tokenizer.skipWhitespace();
const scope_idx = query.get(query.len - 1); const scope_idx = query.get(query.len - 1);
switch (self.index.get(scope_idx)) { switch (self.index.get(scope_idx)) {
.object => |scope| { .object => |scope| {
@ -325,26 +281,20 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
.tip = scope.tip, .tip = scope.tip,
} }); } });
}, },
.array => {
return error.InvalidSyntax;
},
else => return error.InvalidSyntax, else => return error.InvalidSyntax,
} }
const next = try tokenizer.nextToken(); const next = try tokenizer.nextToken(allocator);
token = next; token = next;
switch (next.type) { switch (next.type) {
.colon => { .colon => {
token = try tokenizer.nextToken(); token = try tokenizer.nextToken(allocator);
continue :flag token.type; continue :flag token.type;
}, },
else => continue :flag next.type, else => continue :flag next.type,
// else => return error.InvalidSyntax,
} }
}, },
.object_begin => { .object_begin => {
defer tokenizer.skipWhitespace();
if (query.slice().len < 1) { if (query.slice().len < 1) {
const ptr = try query.addOne(); const ptr = try query.addOne();
ptr.* = root; ptr.* = root;
@ -368,14 +318,14 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
.array => |slice| { .array => |slice| {
self.index.set(parent_idx, .{ .array = ArraySlice{ self.index.set(parent_idx, .{ .array = ArraySlice{
.len = slice.len + 1, .len = slice.len + 1,
.start = if (slice.len == 0) idx_ptr.* else slice.start, .tip = if (slice.len == 0) idx_ptr.* else slice.tip,
} }); } });
}, },
else => {}, else => {},
} }
} }
const next = try tokenizer.nextToken(); const next = try tokenizer.nextToken(allocator);
token = next; token = next;
switch (next.type) { switch (next.type) {
.string => continue :flag .property, .string => continue :flag .property,
@ -384,14 +334,13 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
} }
}, },
.object_end, .array_end => { .object_end, .array_end => {
tokenizer.skipWhitespace();
if (query.pop() == null) if (query.pop() == null)
return error.InvalidSyntax; // double close return error.InvalidSyntax; // double close
if (query.slice().len == 0) if (query.slice().len == 0)
return root; return root;
const next = try tokenizer.nextToken(); const next = try tokenizer.nextToken(allocator);
token = next; token = next;
switch (next.type) { switch (next.type) {
@ -408,7 +357,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
ptr.* = root; ptr.* = root;
self.index.set(root, .{ .array = ArraySlice{ self.index.set(root, .{ .array = ArraySlice{
.len = 0, .len = 0,
.start = 1, .tip = 1,
} }); } });
} else { } else {
// order matters // order matters
@ -419,21 +368,21 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
idx_ptr.* = try self.addEmpty(allocator); idx_ptr.* = try self.addEmpty(allocator);
self.index.set(idx_ptr.*, .{ .array = ArraySlice{ self.index.set(idx_ptr.*, .{ .array = ArraySlice{
.len = 0, .len = 0,
.start = idx_ptr.* + 1, .tip = idx_ptr.* + 1,
} }); } });
switch (self.index.get(parent_idx)) { switch (self.index.get(parent_idx)) {
.array => |slice| { .array => |slice| {
self.index.set(parent_idx, .{ .array = ArraySlice{ self.index.set(parent_idx, .{ .array = ArraySlice{
.len = slice.len + 1, .len = slice.len + 1,
.start = if (slice.len == 0) idx_ptr.* else slice.start, .tip = if (slice.len == 0) idx_ptr.* else slice.tip,
} }); } });
}, },
else => {}, else => {},
} }
} }
const next = try tokenizer.nextToken(); const next = try tokenizer.nextToken(allocator);
token = next; token = next;
switch (next.type) { switch (next.type) {
.property => return error.InvalidSyntax, .property => return error.InvalidSyntax,
@ -455,16 +404,13 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
.array => |slice| { .array => |slice| {
self.index.set(parent_idx, .{ .array = ArraySlice{ self.index.set(parent_idx, .{ .array = ArraySlice{
.len = slice.len + 1, .len = slice.len + 1,
.start = if (slice.len == 0) idx else slice.start, .tip = if (slice.len == 0) idx else slice.tip,
} }); } });
}, },
else => {}, else => {},
} }
const next = tokenizer.nextToken() catch |err| switch (err) { const next = try tokenizer.nextToken(allocator);
error.InvalidSyntax => return err,
else => return root,
};
token = next; token = next;
switch (next.type) { switch (next.type) {
.comma => continue :flag .comma, .comma => continue :flag .comma,
@ -473,9 +419,6 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
} }
}, },
.string => { .string => {
defer tokenizer.skipWhitespace();
errdefer allocator.free(token.value.?.string);
if (query.len == 0) { if (query.len == 0) {
// root // root
_ = try self.addString(allocator, token.value.?.string); _ = try self.addString(allocator, token.value.?.string);
@ -487,7 +430,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
const parent_idx = query.get(query.len - 1); const parent_idx = query.get(query.len - 1);
const next = try tokenizer.nextToken(); const next = try tokenizer.nextToken(allocator);
switch (next.type) { switch (next.type) {
.colon => { .colon => {
continue :flag .property; continue :flag .property;
@ -499,7 +442,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
.array => |slice| { .array => |slice| {
self.index.set(parent_idx, .{ .array = ArraySlice{ self.index.set(parent_idx, .{ .array = ArraySlice{
.len = slice.len + 1, .len = slice.len + 1,
.start = if (slice.len == 0) idx else slice.start, .tip = if (slice.len == 0) idx else slice.tip,
} }); } });
}, },
else => {}, else => {},
@ -511,8 +454,6 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
} }
}, },
.number => { .number => {
defer tokenizer.skipWhitespace();
if (query.len == 0) { if (query.len == 0) {
// root // root
_ = try self.addNumber(allocator, token.value.?.number); _ = try self.addNumber(allocator, token.value.?.number);
@ -526,13 +467,13 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
.array => |slice| { .array => |slice| {
self.index.set(parent_idx, .{ .array = ArraySlice{ self.index.set(parent_idx, .{ .array = ArraySlice{
.len = slice.len + 1, .len = slice.len + 1,
.start = if (slice.len == 0) idx else slice.start, .tip = if (slice.len == 0) idx else slice.tip,
} }); } });
}, },
else => {}, else => {},
} }
const next = try tokenizer.nextToken(); const next = try tokenizer.nextToken(allocator);
token = next; token = next;
switch (next.type) { switch (next.type) {
.comma => continue :flag .comma, .comma => continue :flag .comma,
@ -542,7 +483,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
}, },
.comma => { .comma => {
if (!self.options.flags.allow_trailing_comma) { if (!self.options.flags.allow_trailing_comma) {
const next = try tokenizer.nextToken(); const next = try tokenizer.nextToken(allocator);
token = next; token = next;
switch (next.type) { switch (next.type) {
.object_end, .array_end => return error.TrailingComma, .object_end, .array_end => return error.TrailingComma,
@ -552,7 +493,6 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
} }
}, },
.null => { .null => {
defer tokenizer.skipWhitespace();
const idx = try self.addNull(allocator); const idx = try self.addNull(allocator);
if (query.len == 0) { if (query.len == 0) {
@ -565,12 +505,12 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
.array => |slice| { .array => |slice| {
self.index.set(parent_idx, .{ .array = ArraySlice{ self.index.set(parent_idx, .{ .array = ArraySlice{
.len = slice.len + 1, .len = slice.len + 1,
.start = if (slice.len == 0) idx else slice.start, .tip = if (slice.len == 0) idx else slice.tip,
} }); } });
}, },
else => {}, else => {},
} }
const next = tokenizer.nextToken() catch |err| switch (err) { const next = tokenizer.nextToken(allocator) catch |err| switch (err) {
error.InvalidSyntax => return err, error.InvalidSyntax => return err,
else => return root, else => return root,
}; };
@ -603,14 +543,14 @@ test getValue {
; // 1: a, 2: b, 3: c, 4: d, 5: e, 6: f ; // 1: a, 2: b, 3: c, 4: d, 5: e, 6: f
var tokenizer: Tokenizer = try .init(allocator, text); var tokenizer: Tokenizer = try .init(allocator, text);
defer tokenizer.deinit(); defer tokenizer.deinit(allocator);
var self = try allocator.create(Self); var self = try allocator.create(Self);
self.* = Self.init; self.* = Self.init;
defer allocator.destroy(self); defer allocator.destroy(self);
defer self.deinit(allocator); defer self.deinit(allocator);
const idx: usize = try self.parse(&tokenizer); const idx: usize = try self.parse(allocator, &tokenizer);
var root = try self.getValue(allocator, idx); var root = try self.getValue(allocator, idx);
defer root.deinit(allocator); defer root.deinit(allocator);

View File

@ -5,9 +5,9 @@ const testing = std.testing;
const Language = @import("language.zig"); const Language = @import("language.zig");
const Tokenizer = @import("tokenizer.zig"); const Tokenizer = @import("tokenizer.zig");
test Language { const allocator = std.testing.allocator;
const allocator = std.testing.allocator;
test Language {
const text = const text =
\\ { \\ {
\\ "cute": true, \\ "cute": true,
@ -25,14 +25,14 @@ test Language {
; ;
var tokenizer: Tokenizer = try .init(allocator, text); var tokenizer: Tokenizer = try .init(allocator, text);
defer tokenizer.deinit(); defer tokenizer.deinit(allocator);
var self = try allocator.create(Language); var self = try allocator.create(Language);
defer allocator.destroy(self); defer allocator.destroy(self);
self.* = Language.init; self.* = Language.init;
defer self.deinit(allocator); defer self.deinit(allocator);
const idx: usize = try self.parse(&tokenizer); const idx: usize = try self.parse(allocator, &tokenizer);
var root = try self.getValue(allocator, idx); var root = try self.getValue(allocator, idx);
defer root.deinit(allocator); defer root.deinit(allocator);
@ -50,18 +50,18 @@ test {
fn expectPass(comptime path: []const u8) !void { fn expectPass(comptime path: []const u8) !void {
const file = @embedFile("tests" ++ path); const file = @embedFile("tests" ++ path);
var tokenizer: Tokenizer = try .init(std.testing.allocator, file); var tokenizer: Tokenizer = try .init(allocator, file);
defer tokenizer.deinit(); defer tokenizer.deinit(allocator);
var self = try std.testing.allocator.create(Language); var self = try allocator.create(Language);
self.* = Language.init; self.* = Language.init;
defer std.testing.allocator.destroy(self); defer allocator.destroy(self);
defer self.deinit(std.testing.allocator); defer self.deinit(allocator);
const idx: usize = try self.parse(&tokenizer); const idx: usize = try self.parse(allocator, &tokenizer);
var root = try self.getValue(std.testing.allocator, idx); var root = try self.getValue(allocator, idx);
defer root.deinit(std.testing.allocator); defer root.deinit(allocator);
std.debug.print("{}\n", .{root}); std.debug.print("{}\n", .{root});
} }
@ -69,22 +69,19 @@ fn expectPass(comptime path: []const u8) !void {
fn expectFail(comptime path: []const u8) !void { fn expectFail(comptime path: []const u8) !void {
const file = @embedFile("tests" ++ path); const file = @embedFile("tests" ++ path);
var tokenizer: Tokenizer = try .init(std.testing.allocator, file); var tokenizer: Tokenizer = try .init(allocator, file);
defer tokenizer.deinit(); defer tokenizer.deinit(allocator);
var self = try std.testing.allocator.create(Language); var self = try allocator.create(Language);
self.* = Language.init; self.* = Language.init;
defer std.testing.allocator.destroy(self); defer allocator.destroy(self);
defer self.deinit(std.testing.allocator); defer self.deinit(allocator);
const idx: usize = self.parse(&tokenizer) catch { const idx: usize = self.parse(allocator, &tokenizer) catch
return; return;
}; var root = self.getValue(allocator, idx) catch
var root = self.getValue(std.testing.allocator, idx) catch {
return; return;
}; defer root.deinit(allocator);
defer root.deinit(std.testing.allocator);
} }
// zig fmt: off // zig fmt: off

View File

@ -34,11 +34,7 @@ pub const TokenType = enum(u8) {
pub const Token = struct { pub const Token = struct {
type: TokenType, type: TokenType,
value: ?union { value: ?union { number: f64, string: []const u8, symbol: u8 },
number: f64,
string: []const u8,
symbol: u8,
},
start: usize, start: usize,
end: usize, end: usize,
}; };
@ -49,10 +45,9 @@ text: []const u8,
max_position: usize, max_position: usize,
stack: []usize, stack: []usize,
frame: usize, frame: usize,
allocator: std.mem.Allocator,
/// Initialize a new tokenizer /// Initialize a new tokenizer
pub fn init(allocator: std.mem.Allocator, text: []const u8) std.mem.Allocator.Error!Self { pub fn init(allocator: std.mem.Allocator, text: []const u8) mem.Allocator.Error!Self {
const stack = try allocator.alloc(usize, 0x100); const stack = try allocator.alloc(usize, 0x100);
errdefer allocator.free(stack); errdefer allocator.free(stack);
@memset(stack, 0); @memset(stack, 0);
@ -61,13 +56,12 @@ pub fn init(allocator: std.mem.Allocator, text: []const u8) std.mem.Allocator.Er
.max_position = 0, .max_position = 0,
.stack = stack, .stack = stack,
.frame = 0, .frame = 0,
.allocator = allocator,
}; };
} }
/// Clean up resources /// Clean up resources
pub fn deinit(self: *Self) void { pub fn deinit(self: *Self, allocator: mem.Allocator) void {
self.allocator.free(self.stack); allocator.free(self.stack);
} }
// ========== Core Parsing Functions ========== // ========== Core Parsing Functions ==========
@ -82,13 +76,13 @@ fn advance(self: *Self, delta: usize) void {
self.max_position = self.stack[self.frame]; self.max_position = self.stack[self.frame];
} }
fn pushFrame(self: *Self) Error!usize { fn pushFrame(self: *Self, allocator: mem.Allocator) Error!usize {
self.frame += 1; self.frame += 1;
if (self.frame == self.stack.len) { if (self.frame == self.stack.len) {
const new_stack = try self.allocator.alloc(usize, self.stack.len * 2); const new_stack = try allocator.alloc(usize, self.stack.len * 2);
@memset(new_stack, 0); @memset(new_stack, 0);
@memcpy(new_stack, self.stack); @memcpy(new_stack, self.stack);
self.allocator.free(self.stack); allocator.free(self.stack);
self.stack = new_stack; self.stack = new_stack;
} }
if (self.frame > self.text.len) if (self.frame > self.text.len)
@ -209,10 +203,10 @@ pub fn skipWhitespace(self: *Self) void {
} }
/// Parse a number token /// Parse a number token
pub fn nextNumber(self: *Self) Error!Token { pub fn nextNumber(self: *Self, allocator: mem.Allocator) Error!Token {
self.skipWhitespace(); self.skipWhitespace();
const start = try self.pushFrame(); const start = try self.pushFrame(allocator);
errdefer self.popFrame(); errdefer self.popFrame();
self.matchChar('-') orelse {}; // this may not fail self.matchChar('-') orelse {}; // this may not fail
@ -262,14 +256,14 @@ pub fn nextNumber(self: *Self) Error!Token {
} }
/// Parse an identifier token /// Parse an identifier token
pub fn nextIdentifier(self: *Self) Error!Token { pub fn nextIdentifier(self: *Self, allocator: mem.Allocator) Error!Token {
self.skipWhitespace(); self.skipWhitespace();
const start = try self.pushFrame(); const start = try self.pushFrame(allocator);
errdefer self.popFrame(); errdefer self.popFrame();
var buffer = try self.allocator.alloc(u8, 0x100); var buffer = try allocator.alloc(u8, 0x100);
defer self.allocator.free(buffer); defer allocator.free(buffer);
self.matchCharPredicate(std.ascii.isAlphabetic) orelse self.matchCharPredicate(std.ascii.isAlphabetic) orelse
return error.InvalidSyntax; return error.InvalidSyntax;
@ -319,10 +313,10 @@ pub fn nextIdentifier(self: *Self) Error!Token {
/// Get the next token from the input /// Get the next token from the input
/// WARNING: this function eats whitespaces /// WARNING: this function eats whitespaces
pub fn nextToken(self: *Self) Error!Token { pub fn nextToken(self: *Self, allocator: mem.Allocator) Error!Token {
self.skipWhitespace(); self.skipWhitespace();
const start = try self.pushFrame(); const start = try self.pushFrame(allocator);
errdefer self.popFrame(); errdefer self.popFrame();
// Fall back to single character symbol // Fall back to single character symbol
@ -350,19 +344,19 @@ pub fn nextToken(self: *Self) Error!Token {
':' => .colon, ':' => .colon,
'"' => { '"' => {
self.rollback(); self.rollback();
const string = try self.nextString(); const string = try self.nextString(allocator);
errdefer self.allocator.free(string); errdefer allocator.free(string);
return self.commit(string); return self.commit(string);
}, },
else => { else => {
self.rollback(); self.rollback();
// Try different token types in order of precedence // Try different token types in order of precedence
if (std.ascii.isDigit(c) or c == '-') { if (std.ascii.isDigit(c) or c == '-') {
return self.commit(self.nextNumber()); return self.commit(self.nextNumber(allocator));
} }
if (std.ascii.isAlphabetic(c)) { if (std.ascii.isAlphabetic(c)) {
return self.commit(self.nextIdentifier()); return self.commit(self.nextIdentifier(allocator));
} }
return error.InvalidSyntax; return error.InvalidSyntax;
@ -377,15 +371,15 @@ pub fn nextToken(self: *Self) Error!Token {
}); });
} }
pub fn nextString(self: *Self) Error!Token { pub fn nextString(self: *Self, allocator: mem.Allocator) Error!Token {
self.skipWhitespace(); self.skipWhitespace();
const start = try self.pushFrame(); const start = try self.pushFrame(allocator);
errdefer self.popFrame(); errdefer self.popFrame();
self.matchChar('"') orelse unreachable; self.matchChar('"') orelse unreachable;
var buffer: std.ArrayList(u8) = .init(self.allocator); var buffer: std.ArrayList(u8) = .init(allocator);
defer buffer.deinit(); defer buffer.deinit();
loop: while (!self.endOfInput()) { loop: while (!self.endOfInput()) {
@ -449,6 +443,7 @@ pub fn nextString(self: *Self) Error!Token {
pub const Iterator = struct { pub const Iterator = struct {
tokenizer: *Self, tokenizer: *Self,
allocator: mem.Allocator,
pub fn next(it: *Iterator) ?Token { pub fn next(it: *Iterator) ?Token {
defer it.tokenizer.skipWhitespace(); defer it.tokenizer.skipWhitespace();
@ -457,7 +452,7 @@ pub const Iterator = struct {
if (it.tokenizer.endOfInput()) { if (it.tokenizer.endOfInput()) {
return null; return null;
} }
return it.tokenizer.nextToken() catch |err| switch (err) { return it.tokenizer.nextToken(it.allocator) catch |err| switch (err) {
error.InvalidSyntax => unreachable, error.InvalidSyntax => unreachable,
else => { else => {
return null; return null;
@ -473,16 +468,13 @@ pub const Iterator = struct {
}; };
/// iterator /// iterator
pub fn iterator(self: *Self) Iterator { pub fn iterator(self: *Self, allocator: mem.Allocator) Iterator {
return Iterator{ return .{ .tokenizer = self, .allocator = allocator };
.tokenizer = self,
};
} }
pub fn stringToUtf8(bytes: []u8) ![]u8 { pub fn stringToUtf8(bytes: []u8) ![]u8 {
const code_point = std.fmt.parseInt(u21, bytes, 16) catch { const code_point = std.fmt.parseInt(u21, bytes, 16) catch
return error.BadNumber; return error.BadNumber;
};
var buffer: [4]u8 = undefined; var buffer: [4]u8 = undefined;
var index: usize = 0; var index: usize = 0;
@ -553,9 +545,7 @@ pub fn skipWhitespaceSimd(text: []const u8) usize {
// Find first non-whitespace // Find first non-whitespace
const mask: std.meta.Int(.unsigned, ChunkSize) = @bitCast(anyws == FalseMask); const mask: std.meta.Int(.unsigned, ChunkSize) = @bitCast(anyws == FalseMask);
if (mask != 0) { if (mask != 0) return j + @ctz(mask);
return j + @ctz(mask);
}
} }
// Scalar processing for remaining bytes // Scalar processing for remaining bytes