From aa890ea209ddc49856eb76fc03e793e9768a54e0 Mon Sep 17 00:00:00 2001 From: yuzu Date: Sun, 25 May 2025 18:48:35 -0500 Subject: [PATCH] idk --- language.zig | 53 ++++++++++++++++++++++++++++++++++++++------------- strings.zig | 35 ++++++++++++++++++++++++++++++---- tokenizer.zig | 21 +++----------------- 3 files changed, 74 insertions(+), 35 deletions(-) diff --git a/language.zig b/language.zig index d07db40..89ba736 100644 --- a/language.zig +++ b/language.zig @@ -7,13 +7,13 @@ const StringPool = @import("strings.zig"); const StringIndex = StringPool.StringIndex; const assert = std.debug.assert; -// data structures -const Object = std.StringArrayHashMapUnmanaged(JsonInput); - const Self = @This(); pub const Error = enum { TrailingComma, + MissingKey, + MissingValue, + UnexpectedToken, }; pub const JsonType = enum { @@ -35,12 +35,16 @@ pub const JsonValue = union(JsonType) { }; pub const JsonInput = union(JsonType) { + + // data structures + const Object = std.StringArrayHashMapUnmanaged(JsonInput); + null: void, bool: bool, number: f64, string: []const u8, array: []JsonInput, - object: std.StringArrayHashMapUnmanaged(JsonInput), + object: Object, pub fn deinit(self: JsonInput, allocator: mem.Allocator) void { switch (self) { @@ -218,12 +222,31 @@ fn getObject(self: *Self, allocator: mem.Allocator, index: usize) !struct { const keys = try allocator.alloc(StringIndex, entry.object.len); const values = try allocator.alloc(usize, entry.object.len); - for (0..entry.object.len) |i| { - const slice = StringIndex.slice(@enumFromInt(pidx), &self.property_index); - keys[i] = @enumFromInt(pidx); - values[i] = vidx; - pidx += slice.len + 1; - vidx += 1; + var i: usize = 0; + + flag: switch (self.index.get(vidx)) { + .array => { + vidx += 1; + continue :flag self.index.get(vidx); + }, + .object => |obj| { + var iter = StringIndex.iterator( + @enumFromInt(obj.property_idx), + self.property_index.string_bytes.items, + ); + const slice = iter.next(); + keys[i] = @enumFromInt(obj.property_idx); + values[i] = vidx; + i += 1; + pidx += slice.len + 1; + vidx += 1; + continue :flag self.index.get(vidx); + }, + else => { + // pidx += slice.len + 1; + vidx += 1; + continue :flag self.index.get(vidx); + }, } return .{ keys, values }; @@ -341,7 +364,7 @@ fn getValue( return .{ .array = out[0..arr.len] }; }, .object => |obj| { - var map: std.StringArrayHashMapUnmanaged(JsonInput) = .empty; + var map: JsonInput.Object = .empty; var p = obj.property_idx; var v = obj.value_idx; for (0..obj.len) |_| { @@ -369,8 +392,12 @@ test getValue { const json = \\ { - \\ "array":[{"cute":true}, - \\ {"funny":false}] + \\ "cute": true, + \\ "metadata": { + \\ "post": [1,2,3], + \\ "a": 2 + \\ }, + \\ "b": 3 \\ } ; diff --git a/strings.zig b/strings.zig index 8fb49ae..5642229 100644 --- a/strings.zig +++ b/strings.zig @@ -1,6 +1,5 @@ /// credits to Andrew Kelley /// strings.zig - const std = @import("std"); const mem = std.mem; const assert = std.debug.assert; @@ -26,9 +25,9 @@ pub fn deinit(self: *Self, allocator: Allocator) void { pub const StringIndex = enum(u32) { _, - const Table = std.HashMapUnmanaged(StringIndex, void, TableContext, max_load_percent); + pub const Table = std.HashMapUnmanaged(StringIndex, void, TableContext, max_load_percent); - const TableContext = struct { + pub const TableContext = struct { bytes: []const u8, pub fn eql(_: @This(), a: StringIndex, b: StringIndex) bool { @@ -40,7 +39,7 @@ pub const StringIndex = enum(u32) { } }; - const TableIndexAdapter = struct { + pub const TableIndexAdapter = struct { bytes: []const u8, pub fn eql(ctx: @This(), a: []const u8, b: StringIndex) bool { @@ -57,6 +56,34 @@ pub const StringIndex = enum(u32) { const start_slice = state.string_bytes.items[@intFromEnum(index)..]; return start_slice[0..mem.indexOfScalar(u8, start_slice, 0).? :0]; } + + pub fn iterator(start: StringIndex, bytes: []const u8) Iterator { + return .{ + .bytes = bytes, + .pos = @intFromEnum(start), + }; + } + + pub const Iterator = struct { + bytes: []const u8, + pos: usize = 0, + + pub fn next(self: *Iterator) ?[:0]const u8 { + if (self.pos >= self.bytes.len) return null; + + // Find the next null terminator starting from current position + const end_pos = mem.indexOfScalarPos(u8, self.bytes, self.pos, 0) orelse { + // No null found: return remaining bytes (invalid, but handle gracefully) + const s = self.bytes[self.pos..]; + self.pos = self.bytes.len; + return s; + }; + + const s = self.bytes[self.pos..end_pos :0]; + self.pos = end_pos + 1; // Skip the null terminator + return s; + } + }; }; pub fn add(state: *Self, allocator: Allocator, bytes: []const u8) !StringIndex { diff --git a/tokenizer.zig b/tokenizer.zig index 22df86b..23f60ec 100644 --- a/tokenizer.zig +++ b/tokenizer.zig @@ -8,22 +8,10 @@ pub const Error = error{ OutOfMemory, /// eg: bad escaping UnexpectedCharacter, - /// eg: got the wrong token type, check TokenType - UnexpectedToken, /// eg: std.fmt.parseFloat failed BadNumber, /// fba error BufferTooSmall, - /// eg: missing comma - CommaExpected, - /// eg: missing colon - ColonExpected, - /// eg: missing object key - KeyExpected, - /// eg: error while writing - PrintError, - /// eg: trailing comma in object - TrailingComma, }; pub const TokenType = enum(u8) { @@ -269,9 +257,8 @@ pub fn nextIdentifier(self: *Self) Error!Token { var buffer = try self.allocator.alloc(u8, 0x100); defer self.allocator.free(buffer); - self.matchCharPredicate(std.ascii.isAlphabetic) orelse { - return error.UnexpectedToken; - }; + self.matchCharPredicate(std.ascii.isAlphabetic) orelse + return error.InvalidSyntax; buffer[0] = self.lastChar(); @@ -380,9 +367,7 @@ pub fn nextString(self: *Self) Error!Token { self.skipWhitespace(); - self.matchChar('"') orelse { - return error.UnexpectedToken; - }; + self.matchChar('"') orelse unreachable; var buffer: std.ArrayList(u8) = .init(self.allocator);