diff --git a/language.zig b/language.zig index 3826348..a3e3c0d 100644 --- a/language.zig +++ b/language.zig @@ -263,6 +263,7 @@ pub fn getValue( /// always returns 0 (root) pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usize { + const allow_comments = self.options.flags.allow_comments; tokenizer.skipWhitespace(); if (tokenizer.endOfInput()) @@ -270,7 +271,7 @@ pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usiz const root = try self.addEmpty(allocator); - var token = try tokenizer.nextToken(allocator); + var token = try tokenizer.nextToken(allocator, allow_comments); var query: std.BoundedArray(usize, self.options.max_depth) = try .init(0); @@ -298,11 +299,11 @@ pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usiz else => return error.InvalidSyntax, } - const next = try tokenizer.nextToken(allocator); + const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .colon => { - token = try tokenizer.nextToken(allocator); + token = try tokenizer.nextToken(allocator, allow_comments); continue :flag token.type; }, else => continue :flag next.type, @@ -339,7 +340,7 @@ pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usiz } } - const next = try tokenizer.nextToken(allocator); + const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .string => continue :flag .property, @@ -354,7 +355,7 @@ pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usiz if (query.slice().len == 0) return root; - const next = try tokenizer.nextToken(allocator); + const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { @@ -396,7 +397,7 @@ pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usiz } } - const next = try tokenizer.nextToken(allocator); + const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .property => return error.InvalidSyntax, @@ -423,7 +424,7 @@ pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usiz else => {}, } - const next = try tokenizer.nextToken(allocator); + const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .comma => continue :flag .comma, @@ -443,7 +444,7 @@ pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usiz const parent_idx = query.get(query.len - 1); - const next = try tokenizer.nextToken(allocator); + const next = try tokenizer.nextToken(allocator, allow_comments); switch (next.type) { .colon => { continue :flag .property; @@ -498,7 +499,7 @@ pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usiz else => {}, } - const next = try tokenizer.nextToken(allocator); + const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .comma => continue :flag .comma, @@ -508,7 +509,7 @@ pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usiz }, .comma => { if (!self.options.flags.allow_trailing_comma) { - const next = try tokenizer.nextToken(allocator); + const next = try tokenizer.nextToken(allocator, allow_comments); token = next; switch (next.type) { .object_end, .array_end => return error.TrailingComma, @@ -535,7 +536,7 @@ pub fn parse(self: *Self, allocator: mem.Allocator, tokenizer: *Tokenizer) !usiz }, else => {}, } - const next = tokenizer.nextToken(allocator) catch |err| switch (err) { + const next = tokenizer.nextToken(allocator, allow_comments) catch |err| switch (err) { error.InvalidSyntax => return err, else => return root, }; @@ -556,7 +557,7 @@ test getValue { const allocator = std.testing.allocator; const text = - \\{ + \\{ // epic comment \\ "a":"A", \\ "b":"B", \\ "c": { @@ -575,6 +576,8 @@ test getValue { defer allocator.destroy(self); defer self.deinit(allocator); + self.options.flags.allow_comments = true; + const idx: usize = try self.parse(allocator, &tokenizer); var root = try self.getValue(allocator, idx); diff --git a/reflection.zig b/reflection.zig index 94841d4..f2fb5af 100644 --- a/reflection.zig +++ b/reflection.zig @@ -149,7 +149,21 @@ pub fn reflectT(self: *Self, comptime T: type, allocator: mem.Allocator, idx: us return r; }, .pointer => |ptrInfo| switch (ptrInfo.size) { - .slice => {}, + .slice => { + var r: T = try allocator.alloc(ptrInfo.child, slice.len); + + if (ptrInfo.sentinel_ptr) |some| { + const sentinel = @as(*align(1) const ptrInfo.child, @ptrCast(some)).*; + r[slice.len - 1] = sentinel; + } + + for (0..slice.len) |i| { + // weird hack to populate the string + const rptr = @constCast(r); + rptr[i] = try self.reflectT(ptrInfo.child, allocator, slice.tip + i); + } + return r; + }, else => return error.TypeError, }, else => return error.TypeError, @@ -206,7 +220,8 @@ test reflectT { \\ "admin": true, \\ "flags": 0, \\ "union": ":D", - \\ "enum": "world" + \\ "enum": "world", + \\ "many": [1,2,3] \\} ; var self = try allocator.create(Self); @@ -229,10 +244,230 @@ test reflectT { flags: UserFlags, @"union": union { hi: bool, bye: f64, n128: []const u8 }, @"enum": enum { hello, world }, + many: []const u8, }; const root = try self.reflectT(UserSchema, allocator, idx); std.debug.print("hello? {s}\n", .{@tagName(root.@"enum")}); std.debug.print("friend? {s}\n", .{root.@"union".n128}); + std.debug.print("many: {any}\n", .{root.many}); + allocator.free(root.many); +} + +pub const Options = struct { + max_buffer_size: usize = 4096, + ruleset: Ruleset = .{}, + indent: u8 = 0, +}; + +pub const Ruleset = packed struct { + /// print 'packed struct' as a number + allow_bitfields: bool = false, + /// print 'enum' as a number + allow_data_cast: bool = false, + /// ignore comments + allow_comments: bool = false, + /// prettify the output + pretty: bool = false, + + pub const Pedantic = Ruleset{ + .allow_bitfields = false, + .allow_data_cast = false, + .allow_comments = false, + }; + + pub const Chill = Ruleset{ + .allow_bitfields = true, + .allow_data_cast = true, + .allow_comments = true, + }; +}; + +pub fn prettyStringify(raw: anytype, comptime options: Options) ![]const u8 { + return stringify(raw, comptime blk: { + var opts = options; + opts.ruleset.pretty = true; + break :blk opts; + }); +} + +pub fn stringify(raw: anytype, comptime options: Options) ![]const u8 { + if (options.indent > 0 and !options.ruleset.pretty) + @compileError("Indentation is only allowed when pretty is enabled"); + + var buf: [options.max_buffer_size]u8 = undefined; + var fba: std.heap.FixedBufferAllocator = .init(&buf); + const allocator = fba.allocator(); + + if (std.meta.hasFn(@TypeOf(raw), "stringify")) { + // If the type has a custom stringify function, use it. + return raw.stringify(options); + } + + switch (@typeInfo(@TypeOf(raw))) { + .null => return "null", + .bool => return if (raw) "true" else "false", + .int, .comptime_int => return std.fmt.bufPrint(&buf, "{d}", .{raw}), + .float, .comptime_float => return std.fmt.bufPrint(&buf, "{d:.1}", .{raw}), + .optional => { + if (raw) |value| { + return stringify(value, options); + } else { + return "null"; + } + }, + .@"enum" => |enumInfo| { + if (options.ruleset.allow_data_cast) { + const i: enumInfo.tag_type = @intFromEnum(raw); + return std.fmt.bufPrint(&buf, "{d}", .{i}); + } else { + return std.fmt.bufPrint(&buf, "{s}", .{@tagName(raw)}); + } + }, + .@"struct" => |structInfo| switch (structInfo.layout) { + .@"packed" => { + if (options.ruleset.allow_bitfields) { + const i: structInfo.backing_integer.? = @bitCast(raw); + return std.fmt.bufPrint(&buf, "{d}", .{i}); + } else { + return error.TypeError; + } + }, + .auto, .@"extern" => { + var string: std.ArrayListUnmanaged(u8) = .empty; + var writer = string.writer(allocator); + try writer.writeByte('{'); + if (options.ruleset.pretty) + try writer.writeByte('\n'); + inline for (structInfo.fields, 0..) |field, i| { + inline for (0..options.indent) |_| + try writer.writeByte(' '); + try writer.writeByte('"'); + try writer.writeAll(field.name); + try writer.writeByte('"'); + if (options.ruleset.pretty) + try writer.writeAll(": ") + else + try writer.writeByte(':'); + const value = @field(raw, field.name); + const str = try stringify(value, options); + try writer.writeAll(str); + if (i < structInfo.fields.len - 1) + try writer.writeByte(','); + if (options.ruleset.pretty) + try writer.writeByte('\n'); + } + try writer.writeByte('}'); + return string.toOwnedSlice(allocator); + }, + }, + .array => |arrayInfo| { + var string: std.ArrayListUnmanaged(u8) = .empty; + var writer = string.writer(allocator); + try writer.writeByte('['); + for (0..arrayInfo.len) |i| { + if (i > 0) { + try writer.writeAll(','); + if (options.ruleset.pretty) + try writer.writeByte(' '); + } + const value = @field(raw, i); + const str = try stringify(value, options); + try writer.writeAll(str); + } + try writer.writeByte(']'); + return string.toOwnedSlice(); + }, + .pointer => |ptrInfo| switch (ptrInfo.size) { + .slice => { + if (ptrInfo.child == u8) { + if (ptrInfo.is_const) + return std.fmt.bufPrint(&buf, "\"{s}\"", .{raw}); + return std.fmt.bufPrint(&buf, "\"{b}\"", .{raw}); + } + + // it is a regular array + const string: std.ArrayListUnmanaged(u8) = .empty; + const writer = string.writer(allocator); + + try writer.writeByte('['); + for (0..raw.len) |i| { + if (i > 0) { + try writer.writeAll(','); + if (options.ruleset.pretty) + try writer.writeByte(' '); + } + const value = raw[i]; + const str = try stringify(value, options); + try writer.writeAll(str); + } + try writer.writeByte(']'); + return string.toOwnedSlice(); + }, + else => return error.TypeError, + }, + .@"union" => |unionInfo| { + if (unionInfo.tag_type) |tag| { + inline for (unionInfo.fields) |field| { + if (field.name == @tagName(tag)) { + const value = @field(raw, field.name); + if (@typeInfo(field.type) == .pointer and field.type == []const u8) { + // idk + return std.fmt.bufPrint(&buf, "\"{s}\"", .{value}); + } else { + return stringify(value, options); + } + } + return error.TypeError; + } + unreachable; + } else { + return error.TypeError; + } + }, + else => |t| @compileError("Error on " ++ @tagName(t)), + } +} + +test stringify { + const UserFlags = packed struct { + is_cool: bool = false, + is_friendly: bool = false, + }; + const UserSchema = struct { + age: f64, + name: []const u8, + admin: bool, + flags: UserFlags, + @"union": union { + hi: bool, + bye: f64, + n128: []const u8, + pub fn stringify(_: anytype, comptime options: Options) ![]const u8 { + _ = options; // unused + return "anything here"; + } + }, + @"enum": enum { hello, world }, + many: []const u8, + }; + const user = UserSchema{ + .age = 15.0, + .name = "Yuzu", + .admin = true, + .flags = UserFlags{ .is_cool = true, .is_friendly = false }, + .@"union" = .{ .hi = true }, + .@"enum" = .world, + .many = "hello", + }; + + const options = Options{ + .max_buffer_size = 1024, + .ruleset = Ruleset.Chill, + .indent = 2, + }; + + const str = try prettyStringify(user, options); + std.debug.print("Stringified user: {s}\n", .{str}); } diff --git a/tokenizer.zig b/tokenizer.zig index 84ad6bd..f4a4d48 100644 --- a/tokenizer.zig +++ b/tokenizer.zig @@ -320,7 +320,7 @@ pub fn nextIdentifier(self: *Self, allocator: mem.Allocator) Error!Token { /// Get the next token from the input /// WARNING: this function eats whitespaces -pub fn nextToken(self: *Self, allocator: mem.Allocator) Error!Token { +pub fn nextToken(self: *Self, allocator: mem.Allocator, allow_comments: bool) Error!Token { self.skipWhitespace(); const start = try self.pushFrame(allocator); @@ -334,6 +334,36 @@ pub fn nextToken(self: *Self, allocator: mem.Allocator) Error!Token { }; const symbol_t: TokenType = switch (c) { + '/' => { + if (allow_comments and self.matchChar('/') != null) { + // Single line comment + while (true) { + const next_c = self.anyChar() orelse return .{ + .type = .eof, + .value = null, + .start = start, + }; + if (next_c == '\n') break; + } + self.skipWhitespace(); + return self.nextToken(allocator, allow_comments); + } else if (allow_comments and self.matchChar('*') != null) { + // Multi-line comment + while (true) { + if (self.endOfInput()) + return error.InvalidSyntax; // unterminated comment + const next_c = self.anyChar() orelse return .{ + .type = .eof, + .value = null, + .start = start, + }; + if (next_c == '*' and self.matchChar('/') != null) break; + } + self.skipWhitespace(); + return self.nextToken(allocator, allow_comments); + } + return error.InvalidSyntax; // not a comment + }, '{' => .object_begin, '}' => .object_end, '[' => .array_begin, @@ -443,37 +473,6 @@ pub fn nextString(self: *Self, allocator: mem.Allocator) Error!Token { return error.InvalidSyntax; } -pub const Iterator = struct { - tokenizer: *Self, - allocator: mem.Allocator, - - pub fn next(it: *Iterator) ?Token { - defer it.tokenizer.skipWhitespace(); - errdefer it.tokenizer.deinit(); - - if (it.tokenizer.endOfInput()) { - return null; - } - return it.tokenizer.nextToken(it.allocator) catch |err| switch (err) { - error.InvalidSyntax => unreachable, - else => { - return null; - }, - }; - } - - pub fn reset(it: *Iterator) void { - it.tokenizer.position = 0; - it.tokenizer.max_position = 0; - it.tokenizer.frame = 0; - } -}; - -/// iterator -pub fn iterator(self: *Self, allocator: mem.Allocator) Iterator { - return .{ .tokenizer = self, .allocator = allocator }; -} - pub fn stringToUtf8(bytes: []u8) ![]u8 { const code_point = std.fmt.parseInt(u21, bytes, 16) catch return error.BadNumber;