Compare commits
2 Commits
9d8a5d067e
...
da94cc608a
Author | SHA1 | Date | |
---|---|---|---|
da94cc608a | |||
ad766bc1e8 |
10
README.md
10
README.md
@ -21,6 +21,14 @@
|
||||
- This parser is based off of a [string pool](https://en.wikipedia.org/wiki/String_interning) written by Andrew Kelley, so it must be good, [watch a video](https://www.hytradboi.com/2025/05c72e39-c07e-41bc-ac40-85e8308f2917-programming-without-pointers)
|
||||
- Probably no, but less featureful software (or less worse) is usually better than over-engineered mess
|
||||
* (a) Ok but all of your reasons are dumb
|
||||
### Sic respondeo:
|
||||
- I'll answer this later
|
||||
## Behaviour
|
||||
- Null characters, eg: U+0000 are forbidden and will be ignored by the parser
|
||||
- All control characters except DEL are forbidden
|
||||
- It passes most of the JSON test suite, 253 tests as of 2025-05-29 when I'm writing this
|
||||
- It has no reflection as of right now, you must implement your own `parse` function, with reflection
|
||||
- It uses null terminated strings, clearly this is not the best approach, but it's efficient
|
||||
|
||||
## Sic respondeo:
|
||||
The Zig Discord server is plagued with modern scum, of course, modern scum will dismiss all of my claims or label them as "dumb" or "you're using it wrong!", has any of these individuals not considered that Zig is over complicated? There is a reason why Andrew Kelley himself detached from the communities [and has spoken in multiple instances](https://andrewkelley.me/post/goodbye-twitter-reddit.html) about the "shitification" of software communities, it's like turning a good community of like-minded programmers into a soydev shill. One good thing that he did was shutting down the r/zig subreddit.
|
||||
On different note, I defend simplicity and minimalism, so I am not saying that *every* Zig developer who thinks differently is scum, I just say that if you cannot think beyond your viewpoint you will reach nowhere. Lastly, if your software is not straightforward and simple to use, then why criticising someone for now knowing how to use it? The only reasonable way to make software complicated is as long as user friendliness is not a tradeoff for performance or minimalism.
|
||||
|
100
language.zig
100
language.zig
@ -48,14 +48,16 @@ pub const JsonInput = union(JsonType) {
|
||||
|
||||
pub fn deinit(self: JsonInput, allocator: mem.Allocator) void {
|
||||
switch (self) {
|
||||
JsonInput.array => |array| {
|
||||
.array => |array| {
|
||||
for (array) |json_input|
|
||||
json_input.deinit(allocator);
|
||||
allocator.free(array);
|
||||
},
|
||||
.object => |*object| {
|
||||
var it = object.iterator();
|
||||
while (it.next()) |entry| entry.value_ptr.deinit(allocator);
|
||||
while (it.next()) |entry| {
|
||||
entry.value_ptr.deinit(allocator);
|
||||
}
|
||||
@constCast(object).deinit(allocator);
|
||||
},
|
||||
else => {},
|
||||
@ -98,8 +100,8 @@ pub const JsonInput = union(JsonType) {
|
||||
/// same as ObjectEntry but simpler
|
||||
/// start is the offset
|
||||
pub const ArraySlice = struct {
|
||||
start: usize,
|
||||
len: usize,
|
||||
start: usize,
|
||||
};
|
||||
|
||||
/// just += the properties and value indexes to get the next item
|
||||
@ -107,12 +109,19 @@ pub const ArraySlice = struct {
|
||||
/// it should be ordered
|
||||
pub const ObjectEntry = struct {
|
||||
len: usize,
|
||||
property_idx: usize,
|
||||
value_idx: usize,
|
||||
tip: usize,
|
||||
};
|
||||
|
||||
pub const PropertyEntry = struct {
|
||||
tip: StringIndex,
|
||||
};
|
||||
|
||||
pub const Flags = packed struct {
|
||||
allow_comments: bool = false,
|
||||
allow_trailing_comma: bool = false,
|
||||
enums_are_strings: bool = false,
|
||||
unions_are_strings: bool = false,
|
||||
packed_structs_are_ints: bool = false,
|
||||
};
|
||||
|
||||
pub const Options = struct {
|
||||
@ -122,8 +131,9 @@ pub const Options = struct {
|
||||
};
|
||||
|
||||
index: std.MultiArrayList(JsonValue) = .{},
|
||||
string_index: StringPool = .empty,
|
||||
property_index: StringPool = .empty,
|
||||
strings: StringPool = .empty,
|
||||
properties: StringPool = .empty,
|
||||
property_map: std.AutoArrayHashMapUnmanaged(usize, PropertyEntry) = .empty,
|
||||
|
||||
options: Options = .{},
|
||||
|
||||
@ -131,8 +141,9 @@ pub const init = Self{};
|
||||
|
||||
pub fn deinit(self: *Self, allocator: mem.Allocator) void {
|
||||
self.index.deinit(allocator);
|
||||
self.property_index.deinit(allocator);
|
||||
self.string_index.deinit(allocator);
|
||||
self.properties.deinit(allocator);
|
||||
self.strings.deinit(allocator);
|
||||
self.property_map.deinit(allocator);
|
||||
}
|
||||
|
||||
fn addNumber(self: *Self, allocator: mem.Allocator, number: f64) !usize {
|
||||
@ -143,13 +154,14 @@ fn addNumber(self: *Self, allocator: mem.Allocator, number: f64) !usize {
|
||||
}
|
||||
|
||||
fn addProperty(self: *Self, allocator: mem.Allocator, bytes: []const u8) !usize {
|
||||
const stridx = try self.property_index.add(allocator, bytes);
|
||||
const stridx = try self.properties.add(allocator, bytes);
|
||||
try self.index.ensureUnusedCapacity(allocator, 1);
|
||||
try self.property_map.ensureUnusedCapacity(allocator, 1);
|
||||
return @intFromEnum(stridx);
|
||||
}
|
||||
|
||||
fn addString(self: *Self, allocator: mem.Allocator, bytes: []const u8) !usize {
|
||||
const stridx = try self.string_index.add(allocator, bytes);
|
||||
const stridx = try self.strings.add(allocator, bytes);
|
||||
try self.index.ensureUnusedCapacity(allocator, 1);
|
||||
const idx = self.index.addOneAssumeCapacity();
|
||||
self.index.set(idx, .{ .string = stridx });
|
||||
@ -182,7 +194,7 @@ fn skipSlots(self: *Self, slot: usize) usize {
|
||||
switch (e) {
|
||||
.object => |obj| {
|
||||
var total: usize = 1;
|
||||
var v = obj.value_idx;
|
||||
var v = obj.tip;
|
||||
for (0..obj.len) |_| {
|
||||
const s = skipSlots(self, v);
|
||||
total += s;
|
||||
@ -204,25 +216,6 @@ fn skipSlots(self: *Self, slot: usize) usize {
|
||||
}
|
||||
}
|
||||
|
||||
fn skipNestedProps(self: *Self, pptr: *usize, slot: usize) void {
|
||||
const e = self.index.get(slot);
|
||||
if (e == .object) {
|
||||
var v = e.object.value_idx;
|
||||
// Skip each nested key and its deeper nested props
|
||||
for (0..e.object.len) |_| {
|
||||
// Skip this key
|
||||
const k: *StringIndex = @ptrCast(pptr);
|
||||
const slice = k.slice(&self.property_index);
|
||||
pptr.* += slice.len + 1;
|
||||
// Recurse into this property's value
|
||||
skipNestedProps(self, pptr, v);
|
||||
// Skip slots of the value in index array
|
||||
const s = skipSlots(self, v);
|
||||
v += s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn getValue(
|
||||
self: *Self,
|
||||
allocator: mem.Allocator,
|
||||
@ -235,7 +228,7 @@ pub fn getValue(
|
||||
.bool => |b| return .{ .bool = b },
|
||||
.number => |number| return .{ .number = number },
|
||||
.string => |string| {
|
||||
const sl = string.slice(&self.string_index);
|
||||
const sl = string.slice(&self.strings);
|
||||
return .{ .string = sl };
|
||||
},
|
||||
.array => |arr| {
|
||||
@ -250,23 +243,20 @@ pub fn getValue(
|
||||
},
|
||||
.object => |obj| {
|
||||
var map: JsonInput.Object = .empty;
|
||||
var p = obj.property_idx;
|
||||
var v = obj.value_idx;
|
||||
for (0..obj.len) |_| {
|
||||
// Extract key
|
||||
const k: StringIndex = @enumFromInt(p);
|
||||
const key_slice = k.slice(&self.property_index);
|
||||
// Extract and assign value
|
||||
const val = try self.getValue(allocator, v);
|
||||
errdefer map.deinit(allocator);
|
||||
var tip = obj.tip;
|
||||
|
||||
for (0..obj.len) |_|
|
||||
if (self.property_map.get(tip)) |pen| {
|
||||
const key_slice = pen.tip.slice(&self.properties);
|
||||
|
||||
const val = try self.getValue(allocator, tip);
|
||||
try map.put(allocator, key_slice, val);
|
||||
// Advance past this key
|
||||
p += key_slice.len + 1;
|
||||
// Skip nested property names of this value
|
||||
self.skipNestedProps(&p, v);
|
||||
// Advance past the value slots
|
||||
const s = self.skipSlots(v);
|
||||
v += s;
|
||||
}
|
||||
const s = self.skipSlots(tip);
|
||||
tip += s;
|
||||
} else {
|
||||
return error.MissingKey;
|
||||
};
|
||||
return .{ .object = map };
|
||||
},
|
||||
}
|
||||
@ -299,11 +289,13 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
||||
.object => |scope| {
|
||||
//std.debug.print("prop: {s} \n", .{token.value.?.string});
|
||||
const pidx = try self.addProperty(allocator, token.value.?.string);
|
||||
|
||||
self.property_map.putAssumeCapacity(scope.len + 1, .{ .tip = @enumFromInt(pidx) });
|
||||
allocator.free(token.value.?.string);
|
||||
|
||||
self.index.set(scope_idx, .{ .object = ObjectEntry{
|
||||
.len = scope.len + 1,
|
||||
.property_idx = if (scope.len == 0) pidx else scope.property_idx,
|
||||
.value_idx = scope.value_idx,
|
||||
.tip = scope.tip,
|
||||
} });
|
||||
},
|
||||
else => return error.InvalidSyntax,
|
||||
@ -328,8 +320,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
||||
ptr.* = root;
|
||||
self.index.set(root, .{ .object = ObjectEntry{
|
||||
.len = 0,
|
||||
.property_idx = 0,
|
||||
.value_idx = 1,
|
||||
.tip = 1,
|
||||
} });
|
||||
} else {
|
||||
//order
|
||||
@ -340,8 +331,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
||||
self.index.set(idx_ptr.*, .{
|
||||
.object = ObjectEntry{
|
||||
.len = 0,
|
||||
.property_idx = self.index.len, //self.property_index.string_bytes.items.len,
|
||||
.value_idx = self.index.len,
|
||||
.tip = self.index.len,
|
||||
},
|
||||
});
|
||||
switch (self.index.get(parent_idx)) {
|
||||
@ -451,6 +441,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
||||
if (query.len == 0) {
|
||||
// root
|
||||
const idx = try self.addString(allocator, token.value.?.string);
|
||||
allocator.free(token.value.?.string);
|
||||
self.index.set(root, .{ .string = @enumFromInt(idx) });
|
||||
return root;
|
||||
}
|
||||
@ -516,6 +507,7 @@ pub fn parse(self: *Self, tokenizer: *Tokenizer) !usize {
|
||||
token = next;
|
||||
switch (next.type) {
|
||||
.object_end, .array_end => return error.TrailingComma,
|
||||
.comma => return error.InvalidSyntax,
|
||||
else => continue :flag token.type,
|
||||
}
|
||||
}
|
||||
|
3
root.zig
Normal file
3
root.zig
Normal file
@ -0,0 +1,3 @@
|
||||
pub const Language = @import("language.zig");
|
||||
pub const Tokenizer = @import("tokenizer.zig");
|
||||
pub const StringPool = @import("strings.zig");
|
97
test.zig
97
test.zig
@ -16,7 +16,7 @@ test Language {
|
||||
\\ "a": 2,
|
||||
\\ "c": {
|
||||
\\ "d": 4,
|
||||
\\ "uwu": [[[[[1], [2]]]]],
|
||||
\\ "uwua": [[[[[1], [2]]]]],
|
||||
\\ "x": true
|
||||
\\ }
|
||||
\\ },
|
||||
@ -119,60 +119,59 @@ test { try expectPass("/y_number_real_pos_exponent.json"); }
|
||||
test { try expectPass("/y_number_simple_int.json"); }
|
||||
test { try expectPass("/y_number_simple_real.json"); }
|
||||
test { try expectPass("/y_object_basic.json"); }
|
||||
// maybe there is a better way to handle these
|
||||
// test { try expectPass("/y_object_duplicated_key_and_value.json"); }
|
||||
// test { try expectPass("/y_object_duplicated_key.json"); }
|
||||
test { try expectPass("/y_object_duplicated_key_and_value.json"); }
|
||||
test { try expectPass("/y_object_duplicated_key.json"); }
|
||||
test { try expectPass("/y_object_empty.json"); }
|
||||
test { try expectPass("/y_object_empty_key.json"); }
|
||||
// BIG ISSUE
|
||||
// test { try expectPass("/y_object_escaped_null_in_key.json"); }
|
||||
test { try expectPass("/y_object_escaped_null_in_key.json"); }
|
||||
test { try expectPass("/y_object_extreme_numbers.json"); }
|
||||
test { try expectPass("/y_object.json"); }
|
||||
//test { try expectPass("/y_object_long_strings.json"); }
|
||||
test { try expectPass("/y_object_long_strings.json"); }
|
||||
test { try expectPass("/y_object_simple.json"); }
|
||||
//test { try expectPass("/y_object_string_unicode.json"); }
|
||||
test { try expectPass("/y_object_string_unicode.json"); }
|
||||
test { try expectPass("/y_object_with_newlines.json"); }
|
||||
//test { try expectPass("/y_string_1_2_3_bytes_UTF-8_sequences.json"); }
|
||||
//test { try expectPass("/y_string_accepted_surrogate_pair.json"); }
|
||||
//test { try expectPass("/y_string_accepted_surrogate_pairs.json"); }
|
||||
//test { try expectPass("/y_string_allowed_escapes.json"); }
|
||||
//test { try expectPass("/y_string_backslash_and_u_escaped_zero.json"); }
|
||||
//test { try expectPass("/y_string_backslash_doublequotes.json"); }
|
||||
//test { try expectPass("/y_string_comments.json"); }
|
||||
//test { try expectPass("/y_string_double_escape_a.json"); }
|
||||
//test { try expectPass("/y_string_double_escape_n.json"); }
|
||||
//test { try expectPass("/y_string_escaped_control_character.json"); }
|
||||
//test { try expectPass("/y_string_escaped_noncharacter.json"); }
|
||||
//test { try expectPass("/y_string_in_array.json"); }
|
||||
//test { try expectPass("/y_string_in_array_with_leading_space.json"); }
|
||||
//test { try expectPass("/y_string_last_surrogates_1_and_2.json"); }
|
||||
//test { try expectPass("/y_string_nbsp_uescaped.json"); }
|
||||
//test { try expectPass("/y_string_nonCharacterInUTF-8_U+10FFFF.json"); }
|
||||
//test { try expectPass("/y_string_nonCharacterInUTF-8_U+FFFF.json"); }
|
||||
//test { try expectPass("/y_string_null_escape.json"); }
|
||||
//test { try expectPass("/y_string_one-byte-utf-8.json"); }
|
||||
//test { try expectPass("/y_string_pi.json"); }
|
||||
//test { try expectPass("/y_string_reservedCharacterInUTF-8_U+1BFFF.json"); }
|
||||
//test { try expectPass("/y_string_simple_ascii.json"); }
|
||||
//test { try expectPass("/y_string_space.json"); }
|
||||
//test { try expectPass("/y_string_surrogates_U+1D11E_MUSICAL_SYMBOL_G_CLEF.json"); }
|
||||
//test { try expectPass("/y_string_three-byte-utf-8.json"); }
|
||||
//test { try expectPass("/y_string_two-byte-utf-8.json"); }
|
||||
//test { try expectPass("/y_string_u+2028_line_sep.json"); }
|
||||
//test { try expectPass("/y_string_u+2029_par_sep.json"); }
|
||||
//test { try expectPass("/y_string_uescaped_newline.json"); }
|
||||
//test { try expectPass("/y_string_uEscape.json"); }
|
||||
//test { try expectPass("/y_string_unescaped_char_delete.json"); }
|
||||
//test { try expectPass("/y_string_unicode_2.json"); }
|
||||
//test { try expectPass("/y_string_unicodeEscapedBackslash.json"); }
|
||||
//test { try expectPass("/y_string_unicode_escaped_double_quote.json"); }
|
||||
//test { try expectPass("/y_string_unicode.json"); }
|
||||
//test { try expectPass("/y_string_unicode_U+10FFFE_nonchar.json"); }
|
||||
//test { try expectPass("/y_string_unicode_U+1FFFE_nonchar.json"); }
|
||||
//test { try expectPass("/y_string_unicode_U+200B_ZERO_WIDTH_SPACE.json"); }
|
||||
//test { try expectPass("/y_string_unicode_U+2064_invisible_plus.json"); }
|
||||
//test { try expectPass("/y_string_unicode_U+FDD0_nonchar.json"); }
|
||||
//test { try expectPass("/y_string_unicode_U+FFFE_nonchar.json"); }
|
||||
test { try expectPass("/y_string_1_2_3_bytes_UTF-8_sequences.json"); }
|
||||
test { try expectPass("/y_string_accepted_surrogate_pair.json"); }
|
||||
test { try expectPass("/y_string_accepted_surrogate_pairs.json"); }
|
||||
test { try expectPass("/y_string_allowed_escapes.json"); }
|
||||
test { try expectPass("/y_string_backslash_and_u_escaped_zero.json"); }
|
||||
test { try expectPass("/y_string_backslash_doublequotes.json"); }
|
||||
test { try expectPass("/y_string_comments.json"); }
|
||||
test { try expectPass("/y_string_double_escape_a.json"); }
|
||||
test { try expectPass("/y_string_double_escape_n.json"); }
|
||||
test { try expectPass("/y_string_escaped_control_character.json"); }
|
||||
test { try expectPass("/y_string_escaped_noncharacter.json"); }
|
||||
test { try expectPass("/y_string_in_array.json"); }
|
||||
test { try expectPass("/y_string_in_array_with_leading_space.json"); }
|
||||
test { try expectPass("/y_string_last_surrogates_1_and_2.json"); }
|
||||
test { try expectPass("/y_string_nbsp_uescaped.json"); }
|
||||
test { try expectPass("/y_string_nonCharacterInUTF-8_U+10FFFF.json"); }
|
||||
test { try expectPass("/y_string_nonCharacterInUTF-8_U+FFFF.json"); }
|
||||
test { try expectPass("/y_string_null_escape.json"); }
|
||||
test { try expectPass("/y_string_one-byte-utf-8.json"); }
|
||||
test { try expectPass("/y_string_pi.json"); }
|
||||
test { try expectPass("/y_string_reservedCharacterInUTF-8_U+1BFFF.json"); }
|
||||
test { try expectPass("/y_string_simple_ascii.json"); }
|
||||
test { try expectPass("/y_string_space.json"); }
|
||||
test { try expectPass("/y_string_surrogates_U+1D11E_MUSICAL_SYMBOL_G_CLEF.json"); }
|
||||
test { try expectPass("/y_string_three-byte-utf-8.json"); }
|
||||
test { try expectPass("/y_string_two-byte-utf-8.json"); }
|
||||
test { try expectPass("/y_string_u+2028_line_sep.json"); }
|
||||
test { try expectPass("/y_string_u+2029_par_sep.json"); }
|
||||
test { try expectPass("/y_string_uescaped_newline.json"); }
|
||||
test { try expectPass("/y_string_uEscape.json"); }
|
||||
test { try expectPass("/y_string_unescaped_char_delete.json"); }
|
||||
test { try expectPass("/y_string_unicode_2.json"); }
|
||||
test { try expectPass("/y_string_unicodeEscapedBackslash.json"); }
|
||||
test { try expectPass("/y_string_unicode_escaped_double_quote.json"); }
|
||||
test { try expectPass("/y_string_unicode.json"); }
|
||||
test { try expectPass("/y_string_unicode_U+10FFFE_nonchar.json"); }
|
||||
test { try expectPass("/y_string_unicode_U+1FFFE_nonchar.json"); }
|
||||
test { try expectPass("/y_string_unicode_U+200B_ZERO_WIDTH_SPACE.json"); }
|
||||
test { try expectPass("/y_string_unicode_U+2064_invisible_plus.json"); }
|
||||
test { try expectPass("/y_string_unicode_U+FDD0_nonchar.json"); }
|
||||
test { try expectPass("/y_string_unicode_U+FFFE_nonchar.json"); }
|
||||
test { try expectPass("/y_string_utf8.json"); }
|
||||
test { try expectPass("/y_string_with_del_character.json"); }
|
||||
test { try expectPass("/y_structure_lonely_false.json"); }
|
||||
@ -297,7 +296,7 @@ test { try expectFail("/n_object_unquoted_key.json"); }
|
||||
test { try expectFail("/n_object_unterminated-value.json"); }
|
||||
test { try expectFail("/n_object_with_single_string.json"); }
|
||||
// !!!
|
||||
// test { try expectFail("/n_object_with_trailing_garbage.json"); }
|
||||
test { try expectFail("/n_object_with_trailing_garbage.json"); }
|
||||
// test { try expectFail("/n_single_space.json"); }
|
||||
test { try expectFail("/n_string_1_surrogate_then_escape.json"); }
|
||||
// test { try expectFail("/n_string_1_surrogate_then_escape_u1.json"); }
|
||||
|
@ -395,6 +395,9 @@ pub fn nextString(self: *Self) Error!Token {
|
||||
|
||||
switch (try self.lastChar()) {
|
||||
'"' => {
|
||||
while (std.mem.indexOfScalar(u8, buffer.items, 0x00)) |idx|
|
||||
_ = buffer.swapRemove(idx);
|
||||
|
||||
return .{
|
||||
.type = .string,
|
||||
.value = .{ .string = try buffer.toOwnedSlice() },
|
||||
@ -435,7 +438,7 @@ pub fn nextString(self: *Self) Error!Token {
|
||||
} // end switch
|
||||
},
|
||||
else => |c| {
|
||||
if (std.ascii.isControl(c)) {
|
||||
if (std.ascii.isControl(c) and c != std.ascii.control_code.del) {
|
||||
return error.UnexpectedCharacter;
|
||||
}
|
||||
try buffer.append(c);
|
||||
|
Loading…
x
Reference in New Issue
Block a user