Refactor path parsing

Remove horrible path segments code, re-write as an abstracted,
alloc-less `jetzig.http.Path`.
This commit is contained in:
Bob Farrell 2024-03-17 14:22:59 +00:00
parent 60d27d9a6c
commit 9b255eb19a
5 changed files with 107 additions and 124 deletions

View File

@ -8,6 +8,7 @@ pub const Session = @import("http/Session.zig");
pub const Cookies = @import("http/Cookies.zig");
pub const Headers = @import("http/Headers.zig");
pub const Query = @import("http/Query.zig");
pub const Path = @import("http/Path.zig");
pub const status_codes = @import("http/status_codes.zig");
pub const middleware = @import("http/middleware.zig");
pub const mime = @import("http/mime.zig");

View File

@ -1,7 +1,17 @@
/// Abstraction of the path component of a URI.
/// Provides access to:
/// * Unaltered original path
/// * Base path (without extension and query string)
/// * Directory (parent path from base path)
/// * Resource ID (final component of base path)
/// * Extension (".json", ".html", etc.)
/// * Query (everything after first "?" character)
const std = @import("std");
path: []const u8,
base_path: []const u8,
directory: []const u8,
file_path: []const u8,
resource_id: []const u8,
extension: ?[]const u8,
query: ?[]const u8,
@ -10,10 +20,14 @@ const Self = @This();
/// Initialize a new HTTP Path.
pub fn init(path: []const u8) Self {
const base_path = getBasePath(path);
return .{
.path = path,
.base_path = getBasePath(path),
.resource_id = getResourceId(path),
.base_path = base_path,
.directory = getDirectory(base_path),
.file_path = getFilePath(path),
.resource_id = getResourceId(base_path),
.extension = getExtension(path),
.query = getQuery(path),
};
@ -42,13 +56,37 @@ fn getBasePath(path: []const u8) []const u8 {
}
}
// Extract `"baz"` from:
// Extract `"/foo/bar"` from:
// * `"/foo/bar/baz"`
// Special case:
// * `"/"` returns `"/"`
pub fn getDirectory(base_path: []const u8) []const u8 {
if (std.mem.eql(u8, base_path, "/")) return "/";
if (std.mem.lastIndexOfScalar(u8, base_path, '/')) |index| {
return base_path[0..index];
} else {
return "/";
}
}
// Extract `"/foo/bar/baz.html"` from:
// * `"/foo/bar/baz.html"`
// * `"/foo/bar/baz.html?qux=quux&corge=grault"`
// Special case:
// * `"/foo/bar/baz"` returns `"/foo/bar/baz"`
fn getFilePath(path: []const u8) []const u8 {
if (std.mem.indexOfScalar(u8, path, '?')) |query_index| {
return path[0..query_index];
} else {
return path;
}
}
// Extract `"baz"` from:
// * `"/foo/bar/baz"`
// * `"/baz"`
fn getResourceId(path: []const u8) []const u8 {
const base_path = getBasePath(path);
fn getResourceId(base_path: []const u8) []const u8 {
var it = std.mem.splitBackwardsScalar(u8, base_path, '/');
while (it.next()) |segment| return segment;
return base_path;
@ -101,6 +139,30 @@ test ".base_path (without extension, without query)" {
try std.testing.expectEqualStrings("/foo/bar/baz", path.base_path);
}
test ".directory (with extension, with query)" {
const path = Self.init("/foo/bar/baz.html?qux=quux&corge=grault");
try std.testing.expectEqualStrings("/foo/bar", path.directory);
}
test ".directory (with extension, without query)" {
const path = Self.init("/foo/bar/baz.html");
try std.testing.expectEqualStrings("/foo/bar", path.directory);
}
test ".directory (without extension, without query)" {
const path = Self.init("/foo/bar/baz");
try std.testing.expectEqualStrings("/foo/bar", path.directory);
}
test ".directory (without extension, without query, root path)" {
const path = Self.init("/");
try std.testing.expectEqualStrings("/", path.directory);
}
test ".resource_id (with extension, with query)" {
const path = Self.init("/foo/bar/baz.html?qux=quux&corge=grault");
@ -172,3 +234,27 @@ test ".query (with empty query)" {
try std.testing.expect(path.query == null);
}
test ".file_path (with extension, with query)" {
const path = Self.init("/foo/bar/baz.json?qux=quux&corge=grault");
try std.testing.expectEqualStrings("/foo/bar/baz.json", path.file_path);
}
test ".file_path (with extension, without query)" {
const path = Self.init("/foo/bar/baz.json");
try std.testing.expectEqualStrings("/foo/bar/baz.json", path.file_path);
}
test ".file_path (without extension, without query)" {
const path = Self.init("/foo/bar/baz");
try std.testing.expectEqualStrings("/foo/bar/baz", path.file_path);
}
test ".file_path (without extension, with query)" {
const path = Self.init("/foo/bar/baz?qux=quux&corge=grault");
try std.testing.expectEqualStrings("/foo/bar/baz", path.file_path);
}

View File

@ -10,10 +10,9 @@ pub const Modifier = enum { edit, new };
pub const Format = enum { HTML, JSON, UNKNOWN };
allocator: std.mem.Allocator,
path: []const u8,
path: jetzig.http.Path,
method: Method,
headers: jetzig.http.Headers,
segments: std.ArrayList([]const u8),
server: *jetzig.http.Server,
std_http_request: std.http.Server.Request,
response: *jetzig.http.Response,
@ -50,23 +49,6 @@ pub fn init(
_ => return error.JetzigUnsupportedHttpMethod,
};
// TODO: Replace all this with a `Path` type which exposes all components of the path in a
// sensible way:
// * Array of segments: "/foo/bar/baz" => .{ "foo", "bar", "baz" }
// * Resource ID: "/foo/bar/baz/1" => "1"
// * Extension: "/foo/bar/baz/1.json" => ".json"
// * Query params: "/foo/bar/baz?foo=bar&baz=qux" => .{ .foo = "bar", .baz => "qux" }
// * Anything else ?
var it = std.mem.splitScalar(u8, std_http_request.head.target, '/');
var segments = std.ArrayList([]const u8).init(allocator);
while (it.next()) |segment| {
if (std.mem.indexOfScalar(u8, segment, '?')) |query_index| {
try segments.append(segment[0..query_index]);
} else {
try segments.append(segment);
}
}
const response_data = try allocator.create(jetzig.data.Data);
response_data.* = jetzig.data.Data.init(allocator);
@ -77,11 +59,10 @@ pub fn init(
return .{
.allocator = allocator,
.path = std_http_request.head.target,
.path = jetzig.http.Path.init(std_http_request.head.target),
.method = method,
.headers = jetzig.http.Headers.init(allocator),
.server = server,
.segments = segments,
.response = response,
.response_data = response_data,
.query_data = query_data,
@ -92,7 +73,6 @@ pub fn init(
pub fn deinit(self: *Self) void {
// self.session.deinit();
self.segments.deinit();
self.allocator.destroy(self.cookies);
self.allocator.destroy(self.session);
if (self.processed) self.allocator.free(self.body);
@ -263,13 +243,10 @@ fn queryParams(self: *Self) !*jetzig.data.Value {
}
fn parseQueryString(self: *Self) !bool {
const delimiter_index = std.mem.indexOfScalar(u8, self.path, '?');
if (delimiter_index) |index| {
if (self.path.len - 1 < index + 1) return false;
if (self.path.query) |query| {
self.query.* = jetzig.http.Query.init(
self.allocator,
self.path[index + 1 ..],
query,
self.query_data,
);
try self.query.parse();
@ -280,7 +257,7 @@ fn parseQueryString(self: *Self) !bool {
}
fn extensionFormat(self: *Self) ?jetzig.http.Request.Format {
const extension = std.fs.path.extension(self.path);
const extension = self.path.extension orelse return null;
if (std.mem.eql(u8, extension, ".html")) {
return .HTML;
@ -333,41 +310,6 @@ pub fn fmtMethod(self: *Self) []const u8 {
};
}
pub fn resourceModifier(self: *Self) ?Modifier {
const basename = std.fs.path.basename(self.segments.items[self.segments.items.len - 1]);
const extension = std.fs.path.extension(basename);
const resource = basename[0 .. basename.len - extension.len];
if (std.mem.eql(u8, resource, "edit")) return .edit;
if (std.mem.eql(u8, resource, "new")) return .new;
return null;
}
pub fn resourceName(self: *Self) []const u8 {
if (self.segments.items.len == 0) return "default"; // Should never happen ?
const basename = std.fs.path.basename(self.segments.items[self.segments.items.len - 1]);
if (std.mem.indexOfScalar(u8, basename, '?')) |index| {
return basename[0..index];
}
const extension = std.fs.path.extension(basename);
return basename[0 .. basename.len - extension.len];
}
pub fn resourcePath(self: *Self) ![]const u8 {
const path = try std.fs.path.join(
self.allocator,
self.segments.items[0 .. self.segments.items.len - 1],
);
defer self.allocator.free(path);
return try std.mem.concat(self.allocator, u8, &[_][]const u8{ "/", path });
}
/// For a path `/foo/bar/baz/123.json`, returns `"123"`.
pub fn resourceId(self: *Self) []const u8 {
return self.resourceName();
}
// Determine if a given route matches the current request.
pub fn match(self: *Self, route: jetzig.views.Route) !bool {
return switch (self.method) {
@ -398,55 +340,9 @@ pub fn match(self: *Self, route: jetzig.views.Route) !bool {
fn isMatch(self: *Self, match_type: enum { exact, resource_id }, route: jetzig.views.Route) bool {
const path = switch (match_type) {
.exact => self.pathWithoutExtension(),
.resource_id => self.pathWithoutExtensionAndResourceId(),
.exact => self.path.base_path,
.resource_id => self.path.directory,
};
return std.mem.eql(u8, path, route.uri_path);
}
// TODO: Be a bit more deterministic in identifying extension, e.g. deal with `.` characters
// elsewhere in the path (e.g. in query string).
fn pathWithoutExtension(self: *Self) []const u8 {
const extension_index = std.mem.lastIndexOfScalar(u8, self.path, '.');
if (extension_index) |capture| return self.path[0..capture];
const query_index = std.mem.indexOfScalar(u8, self.path, '?');
if (query_index) |capture| return self.path[0..capture];
return self.path;
}
fn pathWithoutExtensionAndResourceId(self: *Self) []const u8 {
const path = self.pathWithoutExtension();
const index = std.mem.lastIndexOfScalar(u8, self.path, '/');
if (index) |capture| {
if (capture == 0) return "/";
return path[0..capture];
} else {
return path;
}
}
fn fullName(self: *Self) ![]const u8 {
return try self.name(true);
}
fn fullNameWithStrippedResourceId(self: *Self) ![]const u8 {
return try self.name(false);
}
fn name(self: *Self, with_resource_id: bool) ![]const u8 {
const dirname = try std.mem.join(
self.allocator,
"_",
self.segments.items[0 .. self.segments.items.len - 1],
);
defer self.allocator.free(dirname);
return std.mem.concat(self.allocator, u8, &[_][]const u8{
dirname,
if (with_resource_id) "." else "",
if (with_resource_id) self.resourceName() else "",
});
}

View File

@ -348,7 +348,7 @@ fn requestLogMessage(self: *Self, request: *jetzig.http.Request) ![]const u8 {
formatted_duration,
request.fmtMethod(),
status.format(),
request.path,
request.path.path,
});
}
@ -390,7 +390,7 @@ fn matchStaticResource(self: *Self, request: *jetzig.http.Request) !?StaticResou
}
fn matchPublicContent(self: *Self, request: *jetzig.http.Request) !?StaticResource {
if (request.path.len < 2) return null;
if (request.path.file_path.len <= 1) return null;
if (request.method != .GET) return null;
var iterable_dir = std.fs.cwd().openDir(
@ -410,7 +410,7 @@ fn matchPublicContent(self: *Self, request: *jetzig.http.Request) !?StaticResour
while (try walker.next()) |file| {
if (file.kind != .file) continue;
if (std.mem.eql(u8, file.path, request.path[1..])) {
if (std.mem.eql(u8, file.path, request.path.file_path[1..])) {
const content = try iterable_dir.readFileAlloc(
request.allocator,
file.path,
@ -476,7 +476,7 @@ fn staticPath(request: *jetzig.http.Request, route: jetzig.views.Route) !?[]cons
if (try static_params.getValue("id")) |id| {
switch (id.*) {
.string => |capture| {
if (!std.mem.eql(u8, capture.value, request.resourceId())) continue;
if (!std.mem.eql(u8, capture.value, request.path.resource_id)) continue;
},
// Should be unreachable - this means generated `routes.zig` is incoherent:
inline else => return error.JetzigRouteError,

View File

@ -80,11 +80,11 @@ fn renderFn(self: Self, request: *jetzig.http.Request) anyerror!jetzig.views.Vie
switch (self.view.?.dynamic) {
.index => |view| return try view(request, request.response_data),
.get => |view| return try view(request.resourceId(), request, request.response_data),
.get => |view| return try view(request.path.resource_id, request, request.response_data),
.post => |view| return try view(request, request.response_data),
.patch => |view| return try view(request.resourceId(), request, request.response_data),
.put => |view| return try view(request.resourceId(), request, request.response_data),
.delete => |view| return try view(request.resourceId(), request, request.response_data),
.patch => |view| return try view(request.path.resource_id, request, request.response_data),
.put => |view| return try view(request.path.resource_id, request, request.response_data),
.delete => |view| return try view(request.path.resource_id, request, request.response_data),
}
}