From 854aea65e04d6af5d2ca866f75081b6d75636f13 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Fri, 3 Jan 2025 09:59:51 -0400 Subject: [PATCH] Improve leading slash handling in index URI manipulation (#60) Signed-off-by: Juan Cruz Viotti --- Makefile | 1 + src/index/index.cc | 58 +++++++++++++------ .../common/index/url-base-trailing-slash.sh | 41 +++++++++++++ 3 files changed, 81 insertions(+), 19 deletions(-) create mode 100755 test/cli/common/index/url-base-trailing-slash.sh diff --git a/Makefile b/Makefile index c01c681..f67240e 100644 --- a/Makefile +++ b/Makefile @@ -56,6 +56,7 @@ lint: .PHONY: test test: ./test/cli/common/index/invalid-configuration.sh $(PREFIX)/bin/sourcemeta-registry-index + ./test/cli/common/index/url-base-trailing-slash.sh $(PREFIX)/bin/sourcemeta-registry-index ifeq ($(ENTERPRISE), ON) ./test/cli/ee/index/no-options.sh $(PREFIX)/bin/sourcemeta-registry-index ./test/cli/ee/index/no-output.sh $(PREFIX)/bin/sourcemeta-registry-index diff --git a/src/index/index.cc b/src/index/index.cc index 9495a68..3713a98 100644 --- a/src/index/index.cc +++ b/src/index/index.cc @@ -7,7 +7,6 @@ #include "configure.h" -#include // std::transform #include // assert #include // std::tolower #include // EXIT_FAILURE, EXIT_SUCCESS @@ -17,6 +16,7 @@ #include // std::ref #include // std::cerr, std::cout #include // std::span +#include // std::ostringstream #include // std::string #include // std::string_view #include // std::vector @@ -25,6 +25,35 @@ #include "enterprise_index.h" #endif +template +static auto write_lower_except_trailing(T &stream, const std::string &input, + const char trailing) -> void { + for (auto iterator = input.cbegin(); iterator != input.cend(); ++iterator) { + if (std::next(iterator) == input.cend() && *iterator == trailing) { + continue; + } + + stream << static_cast(std::tolower(*iterator)); + } +} + +static auto url_join(const std::string &first, const std::string &second, + const std::string &third, const std::string &extension) + -> std::string { + std::ostringstream result; + write_lower_except_trailing(result, first, '/'); + result << '/'; + write_lower_except_trailing(result, second, '/'); + result << '/'; + write_lower_except_trailing(result, third, '.'); + if (!result.str().ends_with(extension)) { + result << '.'; + result << extension; + } + + return result.str(); +} + static auto index(const sourcemeta::jsontoolkit::JSON &configuration, const std::filesystem::path &base, const std::filesystem::path &output) -> int { @@ -68,24 +97,15 @@ static auto index(const sourcemeta::jsontoolkit::JSON &configuration, return EXIT_FAILURE; } - std::ostringstream new_identifier; - new_identifier << server_url.recompose(); - new_identifier << '/'; - new_identifier << schema_entry.first; - new_identifier << '/'; - for (const auto character : identifier_uri.recompose()) { - new_identifier << static_cast(std::tolower(character)); - } - - // We want to guarantee identifiers end with a JSON extension, - // as we want to use the non-extension URI to potentially metadata - // about schemas, etc - if (!new_identifier.str().ends_with(".json")) { - new_identifier << ".json"; - } - - std::cerr << "Rebased identifier: " << new_identifier.str() << "\n"; - resolver.reidentify(current_identifier, new_identifier.str()); + const auto new_identifier{ + url_join(server_url.recompose(), schema_entry.first, + identifier_uri.recompose(), + // We want to guarantee identifiers end with a JSON + // extension, as we want to use the non-extension URI to + // potentially metadata about schemas, etc + "json")}; + std::cerr << "Rebased identifier: " << new_identifier << "\n"; + resolver.reidentify(current_identifier, new_identifier); } } diff --git a/test/cli/common/index/url-base-trailing-slash.sh b/test/cli/common/index/url-base-trailing-slash.sh new file mode 100755 index 0000000..706f84d --- /dev/null +++ b/test/cli/common/index/url-base-trailing-slash.sh @@ -0,0 +1,41 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << EOF > "$TMP/configuration.json" +{ + "url": "https://sourcemeta.com/", + "port": 8000, + "schemas": { + "example/schemas": { + "base": "https://example.com/", + "path": "./schemas" + } + } +} +EOF + +mkdir "$TMP/schemas" + +cat << 'EOF' > "$TMP/schemas/test.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/test.json" +} +EOF + +"$1" "$TMP/configuration.json" "$TMP/output" + +cat << 'EOF' > "$TMP/expected.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://sourcemeta.com/example/schemas/test.json" +} +EOF + +diff "$TMP/output/schemas/example/schemas/test.json" "$TMP/expected.json"