Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revamp schema indexing based on new JSON Toolkit features #55

Merged
merged 1 commit into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DEPENDENCIES
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
vendorpull https://github.com/sourcemeta/vendorpull dea311b5bfb53b6926a4140267959ae334d3ecf4
noa https://github.com/sourcemeta/noa 924f5cc8549af7f12227869dcbab4259029ac650
jsontoolkit https://github.com/sourcemeta/jsontoolkit 1a6da6d690f630010cdc6ef83e23aac1794deadf
jsontoolkit https://github.com/sourcemeta/jsontoolkit 16e66cdd1a72126f626fe508c072c4c5aa69cde1
blaze https://github.com/sourcemeta/blaze bde17029b8fc8daf38a496d4d27354a4d24b3530
hydra https://github.com/sourcemeta/hydra 67472465715103167830bc00655dc015d9c10934
bootstrap https://github.com/twbs/bootstrap v5.3.3
Expand Down
94 changes: 51 additions & 43 deletions src/index/index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,25 @@ static auto index(const sourcemeta::jsontoolkit::JSON &configuration,
const std::filesystem::path &output) -> int {
assert(std::filesystem::exists(base));
assert(std::filesystem::exists(output));
assert(configuration.is_object());
assert(configuration.defines("schemas"));
assert(configuration.at("schemas").is_object());

const auto server_url{
sourcemeta::jsontoolkit::URI{configuration.at("url").to_string()}
.canonicalize()};

// Popular flat file resolver
sourcemeta::jsontoolkit::FlatFileSchemaResolver resolver{
sourcemeta::jsontoolkit::official_resolver};
for (const auto &schema_entry : configuration.at("schemas").as_object()) {
assert(schema_entry.second.is_object());
assert(schema_entry.second.defines("path"));
assert(schema_entry.second.at("path").is_string());
const auto collection_path{std::filesystem::canonical(
base / schema_entry.second.at("path").to_string())};
assert(schema_entry.second.defines("base"));
assert(schema_entry.second.at("base").is_string());
const auto collection_base_uri{
sourcemeta::jsontoolkit::URI{schema_entry.second.at("base").to_string()}
.canonicalize()};

std::cerr << "-- Processing collection: " << schema_entry.first << "\n";
std::cerr << "Base directory: " << collection_path.string() << "\n";
std::cerr << "Base URI: " << collection_base_uri.recompose() << "\n";

for (const auto &entry :
std::filesystem::recursive_directory_iterator{collection_path}) {
if (!entry.is_regular_file() || entry.path().extension() != ".json" ||
Expand All @@ -57,52 +57,60 @@ static auto index(const sourcemeta::jsontoolkit::JSON &configuration,
}

std::cerr << "Found schema: " << entry.path().string() << "\n";

const auto schema{sourcemeta::jsontoolkit::from_file(entry.path())};
const auto identifier{sourcemeta::jsontoolkit::identify(
schema, sourcemeta::jsontoolkit::official_resolver,
sourcemeta::jsontoolkit::IdentificationStrategy::Loose)};
if (!identifier.has_value()) {
std::cout << "Could not determine schema identifier\n";
return EXIT_FAILURE;
}

const auto &current_identifier{resolver.add(entry.path())};
auto identifier_uri{
sourcemeta::jsontoolkit::URI{identifier.value()}.canonicalize()};
std::cerr << "Schema identifier: " << identifier_uri.recompose() << "\n";
sourcemeta::jsontoolkit::URI{current_identifier}.canonicalize()};
std::cerr << "Current identifier: " << identifier_uri.recompose() << "\n";
identifier_uri.relative_to(collection_base_uri);
if (identifier_uri.is_absolute()) {
std::cout << "Cannot resolve the schema identifier against the "
"collection base\n";
return EXIT_FAILURE;
}

auto schema_directory{schema_entry.first};
std::transform(schema_directory.begin(), schema_directory.end(),
schema_directory.begin(), [](const auto character) {
return std::tolower(character);
});

auto schema_basename{identifier_uri.recompose()};
std::transform(schema_basename.begin(), schema_basename.end(),
schema_basename.begin(), [](const auto character) {
return std::tolower(character);
});

const auto schema_output{std::filesystem::weakly_canonical(
output / "schemas" / schema_directory / schema_basename)};
std::cerr << "Schema output: " << schema_output.string() << "\n";

// Note we copy as-is and we rebase IDs at runtime to correctly
// handle meta-schemas that can only be resolved at runtime
std::filesystem::create_directories(schema_output.parent_path());
std::ofstream stream{schema_output};
sourcemeta::jsontoolkit::prettify(
schema, stream, sourcemeta::jsontoolkit::schema_format_compare);
stream << "\n";
std::ostringstream new_identifier;
new_identifier << server_url.recompose();
new_identifier << '/';
new_identifier << schema_entry.first;
new_identifier << '/';
for (const auto character : identifier_uri.recompose()) {
new_identifier << static_cast<char>(std::tolower(character));
}

// We want to guarantee identifiers end with a JSON extension,
// as we want to use the non-extension URI to potentially metadata
// about schemas, etc
if (!new_identifier.str().ends_with(".json")) {
new_identifier << ".json";
}

std::cerr << "Rebased identifier: " << new_identifier.str() << "\n";
resolver.reidentify(current_identifier, new_identifier.str());
}
}

for (const auto &schema : resolver) {
std::cerr << "-- Processing schema: " << schema.first << "\n";
sourcemeta::jsontoolkit::URI schema_uri{schema.first};
schema_uri.relative_to(server_url);
assert(schema_uri.is_relative());
const auto schema_output{std::filesystem::weakly_canonical(
output / "schemas" / schema_uri.recompose())};
std::cerr << "Schema output: " << schema_output.string() << "\n";
std::filesystem::create_directories(schema_output.parent_path());
std::ofstream stream{schema_output};
const auto result{resolver(schema.first)};
if (!result.has_value()) {
std::cout << "Cannot resolve the schema with identifier " << schema.first
<< "\n";
return EXIT_FAILURE;
}

sourcemeta::jsontoolkit::prettify(
result.value(), stream, sourcemeta::jsontoolkit::schema_format_compare);
stream << "\n";
}

return EXIT_SUCCESS;
}

Expand Down
9 changes: 1 addition & 8 deletions src/server/resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,7 @@ auto resolver(const sourcemeta::jsontoolkit::URI &server_base_url,
return std::nullopt;
}

auto schema{sourcemeta::jsontoolkit::from_file(schema_path)};
sourcemeta::jsontoolkit::reidentify(
schema, std::string{identifier},
[&server_base_url, &schema_base_directory](const auto &subidentifier) {
return resolver(server_base_url, schema_base_directory, subidentifier);
});

return schema;
return sourcemeta::jsontoolkit::from_file(schema_path);
}

} // namespace sourcemeta::registry
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 16 additions & 12 deletions vendor/jsontoolkit/src/jsonschema/resolver.cc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading