Skip to content

Commit

Permalink
Add support for Elasticsearch tab delimited links file (#2955)
Browse files Browse the repository at this point in the history
The Elasticsearch reference docs links file formatted in json. That
format makes extraction of links complicated, both on the docs side, and
also in Elasticsearch needing to parse json. This commit adds support
for a new tab delimited txt file where the Elasticsearch reference docs
will move to.

relates elastic/elasticsearch#105813
  • Loading branch information
rjernst authored Apr 10, 2024
1 parent 30af8af commit 3fccd9b
Showing 1 changed file with 23 additions and 2 deletions.
25 changes: 23 additions & 2 deletions build_docs.pl
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ sub check_elasticsearch_links {
# So we grab all quoted strings that contain `html`. This *should* be fine
# for a while because the keys in the file are all in SHOUTING_SNAKE_CASE
# so even if one contains "html" it'll contain "HTML" which doesn't match.
my $extractor = sub {
my $json_extractor = sub {
my $contents = shift;
return sub {
while ( $contents =~ m!"([^"\#]+)(?:\#([^"]+))?"!g ) {
Expand All @@ -465,6 +465,15 @@ sub check_elasticsearch_links {
return;
};
};
my $tabdelim_extractor = sub {
my $contents = shift;
return sub {
while ( $contents =~ m!"[^\t]+\t(.*)"!g ) {
return "en/elasticsearch/reference/$version/$1";
}
return;
};
};

my $src_path = 'server/src/main/resources/org/elasticsearch/common/reference-docs-links.json';
my $repo = ES::Repo->get_repo('elasticsearch');
Expand All @@ -486,7 +495,19 @@ sub check_elasticsearch_links {
# https://github.com/elastic/docs/issues/2264
$branch = $version eq "master" ? "main" : $version;
say " Branch: $branch, Version: $version";
my $source = $repo->show_file( $link_check_name, $branch, $src_path );

my $links_file;
my $extractor;
my $source = eval {
$links_file = 'server/src/main/resources/org/elasticsearch/common/reference-docs-links.json';
$extractor = $json_extractor;
$repo->show_file( $link_check_name, $branch, $links_file );
} || eval {
$links_file = 'libs/core/src/main/resources/org/elasticsearch/core/reference-docs-links.txt';
$extractor = $tabdelim_extractor;
$repo->show_file( $link_check_name, $branch, $links_file );
};
die "failed to find elasticsearch links file;\n$@" unless $source;

$link_checker->check_source( $source, $extractor,
"Elasticsearch [$version]: $src_path" );
Expand Down

0 comments on commit 3fccd9b

Please sign in to comment.