diff --git a/bin/cover.pl b/bin/cover.pl new file mode 100644 index 0000000..aed52da --- /dev/null +++ b/bin/cover.pl @@ -0,0 +1,102 @@ +use strict; +use warnings; +use v5.36; + +use Cpanel::JSON::XS qw< decode_json >; +use Getopt::Long; +use Path::Tiny qw< path >; + +use MetaCPAN::Logger qw< :log :dlog >; + +use MetaCPAN::ES; +use MetaCPAN::Ingest qw< + handle_error + read_url +>; + +# args +my ( $json_file, $test ); +GetOptions( + "json=s" => \$json_file, + "test" => \$test +); +my $cover_url //= 'http://cpancover.com/latest/cpancover.json'; +my $cover_dev_url //= 'http://cpancover.com/latest/cpancover_dev.json'; + +# setup +my %valid_keys + = map { $_ => 1 } qw< branch condition statement subroutine total >; + +my $es = MetaCPAN::ES->new( index => "cover", type => "cover" ); +my $bulk = $es->bulk(); + +my $data = retrieve_cover_data(); + +log_info {'Updating the cover index'}; + +for my $dist ( sort keys %{$data} ) { + for my $version ( keys %{ $data->{$dist} } ) { + my $release = $dist . '-' . $version; + my $rel_check = $es->search( + index => 'cpan', + type => 'release', + size => 0, + body => { + query => { term => { name => $release } }, + }, + ); + if ( $rel_check->{hits}{total} ) { + log_info { "Adding release info for '" . $release . "'" }; + } + else { + log_warn { "Release '" . $release . "' does not exist." }; + next; + } + + my %doc_data = %{ $data->{$dist}{$version}{coverage}{total} }; + + for my $k ( keys %doc_data ) { + delete $doc_data{$k} unless exists $valid_keys{$k}; + } + + $bulk->update( { + id => $release, + doc => { + distribution => $dist, + version => $version, + release => $release, + criteria => \%doc_data, + }, + doc_as_upsert => 1, + } ); + } +} + +$bulk->flush; + +### + +sub retrieve_cover_data { + return decode_json( path($json_file)->slurp ) if $json_file; + + my $url = $test ? $cover_dev_url : $cover_url; + + return decode_json( read_url($url) ); +} + +1; + +__END__ + +=pod + +=head1 SYNOPSIS + + # bin/metacpan cover [--test] [json_file] + +=head1 DESCRIPTION + +Retrieves the CPAN Cover data from its source and +updates our ES information. + +=cut diff --git a/bin/cve.pl b/bin/cve.pl index 85e8fd6..81d631e 100644 --- a/bin/cve.pl +++ b/bin/cve.pl @@ -13,7 +13,7 @@ use MetaCPAN::Ingest qw< handle_error numify_version - ua + read_url >; my %range_ops = qw( < lt <= lte > gt >= gte ); @@ -202,20 +202,7 @@ sub retrieve_cve_data { my $url = $test ? $cve_dev_url : $cve_url; - log_info { 'Fetching data from ', $url }; - my $ua = ua(); - my $resp = $ua->get($url); - - handle_error( $resp->status_line, 1 ) unless $resp->is_success; - - # clean up headers if .json.gz is served as gzip type - # rather than json encoded with gzip - if ( $resp->header('Content-Type') eq 'application/x-gzip' ) { - $resp->header( 'Content-Type' => 'application/json' ); - $resp->header( 'Content-Encoding' => 'gzip' ); - } - - return decode_json( $resp->decoded_content ); + return decode_json( read_url($url) ); } 1; diff --git a/lib/MetaCPAN/Ingest.pm b/lib/MetaCPAN/Ingest.pm index 56147f4..bede979 100644 --- a/lib/MetaCPAN/Ingest.pm +++ b/lib/MetaCPAN/Ingest.pm @@ -34,6 +34,7 @@ use Sub::Exporter -setup => { read_02packages_fh read_06perms_fh read_06perms_iter + read_url strip_pod tmp_dir ua @@ -169,6 +170,22 @@ sub ua ( $proxy = undef ) { return $ua; } +sub read_url ( $url ) { + my $ua = ua(); + my $resp = $ua->get($url); + + handle_error( $resp->status_line, 1 ) unless $resp->is_success; + + # clean up headers if .json.gz is served as gzip type + # rather than json encoded with gzip + if ( $resp->header('Content-Type') eq 'application/x-gzip' ) { + $resp->header( 'Content-Type' => 'application/json' ); + $resp->header( 'Content-Encoding' => 'gzip' ); + } + + return $resp->decoded_content; +} + sub cpan_file_map () { my $cpan = cpan_dir(); my $ls = $cpan->child(qw< indices find-ls.gz >);