-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #49 from metacpan/mickey/external
External script
- Loading branch information
Showing
3 changed files
with
297 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
use strict; | ||
use warnings; | ||
use v5.36; | ||
|
||
use Email::Sender::Simple (); | ||
use Email::Simple (); | ||
use Getopt::Long; | ||
use MetaCPAN::Logger qw< :log :dlog >; | ||
|
||
use MetaCPAN::ES; | ||
use MetaCPAN::External::Cygwin qw< run_cygwin >; | ||
use MetaCPAN::External::Debian qw< run_debian >; | ||
|
||
# with( | ||
# 'MetaCPAN::Script::Role::External::Cygwin', | ||
# 'MetaCPAN::Script::Role::External::Debian', | ||
# ); | ||
|
||
# args | ||
my ( $email_to, $external_source ); | ||
GetOptions( | ||
"email_to=s" => \$email_to, | ||
"external_source=s" => \$external_source, | ||
); | ||
|
||
die "wrong external source: $external\n" | ||
unless $external_source | ||
and grep { $_ eq $external_source } qw< cygwin debian >; | ||
|
||
# setup | ||
my $es = MetaCPAN::ES->new( type => "author" ); | ||
|
||
my $ret; | ||
|
||
$ret = run_cygwin() if $external_source eq 'cygwin'; | ||
$ret = run_debian() if $external_source eq 'debian'; | ||
|
||
my $email_body = $ret->{errors_email_body}; | ||
if ( $email_to and $email_body ) { | ||
my $email = Email::Simple->create( | ||
header => [ | ||
'Content-Type' => 'text/plain; charset=utf-8', | ||
To => $email_to, | ||
From => '[email protected]', | ||
Subject => "Package mapping failures report for $external_source", | ||
'MIME-Version' => '1.0', | ||
], | ||
body => $email_body, | ||
); | ||
Email::Sender::Simple->send($email); | ||
|
||
log_debug { "Sending email to " . $email_to . ":" }; | ||
log_debug {"Email body:"}; | ||
log_debug {$email_body}; | ||
} | ||
|
||
my $scroll = $es->scroll( | ||
type => 'distribution', | ||
scroll => '10m', | ||
body => { | ||
query => { | ||
exists => { field => "external_package." . $external_source } | ||
} | ||
}, | ||
); | ||
|
||
my @to_remove; | ||
|
||
while ( my $s = $scroll->next ) { | ||
my $name = $s->{_source}{name}; | ||
next unless $name; | ||
|
||
if ( exists $dist->{$name} ) { | ||
delete $dist->{$name} | ||
if $dist->{$name} eq | ||
$s->{_source}{external_package}{$external_source}; | ||
} | ||
else { | ||
push @to_remove => $name; | ||
} | ||
} | ||
|
||
my $bulk = $es->bulk( type => 'distribution' ); | ||
|
||
for my $d ( keys %{$dist} ) { | ||
log_debug {"[$external_source] adding $d"}; | ||
$bulk->update( { | ||
id => $d, | ||
doc => +{ | ||
'external_package' => { | ||
$external_source => $dist->{$d} | ||
} | ||
}, | ||
doc_as_upsert => 1, | ||
} ); | ||
} | ||
|
||
for my $d (@to_remove) { | ||
log_debug {"[$external_source] removing $d"}; | ||
$bulk->update( { | ||
id => $d, | ||
doc => +{ | ||
'external_package' => { | ||
$external_source => undef | ||
} | ||
} | ||
} ); | ||
} | ||
|
||
$bulk->flush; | ||
|
||
1; | ||
|
||
=pod | ||
=head1 SYNOPSIS | ||
# bin/external.pl --external_source SOURCE --email_to EMAIL | ||
=cut |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package MetaCPAN::External::Cygwin; | ||
|
||
use List::Util qw< shuffle >; | ||
use MetaCPAN::Logger qw< :log :dlog >; | ||
|
||
use MetaCPAN::Ingest qw< ua >; | ||
|
||
use Sub::Exporter -setup => { | ||
exports => [ qw< | ||
run_cygwin | ||
> ] | ||
}; | ||
|
||
sub run_cygwin () { | ||
my $ret = {}; | ||
|
||
my $ua = ua(); | ||
my $mirrors = get_mirrors($ua); | ||
|
||
my @mirrors = @{ $mirrors }; | ||
my $timeout = $ua->timeout(10); | ||
|
||
MIRROR: { | ||
my $mirror = shift @mirrors or die "Ran out of mirrors"; | ||
log_debug {"Trying mirror: $mirror"}; | ||
my $res = $ua->get( $mirror . 'x86_64/setup.ini' ); | ||
redo MIRROR unless $res->is_success; | ||
|
||
my @packages = split /^\@ /m, $res->decoded_content; | ||
shift @packages; # drop headers | ||
log_debug { sprintf "Got %d cygwin packages", scalar @packages }; | ||
|
||
for my $desc (@packages) { | ||
next if substr( $desc, 0, 5 ) ne 'perl-'; | ||
my ( $pkg, %attr ) = map s/\A"|"\z//gr, map s/ \z//r, | ||
map s/\n+/ /gr, split /^([a-z]+): /m, $desc; | ||
$attr{category} = [ split / /, $attr{category} ]; | ||
next if grep /^(Debug|_obsolete)$/, @{ $attr{category} }; | ||
$ret->{dist}{ $pkg =~ s/^perl-//r } = $pkg; | ||
} | ||
} | ||
$ua->timeout($timeout); | ||
|
||
log_debug { | ||
sprintf "Found %d cygwin-CPAN packages", | ||
scalar keys %{ $ret->{dist} } | ||
}; | ||
|
||
return $ret; | ||
} | ||
|
||
sub _get_mirrors ( $ua ) { | ||
log_debug {"Fetching mirror list"}; | ||
my $res = $ua->get('https://cygwin.com/mirrors.lst'); | ||
die "Failed to fetch mirror list: " . $res->status_line | ||
unless $res->is_success; | ||
my @mirrors = shuffle map +( split /;/ )[0], split /\n/, | ||
$res->decoded_content; | ||
|
||
log_debug { sprintf "Got %d mirrors", scalar @mirrors }; | ||
return \@mirrors; | ||
} | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
package MetaCPAN::External::Debian; | ||
|
||
use strict; | ||
use warnings; | ||
use v5.36; | ||
|
||
use CPAN::DistnameInfo (); | ||
use DBI (); | ||
|
||
use MetaCPAN::ES; | ||
|
||
use Sub::Exporter -setup => { | ||
exports => [ qw< | ||
run_debian | ||
> ] | ||
}; | ||
|
||
sub run_debian () { | ||
my $ret = {}; | ||
|
||
my $host_regex = _get_host_regex(); | ||
|
||
# connect to the database | ||
my $dbh = DBI->connect( "dbi:Pg:host=udd-mirror.debian.net;dbname=udd", | ||
'udd-mirror', 'udd-mirror' ); | ||
|
||
# special cases | ||
my %skip = ( 'libbssolv-perl' => 1 ); | ||
|
||
# multiple queries are needed | ||
my @sql = ( | ||
|
||
# packages with upstream identified as CPAN | ||
q{select u.source, u.upstream_url from upstream_metadata um join upstream u on um.source = u.source where um.key='Archive' and um.value='CPAN'}, | ||
|
||
# packages which upstream URL pointing to CPAN | ||
qq{select source, upstream_url from upstream where upstream_url ~ '${\$host_regex}'}, | ||
); | ||
|
||
my @failures; | ||
|
||
for my $sql (@sql) { | ||
my $sth = $dbh->prepare($sql); | ||
$sth->execute(); | ||
|
||
# map Debian source package to CPAN distro | ||
while ( my ( $source, $url ) = $sth->fetchrow ) { | ||
next if $skip{$source}; | ||
if ( my $dist = dist_for_debian( $source, $url ) ) { | ||
$ret->{dist}{$dist} = $source; | ||
} | ||
else { | ||
push @failures => [ $source, $url ]; | ||
} | ||
} | ||
} | ||
|
||
if (@failures) { | ||
my $ret->{errors_email_body} = join "\n" => | ||
map { sprintf "%s %s", $_->[0], $_->[1] // '<undef>' } @failures; | ||
} | ||
|
||
return $ret; | ||
} | ||
|
||
sub dist_for_debian ( $source, $url ) { | ||
my %alias = ( | ||
'datapager' => 'data-pager', | ||
'html-format' => 'html-formatter', | ||
); | ||
|
||
my $dist = CPAN::DistnameInfo->new($url); | ||
if ( $dist->dist ) { | ||
return $dist->dist; | ||
} | ||
elsif ( $source =~ /^lib(.*)-perl$/ ) { | ||
my $es = MetaCPAN::ES->new( type => 'release' ); | ||
my $res = $es->scroll( | ||
body => { | ||
query => { | ||
term => { 'distribution.lowercase' => $alias{$1} // $1 } | ||
}, | ||
sort => [ { 'date' => 'desc' } ], | ||
} | ||
)->next; | ||
|
||
return $res->{_source}{distribution} | ||
if $res; | ||
} | ||
|
||
return; | ||
} | ||
|
||
sub _get_host_regex () { | ||
my @cpan_hosts = qw< | ||
backpan.cpan.org | ||
backpan.perl.org | ||
cpan.metacpan.org | ||
cpan.noris.de | ||
cpan.org | ||
cpan.perl.org | ||
search.cpan.org | ||
www.cpan.org | ||
www.perl.com | ||
>; | ||
|
||
return | ||
'^(https?|ftp)://(' | ||
. join( '|', map {s/\./\\./r} @cpan_hosts ) . ')/'; | ||
} | ||
|
||
1; |