Skip to content

Commit

Permalink
Merge pull request #49 from metacpan/mickey/external
Browse files Browse the repository at this point in the history
External script
  • Loading branch information
mickeyn authored Oct 28, 2024
2 parents 80cdeec + 66b74c0 commit 363570f
Show file tree
Hide file tree
Showing 3 changed files with 297 additions and 0 deletions.
120 changes: 120 additions & 0 deletions bin/external.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
use strict;
use warnings;
use v5.36;

use Email::Sender::Simple ();
use Email::Simple ();
use Getopt::Long;
use MetaCPAN::Logger qw< :log :dlog >;

use MetaCPAN::ES;
use MetaCPAN::External::Cygwin qw< run_cygwin >;
use MetaCPAN::External::Debian qw< run_debian >;

# with(
# 'MetaCPAN::Script::Role::External::Cygwin',
# 'MetaCPAN::Script::Role::External::Debian',
# );

# args
my ( $email_to, $external_source );
GetOptions(
"email_to=s" => \$email_to,
"external_source=s" => \$external_source,
);

die "wrong external source: $external\n"
unless $external_source
and grep { $_ eq $external_source } qw< cygwin debian >;

# setup
my $es = MetaCPAN::ES->new( type => "author" );

my $ret;

$ret = run_cygwin() if $external_source eq 'cygwin';
$ret = run_debian() if $external_source eq 'debian';

my $email_body = $ret->{errors_email_body};
if ( $email_to and $email_body ) {
my $email = Email::Simple->create(
header => [
'Content-Type' => 'text/plain; charset=utf-8',
To => $email_to,
From => '[email protected]',
Subject => "Package mapping failures report for $external_source",
'MIME-Version' => '1.0',
],
body => $email_body,
);
Email::Sender::Simple->send($email);

log_debug { "Sending email to " . $email_to . ":" };
log_debug {"Email body:"};
log_debug {$email_body};
}

my $scroll = $es->scroll(
type => 'distribution',
scroll => '10m',
body => {
query => {
exists => { field => "external_package." . $external_source }
}
},
);

my @to_remove;

while ( my $s = $scroll->next ) {
my $name = $s->{_source}{name};
next unless $name;

if ( exists $dist->{$name} ) {
delete $dist->{$name}
if $dist->{$name} eq
$s->{_source}{external_package}{$external_source};
}
else {
push @to_remove => $name;
}
}

my $bulk = $es->bulk( type => 'distribution' );

for my $d ( keys %{$dist} ) {
log_debug {"[$external_source] adding $d"};
$bulk->update( {
id => $d,
doc => +{
'external_package' => {
$external_source => $dist->{$d}
}
},
doc_as_upsert => 1,
} );
}

for my $d (@to_remove) {
log_debug {"[$external_source] removing $d"};
$bulk->update( {
id => $d,
doc => +{
'external_package' => {
$external_source => undef
}
}
} );
}

$bulk->flush;

1;

=pod
=head1 SYNOPSIS
# bin/external.pl --external_source SOURCE --email_to EMAIL
=cut
65 changes: 65 additions & 0 deletions lib/MetaCPAN/External/Cygwin.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package MetaCPAN::External::Cygwin;

use List::Util qw< shuffle >;
use MetaCPAN::Logger qw< :log :dlog >;

use MetaCPAN::Ingest qw< ua >;

use Sub::Exporter -setup => {
exports => [ qw<
run_cygwin
> ]
};

sub run_cygwin () {
my $ret = {};

my $ua = ua();
my $mirrors = get_mirrors($ua);

my @mirrors = @{ $mirrors };
my $timeout = $ua->timeout(10);

MIRROR: {
my $mirror = shift @mirrors or die "Ran out of mirrors";
log_debug {"Trying mirror: $mirror"};
my $res = $ua->get( $mirror . 'x86_64/setup.ini' );
redo MIRROR unless $res->is_success;

my @packages = split /^\@ /m, $res->decoded_content;
shift @packages; # drop headers
log_debug { sprintf "Got %d cygwin packages", scalar @packages };

for my $desc (@packages) {
next if substr( $desc, 0, 5 ) ne 'perl-';
my ( $pkg, %attr ) = map s/\A"|"\z//gr, map s/ \z//r,
map s/\n+/ /gr, split /^([a-z]+): /m, $desc;
$attr{category} = [ split / /, $attr{category} ];
next if grep /^(Debug|_obsolete)$/, @{ $attr{category} };
$ret->{dist}{ $pkg =~ s/^perl-//r } = $pkg;
}
}
$ua->timeout($timeout);

log_debug {
sprintf "Found %d cygwin-CPAN packages",
scalar keys %{ $ret->{dist} }
};

return $ret;
}

sub _get_mirrors ( $ua ) {
log_debug {"Fetching mirror list"};
my $res = $ua->get('https://cygwin.com/mirrors.lst');
die "Failed to fetch mirror list: " . $res->status_line
unless $res->is_success;
my @mirrors = shuffle map +( split /;/ )[0], split /\n/,
$res->decoded_content;

log_debug { sprintf "Got %d mirrors", scalar @mirrors };
return \@mirrors;
}

1;
112 changes: 112 additions & 0 deletions lib/MetaCPAN/External/Debian.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package MetaCPAN::External::Debian;

use strict;
use warnings;
use v5.36;

use CPAN::DistnameInfo ();
use DBI ();

use MetaCPAN::ES;

use Sub::Exporter -setup => {
exports => [ qw<
run_debian
> ]
};

sub run_debian () {
my $ret = {};

my $host_regex = _get_host_regex();

# connect to the database
my $dbh = DBI->connect( "dbi:Pg:host=udd-mirror.debian.net;dbname=udd",
'udd-mirror', 'udd-mirror' );

# special cases
my %skip = ( 'libbssolv-perl' => 1 );

# multiple queries are needed
my @sql = (

# packages with upstream identified as CPAN
q{select u.source, u.upstream_url from upstream_metadata um join upstream u on um.source = u.source where um.key='Archive' and um.value='CPAN'},

# packages which upstream URL pointing to CPAN
qq{select source, upstream_url from upstream where upstream_url ~ '${\$host_regex}'},
);

my @failures;

for my $sql (@sql) {
my $sth = $dbh->prepare($sql);
$sth->execute();

# map Debian source package to CPAN distro
while ( my ( $source, $url ) = $sth->fetchrow ) {
next if $skip{$source};
if ( my $dist = dist_for_debian( $source, $url ) ) {
$ret->{dist}{$dist} = $source;
}
else {
push @failures => [ $source, $url ];
}
}
}

if (@failures) {
my $ret->{errors_email_body} = join "\n" =>
map { sprintf "%s %s", $_->[0], $_->[1] // '<undef>' } @failures;
}

return $ret;
}

sub dist_for_debian ( $source, $url ) {
my %alias = (
'datapager' => 'data-pager',
'html-format' => 'html-formatter',
);

my $dist = CPAN::DistnameInfo->new($url);
if ( $dist->dist ) {
return $dist->dist;
}
elsif ( $source =~ /^lib(.*)-perl$/ ) {
my $es = MetaCPAN::ES->new( type => 'release' );
my $res = $es->scroll(
body => {
query => {
term => { 'distribution.lowercase' => $alias{$1} // $1 }
},
sort => [ { 'date' => 'desc' } ],
}
)->next;

return $res->{_source}{distribution}
if $res;
}

return;
}

sub _get_host_regex () {
my @cpan_hosts = qw<
backpan.cpan.org
backpan.perl.org
cpan.metacpan.org
cpan.noris.de
cpan.org
cpan.perl.org
search.cpan.org
www.cpan.org
www.perl.com
>;

return
'^(https?|ftp)://('
. join( '|', map {s/\./\\./r} @cpan_hosts ) . ')/';
}

1;

0 comments on commit 363570f

Please sign in to comment.