forked from hugomallinson/citeusync
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathciteusync.pl
89 lines (76 loc) · 2.15 KB
/
citeusync.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/perl
use WWW::Mechanize;
my $home = $ENV{HOME} || (getpwuid($<))[7];
open(my $config_file, '<', "$home/.citeusync") or die $!;
@data = <$config_file>;
chomp @data;
$username = $data[0];
$password = $data[1];
$save_folder = $data[2];
my $mech = WWW::Mechanize->new();
$mech->get("http://www.citeulike.org/login");
print "Logging in ... ";
$mech->submit_form(
form_name => 'frm',
fields => { username => $username, password => $password },
);
if ( $mech->success() ) {
print "OK.\n";
}
else {
print "Error.\n";
}
$mech->get("http://www.citeulike.org/user/$username");
my $content = $mech->content();
while ( $content =~ /\/page\/([0-9]+)\">[0-9]/g ) {
$last_page = $1;
}
$content = "";
for ( $i = 1 ; $i <= $last_page ; $i++ ) {
$mech->get("http://www.citeulike.org/user/$username/page/$i");
$content = $content . $mech->content();
}
my @links;
while ( $content =~ /\'link\': \'(\/pdf\/user\/$username\/article\/[^\']*)'/g ) {
push( @links, "http://www.citeulike.org$1" );
}
print "Found $#links PDFs.\n";
my $files = join( "", glob("$save_folder/*.pdf") );
foreach $link (@links) {
$link =~ /\/article\/([0-9]*)\//;
my $article_id = $1;
if ( $files =~ /$article_id/ ) {
print("Already have $article_id.pdf.\n");
}
else {
$files = $files . $article_id . ".pdf";
print("Fetching $link ...");
$mech->get($link);
if ( $mech->success() ) {
print "OK.\n";
}
else {
print "Error.\n";
}
$mech->save_content("$save_folder/$article_id.pdf");
}
}
$mech->get("http://www.citeulike.org/bibtex/user/$username");
$content = $mech->content();
my @bibs = split( "\n@", $content );
foreach $bib (@bibs) {
$bib =~ /citeulike-article-id\s+=\s+\{([0-9]*)\}/i;
my $article_id = $1;
if ( $files =~ /$article_id/ ) {
my $url = "citeulikepdf:$article_id.pdf:PDF";
if ( $bib =~ /file/i ) {
$bib =~ s/file.*}/file = {$url}/ig;
}
else {
$bib =~ s/}\s+$/,\n file = {$url}}/g;
}
}
}
open( FILE, "> $save_folder/$username.bib" );
print FILE join( "\n@", @bibs );
close(FILE);