-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrabRatings.pl
92 lines (71 loc) · 2.19 KB
/
grabRatings.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/perl -w
# Example code from Chapter 1 of /Perl and LWP/ by Sean M. Burke
# http://www.oreilly.com/catalog/perllwp/
#require 5;
use strict;
#use warnings;
use LWP::Simple;
my $myURL;
my $filename;
if (scalar(@ARGV) != 1) {
print "Usage: grabRatings.pl StateFolder\n";
exit;
}
my $stateFolder = $ARGV[0];
my $cnt = 0;
#my @searchPages = glob('$stateFolder/ratings/search*');
my @searchPages;
opendir(DIR, "$stateFolder/ratings");
foreach my $file (readdir(DIR)) {
push(@searchPages, "$stateFolder/ratings/$file") if $file =~ m/search.*htm/i;
}
closedir(DIR);
foreach $filename (@searchPages) {
open(FI, "<" . $filename) or die "Cannot open file $filename";
my @lines = <FI>;
close (FI);
# my $prefix;
# if ($filename =~ m/search([^.]+)\.htm/i) {
# $prefix = $1;
# } else {
# print "DISASTER could not find prefix in $filename\n";
# exit(4);
# }
# Read all the lines from file $i, then get each license found therein.
my $j;
for ($j = 0; $j < scalar(@lines); $j++) {
if ($lines[$j] =~ m/HREF="\/(doctor-ratings[^" ]*).*Click to see ratings/) {
my $originalURL = $1;
my ($id, $name);
if ($originalURL =~ m/doctor-ratings\/(\d+)/) {
$id = $1;
} else { print "DISASTER id-processing $originalURL"; exit(3); }
if ($originalURL =~ m/([^\/]+)$/) {
$name = $1;
} else { print "DISASTER name-processing $originalURL"; exit(3); }
my $prefix;
if ($name =~ m/^([A-Z])/i) {
$prefix = $1;
$prefix =~ tr/a-z/A-Z/;
} else { print "DISASTER could not find first letter of name $name from $originalURL"; exit(3); }
$filename = "$stateFolder/ratings/$prefix/$name$id.htm";
if (-e $filename) {
print "Skipped $filename\n";
} else {
open(FO, ">" . $filename) or die "Cannot open file $filename";
my $myUrl = "http://www.ratemds.com/$originalURL";
my $content = get($myUrl);
print FO $content;
print "Wrote $myUrl to $filename\n";
close(FO);
sleep 10;
}
$cnt++;
}
}
#sleep 5;
}
print "Got or skipped $cnt ratings\n";
print "done! Ta Da \n";
__END__