Skip to content

Commit

Permalink
fix: deprecate failing US/CA scrapers (covidatlas#830)
Browse files Browse the repository at this point in the history
  • Loading branch information
qgolsteyn authored Apr 16, 2020
1 parent 32b94c3 commit 3563a4b
Show file tree
Hide file tree
Showing 19 changed files with 168 additions and 75 deletions.
17 changes: 12 additions & 5 deletions src/shared/scrapers/US/CA/alameda-county.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand All @@ -19,11 +20,17 @@ const scraper = {
headless: true,
type: 'paragraph',
maintainers: [maintainers.jbencina],
async scraper() {
const $ = await fetch.headless(this.url);
const $el = $('p:contains("Positive Cases")');
const matches = $el.html().match(/Positive Cases:.*?(\d+).*/);
return { cases: parse.number(matches[1]) };
scraper: {
'0': async function() {
const $ = await fetch.headless(this.url);
const $el = $('p:contains("Positive Cases")');
const matches = $el.html().match(/Positive Cases:.*?(\d+).*/);
return { cases: parse.number(matches[1]) };
},
'2020-04-15': async function() {
await fetch.headless(this.url);
throw new DeprecatedError('Sunsetting county level scrapers');
}
}
};

Expand Down
25 changes: 16 additions & 9 deletions src/shared/scrapers/US/CA/calaveras-county.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand All @@ -11,15 +12,21 @@ const scraper = {
country: 'iso1:US',
url: 'https://covid19.calaverasgov.us/',
maintainers: [maintainers.jbencina],
async scraper() {
const $ = await fetch.page(this.url);
const cases = parse.number(
$('h2:contains("in Calaveras County:")')
.first()
.text()
.match(/in Calaveras County: (\d+)/)[1]
);
return { cases };
scraper: {
'0': async function() {
const $ = await fetch.page(this.url);
const cases = parse.number(
$('h2:contains("in Calaveras County:")')
.first()
.text()
.match(/in Calaveras County: (\d+)/)[1]
);
return { cases };
},
'2020-04-15': async function() {
await fetch.page(this.url);
throw new DeprecatedError('Sunsetting county level scrapers');
}
}
};

Expand Down
5 changes: 5 additions & 0 deletions src/shared/scrapers/US/CA/del-norte-county.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand Down Expand Up @@ -62,6 +63,10 @@ const scraper = {
cases,
tested
};
},
'2020-04-15': async function() {
await fetch.page(this.url);
throw new DeprecatedError('Sunsetting county level scraper');
}
}
};
Expand Down
2 changes: 1 addition & 1 deletion src/shared/scrapers/US/CA/glenn-county.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ const scraper = {
$('span:contains("Glenn County COVID-19 Cases")')
.first()
.text()
.match(/Cases: (\d+)/)[1]
.match(/Cases:.*(\d+)/)[1]
);
return { cases };
}
Expand Down
3 changes: 2 additions & 1 deletion src/shared/scrapers/US/CA/kern-county.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand Down Expand Up @@ -42,7 +43,7 @@ const scraper = {
return { cases, tested };
},
'2020-03-23': async function() {
throw new Error('Kern County, CA now uses a PNG and PDF');
throw new DeprecatedError('Kern County, CA now uses a PNG and PDF');
}
}
};
Expand Down
16 changes: 11 additions & 5 deletions src/shared/scrapers/US/CA/madera-county.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand All @@ -11,11 +12,16 @@ const scraper = {
country: 'iso1:US',
maintainers: [maintainers.jbencina],
url: 'https://www.maderacounty.com/government/public-health/health-updates/corona-virus',
async scraper() {
const $ = await fetch.page(this.url);
const $el = $('*:contains("Confirmed cases")').first();
const matches = $el.text().match(/Confirmed cases:.*?(\d+)/);
return { cases: parse.number(matches[1]) };
scraper: {
'0': async function() {
const $ = await fetch.page(this.url);
const $el = $('*:contains("Confirmed cases")').first();
const matches = $el.text().match(/Confirmed cases:.*?(\d+)/);
return { cases: parse.number(matches[1]) };
},
'2020-04-04': async function() {
throw new DeprecatedError('Madera County, CA now uses a PNG');
}
}
};

Expand Down
17 changes: 12 additions & 5 deletions src/shared/scrapers/US/CA/marin-county.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand All @@ -11,11 +12,17 @@ const scraper = {
country: 'iso1:US',
maintainers: [maintainers.jbencina],
url: 'https://coronavirus.marinhhs.org/surveillance',
async scraper() {
const $ = await fetch.page(this.url);
const text = $('td:contains("confirmed cases of COVID-19")').text();
const cases = parse.number(text.match(/there have been (\d+) confirmed cases of/)[1]);
return { cases };
scraper: {
'0': async function() {
const $ = await fetch.page(this.url);
const text = $('td:contains("confirmed cases of COVID-19")').text();
const cases = parse.number(text.match(/there have been (\d+) confirmed cases of/)[1]);
return { cases };
},
'2020-04-15': async function() {
await fetch.page(this.url);
throw new DeprecatedError('Sunsetting county level scrapers');
}
}
};

Expand Down
5 changes: 5 additions & 0 deletions src/shared/scrapers/US/CA/mendocino-county.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand Down Expand Up @@ -33,6 +34,10 @@ const scraper = {
const $li = $outerLI.find('li:contains("Total Positives")');
const cases = parse.number($li.text().split(':')[1]);
return { cases };
},
'2020-04-15': async function() {
await fetch.page(this.url);
throw new DeprecatedError('Sunsetting county level scrapers');
}
}
};
Expand Down
5 changes: 5 additions & 0 deletions src/shared/scrapers/US/CA/merced-county.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import cheerioTableparser from 'cheerio-tableparser';
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand Down Expand Up @@ -69,6 +70,10 @@ const scraper = {
deaths: parse.number(data[1][3]),
recoveries: parse.number(data[1][4])
};
},
'2020-04-15': async function() {
await fetch.page(this.url);
throw new DeprecatedError('Sunsetting county level scrapers');
}
}
};
Expand Down
3 changes: 2 additions & 1 deletion src/shared/scrapers/US/CA/orange-county.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand Down Expand Up @@ -30,7 +31,7 @@ const scraper = {
'2020-03-18': async function scraper() {
this.url = 'https://occovid19.ochealthinfo.com/coronavirus-in-oc';
await fetch.page(this.url);
throw new Error('Need to scrape new page');
throw new DeprecatedError('Need to scrape new page');
}
}
};
Expand Down
17 changes: 12 additions & 5 deletions src/shared/scrapers/US/CA/riverside-county.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand All @@ -11,11 +12,17 @@ const scraper = {
country: 'iso1:US',
maintainers: [maintainers.jbencina],
url: 'https://www.rivcoph.org/coronavirus',
async scraper() {
const $ = await fetch.page(this.url);
const $el = $('p:contains("Confirmed cases:")').first();
const matches = $el.text().match(/Confirmed cases:.*?(\d+)/);
return { cases: parse.number(matches[1]) };
scraper: {
'0': async function() {
const $ = await fetch.page(this.url);
const $el = $('p:contains("Confirmed cases:")').first();
const matches = $el.text().match(/Confirmed cases:.*?(\d+)/);
return { cases: parse.number(matches[1]) };
},
'2020-04-15': async function() {
await fetch.page(this.url);
throw new DeprecatedError('Sunsetting county scraper');
}
}
};

Expand Down
23 changes: 15 additions & 8 deletions src/shared/scrapers/US/CA/sacramento-county.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand All @@ -11,14 +12,20 @@ const scraper = {
country: 'iso1:US',
maintainers: [maintainers.jbencina],
url: 'https://www.saccounty.net/COVID-19/Pages/default.aspx',
async scraper() {
const $ = await fetch.page(this.url);
const $table = $('th:contains("Confirmed")').closest('table');
const $tds = $table.find('tr:nth-child(2) > td');
return {
cases: parse.number($tds.first().text()),
deaths: parse.number($tds.last().text())
};
scraper: {
'0': async function() {
const $ = await fetch.page(this.url);
const $table = $('th:contains("Confirmed")').closest('table');
const $tds = $table.find('tr:nth-child(2) > td');
return {
cases: parse.number($tds.first().text()),
deaths: parse.number($tds.last().text())
};
},
'2020-04-15': async function() {
await fetch.page(this.url);
throw new DeprecatedError('Sunsetting county scraper');
}
}
};

Expand Down
3 changes: 2 additions & 1 deletion src/shared/scrapers/US/CA/san-benito-county.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand Down Expand Up @@ -39,7 +40,7 @@ const scraper = {
};
},
'2020-03-19': async function scraper() {
throw new Error('Need to scrape new arcgis');
throw new DeprecatedError('Need to scrape new arcgis');
}
}
};
Expand Down
23 changes: 15 additions & 8 deletions src/shared/scrapers/US/CA/san-bernardino-county.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand All @@ -11,14 +12,20 @@ const scraper = {
country: 'iso1:US',
maintainers: [maintainers.jbencina],
url: 'http://wp.sbcounty.gov/dph/coronavirus/',
async scraper() {
const $ = await fetch.page(this.url);
const cases = parse.number(
$('h3:contains("COVID-19 CASES")')
.parent()
.attr('data-number-value')
);
return { cases };
scraper: {
'0': async function() {
const $ = await fetch.page(this.url);
const cases = parse.number(
$('h3:contains("COVID-19 CASES")')
.parent()
.attr('data-number-value')
);
return { cases };
},
'2020-04-15': async function() {
await fetch.page(this.url);
throw new DeprecatedError('Sunsetting county scraper');
}
}
};

Expand Down
5 changes: 5 additions & 0 deletions src/shared/scrapers/US/CA/santa-barbara-county.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import cheerioTableparser from 'cheerio-tableparser';
import * as fetch from '../../../lib/fetch/index.js';
import * as parse from '../../../lib/parse.js';
import maintainers from '../../../lib/maintainers.js';
import { DeprecatedError } from '../../../lib/errors.js';

// Set county to this if you only have state data, but this isn't the entire state
// const UNASSIGNED = '(unassigned)';
Expand Down Expand Up @@ -56,6 +57,10 @@ const scraper = {
const tested = parse.number(data[1][lastRow]);

return { cases, tested };
},
'2020-04-15': async function() {
await fetch.page(this.url);
throw new DeprecatedError('Sunsetting county scraper');
}
}
};
Expand Down
Loading

0 comments on commit 3563a4b

Please sign in to comment.