forked from get-set-fetch/scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
UpsertResourcePlugin.ts
72 lines (62 loc) · 2.25 KB
/
UpsertResourcePlugin.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
/* eslint-disable no-await-in-loop */
/* eslint-disable no-param-reassign */
import Plugin from '../Plugin';
import Project from '../../storage/base/Project';
import Resource from '../../storage/base/Resource';
import { SchemaType } from '../../schema/SchemaHelper';
import { getLogger } from '../../logger/Logger';
/** Updates a resource in the database after its scraping completes. */
export default class UpsertResourcePlugin extends Plugin {
static get schema() {
return {
type: 'object',
title: 'Upsert Resource Plugin',
description: 'updates a static resource or inserts a dynamic one after scraping it.',
properties: {
keepHtmlData: {
type: 'boolean',
default: false,
title: 'Keep Html Data',
description: 'Whether or not to save html buffer response (if present) under resource.data',
},
},
} as const;
}
logger = getLogger('UpsertResourcePlugin');
opts: SchemaType<typeof UpsertResourcePlugin.schema>;
constructor(opts:SchemaType<typeof UpsertResourcePlugin.schema> = {}) {
super(opts);
}
test(project: Project, resource: Resource) {
return !!(resource);
}
async apply(project: Project, resource: Resource) {
// scrape complete, save the scraped resource
await this.saveResource(resource);
/*
after a resource is updated, remove its dynamic actions
this allows for other dynamic plugins to be triggered
*/
return { actions: null };
}
async saveResource(resource: Resource) {
// scrape complete, remove inProgress flag, set scrape date
resource.scrapeInProgress = false;
resource.scrapedAt = new Date(Date.now());
// only save html response under resource.data (Uint8Array) if the corresponding flag is set
if (!this.opts.keepHtmlData && (/html/i).test(resource.contentType) && resource.data) {
resource.data = null;
}
// static resources have already been inserted in db via plugins like InsertResourcesPlugin in a previous scrape step, just do update
if (resource.id) {
await resource.update();
}
/*
do save when:
dynamic resources are found and scraped on the fly starting from an already scraped static resource
*/
else {
await resource.save();
}
}
}