From 53331e82ee66274316add7cadb4afec1ce2d4bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Mon, 20 Jan 2025 10:59:19 +0100 Subject: [PATCH] fix: graceful `BasicCrawler` tidy-up on `CriticalError` (#2817) Runs all the `BasicCrawler` clean-up actions even when the `AutoscaledPool.run()` throws. Closes #2807 --- .../src/internals/basic-crawler.ts | 79 ++++++++++--------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index b562ff4f6f5a..168282c8fe6c 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -918,6 +918,8 @@ export class BasicCrawler `${count}x: ${info.at(-1)!.trim()} (${info[0]})`; + const finalStats = this.stats.calculate(); + stats = { + requestsFinished: this.stats.state.requestsFinished, + requestsFailed: this.stats.state.requestsFailed, + retryHistogram: this.stats.requestRetryHistogram, + ...finalStats, + }; + this.log.info('Final request statistics:', stats); + + if (this.stats.errorTracker.total !== 0) { + const prettify = ([count, info]: [number, string[]]) => + `${count}x: ${info.at(-1)!.trim()} (${info[0]})`; + + this.log.info(`Error analysis:`, { + totalErrors: this.stats.errorTracker.total, + uniqueErrors: this.stats.errorTracker.getUniqueErrorCount(), + mostCommonErrors: this.stats.errorTracker.getMostPopularErrors(3).map(prettify), + }); + } - this.log.info(`Error analysis:`, { - totalErrors: this.stats.errorTracker.total, - uniqueErrors: this.stats.errorTracker.getUniqueErrorCount(), - mostCommonErrors: this.stats.errorTracker.getMostPopularErrors(3).map(prettify), - }); - } + const client = this.config.getStorageClient(); - const client = this.config.getStorageClient(); + if (client.teardown) { + let finished = false; + setTimeout(() => { + if (!finished) { + this.log.info('Waiting for the storage to write its state to file system.'); + } + }, 1000); + await client.teardown(); + finished = true; + } - if (client.teardown) { - let finished = false; - setTimeout(() => { - if (!finished) { - this.log.info('Waiting for the storage to write its state to file system.'); - } - }, 1000); - await client.teardown(); - finished = true; + periodicLogger.stop(); + await this.setStatusMessage( + `Finished! Total ${this.stats.state.requestsFinished + this.stats.state.requestsFailed} requests: ${ + this.stats.state.requestsFinished + } succeeded, ${this.stats.state.requestsFailed} failed.`, + { isStatusMessageTerminal: true, level: 'INFO' }, + ); + this.running = false; + this.hasFinishedBefore = true; } - periodicLogger.stop(); - await this.setStatusMessage( - `Finished! Total ${this.stats.state.requestsFinished + this.stats.state.requestsFailed} requests: ${ - this.stats.state.requestsFinished - } succeeded, ${this.stats.state.requestsFailed} failed.`, - { isStatusMessageTerminal: true, level: 'INFO' }, - ); - this.running = false; - this.hasFinishedBefore = true; - return stats; }