Skip to content

Commit

Permalink
fix: graceful BasicCrawler tidy-up on CriticalError (#2817)
Browse files Browse the repository at this point in the history
Runs all the `BasicCrawler` clean-up actions even when the
`AutoscaledPool.run()` throws.

Closes #2807
  • Loading branch information
barjin authored Jan 20, 2025
1 parent 455f9c7 commit 53331e8
Showing 1 changed file with 41 additions and 38 deletions.
79 changes: 41 additions & 38 deletions packages/basic-crawler/src/internals/basic-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,8 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
this.events.on(EventType.MIGRATING, boundPauseOnMigration);
this.events.on(EventType.ABORTING, boundPauseOnMigration);

let stats = {} as FinalStatistics;

try {
await this.autoscaledPool!.run();
} finally {
Expand All @@ -927,50 +929,51 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
process.off('SIGINT', sigintHandler);
this.events.off(EventType.MIGRATING, boundPauseOnMigration);
this.events.off(EventType.ABORTING, boundPauseOnMigration);
}

const finalStats = this.stats.calculate();
const stats = {
requestsFinished: this.stats.state.requestsFinished,
requestsFailed: this.stats.state.requestsFailed,
retryHistogram: this.stats.requestRetryHistogram,
...finalStats,
};
this.log.info('Final request statistics:', stats);

if (this.stats.errorTracker.total !== 0) {
const prettify = ([count, info]: [number, string[]]) => `${count}x: ${info.at(-1)!.trim()} (${info[0]})`;
const finalStats = this.stats.calculate();
stats = {
requestsFinished: this.stats.state.requestsFinished,
requestsFailed: this.stats.state.requestsFailed,
retryHistogram: this.stats.requestRetryHistogram,
...finalStats,
};
this.log.info('Final request statistics:', stats);

if (this.stats.errorTracker.total !== 0) {
const prettify = ([count, info]: [number, string[]]) =>
`${count}x: ${info.at(-1)!.trim()} (${info[0]})`;

this.log.info(`Error analysis:`, {
totalErrors: this.stats.errorTracker.total,
uniqueErrors: this.stats.errorTracker.getUniqueErrorCount(),
mostCommonErrors: this.stats.errorTracker.getMostPopularErrors(3).map(prettify),
});
}

this.log.info(`Error analysis:`, {
totalErrors: this.stats.errorTracker.total,
uniqueErrors: this.stats.errorTracker.getUniqueErrorCount(),
mostCommonErrors: this.stats.errorTracker.getMostPopularErrors(3).map(prettify),
});
}
const client = this.config.getStorageClient();

const client = this.config.getStorageClient();
if (client.teardown) {
let finished = false;
setTimeout(() => {
if (!finished) {
this.log.info('Waiting for the storage to write its state to file system.');
}
}, 1000);
await client.teardown();
finished = true;
}

if (client.teardown) {
let finished = false;
setTimeout(() => {
if (!finished) {
this.log.info('Waiting for the storage to write its state to file system.');
}
}, 1000);
await client.teardown();
finished = true;
periodicLogger.stop();
await this.setStatusMessage(
`Finished! Total ${this.stats.state.requestsFinished + this.stats.state.requestsFailed} requests: ${
this.stats.state.requestsFinished
} succeeded, ${this.stats.state.requestsFailed} failed.`,
{ isStatusMessageTerminal: true, level: 'INFO' },
);
this.running = false;
this.hasFinishedBefore = true;
}

periodicLogger.stop();
await this.setStatusMessage(
`Finished! Total ${this.stats.state.requestsFinished + this.stats.state.requestsFailed} requests: ${
this.stats.state.requestsFinished
} succeeded, ${this.stats.state.requestsFailed} failed.`,
{ isStatusMessageTerminal: true, level: 'INFO' },
);
this.running = false;
this.hasFinishedBefore = true;

return stats;
}

Expand Down

0 comments on commit 53331e8

Please sign in to comment.