From baba0b4da58af8302c1cc4ec1d43c1f22d3b8b48 Mon Sep 17 00:00:00 2001 From: undergroundwires Date: Wed, 13 Mar 2024 11:34:19 +0100 Subject: [PATCH] ci/cd: trigger URL checks more, and limit amount Fix all URL checks failing in GitHub runner due to: - Missing Happy Eyeballs in Node.js nodejs/undici$1531 nodejs/node$41625 - Or is it due too to many max connections? Test this. Mentioned in comment nodejs/node$41625. Key changes: - Run URL checks more frequently on changes, every time a collection file or test code is changed. - Introduce environment variable to randomly select and limit URLs tested, this way the tests will provide quicker feedback on code changes. Other supporting changes: - Log more information about test before running the test to enable easier troubleshooting. - Move shuffle function for arrays for reusability and missing tests. --- .github/workflows/checks.external-urls.yaml | 8 ++ src/application/Common/Shuffle.ts | 12 +++ .../DocumentationUrlExtractor.ts | 69 ++++++++++++++ .../TestExecutionDetailsLogger.ts | 26 ++++++ tests/checks/external-urls/main.spec.ts | 92 +++++++++++-------- tests/unit/application/Common/Shuffle.spec.ts | 52 +++++++++++ 6 files changed, 221 insertions(+), 38 deletions(-) create mode 100644 src/application/Common/Shuffle.ts create mode 100644 tests/checks/external-urls/DocumentationUrlExtractor.ts create mode 100644 tests/checks/external-urls/TestExecutionDetailsLogger.ts create mode 100644 tests/unit/application/Common/Shuffle.spec.ts diff --git a/.github/workflows/checks.external-urls.yaml b/.github/workflows/checks.external-urls.yaml index 8d8b46c85..81baa0ad4 100644 --- a/.github/workflows/checks.external-urls.yaml +++ b/.github/workflows/checks.external-urls.yaml @@ -3,6 +3,10 @@ name: checks.external-urls on: schedule: - cron: '0 0 * * 0' # at 00:00 on every Sunday + push: + paths: + - tests/checks/external-urls/** + - src/application/collections/** jobs: run-check: @@ -20,3 +24,7 @@ jobs: - name: Test run: npm run check:external-urls + env: + RANDOMIZED_URL_CHECK_LIMIT: "${{ github.event_name == 'push' && '100' || '' }}" + # - Scheduled checks has no limits, ensuring thorough testing. + # - For push events, triggered by code changes, the amount of URLs are limited to provide quick feedback. diff --git a/src/application/Common/Shuffle.ts b/src/application/Common/Shuffle.ts new file mode 100644 index 000000000..94df09bd0 --- /dev/null +++ b/src/application/Common/Shuffle.ts @@ -0,0 +1,12 @@ +/* + Shuffle an array of strings, returning a new array with elements in random order. + Uses the Fisher-Yates (or Durstenfeld) algorithm. +*/ +export function shuffle(array: readonly T[]): T[] { + const shuffledArray = [...array]; + for (let i = array.length - 1; i > 0; i--) { + const j = Math.floor(Math.random() * (i + 1)); + [shuffledArray[i], shuffledArray[j]] = [shuffledArray[j], shuffledArray[i]]; + } + return shuffledArray; +} diff --git a/tests/checks/external-urls/DocumentationUrlExtractor.ts b/tests/checks/external-urls/DocumentationUrlExtractor.ts new file mode 100644 index 000000000..7b53b0d12 --- /dev/null +++ b/tests/checks/external-urls/DocumentationUrlExtractor.ts @@ -0,0 +1,69 @@ +import type { IApplication } from '@/domain/IApplication'; +import type { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger'; + +interface UrlExtractionContext { + readonly logger: TestExecutionDetailsLogger; + readonly application: IApplication; + readonly urlExclusionPatterns: readonly RegExp[]; +} + +export function extractDocumentationUrls( + context: UrlExtractionContext, +): string[] { + const urlsInApplication = extractUrlsFromApplication(context.application); + context.logger.logLabeledInformation( + 'Extracted URLs from application', + urlsInApplication.length.toString(), + ); + const uniqueUrls = filterDuplicateUrls(urlsInApplication); + context.logger.logLabeledInformation( + 'Unique URLs after deduplication', + `${uniqueUrls.length} (duplicates removed)`, + ); + context.logger.logLabeledInformation( + 'Exclusion patterns for URLs', + context.urlExclusionPatterns.length === 0 + ? 'None (all URLs included)' + : context.urlExclusionPatterns.map((pattern, index) => `${index + 1}) ${pattern.toString()}`).join('\n'), + ); + const includedUrls = filterUrlsExcludingPatterns(uniqueUrls, context.urlExclusionPatterns); + context.logger.logLabeledInformation( + 'URLs extracted for testing', + `${includedUrls.length} (after applying exclusion patterns; ${uniqueUrls.length - includedUrls.length} URLs ignored)`, + ); + return includedUrls; +} + +function extractUrlsFromApplication(application: IApplication): string[] { + return [ // Get all executables + ...application.collections.flatMap((c) => c.getAllCategories()), + ...application.collections.flatMap((c) => c.getAllScripts()), + ] + // Get all docs + .flatMap((documentable) => documentable.docs) + // Parse all URLs + .flatMap((docString) => extractUrlsExcludingCodeBlocks(docString)); +} + +function filterDuplicateUrls(urls: readonly string[]): string[] { + return urls.filter((url, index, array) => array.indexOf(url) === index); +} + +function filterUrlsExcludingPatterns( + urls: readonly string[], + patterns: readonly RegExp[], +): string[] { + return urls.filter((url) => !patterns.some((pattern) => pattern.test(url))); +} + +function extractUrlsExcludingCodeBlocks(textWithInlineCode: string): string[] { + /* + Matches URLs: + - Excludes inline code blocks as they may contain URLs not intended for user interaction + and not guaranteed to support expected HTTP methods, leading to false-negatives. + - Supports URLs containing parentheses, avoiding matches within code that might not represent + actual links. + */ + const nonCodeBlockUrlRegex = /(?()]+(?:\([^\s`"<>()]*\))?[^\s`"<>()]*)/g; + return textWithInlineCode.match(nonCodeBlockUrlRegex) || []; +} diff --git a/tests/checks/external-urls/TestExecutionDetailsLogger.ts b/tests/checks/external-urls/TestExecutionDetailsLogger.ts new file mode 100644 index 000000000..a8a05c33b --- /dev/null +++ b/tests/checks/external-urls/TestExecutionDetailsLogger.ts @@ -0,0 +1,26 @@ +import { indentText } from '@tests/shared/Text'; + +export class TestExecutionDetailsLogger { + public logTestSectionStartDelimiter(): void { + this.logSectionDelimiterLine(); + } + + public logTestSectionEndDelimiter(): void { + this.logSectionDelimiterLine(); + } + + public logLabeledInformation( + label: string, + detailedInformation: string, + ): void { + console.log([ + `${label}:`, + indentText(detailedInformation), + ].join('\n')); + } + + private logSectionDelimiterLine(): void { + const horizontalLine = '─'.repeat(40); + console.log(horizontalLine); + } +} diff --git a/tests/checks/external-urls/main.spec.ts b/tests/checks/external-urls/main.spec.ts index 8fac7c71d..47538424b 100644 --- a/tests/checks/external-urls/main.spec.ts +++ b/tests/checks/external-urls/main.spec.ts @@ -1,19 +1,26 @@ import { test, expect } from 'vitest'; import { parseApplication } from '@/application/Parser/ApplicationParser'; -import type { IApplication } from '@/domain/IApplication'; import { indentText } from '@tests/shared/Text'; import { formatAssertionMessage } from '@tests/shared/FormatAssertionMessage'; +import { shuffle } from '@/application/Common/Shuffle'; import { type UrlStatus, formatUrlStatus } from './StatusChecker/UrlStatus'; import { getUrlStatusesInParallel, type BatchRequestOptions } from './StatusChecker/BatchStatusChecker'; +import { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger'; +import { extractDocumentationUrls } from './DocumentationUrlExtractor'; // arrange +const logger = new TestExecutionDetailsLogger(); +logger.logTestSectionStartDelimiter(); const app = parseApplication(); -const urls = collectUniqueUrls({ - application: app, - excludePatterns: [ +let urls = extractDocumentationUrls({ + logger, + urlExclusionPatterns: [ /^https:\/\/archive\.ph/, // Drops HEAD/GET requests via fetch/curl, responding to Postman/Chromium. ], + application: app, }); +urls = filterUrlsToEnvironmentCheckLimit(urls); +logger.logLabeledInformation('URLs submitted for testing', urls.length.toString()); const requestOptions: BatchRequestOptions = { domainOptions: { sameDomainParallelize: false, // be nice to our third-party servers @@ -30,55 +37,64 @@ const requestOptions: BatchRequestOptions = { enableCookies: true, }, }; +logger.logLabeledInformation('HTTP request options', JSON.stringify(requestOptions, null, 2)); const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000; +logger.logLabeledInformation('Scheduled test duration', convertMillisecondsToHumanReadableFormat(testTimeoutInMs)); +logger.logTestSectionEndDelimiter(); test(`all URLs (${urls.length}) should be alive`, async () => { // act const results = await getUrlStatusesInParallel(urls, requestOptions); // assert const deadUrls = results.filter((r) => r.code === undefined || !isOkStatusCode(r.code)); - expect(deadUrls).to.have.lengthOf(0, formatAssertionMessage([formatUrlStatusReport(deadUrls)])); + expect(deadUrls).to.have.lengthOf( + 0, + formatAssertionMessage([createReportForDeadUrlStatuses(deadUrls)]), + ); }, testTimeoutInMs); function isOkStatusCode(statusCode: number): boolean { return statusCode >= 200 && statusCode < 300; } -function collectUniqueUrls( - options: { - readonly application: IApplication, - readonly excludePatterns?: readonly RegExp[], - }, -): string[] { - return [ // Get all nodes - ...options.application.collections.flatMap((c) => c.getAllCategories()), - ...options.application.collections.flatMap((c) => c.getAllScripts()), - ] - // Get all docs - .flatMap((documentable) => documentable.docs) - // Parse all URLs - .flatMap((docString) => extractUrls(docString)) - // Remove duplicates - .filter((url, index, array) => array.indexOf(url) === index) - // Exclude certain URLs based on patterns - .filter((url) => !shouldExcludeUrl(url, options.excludePatterns ?? [])); +function createReportForDeadUrlStatuses(deadUrlStatuses: readonly UrlStatus[]): string { + return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`; } -function shouldExcludeUrl(url: string, patterns: readonly RegExp[]): boolean { - return patterns.some((pattern) => pattern.test(url)); +function filterUrlsToEnvironmentCheckLimit(originalUrls: string[]): string[] { + const { RANDOMIZED_URL_CHECK_LIMIT } = process.env; + logger.logLabeledInformation('URL check limit', RANDOMIZED_URL_CHECK_LIMIT || 'Unlimited'); + if (RANDOMIZED_URL_CHECK_LIMIT !== undefined && RANDOMIZED_URL_CHECK_LIMIT !== '') { + const maxUrlsInTest = parseInt(RANDOMIZED_URL_CHECK_LIMIT, 10); + if (Number.isNaN(maxUrlsInTest)) { + throw new Error(`Invalid URL limit: ${RANDOMIZED_URL_CHECK_LIMIT}`); + } + if (maxUrlsInTest < originalUrls.length) { + return shuffle(originalUrls).slice(0, maxUrlsInTest); + } + } + return originalUrls; } -function formatUrlStatusReport(deadUrlStatuses: readonly UrlStatus[]): string { - return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`; -} +function convertMillisecondsToHumanReadableFormat(milliseconds: number): string { + const timeParts: string[] = []; + const addTimePart = (amount: number, label: string) => { + if (amount === 0) { + return; + } + timeParts.push(`${amount} ${label}`); + }; + + const hours = milliseconds / (1000 * 60 * 60); + const absoluteHours = Math.floor(hours); + addTimePart(absoluteHours, 'hours'); + + const minutes = (hours - absoluteHours) * 60; + const absoluteMinutes = Math.floor(minutes); + addTimePart(absoluteMinutes, 'minutes'); + + const seconds = (minutes - absoluteMinutes) * 60; + const absoluteSeconds = Math.floor(seconds); + addTimePart(absoluteSeconds, 'seconds'); -function extractUrls(textWithInlineCode: string): string[] { - /* - Matches URLs: - - Excludes inline code blocks as they may contain URLs not intended for user interaction - and not guaranteed to support expected HTTP methods, leading to false-negatives. - - Supports URLs containing parentheses, avoiding matches within code that might not represent - actual links. - */ - const nonCodeBlockUrlRegex = /(?()]+(?:\([^\s`"<>()]*\))?[^\s`"<>()]*)/g; - return textWithInlineCode.match(nonCodeBlockUrlRegex) || []; + return timeParts.join(', '); } diff --git a/tests/unit/application/Common/Shuffle.spec.ts b/tests/unit/application/Common/Shuffle.spec.ts new file mode 100644 index 000000000..ebbb0aa98 --- /dev/null +++ b/tests/unit/application/Common/Shuffle.spec.ts @@ -0,0 +1,52 @@ +import { describe, it, expect } from 'vitest'; +import { shuffle } from '@/application/Common/Shuffle'; + +describe('Shuffle', () => { + describe('shuffle', () => { + it('returns a new array', () => { + // arrange + const inputArray = ['a', 'b', 'c', 'd']; + // act + const result = shuffle(inputArray); + // assert + expect(result).not.to.equal(inputArray); + }); + + it('returns an array of the same length', () => { + // arrange + const inputArray = ['a', 'b', 'c', 'd']; + // act + const result = shuffle(inputArray); + // assert + expect(result.length).toBe(inputArray.length); + }); + + it('contains the same elements', () => { + // arrange + const inputArray = ['a', 'b', 'c', 'd']; + // act + const result = shuffle(inputArray); + // assert + expect(result).to.have.members(inputArray); + }); + + it('does not modify the input array', () => { + // arrange + const inputArray = ['a', 'b', 'c', 'd']; + const inputArrayCopy = [...inputArray]; + // act + shuffle(inputArray); + // assert + expect(inputArray).to.deep.equal(inputArrayCopy); + }); + + it('handles an empty array correctly', () => { + // arrange + const inputArray: string[] = []; + // act + const result = shuffle(inputArray); + // assert + expect(result).have.lengthOf(0); + }); + }); +});