Skip to content

Commit

Permalink
ci/cd: trigger URL checks more, and limit amount
Browse files Browse the repository at this point in the history
Fix all URL checks failing in GitHub runner due to:

- Missing Happy Eyeballs in Node.js
  nodejs/undici$1531
  nodejs/node$41625
- Missing IPv6 support in GitHub runners:
  actions/runner$3138
  actions/runner-images$668

Tried (did not work):

1)

```
import dns from 'dns';
dns.setDefaultResultOrder('ipv4first');
```

2) Bumping node to v20.

3) TODO: Try autoSelectFamily

- Or is it due too to many max connections? Test this.
  Mentioned in comment nodejs/node$41625.

Key changes:

- Run URL checks more frequently on every change.
- Introduce environment variable to randomly select and limit URLs
  tested, this way the tests will provide quicker feedback on code
  changes.

Other supporting changes:

- Log more information about test before running the test to enable
  easier troubleshooting.
- Move shuffle function for arrays for reusability and missing tests.
  • Loading branch information
undergroundwires committed Mar 19, 2024
1 parent 287b8e6 commit 3341de4
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 38 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/checks.external-urls.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: checks.external-urls

on:
push:
schedule:
- cron: '0 0 * * 0' # at 00:00 on every Sunday

Expand All @@ -20,3 +21,7 @@ jobs:
-
name: Test
run: npm run check:external-urls
env:
RANDOMIZED_URL_CHECK_LIMIT: "${{ github.event_name == 'push' && '10' || '' }}"
# - Scheduled checks has no limits, ensuring thorough testing.
# - For push events, triggered by code changes, the amount of URLs are limited to provide quick feedback.
12 changes: 12 additions & 0 deletions src/application/Common/Shuffle.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
Shuffle an array of strings, returning a new array with elements in random order.
Uses the Fisher-Yates (or Durstenfeld) algorithm.
*/
export function shuffle<T>(array: readonly T[]): T[] {
const shuffledArray = [...array];
for (let i = array.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[shuffledArray[i], shuffledArray[j]] = [shuffledArray[j], shuffledArray[i]];
}
return shuffledArray;
}
69 changes: 69 additions & 0 deletions tests/checks/external-urls/DocumentationUrlExtractor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import type { IApplication } from '@/domain/IApplication';
import type { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger';

interface UrlExtractionContext {
readonly logger: TestExecutionDetailsLogger;
readonly application: IApplication;
readonly urlExclusionPatterns: readonly RegExp[];
}

export function extractDocumentationUrls(
context: UrlExtractionContext,
): string[] {
const urlsInApplication = extractUrlsFromApplication(context.application);
context.logger.logLabeledInformation(
'Extracted URLs from application',
urlsInApplication.length.toString(),
);
const uniqueUrls = filterDuplicateUrls(urlsInApplication);
context.logger.logLabeledInformation(
'Unique URLs after deduplication',
`${uniqueUrls.length} (duplicates removed)`,
);
context.logger.logLabeledInformation(
'Exclusion patterns for URLs',
context.urlExclusionPatterns.length === 0
? 'None (all URLs included)'
: context.urlExclusionPatterns.map((pattern, index) => `${index + 1}) ${pattern.toString()}`).join('\n'),
);
const includedUrls = filterUrlsExcludingPatterns(uniqueUrls, context.urlExclusionPatterns);
context.logger.logLabeledInformation(
'URLs extracted for testing',
`${includedUrls.length} (after applying exclusion patterns; ${uniqueUrls.length - includedUrls.length} URLs ignored)`,
);
return includedUrls;
}

function extractUrlsFromApplication(application: IApplication): string[] {
return [ // Get all executables
...application.collections.flatMap((c) => c.getAllCategories()),
...application.collections.flatMap((c) => c.getAllScripts()),
]
// Get all docs
.flatMap((documentable) => documentable.docs)
// Parse all URLs
.flatMap((docString) => extractUrlsExcludingCodeBlocks(docString));
}

function filterDuplicateUrls(urls: readonly string[]): string[] {
return urls.filter((url, index, array) => array.indexOf(url) === index);
}

function filterUrlsExcludingPatterns(
urls: readonly string[],
patterns: readonly RegExp[],
): string[] {
return urls.filter((url) => !patterns.some((pattern) => pattern.test(url)));
}

function extractUrlsExcludingCodeBlocks(textWithInlineCode: string): string[] {
/*
Matches URLs:
- Excludes inline code blocks as they may contain URLs not intended for user interaction
and not guaranteed to support expected HTTP methods, leading to false-negatives.
- Supports URLs containing parentheses, avoiding matches within code that might not represent
actual links.
*/
const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+(?:\([^\s`"<>()]*\))?[^\s`"<>()]*)/g;
return textWithInlineCode.match(nonCodeBlockUrlRegex) || [];
}
26 changes: 26 additions & 0 deletions tests/checks/external-urls/TestExecutionDetailsLogger.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { indentText } from '@tests/shared/Text';

export class TestExecutionDetailsLogger {
public logTestSectionStartDelimiter(): void {
this.logSectionDelimiterLine();
}

public logTestSectionEndDelimiter(): void {
this.logSectionDelimiterLine();
}

public logLabeledInformation(
label: string,
detailedInformation: string,
): void {
console.log([
`${label}:`,
indentText(detailedInformation),
].join('\n'));
}

private logSectionDelimiterLine(): void {
const horizontalLine = '─'.repeat(40);
console.log(horizontalLine);
}
}
92 changes: 54 additions & 38 deletions tests/checks/external-urls/main.spec.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
import { test, expect } from 'vitest';
import { parseApplication } from '@/application/Parser/ApplicationParser';
import type { IApplication } from '@/domain/IApplication';
import { indentText } from '@tests/shared/Text';
import { formatAssertionMessage } from '@tests/shared/FormatAssertionMessage';
import { shuffle } from '@/application/Common/Shuffle';
import { type UrlStatus, formatUrlStatus } from './StatusChecker/UrlStatus';
import { getUrlStatusesInParallel, type BatchRequestOptions } from './StatusChecker/BatchStatusChecker';
import { TestExecutionDetailsLogger } from './TestExecutionDetailsLogger';
import { extractDocumentationUrls } from './DocumentationUrlExtractor';

// arrange
const logger = new TestExecutionDetailsLogger();
logger.logTestSectionStartDelimiter();
const app = parseApplication();
const urls = collectUniqueUrls({
application: app,
excludePatterns: [
let urls = extractDocumentationUrls({
logger,
urlExclusionPatterns: [
/^https:\/\/archive\.ph/, // Drops HEAD/GET requests via fetch/curl, responding to Postman/Chromium.
],
application: app,
});
urls = filterUrlsToEnvironmentCheckLimit(urls);
logger.logLabeledInformation('URLs submitted for testing', urls.length.toString());
const requestOptions: BatchRequestOptions = {
domainOptions: {
sameDomainParallelize: false, // be nice to our third-party servers
Expand All @@ -30,55 +37,64 @@ const requestOptions: BatchRequestOptions = {
enableCookies: true,
},
};
logger.logLabeledInformation('HTTP request options', JSON.stringify(requestOptions, null, 2));
const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000;
logger.logLabeledInformation('Scheduled test duration', convertMillisecondsToHumanReadableFormat(testTimeoutInMs));
logger.logTestSectionEndDelimiter();
test(`all URLs (${urls.length}) should be alive`, async () => {
// act
const results = await getUrlStatusesInParallel(urls, requestOptions);
// assert
const deadUrls = results.filter((r) => r.code === undefined || !isOkStatusCode(r.code));
expect(deadUrls).to.have.lengthOf(0, formatAssertionMessage([formatUrlStatusReport(deadUrls)]));
expect(deadUrls).to.have.lengthOf(

Check failure on line 49 in tests/checks/external-urls/main.spec.ts

View workflow job for this annotation

GitHub Actions / run-check

../../tests/checks/external-urls/main.spec.ts > all URLs (10) should be alive

AssertionError: --- URL: https://web.archive.org/web/20220810160903/https://help.gnome.org/users/epiphany/stable/history.html.en Error: Error: { "cause": { "name": "ConnectTimeoutError", "code": "UND_ERR_CONNECT_TIMEOUT", "message": "Connect Timeout Error" } } Fetch options: { "method": "HEAD", "headers": { "User-Agent": "Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1", "Host": "web.archive.org", "Upgrade-Insecure-Requests": "1", "Connection": "keep-alive", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Cache-Control": "max-age=0", "Accept-Language": "en-US,en;q=0.9", "referer": "https://privacy.sexy" }, "redirect": "manual" } Request options: { "retryExponentialBaseInMs": 3000, "additionalHeaders": { "referer": "https://privacy.sexy" }, "additionalHeadersUrlIgnore": [], "requestTimeoutInMs": 60000, "randomizeTlsFingerprint": true, "forceHttpGetForUrlPatterns": [] } TLS: Ciphers: TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:TLS_AES_128_GCM_SHA256:HIGH:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-SHA256:!RC4:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES256-SHA384:!MD5:!PSK:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-SHA256:!aNULL:ECDHE-RSA-AES128-SHA256:!eNULL:DHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA384:!EXPORT:!DES:!CAMELLIA:ECDHE-ECDSA-AES128-GCM-SHA256:!SRP Minimum TLS protocol version: TLSv1.2 Node fingerprint: Masked --- URL: https://web.archive.org/web/20240122071007/https://www.thewindowsclub.com/how-to-show-or-hide-copilot-button-on-taskbar-in-windows Error: Error: { "cause": { "name": "ConnectTimeoutError", "code": "UND_ERR_CONNECT_TIMEOUT", "message": "Connect Timeout Error" } } Fetch options: { "method": "HEAD", "headers": { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", "Host": "web.archive.org", "Upgrade-Insecure-Requests": "1", "Connection": "keep-alive", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Cache-Control": "max-age=0", "Accept-Language": "en-US,en;q=0.9", "referer": "https://privacy.sexy" }, "redirect": "manual" } Request options: { "retryExponentialBaseInMs": 3000, "additionalHeaders": { "referer": "https://privacy.sexy" }, "additionalHeadersUrlIgnore": [], "requestTimeoutInMs": 60000, "randomizeTlsFingerprint": true, "forceHttpGetForUrlPatterns": [] } TLS: Ciphers: TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:TLS_AES_128_GCM_SHA256:!CAMELLIA:DHE-RSA-AES128-SHA256:!eNULL:ECDHE-RSA-AES256-SHA256:ECDHE-RSA-AES128-GCM-SHA256:!SRP:DHE-RSA-AES256-SHA384:!DES:!aNULL:DHE-RSA-AES128-GCM-SHA256:!EXPORT:DHE-RSA-AES256-SHA256:ECDHE-RSA-AES256-GCM-SHA384:!MD5:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-SHA256:HIGH:!RC4:ECDHE-ECDSA-AES128-GCM-SHA256:!PSK Minimum TLS protocol version: TLSv1.2 Node fingerprint: Masked --- : expected [ { …(2) }, { …(2) } ] to have a length of +0 but got 2 - Expected + Received - 0 + 2 ❯ ../../tests/checks/external-urls/main.spec.ts:49:28
0,
formatAssertionMessage([createReportForDeadUrlStatuses(deadUrls)]),
);
}, testTimeoutInMs);

function isOkStatusCode(statusCode: number): boolean {
return statusCode >= 200 && statusCode < 300;
}

function collectUniqueUrls(
options: {
readonly application: IApplication,
readonly excludePatterns?: readonly RegExp[],
},
): string[] {
return [ // Get all nodes
...options.application.collections.flatMap((c) => c.getAllCategories()),
...options.application.collections.flatMap((c) => c.getAllScripts()),
]
// Get all docs
.flatMap((documentable) => documentable.docs)
// Parse all URLs
.flatMap((docString) => extractUrls(docString))
// Remove duplicates
.filter((url, index, array) => array.indexOf(url) === index)
// Exclude certain URLs based on patterns
.filter((url) => !shouldExcludeUrl(url, options.excludePatterns ?? []));
function createReportForDeadUrlStatuses(deadUrlStatuses: readonly UrlStatus[]): string {
return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`;
}

function shouldExcludeUrl(url: string, patterns: readonly RegExp[]): boolean {
return patterns.some((pattern) => pattern.test(url));
function filterUrlsToEnvironmentCheckLimit(originalUrls: string[]): string[] {
const { RANDOMIZED_URL_CHECK_LIMIT } = process.env;
logger.logLabeledInformation('URL check limit', RANDOMIZED_URL_CHECK_LIMIT || 'Unlimited');
if (RANDOMIZED_URL_CHECK_LIMIT !== undefined && RANDOMIZED_URL_CHECK_LIMIT !== '') {
const maxUrlsInTest = parseInt(RANDOMIZED_URL_CHECK_LIMIT, 10);
if (Number.isNaN(maxUrlsInTest)) {
throw new Error(`Invalid URL limit: ${RANDOMIZED_URL_CHECK_LIMIT}`);
}
if (maxUrlsInTest < originalUrls.length) {
return shuffle(originalUrls).slice(0, maxUrlsInTest);
}
}
return originalUrls;
}

function formatUrlStatusReport(deadUrlStatuses: readonly UrlStatus[]): string {
return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`;
}
function convertMillisecondsToHumanReadableFormat(milliseconds: number): string {
const timeParts: string[] = [];
const addTimePart = (amount: number, label: string) => {
if (amount === 0) {
return;
}
timeParts.push(`${amount} ${label}`);
};

const hours = milliseconds / (1000 * 60 * 60);
const absoluteHours = Math.floor(hours);
addTimePart(absoluteHours, 'hours');

const minutes = (hours - absoluteHours) * 60;
const absoluteMinutes = Math.floor(minutes);
addTimePart(absoluteMinutes, 'minutes');

const seconds = (minutes - absoluteMinutes) * 60;
const absoluteSeconds = Math.floor(seconds);
addTimePart(absoluteSeconds, 'seconds');

function extractUrls(textWithInlineCode: string): string[] {
/*
Matches URLs:
- Excludes inline code blocks as they may contain URLs not intended for user interaction
and not guaranteed to support expected HTTP methods, leading to false-negatives.
- Supports URLs containing parentheses, avoiding matches within code that might not represent
actual links.
*/
const nonCodeBlockUrlRegex = /(?<!`)(https?:\/\/[^\s`"<>()]+(?:\([^\s`"<>()]*\))?[^\s`"<>()]*)/g;
return textWithInlineCode.match(nonCodeBlockUrlRegex) || [];
return timeParts.join(', ');
}
52 changes: 52 additions & 0 deletions tests/unit/application/Common/Shuffle.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { describe, it, expect } from 'vitest';
import { shuffle } from '@/application/Common/Shuffle';

describe('Shuffle', () => {
describe('shuffle', () => {
it('returns a new array', () => {
// arrange
const inputArray = ['a', 'b', 'c', 'd'];
// act
const result = shuffle(inputArray);
// assert
expect(result).not.to.equal(inputArray);
});

it('returns an array of the same length', () => {
// arrange
const inputArray = ['a', 'b', 'c', 'd'];
// act
const result = shuffle(inputArray);
// assert
expect(result.length).toBe(inputArray.length);
});

it('contains the same elements', () => {
// arrange
const inputArray = ['a', 'b', 'c', 'd'];
// act
const result = shuffle(inputArray);
// assert
expect(result).to.have.members(inputArray);
});

it('does not modify the input array', () => {
// arrange
const inputArray = ['a', 'b', 'c', 'd'];
const inputArrayCopy = [...inputArray];
// act
shuffle(inputArray);
// assert
expect(inputArray).to.deep.equal(inputArrayCopy);
});

it('handles an empty array correctly', () => {
// arrange
const inputArray: string[] = [];
// act
const result = shuffle(inputArray);
// assert
expect(result).have.lengthOf(0);
});
});
});

0 comments on commit 3341de4

Please sign in to comment.