diff --git a/.github/workflows/checks.external-urls.yaml b/.github/workflows/checks.external-urls.yaml index 8d8b46c85..de1ef9ce7 100644 --- a/.github/workflows/checks.external-urls.yaml +++ b/.github/workflows/checks.external-urls.yaml @@ -3,6 +3,9 @@ name: checks.external-urls on: schedule: - cron: '0 0 * * 0' # at 00:00 on every Sunday + push: + paths: + - tests/checks/external-urls/** jobs: run-check: diff --git a/src/infrastructure/Threading/AsyncSleep.ts b/src/infrastructure/Threading/AsyncSleep.ts index c827dc345..924216265 100644 --- a/src/infrastructure/Threading/AsyncSleep.ts +++ b/src/infrastructure/Threading/AsyncSleep.ts @@ -1,7 +1,10 @@ export type SchedulerCallbackType = (...args: unknown[]) => void; export type SchedulerType = (callback: SchedulerCallbackType, ms: number) => void; -export function sleep(time: number, scheduler: SchedulerType = setTimeout) { +export function sleep( + time: number, + scheduler: SchedulerType = setTimeout, +): Promise { return new Promise((resolve) => { scheduler(() => resolve(undefined), time); }); diff --git a/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/app/check-for-errors.ts b/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/app/check-for-errors.ts index 786aa2cf4..052d6d285 100644 --- a/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/app/check-for-errors.ts +++ b/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/app/check-for-errors.ts @@ -1,4 +1,4 @@ -import { splitTextIntoLines, indentText } from '../utils/text'; +import { indentText, splitTextIntoLines } from '@tests/shared/Text'; import { log, die } from '../utils/log'; import { readAppLogFile } from './app-logs'; import { STDERR_IGNORE_PATTERNS } from './error-ignore-patterns'; diff --git a/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/app/system-capture/window-title-capture.ts b/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/app/system-capture/window-title-capture.ts index 668da8c56..88e8bde8a 100644 --- a/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/app/system-capture/window-title-capture.ts +++ b/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/app/system-capture/window-title-capture.ts @@ -1,7 +1,7 @@ +import { filterEmpty } from '@tests/shared/Text'; import { runCommand } from '../../utils/run-command'; import { log, LogLevel } from '../../utils/log'; import { SupportedPlatform, CURRENT_PLATFORM } from '../../utils/platform'; -import { filterEmpty } from '../../utils/text'; export async function captureWindowTitles(processId: number) { if (!processId) { throw new Error('Missing process ID.'); } diff --git a/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/main.ts b/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/main.ts index a24a9cee7..c3ad95de1 100644 --- a/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/main.ts +++ b/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/main.ts @@ -1,3 +1,4 @@ +import { indentText } from '@tests/shared/Text'; import { logCurrentArgs, CommandLineFlag, hasCommandLineFlag } from './cli-args'; import { log, die } from './utils/log'; import { ensureNpmProjectDir, npmInstall, npmBuild } from './utils/npm'; @@ -15,7 +16,6 @@ import { APP_EXECUTION_DURATION_IN_SECONDS, SCREENSHOT_PATH, } from './config'; -import { indentText } from './utils/text'; import type { ExtractionResult } from './app/extractors/common/extraction-result'; export async function main(): Promise { diff --git a/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/utils/run-command.ts b/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/utils/run-command.ts index fe3c96f11..d73e6eda0 100644 --- a/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/utils/run-command.ts +++ b/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/utils/run-command.ts @@ -1,5 +1,6 @@ -import { exec, type ExecOptions, type ExecException } from 'node:child_process'; -import { indentText } from './text'; +import { exec } from 'child_process'; +import { indentText } from '@tests/shared/Text'; +import type { ExecOptions, ExecException } from 'child_process'; const TIMEOUT_IN_SECONDS = 180; const MAX_OUTPUT_BUFFER_SIZE = 1024 * 1024; // 1 MB diff --git a/tests/checks/external-urls/StatusChecker/BatchStatusChecker.ts b/tests/checks/external-urls/StatusChecker/BatchStatusChecker.ts index 8932a1f72..4918ccf5b 100644 --- a/tests/checks/external-urls/StatusChecker/BatchStatusChecker.ts +++ b/tests/checks/external-urls/StatusChecker/BatchStatusChecker.ts @@ -1,64 +1,62 @@ import { sleep } from '@/infrastructure/Threading/AsyncSleep'; -import { getUrlStatus, type IRequestOptions } from './Requestor'; -import { groupUrlsByDomain } from './UrlPerDomainGrouper'; -import type { IUrlStatus } from './IUrlStatus'; +import { getUrlStatus, type RequestOptions } from './Requestor'; +import { groupUrlsByDomain } from './UrlDomainProcessing'; +import type { FollowOptions } from './FetchFollow'; +import type { UrlStatus } from './UrlStatus'; export async function getUrlStatusesInParallel( urls: string[], - options?: IBatchRequestOptions, -): Promise { - // urls = [ 'https://privacy.sexy' ]; // Here to comment out when testing + options?: BatchRequestOptions, +): Promise { + urls = ['https://archive.ph/2023.10.07-112359/https://apps.microsoft.com/detail/9NCBCSZSJRSB?hl=en-us&gl=US']; const uniqueUrls = Array.from(new Set(urls)); - const defaultedOptions = { ...DefaultOptions, ...options }; - console.log('Options: ', defaultedOptions); - const results = await request(uniqueUrls, defaultedOptions); + const defaultedDomainOptions = { ...DefaultDomainOptions, ...options?.domainOptions }; + console.log('Batch request options applied:', defaultedDomainOptions); + const results = await request(uniqueUrls, defaultedDomainOptions, options); return results; } -export interface IBatchRequestOptions { - domainOptions?: IDomainOptions; - requestOptions?: IRequestOptions; +export interface BatchRequestOptions { + readonly domainOptions?: Partial; + readonly requestOptions?: Partial; + readonly followOptions?: Partial; } -interface IDomainOptions { - sameDomainParallelize?: boolean; - sameDomainDelayInMs?: number; +interface DomainOptions { + readonly sameDomainParallelize?: boolean; + readonly sameDomainDelayInMs?: number; } -const DefaultOptions: Required = { - domainOptions: { - sameDomainParallelize: false, - sameDomainDelayInMs: 3 /* sec */ * 1000, - }, - requestOptions: { - retryExponentialBaseInMs: 5 /* sec */ * 1000, - requestTimeoutInMs: 60 /* sec */ * 1000, - additionalHeaders: {}, - }, +const DefaultDomainOptions: Required = { + sameDomainParallelize: false, + sameDomainDelayInMs: 3 /* sec */ * 1000, }; function request( urls: string[], - options: Required, -): Promise { - if (!options.domainOptions.sameDomainParallelize) { + domainOptions: Required, + options?: BatchRequestOptions, +): Promise { + if (!domainOptions.sameDomainParallelize) { return runOnEachDomainWithDelay( urls, - (url) => getUrlStatus(url, options.requestOptions), - options.domainOptions.sameDomainDelayInMs, + (url) => getUrlStatus(url, options?.requestOptions, options?.followOptions), + domainOptions.sameDomainDelayInMs, ); } - return Promise.all(urls.map((url) => getUrlStatus(url, options.requestOptions))); + return Promise.all( + urls.map((url) => getUrlStatus(url, options?.requestOptions, options?.followOptions)), + ); } async function runOnEachDomainWithDelay( urls: string[], - action: (url: string) => Promise, + action: (url: string) => Promise, delayInMs: number | undefined, -): Promise { +): Promise { const grouped = groupUrlsByDomain(urls); const tasks = grouped.map(async (group) => { - const results = new Array(); + const results = new Array(); /* eslint-disable no-await-in-loop */ for (const url of group) { const status = await action(url); diff --git a/tests/checks/external-urls/StatusChecker/ExponentialBackOffRetryHandler.ts b/tests/checks/external-urls/StatusChecker/ExponentialBackOffRetryHandler.ts index 6db907332..74cd76d3e 100644 --- a/tests/checks/external-urls/StatusChecker/ExponentialBackOffRetryHandler.ts +++ b/tests/checks/external-urls/StatusChecker/ExponentialBackOffRetryHandler.ts @@ -1,27 +1,33 @@ import { sleep } from '@/infrastructure/Threading/AsyncSleep'; -import type { IUrlStatus } from './IUrlStatus'; +import { indentText } from '@tests/shared/Text'; +import { type UrlStatus, formatUrlStatus } from './UrlStatus'; const DefaultBaseRetryIntervalInMs = 5 /* sec */ * 1000; export async function retryWithExponentialBackOff( - action: () => Promise, + action: () => Promise, baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs, currentRetry = 1, -): Promise { +): Promise { const maxTries = 3; const status = await action(); if (shouldRetry(status)) { if (currentRetry <= maxTries) { const exponentialBackOffInMs = getRetryTimeoutInMs(currentRetry, baseRetryIntervalInMs); - console.log(`Retrying (${currentRetry}) in ${exponentialBackOffInMs / 1000} seconds`, status); + console.log([ + `Attempt ${currentRetry}: Retrying in ${exponentialBackOffInMs / 1000} seconds.`, + 'Details:', + indentText(formatUrlStatus(status)), + ].join('\n')); await sleep(exponentialBackOffInMs); return retryWithExponentialBackOff(action, baseRetryIntervalInMs, currentRetry + 1); } + console.warn('💀 All retry attempts failed. Final failure to retrieve URL:', indentText(formatUrlStatus(status))); } return status; } -function shouldRetry(status: IUrlStatus) { +function shouldRetry(status: UrlStatus): boolean { if (status.error) { return true; } @@ -32,14 +38,14 @@ function shouldRetry(status: IUrlStatus) { || status.code === 429; // Too Many Requests } -function isTransientError(statusCode: number) { +function isTransientError(statusCode: number): boolean { return statusCode >= 500 && statusCode <= 599; } function getRetryTimeoutInMs( currentRetry: number, baseRetryIntervalInMs: number = DefaultBaseRetryIntervalInMs, -) { +): number { const retryRandomFactor = 0.5; // Retry intervals are between 50% and 150% // of the exponentially increasing base amount const minRandom = 1 - retryRandomFactor; diff --git a/tests/checks/external-urls/StatusChecker/FetchFollow.ts b/tests/checks/external-urls/StatusChecker/FetchFollow.ts index 4414a478b..f25c7598e 100644 --- a/tests/checks/external-urls/StatusChecker/FetchFollow.ts +++ b/tests/checks/external-urls/StatusChecker/FetchFollow.ts @@ -1,19 +1,17 @@ import { fetchWithTimeout } from './FetchWithTimeout'; +import { getDomainFromUrl } from './UrlDomainProcessing'; export function fetchFollow( url: string, timeoutInMs: number, - fetchOptions: RequestInit, - followOptions: IFollowOptions | undefined, + fetchOptions?: Partial, + followOptions?: Partial, ): Promise { - const defaultedFollowOptions = { - ...DefaultFollowOptions, - ...followOptions, - }; + const defaultedFollowOptions = { ...DefaultFollowOptions, ...followOptions }; if (followRedirects(defaultedFollowOptions)) { return fetchWithTimeout(url, timeoutInMs, fetchOptions); } - fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */ }; + fetchOptions = { ...fetchOptions, redirect: 'manual' /* handled manually */, mode: 'cors' }; const cookies = new CookieStorage(defaultedFollowOptions.enableCookies); return followRecursivelyWithCookies( url, @@ -24,13 +22,15 @@ export function fetchFollow( ); } -export interface IFollowOptions { - followRedirects?: boolean; - maximumRedirectFollowDepth?: number; - enableCookies?: boolean; +// "cors" | "navigate" | "no-cors" | "same-origin"; + +export interface FollowOptions { + readonly followRedirects?: boolean; + readonly maximumRedirectFollowDepth?: number; + readonly enableCookies?: boolean; } -export const DefaultFollowOptions: Required = { +const DefaultFollowOptions: Required = { followRedirects: true, maximumRedirectFollowDepth: 20, enableCookies: true, @@ -64,6 +64,10 @@ async function followRecursivelyWithCookies( if (cookieHeader) { cookies.addHeader(cookieHeader); } + options.headers = { + ...options.headers, + Host: getDomainFromUrl(nextUrl), + }; return followRecursivelyWithCookies(nextUrl, timeoutInMs, options, newFollowDepth, cookies); } @@ -77,7 +81,7 @@ class CookieStorage { constructor(private readonly enabled: boolean) { } - public hasAny() { + public hasAny(): boolean { return this.enabled && this.cookies.length > 0; } @@ -88,12 +92,12 @@ class CookieStorage { this.cookies.push(header); } - public getHeader() { + public getHeader(): string { return this.cookies.join(' ; '); } } -function followRedirects(options: IFollowOptions) { +function followRedirects(options: FollowOptions): boolean { if (!options.followRedirects) { return false; } diff --git a/tests/checks/external-urls/StatusChecker/FetchWithTimeout.ts b/tests/checks/external-urls/StatusChecker/FetchWithTimeout.ts index f5144df94..ac03199d0 100644 --- a/tests/checks/external-urls/StatusChecker/FetchWithTimeout.ts +++ b/tests/checks/external-urls/StatusChecker/FetchWithTimeout.ts @@ -8,7 +8,10 @@ export async function fetchWithTimeout( ...(init ?? {}), signal: controller.signal, }; - const promise = fetch(url, options); + const promise = fetch( + url, + options, + ); const timeout = setTimeout(() => controller.abort(), timeoutInMs); return promise.finally(() => clearTimeout(timeout)); } diff --git a/tests/checks/external-urls/StatusChecker/IUrlStatus.ts b/tests/checks/external-urls/StatusChecker/IUrlStatus.ts deleted file mode 100644 index 2b2b60c7e..000000000 --- a/tests/checks/external-urls/StatusChecker/IUrlStatus.ts +++ /dev/null @@ -1,5 +0,0 @@ -export interface IUrlStatus { - url: string; - error?: string; - code?: number; -} diff --git a/tests/checks/external-urls/StatusChecker/README.md b/tests/checks/external-urls/StatusChecker/README.md index 8b30465f5..425bbfae4 100644 --- a/tests/checks/external-urls/StatusChecker/README.md +++ b/tests/checks/external-urls/StatusChecker/README.md @@ -13,7 +13,10 @@ A CLI and SDK for checking the availability of external URLs. - 😇 **Rate Limiting**: Queues requests by domain to be polite. - 🔁 **Retries**: Implements retry pattern with exponential back-off. - ⌚ **Timeouts**: Configurable timeout for each request. -- 🎭️ **User-Agent Rotation**: Change user agents for each request. +- 🎭️ **Impersonation**: Impersonate different browsers for each request. + - **🌐 User-Agent Rotation**: Change user agents. + - **🔑 TLS Handshakes**: Perform TLS and HTTP handshakes that are identical to that of a real browser. +- 🫙 **Cookie jar**: Preserve cookies during redirects to mimic real browser. ## CLI @@ -54,6 +57,7 @@ const statuses = await getUrlStatusesInParallel([ 'https://privacy.sexy', /* ... - **`sameDomainDelayInMs`** (*number*), default: `3000` (3 seconds) - Sets the delay between requests to the same domain. - `requestOptions` (*object*): See [request options](#request-options). +- `followOptions` (*object*): See [follow options](#follow-options). ### `getUrlStatus` @@ -72,7 +76,6 @@ console.log(`Status code: ${status.code}`); - The longer the base time, the greater the intervals between retries. - **`additionalHeaders`** (*object*), default: `false` - Additional HTTP headers to send along with the default headers. Overrides default headers if specified. -- **`followOptions`** (*object*): See [follow options](#follow-options). - **`requestTimeoutInMs`** (*number*), default: `60000` (60 seconds) - Time limit to abort the request if no response is received within the specified time frame. @@ -83,19 +86,7 @@ Follows `3XX` redirects while preserving cookies. Same fetch API except third parameter that specifies [follow options](#follow-options), `redirect: 'follow' | 'manual' | 'error'` is discarded in favor of the third parameter. ```js -const status = await fetchFollow('https://privacy.sexy', { - // First argument is same options as fetch API, except `redirect` options - // that's discarded in favor of next argument follow options - headers: { - 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0' - }, - }, { - // Second argument sets the redirect behavior - followRedirects: true, - maximumRedirectFollowDepth: 20, - enableCookies: true, - } -); +const status = await fetchFollow('https://privacy.sexy', 1000 /* timeout in milliseconds */); console.log(`Status code: ${status.code}`); ``` diff --git a/tests/checks/external-urls/StatusChecker/Requestor.ts b/tests/checks/external-urls/StatusChecker/Requestor.ts index e89f3a3f0..22d0e1593 100644 --- a/tests/checks/external-urls/StatusChecker/Requestor.ts +++ b/tests/checks/external-urls/StatusChecker/Requestor.ts @@ -1,70 +1,108 @@ +import { indentText } from '@tests/shared/Text'; import { retryWithExponentialBackOff } from './ExponentialBackOffRetryHandler'; -import { fetchFollow, type IFollowOptions, DefaultFollowOptions } from './FetchFollow'; +import { fetchFollow, type FollowOptions } from './FetchFollow'; import { getRandomUserAgent } from './UserAgents'; -import type { IUrlStatus } from './IUrlStatus'; +import { getDomainFromUrl } from './UrlDomainProcessing'; +import { randomizeTlsFingerprint, getTlsContextInfo } from './TlsFingerprintRandomizer'; +import type { UrlStatus } from './UrlStatus'; export function getUrlStatus( url: string, - options: IRequestOptions = DefaultOptions, -): Promise { - const defaultedOptions = { ...DefaultOptions, ...options }; - const fetchOptions = getFetchOptions(url, defaultedOptions); - return retryWithExponentialBackOff(async () => { - console.log('Requesting', url); - let result: IUrlStatus; - try { - const response = await fetchFollow( - url, - defaultedOptions.requestTimeoutInMs, - fetchOptions, - defaultedOptions.followOptions, - ); - result = { url, code: response.status }; - } catch (err) { - result = { url, error: JSON.stringify(err, null, '\t') }; - } - return result; - }, defaultedOptions.retryExponentialBaseInMs); + requestOptions?: Partial, + followOptions?: Partial, +): Promise { + const defaultedOptions = getDefaultedRequestOptions(requestOptions); + if (defaultedOptions.randomizeTlsFingerprint) { + randomizeTlsFingerprint(); + } + return fetchUrlStatusWithRetry(url, defaultedOptions, followOptions); } -export interface IRequestOptions { +export interface RequestOptions { readonly retryExponentialBaseInMs?: number; readonly additionalHeaders?: Record; readonly additionalHeadersUrlIgnore?: string[]; - readonly followOptions?: IFollowOptions; readonly requestTimeoutInMs: number; + readonly randomizeTlsFingerprint: boolean; } -const DefaultOptions: Required = { - retryExponentialBaseInMs: 5000, +const DefaultOptions: Required = { + retryExponentialBaseInMs: 5 /* sec */ * 1000, additionalHeaders: {}, additionalHeadersUrlIgnore: [], requestTimeoutInMs: 60 /* seconds */ * 1000, - followOptions: DefaultFollowOptions, + randomizeTlsFingerprint: true, }; -function getFetchOptions(url: string, options: Required): RequestInit { +function fetchUrlStatusWithRetry( + url: string, + requestOptions: Required, + followOptions?: Partial, +): Promise { + const fetchOptions = getFetchOptions(url, requestOptions); + return retryWithExponentialBackOff(async () => { + console.log(`Initiating request for URL: ${url}`); + let result: UrlStatus; + try { + const response = await fetchFollow( + url, + requestOptions.requestTimeoutInMs, + fetchOptions, + followOptions, + ); + result = { url, code: response.status }; + } catch (err) { + result = { + url, + error: [ + 'Error:', indentText(JSON.stringify(err, null, '\t') || err.toString()), + 'Options:', indentText(JSON.stringify(fetchOptions, null, '\t')), + 'TLS:', indentText(getTlsContextInfo()), + ].join('\n'), + }; + } + return result; + }, requestOptions.retryExponentialBaseInMs); +} + +function getFetchOptions(url: string, options: Required): RequestInit { const additionalHeaders = options.additionalHeadersUrlIgnore .some((ignorePattern) => url.startsWith(ignorePattern)) ? {} : options.additionalHeaders; return { - method: 'HEAD', + method: 'GET', // Fetch only headers without the full response body for better speed headers: { - ...getDefaultHeaders(), + ...getDefaultHeaders(url), ...additionalHeaders, }, + redirect: 'manual', // Redirects are handled manually, automatic redirects do not work with Host header + }; +} + +function getDefaultHeaders(url: string): Record { + return { + // Needed for websites that filter out non-browser user agents. + 'User-Agent': getRandomUserAgent(), + + // Required for some websites, especially those behind proxies, to correctly handle the request. + Host: getDomainFromUrl(url), + + // The following mimic a real browser request to improve compatibility with most web servers. + 'Upgrade-Insecure-Requests': '1', + Connection: 'keep-alive', + Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', + 'Accept-Encoding': 'gzip, deflate, br', + 'Cache-Control': 'max-age=0', + 'Accept-Language': 'en-US,en;q=0.9', }; } -function getDefaultHeaders(): Record { +function getDefaultedRequestOptions( + options?: Partial, +): Required { return { - 'user-agent': getRandomUserAgent(), - 'upgrade-insecure-requests': '1', - connection: 'keep-alive', - accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', - 'accept-encoding': 'gzip, deflate, br', - 'cache-control': 'max-age=0', - 'accept-language': 'en-US,en;q=0.9', + ...DefaultOptions, + ...options, }; } diff --git a/tests/checks/external-urls/StatusChecker/TlsFingerprintRandomizer.ts b/tests/checks/external-urls/StatusChecker/TlsFingerprintRandomizer.ts new file mode 100644 index 000000000..abdd54de5 --- /dev/null +++ b/tests/checks/external-urls/StatusChecker/TlsFingerprintRandomizer.ts @@ -0,0 +1,69 @@ +/** + * Modifies the TLS fingerprint of Node.js HTTP client to circumvent TLS fingerprinting blocks. + * TLS fingerprinting is a technique used to identify clients based on the unencrypted data sent + * during the TLS handshake, used for blocking or identifying non-browser clients like debugging + * proxies or automated scripts. + * + * However, Node.js's HTTP client does not fully support all methods required for impersonating a + * browser's TLS fingerprint, as reported in https://github.com/nodejs/undici/issues/1983. + * While this implementation can alter the TLS fingerprint by randomizing the cipher suite order, + * it may not perfectly mimic specific browser fingerprints due to limitations in the TLS + * implementation of Node.js. + * + * For more detailed information, visit: + * - https://archive.today/2024.03.13-102042/https://httptoolkit.com/blog/tls-fingerprinting-node-js/ + * - https://check.ja3.zone/ (To check your tool's or browser's fingerprint) + * - https://github.com/lwthiker/curl-impersonate (A solution for curl) + * - https://github.com/depicts/got-tls (Cipher manipulation support for Node.js) + */ + +import { constants } from 'crypto'; +import tls from 'tls'; +import { indentText } from '@tests/shared/Text'; + +export function randomizeTlsFingerprint() { + tls.DEFAULT_CIPHERS = getShuffledCiphers().join(':'); + console.log( + [ + 'Original ciphers:', indentText(constants.defaultCipherList), + 'Current context', indentText(getTlsContextInfo()), + ].join('\n'), + ); +} + +export function getTlsContextInfo(): string { + return [ + `Ciphers: ${tls.DEFAULT_CIPHERS}`, + `Minimum TLS protocol version: ${tls.DEFAULT_MIN_VERSION}`, + `Node fingerprint: ${constants.defaultCoreCipherList === tls.DEFAULT_CIPHERS ? 'Visible' : 'Masked'}`, + ].join('\n'); +} + +/** + * Shuffles the order of TLS ciphers, excluding the top 3 most important ciphers to maintain + * security preferences. This approach modifies the default cipher list of Node.js to create a + * unique TLS fingerprint, thus helping to circumvent detection mechanisms based on static + * fingerprinting. It leverages randomness in the cipher order as a simple method to generate a + * new, unique TLS fingerprint which is not easily identifiable. The technique is based on altering + * parameters used in the TLS handshake process, particularly the cipher suite order, to avoid + * matching known fingerprints that could identify the client as a Node.js application. + * + * For more details, refer to: + * - https://archive.today/2024.03.13-102234/https://getsetfetch.org/blog/tls-fingerprint.html + */ +export function getShuffledCiphers(): readonly string[] { + const nodeOrderedCipherList = constants.defaultCoreCipherList.split(':'); + const totalTopCiphersToKeep = 3; + // Keep the most important ciphers in the same order + const fixedCiphers = nodeOrderedCipherList.slice(0, totalTopCiphersToKeep); + // Shuffle the rest + const shuffledCiphers = nodeOrderedCipherList.slice(totalTopCiphersToKeep) + .map((cipher) => ({ cipher, sort: Math.random() })) + .sort((a, b) => a.sort - b.sort) + .map(({ cipher }) => cipher); + const ciphers = [ + ...fixedCiphers, + ...shuffledCiphers, + ]; + return ciphers; +} diff --git a/tests/checks/external-urls/StatusChecker/UrlPerDomainGrouper.ts b/tests/checks/external-urls/StatusChecker/UrlDomainProcessing.ts similarity index 64% rename from tests/checks/external-urls/StatusChecker/UrlPerDomainGrouper.ts rename to tests/checks/external-urls/StatusChecker/UrlDomainProcessing.ts index 8537936b5..4038fc103 100644 --- a/tests/checks/external-urls/StatusChecker/UrlPerDomainGrouper.ts +++ b/tests/checks/external-urls/StatusChecker/UrlDomainProcessing.ts @@ -2,18 +2,18 @@ export function groupUrlsByDomain(urls: string[]): string[][] { const domains = new Set(); const urlsWithDomain = urls.map((url) => ({ url, - domain: extractDomain(url), + domain: getDomainFromUrl(url), })); for (const url of urlsWithDomain) { domains.add(url.domain); } return Array.from(domains).map((domain) => { return urlsWithDomain - .filter((url) => url.domain === domain) + .filter((url) => url.domain.toLowerCase() === domain.toLowerCase()) .map((url) => url.url); }); } -function extractDomain(url: string): string { - return url.split('://')[1].split('/')[0].toLowerCase(); +export function getDomainFromUrl(url: string): string { + return new URL(url).host; } diff --git a/tests/checks/external-urls/StatusChecker/UrlStatus.ts b/tests/checks/external-urls/StatusChecker/UrlStatus.ts new file mode 100644 index 000000000..8471b4dbf --- /dev/null +++ b/tests/checks/external-urls/StatusChecker/UrlStatus.ts @@ -0,0 +1,19 @@ +import { indentText } from '@tests/shared/Text'; + +export interface UrlStatus { + readonly url: string; + readonly error?: string; + readonly code?: number; +} + +export function formatUrlStatus(status: UrlStatus): string { + return [ + `URL: ${status.url}`, + ...status.code !== undefined ? [ + `Response code: ${status.code}`, + ] : [], + ...status.error ? [ + `Error:\n${indentText(status.error)}`, + ] : [], + ].join('\n'); +} diff --git a/tests/checks/external-urls/StatusChecker/UserAgents.ts b/tests/checks/external-urls/StatusChecker/UserAgents.ts index 1c389b777..f8350ec43 100644 --- a/tests/checks/external-urls/StatusChecker/UserAgents.ts +++ b/tests/checks/external-urls/StatusChecker/UserAgents.ts @@ -3,73 +3,28 @@ export function getRandomUserAgent(): string { } const UserAgents = [ - // Chrome - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537', - - // Firefox - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Safari/605.1.15', - - // Safari - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/604.1', - - // Internet Explorer - 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko', - - // Edge - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3 Edge/15.0', - - // Opera - 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14', - - // iOS Devices - 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/18.2b11866 Mobile/16B91 Safari/605.1.15', - 'Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1', - - // Android Devices - 'Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.3', - - // Other Devices/Browsers - 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.3', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/605.1.15', - 'Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.3 Edge/15.0', - 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', - 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0', - 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0', - 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.3', - 'Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.3', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.3', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15', - 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', - 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.3 OPR/53.0.2907.99', - 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2)', - 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20120121 Firefox/46.0', - 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; Tablet PC 2.0)', - 'Mozilla/5.0 (Windows NT 5.1; rv:36.0) Gecko/20100101 Firefox/36.0', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0', - 'Mozilla/5.0 (X11; Linux i686; rv:30.0) Gecko/20100101 Firefox/30.0', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:28.0) Gecko/20100101 Firefox/28.0', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.3', - 'Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.3', - 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.3', - 'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/20161202 Firefox/21.0.1', - 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0', - 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0', - 'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0', - 'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0', - 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.3', - 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3', - 'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.3', - 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.3', - 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.3', - 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.3', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.3', - 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3', - 'Mozilla/5.0 (X11; CrOS x86_64 4319.74.0) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3', - 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3', - 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.3 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.3', + // Safari 17.1 - macOS and iPad + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15', + // Safari - iOS 17 - iPhone + 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1', + // Safari - iOS 17 - iPad mini + 'Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1', + // Edge - macOS + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51', + // Edge - Windows + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.58', + // Edge - Android + 'Mozilla/5.0 (Linux; Android 10; HD1913) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.43 Mobile Safari/537.36 EdgA/119.0.2151.92', + // Chrome - macOS + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', + // Chrome - Windows + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', + // Chrome - Android (Phone) + 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36', + // Firefox - macOS + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/114.0', + // Firefox - Windows + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0,', + // Firefox - Android (Phone) + 'Mozilla/5.0 (Android 14; Mobile; rv:109.0) Gecko/120.0 Firefox/120.0', ]; diff --git a/tests/checks/external-urls/main.spec.ts b/tests/checks/external-urls/main.spec.ts index ec592c4ca..91b157bca 100644 --- a/tests/checks/external-urls/main.spec.ts +++ b/tests/checks/external-urls/main.spec.ts @@ -1,50 +1,82 @@ import { test, expect } from 'vitest'; import { parseApplication } from '@/application/Parser/ApplicationParser'; import type { IApplication } from '@/domain/IApplication'; -import { getUrlStatusesInParallel, type IBatchRequestOptions } from './StatusChecker/BatchStatusChecker'; -import type { IUrlStatus } from './StatusChecker/IUrlStatus'; +import { indentText } from '@tests/shared/Text'; +import { formatAssertionMessage } from '@tests/shared/FormatAssertionMessage'; +import { type UrlStatus, formatUrlStatus } from './StatusChecker/UrlStatus'; +import { getUrlStatusesInParallel, type BatchRequestOptions } from './StatusChecker/BatchStatusChecker'; +// arrange const app = parseApplication(); -const urls = collectUniqueUrls(app); -const requestOptions: IBatchRequestOptions = { +const urls = collectUniqueUrls({ + application: app, + excludePatterns: [ + /^https:\/\/archive\.ph/, // Drops HEAD/GET requests via fetch/curl, responding to Postman/Chromium. + ], +}); +const requestOptions: BatchRequestOptions = { domainOptions: { - sameDomainParallelize: false, // be nice to our external servers + sameDomainParallelize: false, // be nice to our third-party servers sameDomainDelayInMs: 5 /* sec */ * 1000, }, requestOptions: { retryExponentialBaseInMs: 3 /* sec */ * 1000, requestTimeoutInMs: 60 /* sec */ * 1000, additionalHeaders: { referer: app.projectDetails.homepage }, + randomizeTlsFingerprint: true, + }, + followOptions: { + followRedirects: true, + enableCookies: true, }, }; const testTimeoutInMs = urls.length * 60 /* seconds */ * 1000; - test(`all URLs (${urls.length}) should be alive`, async () => { + // act const results = await getUrlStatusesInParallel(urls, requestOptions); - const deadUrls = results.filter((r) => r.code !== 200); - expect(deadUrls).to.have.lengthOf(0, printUrls(deadUrls)); + // assert + const deadUrls = results.filter((r) => r.code === undefined || !isOkStatusCode(r.code)); + expect(deadUrls).to.have.lengthOf(0, formatAssertionMessage([formatUrlStatusReport(deadUrls)])); }, testTimeoutInMs); -function collectUniqueUrls(application: IApplication): string[] { +function isOkStatusCode(statusCode: number): boolean { + return statusCode >= 200 && statusCode < 300; +} + +function collectUniqueUrls( + options: { + readonly application: IApplication, + readonly excludePatterns?: readonly RegExp[], + }, +): string[] { return [ // Get all nodes - ...application.collections.flatMap((c) => c.getAllCategories()), - ...application.collections.flatMap((c) => c.getAllScripts()), + ...options.application.collections.flatMap((c) => c.getAllCategories()), + ...options.application.collections.flatMap((c) => c.getAllScripts()), ] // Get all docs .flatMap((documentable) => documentable.docs) // Parse all URLs - .flatMap((docString) => docString.match(/(https?:\/\/[^\s`"<>()]+)/g) || []) + .flatMap((docString) => extractUrls(docString)) // Remove duplicates - .filter((url, index, array) => array.indexOf(url) === index); + .filter((url, index, array) => array.indexOf(url) === index) + // Exclude certain URLs based on patterns + .filter((url) => !shouldExcludeUrl(url, options.excludePatterns ?? [])); +} + +function shouldExcludeUrl(url: string, patterns: readonly RegExp[]): boolean { + return patterns.some((pattern) => pattern.test(url)); +} + +function formatUrlStatusReport(deadUrlStatuses: readonly UrlStatus[]): string { + return `\n${deadUrlStatuses.map((status) => indentText(formatUrlStatus(status))).join('\n---\n')}\n`; } -function printUrls(statuses: IUrlStatus[]): string { - /* eslint-disable prefer-template */ - return '\n' - + statuses.map((status) => `- ${status.url}\n` - + (status.code ? `\tResponse code: ${status.code}` : '') - + (status.error ? `\tError: ${status.error}` : '')) - .join('\n') - + '\n'; - /* eslint-enable prefer-template */ +function extractUrls(textWithInlineCode: string): string[] { + /* + Matches all URLs. + Inline code blocks contain URLs not intended for user interaction and not + guaranteed to support expected HTTP methods, leading to false-negatives. + */ + const nonCodeBlockUrlRegex = /(?()]+)/g; + return textWithInlineCode.match(nonCodeBlockUrlRegex) || []; } diff --git a/tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/utils/text.ts b/tests/shared/Text.ts similarity index 100% rename from tests/checks/desktop-runtime-errors/check-desktop-runtime-errors/utils/text.ts rename to tests/shared/Text.ts