Skip to content

Commit

Permalink
INSTA-21561 queryTrackedDomainList Api (#131)
Browse files Browse the repository at this point in the history
* suppory queryTrackedDomainList api

* update url of xhr,pl,res

* add unit test + handle redacted query

* add redact query ut

* removed and modified comments
  • Loading branch information
Gouri Hariharan authored and GitHub Enterprise committed Jan 2, 2025
1 parent e27f292 commit 5c3b101
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 1 deletion.
6 changes: 6 additions & 0 deletions lib/commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ export function processCommand(command: any[]): any {
}
vars.ignoreUserTimings = command[1];
break;
case 'queryTrackedDomainList':
if (DEBUG) {
validateRegExpArray('queryTrackedDomainList', command[1]);
}
vars.queryTrackedDomainList = command[1];
break;
case 'xhrTransmissionTimeout':
vars.xhrTransmissionTimeout = command[1];
break;
Expand Down
54 changes: 54 additions & 0 deletions lib/queryTrackedDomainList.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//import { isTransmitionRequest } from './transmission/util';
import {matchesAny} from './util';
import vars from './vars';

//const dataUrlPrefix = 'data:';
//const ignorePingsRegex = /.*\/ping(\/?$|\?.*)/i;

export function isQueryTracked(url?: string | number | null): boolean {
if (!url) {
return true;
}

if (url === null) {
return true;
}

url = String(url);
if (!url) {
return true;
}

// Track entire url including parameters if queryTrackedDomainList is empty
if(vars.queryTrackedDomainList.length === 0){
return true;
}

// queryTrackedDomainList contains list of url whose query parameters and fragment string should not be excluded
// from tracking
return matchesAny(vars.queryTrackedDomainList, url);
}

export function removeQueryAndFragmentFromUrl(url?: string | number | null): string {
if (!url) {
return '';
}

if (url === null) {
return '';
}

// Force string conversion. During runtime we have seen that some URLs passed into this code path aren't actually
// strings. Reason currently unknown.

url = String(url);
if (!url) {
return '';
}

const parsedUrl = new URL(url);
parsedUrl.search = '';
parsedUrl.hash = '';
return parsedUrl.toString();
}

9 changes: 8 additions & 1 deletion lib/resources/resources.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { isQueryTracked, removeQueryAndFragmentFromUrl } from '../queryTrackedDomainList';
import {performance, isResourceTimingAvailable} from '../performance';
import { isTransmitionRequest } from '../transmission/util';
import type {BeaconWithResourceTiming} from '../types';
Expand All @@ -22,7 +23,6 @@ export function addResourceTimings(beacon: Partial<BeaconWithResourceTiming>, mi
}
}


function getEntriesTransferFormat(performanceEntries: PerformanceEntryList, minStartTime?: number) {
const trie = createTrie();

Expand All @@ -45,6 +45,13 @@ function getEntriesTransferFormat(performanceEntries: PerformanceEntryList, minS
continue;
}

if (!isQueryTracked(url)) {
url = removeQueryAndFragmentFromUrl(url);
if (DEBUG) {
info('Tracking url excluding query parameters and fragment strings', url);
}
}

const lowerCaseUrl = url.toLowerCase();
const initiatorType = entry['initiatorType'];
if (lowerCaseUrl === 'about:blank' || lowerCaseUrl.indexOf('javascript:') === 0 || // some iframe cases
Expand Down
17 changes: 17 additions & 0 deletions lib/transmission/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { sendBeacon as sendBatchedBeacon, isEnabled as isBatchingEnabled } from './batched';
import { isQueryTracked, removeQueryAndFragmentFromUrl } from '../queryTrackedDomainList';
import {createExcessiveUsageIdentifier} from '../excessiveUsageIdentification';
import { sendBeacon as sendFormEncodedBeacon } from './formEncoded';
import {isUrlIgnored} from '../ignoreRules';
Expand All @@ -22,6 +23,22 @@ export function sendBeacon(data: Partial<Beacon>) {
return;
}

if (!isQueryTracked(data['l'])) {
// data['l'] is a standardized property across all beacons to ensure that we do not accidentally transmit data
// about a page such as this.
data['l'] = removeQueryAndFragmentFromUrl(data['l']);
if (DEBUG) {
info('Tracking location url excluding query parameters and fragment strings ', data['l']);
}
}

if (!isQueryTracked(data['u'])) {
data['u'] = removeQueryAndFragmentFromUrl(data['u']);
if (DEBUG) {
info('Tracking request url excluding query parameters and fragment strings', data['u']);
}
}

if (DEBUG) {
info('Transmitting beacon', data);
}
Expand Down
3 changes: 3 additions & 0 deletions lib/vars.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ const defaultVars: {
// eum('ignoreUrls', [/example.com/]);
ignoreUrls: RegExp[];

queryTrackedDomainList: RegExp[];

// Whether or not ping like requests should be ignored from AJAX
// collection. This is separate from ignoreUrls to handle cases
// where users start monitoring using weasel and generate tons
Expand Down Expand Up @@ -345,6 +347,7 @@ const defaultVars: {
ignoreUrls: [],
ignorePings: true,
ignoreErrorMessages: [],
queryTrackedDomainList: [],
xhrTransmissionTimeout: 20000,
allowedOrigins: [],
page: undefined,
Expand Down
112 changes: 112 additions & 0 deletions test/unit/queryTrackedDomainList.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import {expect} from 'chai';

import {isUrlIgnored, isErrorMessageIgnored} from '../../lib/ignoreRules';
import {isQueryTracked, removeQueryAndFragmentFromUrl} from '../../lib/queryTrackedDomainList';
import vars from '../../lib/vars';
import { stripSecrets } from '@lib/stripSecrets';
import {normalizeUrl} from '@lib/hooks/normalizeUrl';

describe('queryTrackedDomainList api', () => {
afterEach(() => {
vars.queryTrackedDomainList = [];
vars.reportingUrl = 'https://ingress.example.com';
vars.reportingBackends = [{reportingUrl: 'https://ingress.example.com', key: 'key'}];
});

describe('isQueryTracked', () => {
it('must return true if url is undefined', () => {
expect(isQueryTracked(undefined)).to.equal(true);
});

it('must track entire url including parameters if queryTrackedDomainList is empty', () => {
vars.queryTrackedDomainList = [];

expect(isQueryTracked('http://example.com')).to.equal(true);
expect(isQueryTracked('http://shop.example.com')).to.equal(true);
expect(isQueryTracked('http://example.com/123/@!#$**(?sdajd=sadas')).to.equal(true);

});

it('must track entire url including parameters of url in queryTrackedDomainList', () => {
vars.queryTrackedDomainList = [/example.com$/];

expect(isQueryTracked('http://example.com')).to.equal(true);
expect(isQueryTracked('http://shop.example.com')).to.equal(true);
expect(isQueryTracked('http://example.com/123/@!#$**(?sdajd=sadas')).to.equal(false);
expect(isQueryTracked('http://example.comm')).to.equal(false);
expect(isQueryTracked('http://google.com')).to.equal(false);

});

it('must track entire url including parameters of all urls in queryTrackedDomainList', () => {
vars.queryTrackedDomainList = [/example1.com/, /example2.com$/];

expect(isQueryTracked('http://example1.com')).to.equal(true);
expect(isQueryTracked('http://shop.example1.com')).to.equal(true);
expect(isQueryTracked('http://example1.com/123/@!#$**(?sdajd=sadas')).to.equal(true);
expect(isQueryTracked('http://example2.com')).to.equal(true);
expect(isQueryTracked('http://example2.comm')).to.equal(false);
expect(isQueryTracked('http://example2.com/123/@!#$**(?sdajd=sadas')).to.equal(false);
expect(isQueryTracked('http://google.com')).to.equal(false);

});

});

describe('removeQueryAndFragmentFromUrl', () => {
it('should return an empty string if the input is undefined', () => {
expect(removeQueryAndFragmentFromUrl(undefined)).to.equal('');
});
it('should track original url if no query parameter or fragment', () => {
const url = 'https://example.com/path';

expect(removeQueryAndFragmentFromUrl(url)).to.equal(url);
});

it('should exclude query parameter and fragment if present', () => {
const url1 = 'https://example.com/path?abcd#123';
const parsedurl1 = 'https://example.com/path';

const url2 = 'http://something.com/123/@!#$**(?sdajd=sadas';
const parsedurl2 = 'http://something.com/123/@!';

const url3 = 'http://something.com/123/@!#$**(.jpg';
const parsedurl3 = 'http://something.com/123/@!';

expect(removeQueryAndFragmentFromUrl(url1)).to.equal(parsedurl1);
expect(removeQueryAndFragmentFromUrl(url2)).to.equal(parsedurl2);
expect(removeQueryAndFragmentFromUrl(url3)).to.equal(parsedurl3);
});

it('test queryTrackedDomainList with redact query set should be tracked as redacted', () => {
vars.secrets = [/account/i, /pass/i];
vars.queryTrackedDomainList = [/example.com/i];
let url = 'http://example.com/search?accountno=user01&pass=password&phoneno=999';
let redactedurl = 'http://example.com/search?accountno=<redacted>&pass=<redacted>&phoneno=999';

expect(isQueryTracked(url)).to.equal(true);
expect(stripSecrets(url)).to.equal(redactedurl);

if (!isQueryTracked(redactedurl)) {
redactedurl = removeQueryAndFragmentFromUrl(url);
}
expect(redactedurl).to.equal(redactedurl);
});

it('test url not in queryTrackedDomainList with redact query set', () => {
vars.secrets = [/account/i, /pass/i];
vars.queryTrackedDomainList = [/something.com/i];
let url = 'http://example.com/search?accountno=user01&pass=password&phoneno=999';
let redactedurl = 'http://example.com/search?accountno=<redacted>&pass=<redacted>&phoneno=999';
let parsedurl = 'http://example.com/search';

expect(isQueryTracked(url)).to.equal(false);
expect(stripSecrets(url)).to.equal(redactedurl);
expect(removeQueryAndFragmentFromUrl(stripSecrets(url))).to.equal(parsedurl);
});
});
});




0 comments on commit 5c3b101

Please sign in to comment.