From a065eafbe22cc1dba8d6e866092c468204dd940e Mon Sep 17 00:00:00 2001 From: Eoghan Murray Date: Wed, 18 Dec 2024 09:31:06 +0000 Subject: [PATCH 01/25] Fix bug where the right split point was not being picked for the 3rd section onwards --- packages/rrweb-snapshot/src/utils.ts | 3 ++- packages/rrweb-snapshot/test/css.test.ts | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/rrweb-snapshot/src/utils.ts b/packages/rrweb-snapshot/src/utils.ts index 42aba4d649..f696bd6cab 100644 --- a/packages/rrweb-snapshot/src/utils.ts +++ b/packages/rrweb-snapshot/src/utils.ts @@ -466,7 +466,7 @@ export function splitCssText( const childNodes = Array.from(style.childNodes); const splits: string[] = []; if (childNodes.length > 1 && cssText && typeof cssText === 'string') { - const cssTextNorm = normalizeCssString(cssText); + let cssTextNorm = normalizeCssString(cssText); for (let i = 1; i < childNodes.length; i++) { if ( childNodes[i].textContent && @@ -485,6 +485,7 @@ export function splitCssText( ) { splits.push(cssText.substring(0, k)); cssText = cssText.substring(k); + cssTextNorm = cssTextNorm.substring(splitNorm); break; } } diff --git a/packages/rrweb-snapshot/test/css.test.ts b/packages/rrweb-snapshot/test/css.test.ts index 99a5b362e9..d2940a0af5 100644 --- a/packages/rrweb-snapshot/test/css.test.ts +++ b/packages/rrweb-snapshot/test/css.test.ts @@ -105,10 +105,16 @@ describe('css splitter', () => { // as authored, e.g. no spaces style.append('.a{background-color:black;}'); + // test how normalization finds the right sections + style.append('.b {background-color:black;}'); + style.append('.c{ background-color: black}'); + // how it is currently stringified (spaces present) const expected = [ '.a { background-color: red; }', '.a { background-color: black; }', + '.b { background-color: black; }', + '.c { background-color: black; }', ]; const browserSheet = expected.join(''); expect(stringifyStylesheet(style.sheet!)).toEqual(browserSheet); From e2fe6608207fba4b4bdb4925799877869c17b4eb Mon Sep 17 00:00:00 2001 From: Eoghan Murray Date: Tue, 17 Dec 2024 23:39:39 +0000 Subject: [PATCH 02/25] Add test to put splitCssText through it's paces with a large file --- packages/rrweb-snapshot/test/css.test.ts | 30 ++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/packages/rrweb-snapshot/test/css.test.ts b/packages/rrweb-snapshot/test/css.test.ts index d2940a0af5..30877942ec 100644 --- a/packages/rrweb-snapshot/test/css.test.ts +++ b/packages/rrweb-snapshot/test/css.test.ts @@ -7,6 +7,8 @@ import postcss, { type AcceptedPlugin } from 'postcss'; import { JSDOM } from 'jsdom'; import { splitCssText, stringifyStylesheet } from './../src/utils'; import { applyCssSplits } from './../src/rebuild'; +import * as fs from 'fs'; +import * as path from 'path'; import type { serializedElementNodeWithId, BuildCache, @@ -175,6 +177,34 @@ describe('css splitter', () => { expect(splitCssText(browserSheet, style)).toEqual(expected); } }); + + it('efficiently finds split points in large files', () => { + const cssText = fs.readFileSync( + path.resolve(__dirname, './css/benchmark.css'), + 'utf8', + ); + + const parts = cssText.split('}'); + const sections = []; + for (let i = 0; i < parts.length - 1; i++) { + if (i % 100 === 0) { + sections.push(parts[i] + '}'); + } else { + sections[sections.length - 1] += parts[i] + '}'; + } + } + sections[sections.length - 1] += parts[parts.length - 1]; + + expect(cssText.length).toEqual(sections.join('').length); + + const style = JSDOM.fragment(``).querySelector('style'); + if (style) { + sections.forEach((section) => { + style.appendChild(JSDOM.fragment(section)); + }); + } + expect(splitCssText(cssText, style)).toEqual(sections); + }); }); describe('applyCssSplits css rejoiner', function () { From 2f663d403054103012088a573a15de5d1d91fea2 Mon Sep 17 00:00:00 2001 From: Eoghan Murray Date: Wed, 18 Dec 2024 10:41:53 +0000 Subject: [PATCH 03/25] Introduce a limit which causes the 'efficiently' test to fail --- packages/rrweb-snapshot/src/utils.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packages/rrweb-snapshot/src/utils.ts b/packages/rrweb-snapshot/src/utils.ts index f696bd6cab..5788dd8d90 100644 --- a/packages/rrweb-snapshot/src/utils.ts +++ b/packages/rrweb-snapshot/src/utils.ts @@ -465,6 +465,7 @@ export function splitCssText( ): string[] { const childNodes = Array.from(style.childNodes); const splits: string[] = []; + let iter_limit = 0; if (childNodes.length > 1 && cssText && typeof cssText === 'string') { let cssTextNorm = normalizeCssString(cssText); for (let i = 1; i < childNodes.length; i++) { @@ -480,6 +481,12 @@ export function splitCssText( const splitNorm = cssTextNorm.indexOf(bit); // find the split point in the original text for (let k = splitNorm; k < cssText.length; k++) { + iter_limit += 1; + if (iter_limit > 300 * childNodes.length) { + // quit for performance purposes + splits.push(cssText); + return splits; + } if ( normalizeCssString(cssText.substring(0, k)).length === splitNorm ) { From 8ba3b5433657e5975a9ad1f1449812f10c5e2fc3 Mon Sep 17 00:00:00 2001 From: Eoghan Murray Date: Tue, 17 Dec 2024 16:30:09 +0000 Subject: [PATCH 04/25] Fix that it wasn't able to find a split when both halves were identical --- packages/rrweb-snapshot/src/utils.ts | 29 ++++++++++++++++++++---- packages/rrweb-snapshot/test/css.test.ts | 22 ++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/packages/rrweb-snapshot/src/utils.ts b/packages/rrweb-snapshot/src/utils.ts index 5788dd8d90..78255b37cd 100644 --- a/packages/rrweb-snapshot/src/utils.ts +++ b/packages/rrweb-snapshot/src/utils.ts @@ -474,11 +474,32 @@ export function splitCssText( typeof childNodes[i].textContent === 'string' ) { const textContentNorm = normalizeCssString(childNodes[i].textContent!); - for (let j = 3; j < textContentNorm.length; j++) { - // find a substring that appears only once + let j = 3; + for (; j < textContentNorm.length; j++) { + if ( + textContentNorm.indexOf(textContentNorm.substring(0, j), 1) !== -1 + ) { + // substring needs to be unique to this section + continue; + } + break; + } + for (; j < textContentNorm.length; j++) { const bit = textContentNorm.substring(0, j); - if (cssTextNorm.split(bit).length === 2) { - const splitNorm = cssTextNorm.indexOf(bit); + // this substring should appears only once in overall text too + const bits = cssTextNorm.split(bit); + let splitNorm = -1; + if (bits.length === 2) { + splitNorm = cssTextNorm.indexOf(bit); + } else if ( + bits.length > 2 && + bits[0] === '' && + childNodes[i - 1].textContent !== '' + ) { + // this childNode has same starting content as previous + splitNorm = cssTextNorm.indexOf(bit, 1); + } + if (splitNorm !== -1) { // find the split point in the original text for (let k = splitNorm; k < cssText.length; k++) { iter_limit += 1; diff --git a/packages/rrweb-snapshot/test/css.test.ts b/packages/rrweb-snapshot/test/css.test.ts index 30877942ec..a26e0005fd 100644 --- a/packages/rrweb-snapshot/test/css.test.ts +++ b/packages/rrweb-snapshot/test/css.test.ts @@ -145,6 +145,28 @@ describe('css splitter', () => { } }); + it('finds css textElement splits correctly with two identical text nodes', () => { + const window = new Window({ url: 'https://localhost:8080' }); + const document = window.document; + // as authored, with comment, missing semicolons + const textContent = '.a { color:red; } .b { color:blue; }'; + document.head.innerHTML = ''; + const style = document.querySelector('style'); + if (style) { + style.append(textContent); + style.append(textContent); + + const expected = [textContent, textContent]; + const browserSheet = expected.join(''); + expect(splitCssText(browserSheet, style)).toEqual(expected); + + style.append(textContent); + const expected3 = [textContent, textContent, textContent]; + const browserSheet3 = expected3.join(''); + expect(splitCssText(browserSheet3, style)).toEqual(expected3); + } + }); + it('finds css textElement splits correctly when vendor prefixed rules have been removed', () => { const style = JSDOM.fragment(``).querySelector('style'); if (style) { From 61e8b5fba2fe3b499a634c50e957666835b435d7 Mon Sep 17 00:00:00 2001 From: Eoghan Murray Date: Tue, 17 Dec 2024 18:03:34 +0000 Subject: [PATCH 05/25] Fix poor 'crawling' performance in this part of the algorithm for large css texts - e.g. for a (doubled) benchmark.css, we were running normalizeCssText 9480 times before k got to the right place --- packages/rrweb-snapshot/src/utils.ts | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/packages/rrweb-snapshot/src/utils.ts b/packages/rrweb-snapshot/src/utils.ts index 78255b37cd..dfc77b0cc9 100644 --- a/packages/rrweb-snapshot/src/utils.ts +++ b/packages/rrweb-snapshot/src/utils.ts @@ -501,20 +501,29 @@ export function splitCssText( } if (splitNorm !== -1) { // find the split point in the original text - for (let k = splitNorm; k < cssText.length; k++) { + let k = Math.floor( + (cssText.length * splitNorm) / cssTextNorm.length, + ); + let dir = 0; + for (; k > 0 && k < cssText.length; k += dir) { iter_limit += 1; if (iter_limit > 300 * childNodes.length) { // quit for performance purposes splits.push(cssText); return splits; } - if ( - normalizeCssString(cssText.substring(0, k)).length === splitNorm - ) { + let normPart = normalizeCssString(cssText.substring(0, k)); + if (normPart.length === splitNorm) { splits.push(cssText.substring(0, k)); cssText = cssText.substring(k); cssTextNorm = cssTextNorm.substring(splitNorm); break; + } else if (dir === 0) { + if (normPart.length < splitNorm) { + dir = 1; + } else { + dir = -1; + } } } break; From 3474cb27f9449e9a6f784882fe54f07efb129608 Mon Sep 17 00:00:00 2001 From: Eoghan Murray Date: Tue, 17 Dec 2024 23:03:13 +0000 Subject: [PATCH 06/25] Need to take larger jumps to be efficient; use the scaling factor to make better guess at how big a jump to make - can reduce iter_limit from 300 to 50 to prove that this approach is better --- packages/rrweb-snapshot/src/utils.ts | 31 +++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/packages/rrweb-snapshot/src/utils.ts b/packages/rrweb-snapshot/src/utils.ts index dfc77b0cc9..0de5e4daab 100644 --- a/packages/rrweb-snapshot/src/utils.ts +++ b/packages/rrweb-snapshot/src/utils.ts @@ -456,8 +456,9 @@ export function normalizeCssString(cssText: string): string { /** * Maps the output of stringifyStylesheet to individual text nodes of a