From bc60ae41a60bd65c21b72e7b22c5f401c4fcba2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20Thei=C3=9Fen?= Date: Fri, 1 Dec 2023 09:11:57 +0100 Subject: [PATCH 1/5] Analyse with puppeteer --- lib/selector.mjs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 lib/selector.mjs diff --git a/lib/selector.mjs b/lib/selector.mjs new file mode 100644 index 000000000..d9df6d94e --- /dev/null +++ b/lib/selector.mjs @@ -0,0 +1,28 @@ +import puppeteer from "puppeteer"; +import iterator from "./iterator.js"; + +var docs = []; +iterator(function (srcname, name, variant) { + docs.push(name); +}); +var browser = await puppeteer.launch({ headless: "new" }); +for (var name of docs) { + console.group(name); + var page = await browser.newPage(); + await page.goto(`${import.meta.dirname}/../docs/${name}/${name}.html`, { + waitUntil: "networkidle2", + }); + var elements = await page.$$(process.argv[2]); + for (var elem of elements) { + var elems = process.argv[3] + ? await elem.$$("xpath/" + process.argv[3]) + : [elem]; + for (elem of elems) + console.log( + await elem.evaluate((e) => + e.nodeType === Node.ELEMENT_NODE ? e.outerHTML : e.nodeValue, + ), + ); + } + console.groupEnd(); +} From ddd4ba76c5ea14b5e7888bdf63788a9c2aad7559 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20Thei=C3=9Fen?= Date: Fri, 1 Dec 2023 10:13:57 +0100 Subject: [PATCH 2/5] Better parallelization --- lib/selector.mjs | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/lib/selector.mjs b/lib/selector.mjs index d9df6d94e..78fa44bfd 100644 --- a/lib/selector.mjs +++ b/lib/selector.mjs @@ -12,17 +12,23 @@ for (var name of docs) { await page.goto(`${import.meta.dirname}/../docs/${name}/${name}.html`, { waitUntil: "networkidle2", }); - var elements = await page.$$(process.argv[2]); - for (var elem of elements) { - var elems = process.argv[3] - ? await elem.$$("xpath/" + process.argv[3]) - : [elem]; - for (elem of elems) - console.log( - await elem.evaluate((e) => - e.nodeType === Node.ELEMENT_NODE ? e.outerHTML : e.nodeValue, - ), - ); - } + for (var r of await Promise.all( + ( + await Promise.all( + (await page.$$(process.argv[2])).map(async function (elem) { + var elems = process.argv[3] + ? await elem.$$("xpath/" + process.argv[3]) + : [elem]; + return elems.map((elem) => + elem.evaluate((e) => + e.nodeType === Node.ELEMENT_NODE ? e.outerHTML : e.nodeValue, + ), + ); + }), + ) + ).flat(), + )) + console.log(r); console.groupEnd(); } +process.exit(0); From 867941f84b4fa90953f5e4238f5078221b6ad746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20Thei=C3=9Fen?= Date: Fri, 1 Dec 2023 10:39:00 +0100 Subject: [PATCH 3/5] documentation --- lib/README.md | 4 ++++ package.json | 1 + 2 files changed, 5 insertions(+) diff --git a/lib/README.md b/lib/README.md index 40483f2b7..ebca07a42 100644 --- a/lib/README.md +++ b/lib/README.md @@ -96,6 +96,10 @@ The following scripts can be executed manually or as part of a GitHub Action: - a copy of the common [`styles`](../styles) folder - a copy of the document-specific `*/images` folder, if this exists. - [`npm run pdf`](build-pdf.mjs) runs the PDF conversion and writes the PDF document into the [`docs/*`](../docs) folder. +- [`npm run select []`](selector.mjs) selects parts of the generated HTML documents by executing a CSS selector and optionally an XPath expression relative to each match. For example, syntax errors in JSON code snippets can be detected with + ```sh + npm run select ".json .er" "self::*[.!='…']/text()" + ``` - [`npm test`](../test) runs a test suite. ## A note on diagrams diff --git a/package.json b/package.json index 3b3813213..2f71ce2cf 100644 --- a/package.json +++ b/package.json @@ -7,6 +7,7 @@ "build": "node lib/build.js", "pdf": "node lib/build-pdf.js", "start": "node lib/server", + "select": "node lib/selector.mjs", "test": "c8 -r html -r text mocha", "clean-xxx": "node lib/clean.mjs odata-xxx/temp odata-xxx-v4.0" }, From 47986f6033e731fff775b61319495a23d928e0f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20Thei=C3=9Fen?= Date: Fri, 1 Dec 2023 13:41:13 +0100 Subject: [PATCH 4/5] Group results by headings --- lib/selector.mjs | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/lib/selector.mjs b/lib/selector.mjs index 78fa44bfd..2f08df316 100644 --- a/lib/selector.mjs +++ b/lib/selector.mjs @@ -7,6 +7,7 @@ iterator(function (srcname, name, variant) { }); var browser = await puppeteer.launch({ headless: "new" }); for (var name of docs) { + var heading = undefined; console.group(name); var page = await browser.newPage(); await page.goto(`${import.meta.dirname}/../docs/${name}/${name}.html`, { @@ -20,15 +21,31 @@ for (var name of docs) { ? await elem.$$("xpath/" + process.argv[3]) : [elem]; return elems.map((elem) => - elem.evaluate((e) => - e.nodeType === Node.ELEMENT_NODE ? e.outerHTML : e.nodeValue, - ), + elem.evaluate(function (e) { + return { + heading: document.evaluate( + "preceding::*[self::h1|self::h2|self::h3|self::h4|self::h5|self::h6][1]", + e, + () => {}, + XPathResult.FIRST_ORDERED_NODE_TYPE, + ).singleNodeValue.textContent, + match: + e.nodeType === Node.ELEMENT_NODE ? e.outerHTML : e.nodeValue, + }; + }), ); }), ) ).flat(), - )) - console.log(r); + )) { + if (r.heading !== heading) { + if (heading) console.groupEnd(); + console.group(r.heading); + heading = r.heading; + } + console.log(r.match); + } + if (heading) console.groupEnd(); console.groupEnd(); } process.exit(0); From 3ce0d8afbd4d9063bce409ff5ff922672abef55c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heiko=20Thei=C3=9Fen?= Date: Mon, 4 Dec 2023 11:01:09 +0100 Subject: [PATCH 5/5] Exit code --- lib/selector.mjs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/selector.mjs b/lib/selector.mjs index 2f08df316..30233f131 100644 --- a/lib/selector.mjs +++ b/lib/selector.mjs @@ -6,6 +6,7 @@ iterator(function (srcname, name, variant) { docs.push(name); }); var browser = await puppeteer.launch({ headless: "new" }); +var exit_code = 0; for (var name of docs) { var heading = undefined; console.group(name); @@ -20,6 +21,7 @@ for (var name of docs) { var elems = process.argv[3] ? await elem.$$("xpath/" + process.argv[3]) : [elem]; + if (elems.length > 0) exit_code = 1; return elems.map((elem) => elem.evaluate(function (e) { return { @@ -48,4 +50,4 @@ for (var name of docs) { if (heading) console.groupEnd(); console.groupEnd(); } -process.exit(0); +process.exit(exit_code);