Skip to content

Commit

Permalink
Fix testing URLs with HTML entities in them
Browse files Browse the repository at this point in the history
  • Loading branch information
fulldecent committed Dec 20, 2024
1 parent d04a8ea commit f42831b
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 56 deletions.
34 changes: 17 additions & 17 deletions test/fixtures-html-validate-should-fail.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,20 @@ const requiredResults = {
severity: 2,
message:
"external link is broken with status 500: https://freehorses.example.com/free-horses-on-1998-04-01-only.html",
offset: 196,
offset: 271,
line: 9,
column: 6,
column: 81,
size: 1,
selector: "html > body > a:nth-child(1)",
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
},
{
ruleId: "pacific-medical-training/external-links",
severity: 2,
message: "external link is broken with status 500: https://----.example.com",
offset: 303,
message: "external link is broken with status 500: https://----.example.com?a=b&c=d",
offset: 348,
line: 10,
column: 6,
column: 51,
size: 1,
selector: "html > body > a:nth-child(2)",
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
Expand All @@ -49,9 +49,9 @@ const requiredResults = {
ruleId: "pacific-medical-training/external-links",
severity: 2,
message: "external link is broken with status 500: https://-..-..-.-.-",
offset: 373,
offset: 413,
line: 11,
column: 6,
column: 34,
size: 1,
selector: "html > body > a:nth-child(3)",
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
Expand All @@ -61,9 +61,9 @@ const requiredResults = {
severity: 2,
message:
"external link https://httpbin.org/redirect-to?url=https://example.com&status_code=301 redirects to: https://example.com",
offset: 563,
offset: 655,
line: 14,
column: 6,
column: 86,
size: 1,
selector: "html > body > a:nth-child(6)",
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
Expand All @@ -85,26 +85,26 @@ const requiredResults = {
],
"test/fixtures/ensure-https.html": [
{
ruleId: "pacific-medical-training/external-links",
ruleId: "pacific-medical-training/https-links",
severity: 2,
message: "external link http://en.wikipedia.org/wiki/Horse redirects to: https://en.wikipedia.org/wiki/Horse",
message: "external link is insecure and accessible via HTTPS: http://en.wikipedia.org/wiki/Horse",
offset: 196,
line: 9,
column: 6,
size: 1,
selector: "html > body > a",
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
ruleUrl: "https://github.com/fulldecent/github-pages-template/#https-links",
},
{
ruleId: "pacific-medical-training/https-links",
ruleId: "pacific-medical-training/external-links",
severity: 2,
message: "external link is insecure and accessible via HTTPS: http://en.wikipedia.org/wiki/Horse",
offset: 196,
message: "external link http://en.wikipedia.org/wiki/Horse redirects to: https://en.wikipedia.org/wiki/Horse",
offset: 239,
line: 9,
column: 6,
column: 49,
size: 1,
selector: "html > body > a",
ruleUrl: "https://github.com/fulldecent/github-pages-template/#https-links",
ruleUrl: "https://github.com/fulldecent/github-pages-template/#external-links",
},
],
"test/fixtures/using-jquery.html": [
Expand Down
2 changes: 1 addition & 1 deletion test/fixtures/external-link-broken.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
</head>
<body>
<a href="https://freehorses.example.com/free-horses-on-1998-04-01-only.html">Get free horses here!</a>
<a href="https://----.example.com">This server does not exist</a>
<a href="https://----.example.com?a=b&amp;c=d">This server does not exist</a>
<a href="https://-..-..-.-.-">This URL is bad</a>
<a href="tel:12345678999">12345678999</a>
<a href="https://dont-check-this.example.com">https://dont-check-this.example.com</a>
Expand Down
82 changes: 44 additions & 38 deletions test/plugin.html-validate.external-links.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ export default class ExternalLinksRule extends Rule {
setup() {
this.db = this.setupDatabase();
this.skipUrlsRegex = this.loadSkipUrls();
this.on("dom:ready", this.domReady.bind(this));
this.on("tag:ready", this.tagReady.bind(this));
}

setupDatabase() {
Expand Down Expand Up @@ -149,48 +149,54 @@ export default class ExternalLinksRule extends Rule {
}
}

domReady({ document }) {
const aElements = document.getElementsByTagName("a");
for (const aElement of aElements) {
if (!aElement.hasAttribute("href")) {
continue;
}
// Check for href external links
tagReady({ target }) {
// TODO: also check image.src, link.href, script.src
if (target.tagName !== "a") {
return;
}

const href = aElement.getAttribute("href").value;
if (!href || !/^https?:\/\//i.test(href)) {
continue;
}
if (!target.hasAttribute("href")) {
return;
}

// Skip URLs that match the skip URLs regex
const url = href;
if (this.skipUrlsRegex.some((regex) => regex.test(url))) {
continue;
}
// Decode the URL from the href attribute, see https://gitlab.com/html-validate/html-validate/-/issues/218
// Quickly replace a few common HTML entities, TODO use a real approach for this
const url = target.getAttribute("href").value.replace(/&amp;/g, "&").replace(/&gt;/g, ">").replace(/&lt;/g, "<");

// Use cache if the URL is in there
const row = this.db.prepare("SELECT * FROM urls WHERE url = ?").get(url);
if (row) {
if (row.redirect_to) {
this.report({
node: aElement,
message: `external link ${url} redirects to: ${row.redirect_to}`,
});
continue;
}
if (row.status < 200 || row.status >= 300) {
this.report({
node: aElement,
message: `external link is broken with status ${row.status}: ${url}`,
});
continue;
}
}
if (/^https?:\/\//i.test(url) === false) {
return;
}

if (PROXY_URL !== null) {
this.checkWithProxy(url, aElement);
} else {
this.check(url, aElement);
if (this.skipUrlsRegex.some((regex) => regex.test(url))) {
return;
}

console.log(`Checking external link: ${url}`);

// Use cache if the URL is in there
const row = this.db.prepare("SELECT * FROM urls WHERE url = ?").get(url);
if (row) {
if (row.redirect_to) {
this.report({
node: target,
message: `external link ${url} redirects to: ${row.redirect_to}`,
});
return;
}
if (row.status < 200 || row.status >= 300) {
this.report({
node: target,
message: `external link is broken with status ${row.status}: ${url}`,
});
return;
}
}

if (PROXY_URL !== null) {
this.checkWithProxy(url, target);
} else {
this.check(url, target);
}
}
}

0 comments on commit f42831b

Please sign in to comment.