Skip to content

Commit

Permalink
Refactor zimit processing and add srcset support (#1181)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jaifroid authored Dec 10, 2023
1 parent ed4f47c commit 4403292
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 70 deletions.
6 changes: 6 additions & 0 deletions tests/e2e/spec/gutenberg_ro.e2e.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ function runTests (driver, modes) {
// Run tests twice, once in serviceworker mode and once in jquery mode
it('Load Kiwix JS and check title', async function () {
await driver.get('http://localhost:' + port + '/dist/www/index.html?noPrompts=true');
// Pause for 1.3 seconds to allow the app to load
await driver.sleep(1300);
// Issue a reload to ensure that the app is in the correct mode
await driver.navigate().refresh();
// Pause for 800 milliseconds to allow the app to reload
await driver.sleep(800);
const title = await driver.getTitle();
assert.equal('Kiwix', title);
});
Expand Down
11 changes: 8 additions & 3 deletions tests/e2e/spec/legacy-ray_charles.e2e.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ function runTests (driver, modes) {
}
if (mode === 'jquery' || serviceWorkerAPI) {
// Wait until the mode has switched
await driver.sleep(500);
await driver.sleep(800);
let serviceWorkerStatus = await driver.findElement(By.id('serviceWorkerStatus')).getText();
try {
if (mode === 'serviceworker') {
Expand Down Expand Up @@ -238,8 +238,13 @@ function runTests (driver, modes) {
const contentAvailable = await driver.executeScript('return document.getElementById("mw-content-text");');
return contentAvailable;
}, 6000);
const articleLink = await driver.wait(until.elementLocated(By.xpath('/html/body/div/div/ul/li[77]/a[2]')));
const text = await articleLink.getText();
// const articleLink = await driver.wait(until.elementLocated(By.xpath('/html/body/div/div/ul/li[77]/a[2]')));
// const text = await articleLink.getText();
let articleLink;
const text = await driver.wait(async function () {
articleLink = await driver.findElement(By.xpath('/html/body/div/div/ul/li[77]/a[2]'));
return await articleLink.getText();
}, 6000);
// const articleLink = await driver.findElement(By.linkText('This Little Girl of Mine'));
assert.equal('This Little Girl of Mine', text);
// Scroll the element into view and navigate to it
Expand Down
99 changes: 71 additions & 28 deletions www/js/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@ function initServiceWorkerMessaging () {
// Turn off failsafe, as this is a controlled reboot
settingsStore.setItem('lastPageLoad', 'rebooting', Infinity);
if (!appstate.preventAutoReboot) window.location.reload();
} else if (navigator && navigator.serviceWorker && !navigator.serviceWorker.controller) {
} else if (/^https/.test(window.location.protocol) && navigator && navigator.serviceWorker && !navigator.serviceWorker.controller) {
if (!params.noPrompts) {
uiUtil.systemAlert('<p>No Service Worker is registered, meaning this app will not currently work offline!</p><p>Would you like to switch to ServiceWorker mode?</p>',
'Offline use is disabled!', true).then(function (response) {
Expand Down Expand Up @@ -1586,21 +1586,17 @@ function setLocalArchiveFromFileList (files) {
*/
function archiveReadyCallback (archive) {
selectedArchive = archive;

// A css cache significantly speeds up the loading of CSS files (used by default in jQuery mode)
selectedArchive.cssCache = new Map();

if (selectedArchive.zimType !== 'zimit') {
if (params.originalContentInjectionMode) {
params.contentInjectionMode = params.originalContentInjectionMode;
params.originalContentInjectionMode = null;
}
}

// When a new ZIM is loaded, we turn this flag off, so that we don't get false positive attempts to use the Worker
// It will be turned on again when the first article is loaded
appstate.isReplayWorkerAvailable = false;

// When a new ZIM is loaded, we turn this flag to null, so that we don't get false positive attempts to use the Worker
// It will be defined as false or true when the first article is loaded
appstate.isReplayWorkerAvailable = null;
// Initialize the Service Worker
if (params.contentInjectionMode === 'serviceworker') {
initServiceWorkerMessaging();
Expand Down Expand Up @@ -1852,7 +1848,7 @@ function readArticle (dirEntry) {
return;
}

if (selectedArchive.zimType === 'zimit' && params.isLandingPage) {
if (selectedArchive.zimType === 'zimit' && !appstate.isReplayWorkerAvailable) {
if (window.location.protocol === 'chrome-extension:') {
// Zimit archives contain content that is blocked in a local Chromium extension (on every page), so we must fall back to jQuery mode
return handleUnsupportedReplayWorker(dirEntry);
Expand Down Expand Up @@ -1908,6 +1904,7 @@ function readArticle (dirEntry) {
selectedArchive.readUtf8File(dirEntry, function (fileDirEntry, content) {
// Because a Zimit landing page will change the dirEntry, we have to check again for a redirect
if (fileDirEntry.zimitRedirect) {
params.isLandingPage = false;
return selectedArchive.getDirEntryByPath(fileDirEntry.zimitRedirect).then(readArticle);
} else {
displayArticleContentInIframe(fileDirEntry, content);
Expand Down Expand Up @@ -2026,7 +2023,7 @@ function articleLoadedSW (iframeArticleContent) {

// Handles a click on a Zimit link that has been processed by Wombat
function handleClickOnReplayLink (ev, anchor) {
var pseudoNamespace = selectedArchive.zimitPrefix.replace(/^(.*\/)[^/]{2,}\/$/, '$1');
var pseudoNamespace = selectedArchive.zimitPseudoContentNamespace;
var pseudoDomainPath = anchor.hostname + anchor.pathname;
var containingDocDomainPath = anchor.ownerDocument.location.hostname + anchor.ownerDocument.location.pathname;
// If it's for a different protocol (e.g. javascript:) we should let Replay handle that, or if the paths are identical, then we are dealing
Expand Down Expand Up @@ -2202,7 +2199,7 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
// Try to get the Zimit prefix from any canonical URL in the article
var zimitPrefix = htmlArticle.match(regexpGetZimitPrefix);
// If we couldn't get it, reconstruct it from the archive's zimitPrefix
zimitPrefix = zimitPrefix ? zimitPrefix[1] : selectedArchive.zimitPrefix.replace(/^[CA]\/(?:A\/)?([^/]+).*/, '$1');
zimitPrefix = zimitPrefix ? zimitPrefix[1] : selectedArchive.zimitPrefix.replace(/^\w\/([^/]+).*/, '$1');
zimitPrefix = (dirEntry.namespace === 'C' ? 'A/' : '') + zimitPrefix;
htmlArticle = htmlArticle.replace(regexpZimitHtmlLinks, function (match, blockStart, equals, quote, relAssetUrl, blockClose) {
var newBlock = match;
Expand All @@ -2225,12 +2222,13 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
var srcsetArr = srcset.split(',');
for (var i = 0; i < srcsetArr.length; i++) {
// For root-relative links, we need to add the zimitPrefix
srcsetArr[i] = srcsetArr[i].replace(/^\s?\/(?!\/)/, dirEntry.namespace + '/' + zimitPrefix + '/');
srcsetArr[i] = srcsetArr[i].replace(/^\s*\/(?!\/)/, dirEntry.namespace + '/' + zimitPrefix + '/');
// Zimit prefix is in the URL for absolute URLs
srcsetArr[i] = srcsetArr[i].replace(/^(?:\s?https?:)?\/\//i, dirEntry.namespace + '/' + (dirEntry.namespace === 'C' ? 'A/' : ''));
srcsetArr[i] = srcsetArr[i].replace(/^(?:\s*https?:)?\/\//i, dirEntry.namespace + '/' + (dirEntry.namespace === 'C' ? 'A/' : ''));
if (rootDirectory) srcsetArr[i] = srcsetArr[i].replace(/^(\.\.\/?)+/, dirEntry.namespace + '/' + zimitPrefix + '/');
}
match = match.replace(srcset, srcsetArr.join(', '));
match = match.replace(/srcset/i, 'data-kiwixsrcset');
return match;
});
}
Expand All @@ -2241,17 +2239,18 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
htmlArticle = htmlArticle.replace(regexpTagsWithZimUrl, function (match, blockStart, equals, quote, relAssetUrl, querystring) {
// We need to save the query string if any for Zimit-style archives
querystring = querystring || '';
if (selectedArchive.zimType !== 'zimit') {
var assetZIMUrl = uiUtil.deriveZimUrlFromRelativeUrl(relAssetUrl, baseUrl);
var assetZIMUrl = relAssetUrl + querystring;
if (!/^[CA]\//.test(relAssetUrl)) {
// DEV: Note that deriveZimUrlFromRelativeUrl produces a *decoded* URL (and incidentally would remove any URI component)
// We therefore re-encode the URI with encodeURI (which does not encode forward slashes) instead
// of encodeURIComponent
assetZIMUrl = uiUtil.deriveZimUrlFromRelativeUrl(relAssetUrl, baseUrl);
// Re-encode the URI with encodeURI (which does not encode forward slashes) instead of encodeURIComponent
assetZIMUrl = encodeURI(assetZIMUrl);
} else {
// For Zimit-style ZIMs, we we have to remove any root path for jQuery mode to detect the asset
// var rootPathToAsset = document.location.pathname.replace(/\/index.html.*/, '/') + selectedArchive.file.name + '/';
// relAssetUrl = relAssetUrl.replace(rootPathToAsset, '');
assetZIMUrl = relAssetUrl + querystring;
if (selectedArchive.zimType === 'zimit') {
// For Zimit-style ZIMs, we we have to remove any root path for jQuery mode to detect the asset
// var rootPathToAsset = document.location.pathname.replace(/\/index.html.*/, '/') + selectedArchive.file.name + '/';
// relAssetUrl = relAssetUrl.replace(rootPathToAsset, '');
assetZIMUrl = assetZIMUrl + querystring;
}
}
return blockStart + 'data-kiwixurl' + equals + assetZIMUrl;
});
Expand Down Expand Up @@ -2380,7 +2379,7 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
var newHref = href;
if (selectedArchive.zimType === 'zimit') {
// We need to check that the link isn't from a domain contained in the Zimit archive
var zimitDomain = selectedArchive.zimitPrefix.replace(/^[CA/]+([^/]+).*/, '$1');
var zimitDomain = selectedArchive.zimitPrefix.replace(/^\w\/([^/]+).*/, '$1');
newHref = href.replace(anchor.protocol + '//' + zimitDomain + '/', '');
}
if (newHref === href) {
Expand All @@ -2392,7 +2391,7 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
});
return;
} else {
href = selectedArchive.zimitPrefix + newHref;
href = dirEntry.namespace + '/' + selectedArchive.zimitPrefix + newHref;
}
}
// It's a link to an article or file in the ZIM
Expand All @@ -2417,7 +2416,7 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
anchorParameter = href.match(/#([^#;]+)$/);
anchorParameter = anchorParameter ? anchorParameter[1] : '';
var zimUrl;
if (selectedArchive.zimitPrefix && ~href.indexOf(selectedArchive.zimitPrefix)) {
if (selectedArchive.zimitPrefix && ~href.indexOf(dirEntry.namespace + '/' + selectedArchive.zimitPrefix)) {
// It's already a full ZIM URL, so we can use it after stripping any anchor
zimUrl = decodeURIComponent(href.replace(/#.*/, ''));
} else {
Expand All @@ -2443,6 +2442,14 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
images.busy = true;
// Extract the image at the top of the images array and remove it from the array
var image = images.shift();
// Get any data-kiwixsrcset
var srcset = image.getAttribute('data-kiwixsrcset');
var srcsetArr = [];
if (srcset) {
// We need to get the array of images in the srcset
srcsetArr = srcset.split(',');
}
// Get the image URL
var imageUrl = image.getAttribute('data-kiwixurl');
// Decode any WebP images that are encoded as dataURIs
if (/^data:image\/webp/i.test(imageUrl)) {
Expand All @@ -2457,7 +2464,43 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
var mimetype = dirEntry.getMimetype();
uiUtil.feedNodeWithDataURI(image, 'src', content, mimetype, function () {
images.busy = false;
extractImage();
if (srcsetArr.length) {
// We need to process each image in the srcset
// Empty or make a new srcset
image.srcset = '';
var srcsetCount = srcsetArr.length;
srcsetArr.forEach(function (imgAndResolutionUrl) {
srcsetCount--;
images.busy = true;
// Get the url and the resolution from the srcset entry
var urlMatch = imgAndResolutionUrl.match(/^\s*([^\s]+)\s+([0-9.]+\w+)\s*$/);
var url = urlMatch ? urlMatch[1] : '';
var resolution = urlMatch ? urlMatch[2]: '';
selectedArchive.getDirEntryByPath(url).then(function (srcEntry) {
selectedArchive.readBinaryFile(srcEntry, function (fileDirEntry, content) {
var mimetype = srcEntry.getMimetype();
uiUtil.getDataUriFromUint8Array(content, mimetype).then(function (dataUri) {
// Add the dataUri to the srcset
image.srcset += (image.srcset ? ', ' : '') + dataUri + ' ' + resolution;
images.busy = false;
if (srcsetCount === 0) {
extractImage();
}
}).catch(function (e) {
console.error('Could not get dataUri for image:' + url, e);
images.busy = false;
if (srcsetCount === 0) extractImage();
});
});
}).catch(function (e) {
console.error('Could not find DirEntry for image:' + url, e);
images.busy = false;
if (srcsetCount === 0) extractImage();
});
});
} else {
extractImage();
}
});
});
}).catch(function (e) {
Expand Down Expand Up @@ -2509,8 +2552,8 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
throw new Error('DirEntry ' + typeof dirEntry);
}
var mimetype = dirEntry.getMimetype();
var readFile = /^text\//i.test(mimetype) ? selectedArchive.readUtf8File : selectedArchive.readBinaryFile;
return readFile(dirEntry, function (fileDirEntry, content) {
var readFile = /^text\//i.test(mimetype) ? 'readUtf8File' : 'readBinaryFile';
return selectedArchive[readFile](dirEntry, function (fileDirEntry, content) {
var fullUrl = fileDirEntry.namespace + '/' + fileDirEntry.url;
if (params.assetsCache) selectedArchive.cssCache.set(fullUrl, content);
if (/text\/css/i.test(mimetype)) uiUtil.replaceCSSLinkWithInlineCSS(link, content);
Expand Down
32 changes: 26 additions & 6 deletions www/js/lib/uiUtil.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* uiUtil.js : Utility functions for the User Interface
*
* Copyright 2013-2020 Mossroy and contributors
* Copyright 2013-2024 Mossroy, Jaifroid and contributors
* Licence GPL v3:
*
* This file is part of Kiwix.
Expand Down Expand Up @@ -322,16 +322,35 @@ function feedNodeWithDataURI (node, nodeAttribute, content, mimeType, callback)
} else {
if (callback) callback(); // Calling back as soon as possible speeds up extraction
// In browsers that support WebP natively, or for non-WebP images, we can simply convert the Uint8Array to a data URI
// DEV: we use FileReader method because btoa fails on utf8 strings (in SVGs, for example)
// See https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem
// This native browser method is very fast: see https://stackoverflow.com/a/66046176/9727685
getDataUriFromUint8Array(content, mimeType).then(function (dataUri) {
node.setAttribute(nodeAttribute, dataUri);
}).catch(function (err) {
console.error('There was an error converting Uint8Array to data URI', err);
});
}
}

/**
* Creates a data: URI from the given content
* @param {Uint8Array} content The binary content to convert to a URI
* @param {String} mimeType The MIME type of the content
* @returns {Promise<String>} A promise that resolves to the data URI
*/
function getDataUriFromUint8Array (content, mimeType) {
// Use FileReader method because btoa fails on utf8 strings (in SVGs, for example)
// See https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem
// This native browser method is very fast: see https://stackoverflow.com/a/66046176/9727685
return new Promise((resolve, reject) => {
var myReader = new FileReader();
myReader.onloadend = function () {
var url = myReader.result;
node.setAttribute(nodeAttribute, url);
resolve(url);
};
myReader.onerror = function (err) {
reject(err);
};
myReader.readAsDataURL(new Blob([content], { type: mimeType }));
}
});
}

/**
Expand Down Expand Up @@ -1010,6 +1029,7 @@ export default {
scroller: scroller,
systemAlert: systemAlert,
feedNodeWithDataURI: feedNodeWithDataURI,
getDataUriFromUint8Array: getDataUriFromUint8Array,
determineCanvasElementsWorkaround: determineCanvasElementsWorkaround,
replaceCSSLinkWithInlineCSS: replaceCSSLinkWithInlineCSS,
deriveZimUrlFromRelativeUrl: deriveZimUrlFromRelativeUrl,
Expand Down
Loading

0 comments on commit 4403292

Please sign in to comment.