Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor zimit processing #1181

Merged
merged 10 commits into from
Dec 10, 2023
Merged
6 changes: 6 additions & 0 deletions tests/e2e/spec/gutenberg_ro.e2e.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ function runTests (driver, modes) {
// Run tests twice, once in serviceworker mode and once in jquery mode
it('Load Kiwix JS and check title', async function () {
await driver.get('http://localhost:' + port + '/dist/www/index.html?noPrompts=true');
// Pause for 1.3 seconds to allow the app to load
await driver.sleep(1300);
// Issue a reload to ensure that the app is in the correct mode
await driver.navigate().refresh();
// Pause for 800 milliseconds to allow the app to reload
await driver.sleep(800);
const title = await driver.getTitle();
assert.equal('Kiwix', title);
});
Expand Down
11 changes: 8 additions & 3 deletions tests/e2e/spec/legacy-ray_charles.e2e.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ function runTests (driver, modes) {
}
if (mode === 'jquery' || serviceWorkerAPI) {
// Wait until the mode has switched
await driver.sleep(500);
await driver.sleep(800);
let serviceWorkerStatus = await driver.findElement(By.id('serviceWorkerStatus')).getText();
try {
if (mode === 'serviceworker') {
Expand Down Expand Up @@ -238,8 +238,13 @@ function runTests (driver, modes) {
const contentAvailable = await driver.executeScript('return document.getElementById("mw-content-text");');
return contentAvailable;
}, 6000);
const articleLink = await driver.wait(until.elementLocated(By.xpath('/html/body/div/div/ul/li[77]/a[2]')));
const text = await articleLink.getText();
// const articleLink = await driver.wait(until.elementLocated(By.xpath('/html/body/div/div/ul/li[77]/a[2]')));
// const text = await articleLink.getText();
let articleLink;
const text = await driver.wait(async function () {
articleLink = await driver.findElement(By.xpath('/html/body/div/div/ul/li[77]/a[2]'));
return await articleLink.getText();
}, 6000);
// const articleLink = await driver.findElement(By.linkText('This Little Girl of Mine'));
assert.equal('This Little Girl of Mine', text);
// Scroll the element into view and navigate to it
Expand Down
99 changes: 71 additions & 28 deletions www/js/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@ function initServiceWorkerMessaging () {
// Turn off failsafe, as this is a controlled reboot
settingsStore.setItem('lastPageLoad', 'rebooting', Infinity);
if (!appstate.preventAutoReboot) window.location.reload();
} else if (navigator && navigator.serviceWorker && !navigator.serviceWorker.controller) {
} else if (/^https/.test(window.location.protocol) && navigator && navigator.serviceWorker && !navigator.serviceWorker.controller) {
if (!params.noPrompts) {
uiUtil.systemAlert('<p>No Service Worker is registered, meaning this app will not currently work offline!</p><p>Would you like to switch to ServiceWorker mode?</p>',
'Offline use is disabled!', true).then(function (response) {
Expand Down Expand Up @@ -1586,21 +1586,17 @@ function setLocalArchiveFromFileList (files) {
*/
function archiveReadyCallback (archive) {
selectedArchive = archive;

// A css cache significantly speeds up the loading of CSS files (used by default in jQuery mode)
selectedArchive.cssCache = new Map();

if (selectedArchive.zimType !== 'zimit') {
if (params.originalContentInjectionMode) {
params.contentInjectionMode = params.originalContentInjectionMode;
params.originalContentInjectionMode = null;
}
}

// When a new ZIM is loaded, we turn this flag off, so that we don't get false positive attempts to use the Worker
// It will be turned on again when the first article is loaded
appstate.isReplayWorkerAvailable = false;

// When a new ZIM is loaded, we turn this flag to null, so that we don't get false positive attempts to use the Worker
// It will be defined as false or true when the first article is loaded
appstate.isReplayWorkerAvailable = null;
// Initialize the Service Worker
if (params.contentInjectionMode === 'serviceworker') {
initServiceWorkerMessaging();
Expand Down Expand Up @@ -1852,7 +1848,7 @@ function readArticle (dirEntry) {
return;
}

if (selectedArchive.zimType === 'zimit' && params.isLandingPage) {
if (selectedArchive.zimType === 'zimit' && !appstate.isReplayWorkerAvailable) {
if (window.location.protocol === 'chrome-extension:') {
// Zimit archives contain content that is blocked in a local Chromium extension (on every page), so we must fall back to jQuery mode
return handleUnsupportedReplayWorker(dirEntry);
Expand Down Expand Up @@ -1908,6 +1904,7 @@ function readArticle (dirEntry) {
selectedArchive.readUtf8File(dirEntry, function (fileDirEntry, content) {
// Because a Zimit landing page will change the dirEntry, we have to check again for a redirect
if (fileDirEntry.zimitRedirect) {
params.isLandingPage = false;
return selectedArchive.getDirEntryByPath(fileDirEntry.zimitRedirect).then(readArticle);
} else {
displayArticleContentInIframe(fileDirEntry, content);
Expand Down Expand Up @@ -2026,7 +2023,7 @@ function articleLoadedSW (iframeArticleContent) {

// Handles a click on a Zimit link that has been processed by Wombat
function handleClickOnReplayLink (ev, anchor) {
var pseudoNamespace = selectedArchive.zimitPrefix.replace(/^(.*\/)[^/]{2,}\/$/, '$1');
var pseudoNamespace = selectedArchive.zimitPseudoContentNamespace;
var pseudoDomainPath = anchor.hostname + anchor.pathname;
var containingDocDomainPath = anchor.ownerDocument.location.hostname + anchor.ownerDocument.location.pathname;
// If it's for a different protocol (e.g. javascript:) we should let Replay handle that, or if the paths are identical, then we are dealing
Expand Down Expand Up @@ -2202,7 +2199,7 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
// Try to get the Zimit prefix from any canonical URL in the article
var zimitPrefix = htmlArticle.match(regexpGetZimitPrefix);
// If we couldn't get it, reconstruct it from the archive's zimitPrefix
zimitPrefix = zimitPrefix ? zimitPrefix[1] : selectedArchive.zimitPrefix.replace(/^[CA]\/(?:A\/)?([^/]+).*/, '$1');
zimitPrefix = zimitPrefix ? zimitPrefix[1] : selectedArchive.zimitPrefix.replace(/^\w\/([^/]+).*/, '$1');
zimitPrefix = (dirEntry.namespace === 'C' ? 'A/' : '') + zimitPrefix;
htmlArticle = htmlArticle.replace(regexpZimitHtmlLinks, function (match, blockStart, equals, quote, relAssetUrl, blockClose) {
var newBlock = match;
Expand All @@ -2225,12 +2222,13 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
var srcsetArr = srcset.split(',');
for (var i = 0; i < srcsetArr.length; i++) {
// For root-relative links, we need to add the zimitPrefix
srcsetArr[i] = srcsetArr[i].replace(/^\s?\/(?!\/)/, dirEntry.namespace + '/' + zimitPrefix + '/');
srcsetArr[i] = srcsetArr[i].replace(/^\s*\/(?!\/)/, dirEntry.namespace + '/' + zimitPrefix + '/');
// Zimit prefix is in the URL for absolute URLs
srcsetArr[i] = srcsetArr[i].replace(/^(?:\s?https?:)?\/\//i, dirEntry.namespace + '/' + (dirEntry.namespace === 'C' ? 'A/' : ''));
srcsetArr[i] = srcsetArr[i].replace(/^(?:\s*https?:)?\/\//i, dirEntry.namespace + '/' + (dirEntry.namespace === 'C' ? 'A/' : ''));
if (rootDirectory) srcsetArr[i] = srcsetArr[i].replace(/^(\.\.\/?)+/, dirEntry.namespace + '/' + zimitPrefix + '/');
}
match = match.replace(srcset, srcsetArr.join(', '));
match = match.replace(/srcset/i, 'data-kiwixsrcset');
return match;
});
}
Expand All @@ -2241,17 +2239,18 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
htmlArticle = htmlArticle.replace(regexpTagsWithZimUrl, function (match, blockStart, equals, quote, relAssetUrl, querystring) {
// We need to save the query string if any for Zimit-style archives
querystring = querystring || '';
if (selectedArchive.zimType !== 'zimit') {
var assetZIMUrl = uiUtil.deriveZimUrlFromRelativeUrl(relAssetUrl, baseUrl);
var assetZIMUrl = relAssetUrl + querystring;
if (!/^[CA]\//.test(relAssetUrl)) {
// DEV: Note that deriveZimUrlFromRelativeUrl produces a *decoded* URL (and incidentally would remove any URI component)
// We therefore re-encode the URI with encodeURI (which does not encode forward slashes) instead
// of encodeURIComponent
assetZIMUrl = uiUtil.deriveZimUrlFromRelativeUrl(relAssetUrl, baseUrl);
// Re-encode the URI with encodeURI (which does not encode forward slashes) instead of encodeURIComponent
assetZIMUrl = encodeURI(assetZIMUrl);
} else {
// For Zimit-style ZIMs, we we have to remove any root path for jQuery mode to detect the asset
// var rootPathToAsset = document.location.pathname.replace(/\/index.html.*/, '/') + selectedArchive.file.name + '/';
// relAssetUrl = relAssetUrl.replace(rootPathToAsset, '');
assetZIMUrl = relAssetUrl + querystring;
if (selectedArchive.zimType === 'zimit') {
// For Zimit-style ZIMs, we we have to remove any root path for jQuery mode to detect the asset
// var rootPathToAsset = document.location.pathname.replace(/\/index.html.*/, '/') + selectedArchive.file.name + '/';
// relAssetUrl = relAssetUrl.replace(rootPathToAsset, '');
assetZIMUrl = assetZIMUrl + querystring;
}
}
return blockStart + 'data-kiwixurl' + equals + assetZIMUrl;
});
Expand Down Expand Up @@ -2380,7 +2379,7 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
var newHref = href;
if (selectedArchive.zimType === 'zimit') {
// We need to check that the link isn't from a domain contained in the Zimit archive
var zimitDomain = selectedArchive.zimitPrefix.replace(/^[CA/]+([^/]+).*/, '$1');
var zimitDomain = selectedArchive.zimitPrefix.replace(/^\w\/([^/]+).*/, '$1');
newHref = href.replace(anchor.protocol + '//' + zimitDomain + '/', '');
}
if (newHref === href) {
Expand All @@ -2392,7 +2391,7 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
});
return;
} else {
href = selectedArchive.zimitPrefix + newHref;
href = dirEntry.namespace + '/' + selectedArchive.zimitPrefix + newHref;
}
}
// It's a link to an article or file in the ZIM
Expand All @@ -2417,7 +2416,7 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
anchorParameter = href.match(/#([^#;]+)$/);
anchorParameter = anchorParameter ? anchorParameter[1] : '';
var zimUrl;
if (selectedArchive.zimitPrefix && ~href.indexOf(selectedArchive.zimitPrefix)) {
if (selectedArchive.zimitPrefix && ~href.indexOf(dirEntry.namespace + '/' + selectedArchive.zimitPrefix)) {
// It's already a full ZIM URL, so we can use it after stripping any anchor
zimUrl = decodeURIComponent(href.replace(/#.*/, ''));
} else {
Expand All @@ -2443,6 +2442,14 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
images.busy = true;
// Extract the image at the top of the images array and remove it from the array
var image = images.shift();
// Get any data-kiwixsrcset
var srcset = image.getAttribute('data-kiwixsrcset');
var srcsetArr = [];
if (srcset) {
// We need to get the array of images in the srcset
srcsetArr = srcset.split(',');
}
// Get the image URL
var imageUrl = image.getAttribute('data-kiwixurl');
// Decode any WebP images that are encoded as dataURIs
if (/^data:image\/webp/i.test(imageUrl)) {
Expand All @@ -2457,7 +2464,43 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
var mimetype = dirEntry.getMimetype();
uiUtil.feedNodeWithDataURI(image, 'src', content, mimetype, function () {
images.busy = false;
extractImage();
if (srcsetArr.length) {
// We need to process each image in the srcset
// Empty or make a new srcset
image.srcset = '';
var srcsetCount = srcsetArr.length;
srcsetArr.forEach(function (imgAndResolutionUrl) {
srcsetCount--;
images.busy = true;
// Get the url and the resolution from the srcset entry
var urlMatch = imgAndResolutionUrl.match(/^\s*([^\s]+)\s+([0-9.]+\w+)\s*$/);
var url = urlMatch ? urlMatch[1] : '';
var resolution = urlMatch ? urlMatch[2]: '';
selectedArchive.getDirEntryByPath(url).then(function (srcEntry) {
selectedArchive.readBinaryFile(srcEntry, function (fileDirEntry, content) {
var mimetype = srcEntry.getMimetype();
uiUtil.getDataUriFromUint8Array(content, mimetype).then(function (dataUri) {
// Add the dataUri to the srcset
image.srcset += (image.srcset ? ', ' : '') + dataUri + ' ' + resolution;
images.busy = false;
if (srcsetCount === 0) {
extractImage();
}
}).catch(function (e) {
console.error('Could not get dataUri for image:' + url, e);
images.busy = false;
if (srcsetCount === 0) extractImage();
});
});
}).catch(function (e) {
console.error('Could not find DirEntry for image:' + url, e);
images.busy = false;
if (srcsetCount === 0) extractImage();
});
});
} else {
extractImage();
}
});
});
}).catch(function (e) {
Expand Down Expand Up @@ -2509,8 +2552,8 @@ function displayArticleContentInIframe (dirEntry, htmlArticle) {
throw new Error('DirEntry ' + typeof dirEntry);
}
var mimetype = dirEntry.getMimetype();
var readFile = /^text\//i.test(mimetype) ? selectedArchive.readUtf8File : selectedArchive.readBinaryFile;
return readFile(dirEntry, function (fileDirEntry, content) {
var readFile = /^text\//i.test(mimetype) ? 'readUtf8File' : 'readBinaryFile';
return selectedArchive[readFile](dirEntry, function (fileDirEntry, content) {
var fullUrl = fileDirEntry.namespace + '/' + fileDirEntry.url;
if (params.assetsCache) selectedArchive.cssCache.set(fullUrl, content);
if (/text\/css/i.test(mimetype)) uiUtil.replaceCSSLinkWithInlineCSS(link, content);
Expand Down
32 changes: 26 additions & 6 deletions www/js/lib/uiUtil.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* uiUtil.js : Utility functions for the User Interface
*
* Copyright 2013-2020 Mossroy and contributors
* Copyright 2013-2024 Mossroy, Jaifroid and contributors
* Licence GPL v3:
*
* This file is part of Kiwix.
Expand Down Expand Up @@ -322,16 +322,35 @@ function feedNodeWithDataURI (node, nodeAttribute, content, mimeType, callback)
} else {
if (callback) callback(); // Calling back as soon as possible speeds up extraction
// In browsers that support WebP natively, or for non-WebP images, we can simply convert the Uint8Array to a data URI
// DEV: we use FileReader method because btoa fails on utf8 strings (in SVGs, for example)
// See https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem
// This native browser method is very fast: see https://stackoverflow.com/a/66046176/9727685
getDataUriFromUint8Array(content, mimeType).then(function (dataUri) {
node.setAttribute(nodeAttribute, dataUri);
}).catch(function (err) {
console.error('There was an error converting Uint8Array to data URI', err);
});
}
}

/**
* Creates a data: URI from the given content
* @param {Uint8Array} content The binary content to convert to a URI
* @param {String} mimeType The MIME type of the content
* @returns {Promise<String>} A promise that resolves to the data URI
*/
function getDataUriFromUint8Array (content, mimeType) {
// Use FileReader method because btoa fails on utf8 strings (in SVGs, for example)
// See https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem
// This native browser method is very fast: see https://stackoverflow.com/a/66046176/9727685
return new Promise((resolve, reject) => {
var myReader = new FileReader();
myReader.onloadend = function () {
var url = myReader.result;
node.setAttribute(nodeAttribute, url);
resolve(url);
};
myReader.onerror = function (err) {
reject(err);
};
myReader.readAsDataURL(new Blob([content], { type: mimeType }));
}
});
}

/**
Expand Down Expand Up @@ -1010,6 +1029,7 @@ export default {
scroller: scroller,
systemAlert: systemAlert,
feedNodeWithDataURI: feedNodeWithDataURI,
getDataUriFromUint8Array: getDataUriFromUint8Array,
determineCanvasElementsWorkaround: determineCanvasElementsWorkaround,
replaceCSSLinkWithInlineCSS: replaceCSSLinkWithInlineCSS,
deriveZimUrlFromRelativeUrl: deriveZimUrlFromRelativeUrl,
Expand Down
Loading