diff --git a/tests/init.js b/tests/init.js index eef8c2483..169d35ccb 100644 --- a/tests/init.js +++ b/tests/init.js @@ -22,6 +22,9 @@ */ 'use strict'; +// Define global params needed for tests to run on existing app code +var params = {}; + require.config({ baseUrl: 'www/js/lib', paths: { diff --git a/tests/tests.js b/tests/tests.js index 2d7cf4551..b0eb18a20 100644 --- a/tests/tests.js +++ b/tests/tests.js @@ -24,13 +24,12 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'], var localZimArchive; - /** * Make an HTTP request for a Blob and return a Promise * * @param {String} url URL to download from * @param {String} name Name to give to the Blob instance - * @returns {Promise} + * @returns {Promise} A Promise for the Blob */ function makeBlobRequest(url, name) { return new Promise(function (resolve, reject) { @@ -104,15 +103,19 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'], var float = util.readFloatFrom4Bytes(byteArray, 0); assert.equal(float, -118.625, "the IEEE_754 float should be converted as -118.625"); }); - QUnit.test("check upper/lower case variations", function(assert) { + QUnit.test("check upper/lower case variations", function (assert) { var testString1 = "téléphone"; var testString2 = "Paris"; var testString3 = "le Couvre-chef Est sur le porte-manteaux"; var testString4 = "épée"; - assert.equal(util.ucFirstLetter(testString1), "Téléphone", "The first letter should be upper-case"); - assert.equal(util.lcFirstLetter(testString2), "paris", "The first letter should be lower-case"); - assert.equal(util.ucEveryFirstLetter(testString3), "Le Couvre-Chef Est Sur Le Porte-Manteaux", "The first letter of every word should be upper-case"); - assert.equal(util.ucFirstLetter(testString4), "Épée", "The first letter should be upper-case (with accent)"); + var testString5 = '$¥€“«xριστός» †¡Ἀνέστη!”'; + var testString6 = "Καλά Νερά Μαγνησίας žižek"; + assert.equal(util.allCaseFirstLetters(testString1).indexOf("Téléphone") >= 0, true, "The first letter should be uppercase"); + assert.equal(util.allCaseFirstLetters(testString2).indexOf("paris") >= 0, true, "The first letter should be lowercase"); + assert.equal(util.allCaseFirstLetters(testString3).indexOf("Le Couvre-Chef Est Sur Le Porte-Manteaux") >= 0, true, "The first letter of every word should be uppercase"); + assert.equal(util.allCaseFirstLetters(testString4).indexOf("Épée") >= 0, true, "The first letter should be uppercase (with accent)"); + assert.equal(util.allCaseFirstLetters(testString5).indexOf('$¥€“«Xριστός» †¡ἀνέστη!”') >= 0, true, "First non-punctuation/non-currency Unicode letter should be uppercase, second (with breath mark) lowercase"); + assert.equal(util.allCaseFirstLetters(testString6, "full").indexOf("ΚΑΛΆ ΝΕΡΆ ΜΑΓΝΗΣΊΑΣ ŽIŽEK") >= 0, true, "All Unicode letters should be uppercase"); }); QUnit.test("check removal of parameters in URL", function(assert) { var testUrl1 = "A/question.html"; @@ -174,7 +177,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'], assert.equal(firstDirEntry.getTitleOrUrl() , 'A Fool for You', 'First result should be "A Fool for You"'); done(); }; - localZimArchive.findDirEntriesWithPrefix('A', 5, callbackFunction); + localZimArchive.findDirEntriesWithPrefix({prefix: 'A'}, 5, callbackFunction, true); }); QUnit.test("check findDirEntriesWithPrefix 'a'", function(assert) { var done = assert.async(); @@ -185,7 +188,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'], assert.equal(firstDirEntry.getTitleOrUrl() , 'A Fool for You', 'First result should be "A Fool for You"'); done(); }; - localZimArchive.findDirEntriesWithPrefix('a', 5, callbackFunction); + localZimArchive.findDirEntriesWithPrefix({prefix: 'a'}, 5, callbackFunction, true); }); QUnit.test("check findDirEntriesWithPrefix 'blues brothers'", function(assert) { var done = assert.async(); @@ -196,7 +199,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'], assert.equal(firstDirEntry.getTitleOrUrl() , 'Blues Brothers (film)', 'First result should be "Blues Brothers (film)"'); done(); }; - localZimArchive.findDirEntriesWithPrefix('blues brothers', 5, callbackFunction); + localZimArchive.findDirEntriesWithPrefix({prefix: 'blues brothers'}, 5, callbackFunction, true); }); QUnit.test("article '(The Night Time Is) The Right Time' correctly redirects to 'Night Time Is the Right Time'", function(assert) { var done = assert.async(); diff --git a/www/index.html b/www/index.html index 130ab4695..8f34c47d4 100644 --- a/www/index.html +++ b/www/index.html @@ -267,7 +267,7 @@

Display settings


Performance settings

-
+
Speed up archive access
@@ -305,6 +305,20 @@

Performance settings

+
+
+
+

Select max number of search results:

+
+
+
+ +
+
+
diff --git a/www/js/app.js b/www/js/app.js index 6fcd5d900..0d5f11c0d 100644 --- a/www/js/app.js +++ b/www/js/app.js @@ -29,12 +29,6 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesystemAccess','q'], function($, zimArchiveLoader, uiUtil, settingsStore, abstractFilesystemAccess, Q) { - /** - * Maximum number of articles to display in a search - * @type Integer - */ - const MAX_SEARCH_RESULT_SIZE = 50; - /** * The delay (in milliseconds) between two "keepalive" messages sent to the ServiceWorker (so that it is not stopped * by the browser, and keeps the MessageChannel to communicate with the application) @@ -70,6 +64,10 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys params['showUIAnimations'] = settingsStore.getItem('showUIAnimations') ? settingsStore.getItem('showUIAnimations') === 'true' : true; document.getElementById('hideActiveContentWarningCheck').checked = params.hideActiveContentWarning; document.getElementById('showUIAnimationsCheck').checked = params.showUIAnimations; + // Maximum number of article titles to return (range is 5 - 50, default 25) + params['maxSearchResultsSize'] = settingsStore.getItem('maxSearchResultsSize') || 25; + document.getElementById('titleSearchRange').value = params.maxSearchResultsSize; + document.getElementById('titleSearchRangeVal').innerHTML = params.maxSearchResultsSize; // A global parameter that turns caching on or off and deletes the cache (it defaults to true unless explicitly turned off in UI) params['useCache'] = settingsStore.getItem('useCache') !== 'false'; // A parameter to set the app theme and, if necessary, the CSS theme for article content (defaults to 'light') @@ -77,6 +75,14 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys document.getElementById('appThemeSelect').value = params.appTheme; uiUtil.applyAppTheme(params.appTheme); + // Define global state (declared in init.js) + // An object to hold the current search and its state (allows cancellation of search across modules) + globalstate['search'] = { + 'prefix': '', // A field to hold the original search string + 'status': '', // The status of the search: ''|'init'|'interim'|'cancelled'|'complete' + 'type': '' // The type of the search: 'basic'|'full' (set automatically in search algorithm) + }; + // Define globalDropZone (universal drop area) and configDropZone (highlighting area on Config page) var globalDropZone = document.getElementById('search-article'); var configDropZone = document.getElementById('configuration'); @@ -111,11 +117,15 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys // Define behavior of HTML elements var searchArticlesFocused = false; $('#searchArticles').on('click', function() { + var prefix = document.getElementById('prefix').value; + // Do not initiate the same search if it is already in progress + if (globalstate.search.prefix === prefix && !/^(cancelled|complete)$/.test(globalstate.search.status)) return; $("#welcomeText").hide(); $('.alert').hide(); $("#searchingArticles").show(); - pushBrowserHistoryState(null, $('#prefix').val()); - searchDirEntriesFromPrefix($('#prefix').val()); + pushBrowserHistoryState(null, prefix); + // Initiate the search + searchDirEntriesFromPrefix(prefix); $('.navbar-collapse').collapse('hide'); document.getElementById('prefix').focus(); // This flag is set to true in the mousedown event below @@ -198,7 +208,11 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys }); // Hide the search results if user moves out of prefix field $('#prefix').on('blur', function() { - if (!searchArticlesFocused) $('#articleListWithHeader').hide(); + if (!searchArticlesFocused) { + globalstate.search.status = 'cancelled'; + $("#searchingArticles").hide(); + $('#articleListWithHeader').hide(); + } }); $("#btnRandomArticle").on("click", function(e) { $('#prefix').val(""); @@ -349,6 +363,13 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys refreshCacheStatus(); } }); + document.getElementById('titleSearchRange').addEventListener('change', function(e) { + settingsStore.setItem('maxSearchResultsSize', e.target.value, Infinity); + params.maxSearchResultsSize = e.target.value; + }); + document.getElementById('titleSearchRange').addEventListener('input', function(e) { + document.getElementById('titleSearchRangeVal').innerHTML = e.target.value; + }); /** * Displays or refreshes the API status shown to the user @@ -441,7 +462,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys getCacheAttributes().then(function (cache) { document.getElementById('cacheUsed').innerHTML = cache.description; document.getElementById('assetsCount').innerHTML = cache.count; - var cacheSettings = document.getElementById('cacheSettingsDiv'); + var cacheSettings = document.getElementById('performanceSettingsDiv'); var cacheStatusPanel = document.getElementById('cacheStatusPanel'); [cacheSettings, cacheStatusPanel].forEach(function (card) { // IE11 cannot remove more than one class from a list at a time @@ -688,9 +709,13 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys if (title && !(""===title)) { goToArticle(title); } - else if (titleSearch && !(""===titleSearch)) { + else if (titleSearch && titleSearch !== '') { $('#prefix').val(titleSearch); - searchDirEntriesFromPrefix($('#prefix').val()); + if (titleSearch !== globalstate.search.prefix) { + searchDirEntriesFromPrefix(titleSearch); + } else { + $('#prefix').focus(); + } } } }; @@ -926,33 +951,33 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys /** * Handle key input in the prefix input zone - * @param {Event} evt + * @param {Event} evt The event data to handle */ function onKeyUpPrefix(evt) { // Use a timeout, so that very quick typing does not cause a lot of overhead // It is also necessary for the words suggestions to work inside Firefox OS - if(window.timeoutKeyUpPrefix) { + if (window.timeoutKeyUpPrefix) { window.clearTimeout(window.timeoutKeyUpPrefix); } - window.timeoutKeyUpPrefix = window.setTimeout(function() { + window.timeoutKeyUpPrefix = window.setTimeout(function () { var prefix = $("#prefix").val(); - if (prefix && prefix.length>0) { + if (prefix && prefix.length > 0 && prefix !== globalstate.search.prefix) { $('#searchArticles').click(); } - } - ,500); + }, 500); } - /** * Search the index for DirEntries with title that start with the given prefix (implemented * with a binary search inside the index file) - * @param {String} prefix + * @param {String} prefix The string that must appear at the start of any title searched for */ function searchDirEntriesFromPrefix(prefix) { if (selectedArchive !== null && selectedArchive.isReady()) { + // Store the new search term in the globalstate.search object and initialize + globalstate.search = {'prefix': prefix, 'status': 'init', 'type': ''}; $('#activeContent').hide(); - selectedArchive.findDirEntriesWithPrefix(prefix.trim(), MAX_SEARCH_RESULT_SIZE, populateListOfArticles); + selectedArchive.findDirEntriesWithPrefix(globalstate.search, params.maxSearchResultsSize, populateListOfArticles); } else { $('#searchingArticles').hide(); // We have to remove the focus from the search field, @@ -963,30 +988,34 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys } } - /** * Display the list of articles with the given array of DirEntry * @param {Array} dirEntryArray The array of dirEntries returned from the binary search + * @param {Object} reportingSearchPrefix The prefix of the reporting search */ - function populateListOfArticles(dirEntryArray) { + function populateListOfArticles(dirEntryArray, reportingSearchPrefix) { + // Do not allow cancelled or changed searches to report + if (globalstate.search.status === 'cancelled' || globalstate.search.prefix !== reportingSearchPrefix) return; + var stillSearching = globalstate.search.status === 'interim'; var articleListHeaderMessageDiv = $('#articleListHeaderMessage'); var nbDirEntry = dirEntryArray ? dirEntryArray.length : 0; var message; - if (nbDirEntry >= MAX_SEARCH_RESULT_SIZE) { - message = 'First ' + MAX_SEARCH_RESULT_SIZE + ' articles below (refine your search).'; + if (stillSearching) { + message = 'Searching [' + globalstate.search.type + ']... found: ' + nbDirEntry; + } else if (nbDirEntry >= params.maxSearchResultsSize) { + message = 'First ' + params.maxSearchResultsSize + ' articles found (refine your search).'; } else { - message = nbDirEntry + ' articles found.'; - } - if (nbDirEntry === 0) { - message = 'No articles found.'; + message = 'Finished. ' + (nbDirEntry ? nbDirEntry : 'No') + ' articles found' + ( + globalstate.search.type === 'basic' ? ': try fewer words for full search.' : '.' + ); } articleListHeaderMessageDiv.html(message); var articleListDiv = $('#articleList'); var articleListDivHtml = ''; - var listLength = dirEntryArray.length < MAX_SEARCH_RESULT_SIZE ? dirEntryArray.length : MAX_SEARCH_RESULT_SIZE; + var listLength = dirEntryArray.length < params.maxSearchResultsSize ? dirEntryArray.length : params.maxSearchResultsSize; for (var i = 0; i < listLength; i++) { var dirEntry = dirEntryArray[i]; var dirEntryStringId = uiUtil.htmlEscapeChars(dirEntry.toStringId()); @@ -997,13 +1026,15 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys // We have to use mousedown below instead of click as otherwise the prefix blur event fires first // and prevents this event from firing; note that touch also triggers mousedown $('#articleList a').on('mousedown', function (e) { + // Cancel search immediately + globalstate.search.status = 'cancelled'; handleTitleClick(e); return false; }); - $('#searchingArticles').hide(); + if (!stillSearching) $('#searchingArticles').hide(); $('#articleListWithHeader').show(); } - + /** * Handles the click on the title of an article in search results * @param {Event} event @@ -1058,7 +1089,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys * @param {DirEntry} dirEntry The directory entry of the article to read */ function readArticle(dirEntry) { - // Only update for expectedArticleURLToBeDisplayed. + // Reset search prefix to allow users to search the same string again if they want to + globalstate.search.prefix = ''; + // Only update for expectedArticleURLToBeDisplayed. expectedArticleURLToBeDisplayed = dirEntry.namespace + "/" + dirEntry.url; // We must remove focus from UI elements in order to deselect whichever one was clicked (in both jQuery and SW modes), // but we should not do this when opening the landing page (or else one of the Unit Tests fails, at least on Chrome 58) diff --git a/www/js/init.js b/www/js/init.js index 497c63e02..89b737fc1 100644 --- a/www/js/init.js +++ b/www/js/init.js @@ -30,6 +30,13 @@ */ var params = {}; +/** + * A global object for storing app state + * + * @type Object + */ +var globalstate = {}; + require.config({ baseUrl: 'js/lib', paths: { diff --git a/www/js/lib/util.js b/www/js/lib/util.js index e8a69ddc9..b5e2dc6a6 100644 --- a/www/js/lib/util.js +++ b/www/js/lib/util.js @@ -23,57 +23,65 @@ define(['q'], function(Q) { /** - * Utility function : return true if the given string ends with the suffix - * @param {String} str - * @param {String} suffix - * @returns {Boolean} - */ - function endsWith(str, suffix) { - return str.indexOf(suffix, str.length - suffix.length) !== -1; - } - - /** - * Returns the same String with the first letter in upper-case - * @param {String} string - * @returns {String} + * A Regular Expression to match the first letter of a word even if preceded by Unicode punctuation + * Includes currency signs and mathematical symbols: see https://stackoverflow.com/a/21396529/9727685 + * DEV: To maintain the list below, see https://github.com/slevithan/xregexp/blob/master/tools/output/categories.js + * where all the different Unicode punctuation categories can be found (simplify double backspacing before using below) + * Note that the XRegExp punctuation categories begin at !-# in list below + * @type {RegExp} */ - function ucFirstLetter(string) { - if (string && string.length >= 1) { - return string[0].toLocaleUpperCase() + string.slice(1); - } else { - return string; - } - } + var regExpFindStringParts = /(?:^|.+?)(?:[\s$£€\uFFE5^+=`~<>{}[\]|\u3000-\u303F!-#%-\x2A,-/:;\x3F@\x5B-\x5D_\x7B}\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E3B\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]+|$)/g; /** - * Returns the same String with the first letter in lower-case - * @param {String} string - * @returns {String} + * Generates an array of strings with all possible combinations of first-letter or all-letter case transformations + * If caseMatchType is not 'full', then only combinations of first-letter cases for each word are calculated + * If caseMatchType is 'full', then all-uppercase combinations of each word are added to the variations array + * NB may produce duplicate strings if string begins with punctuation or if it is in a language with no case + * @param {String} string The string to be converted + * @param {String} caseMatchType ('basic'|'full') The type (complexity) of case variations to calculate + * @return {Array} An array containing strings with all possible combinations of case types */ - function lcFirstLetter(string) { + function allCaseFirstLetters(string, caseMatchType) { if (string) { - if (string.length >= 1) { - return string.charAt(0).toLocaleLowerCase() + string.slice(1); - } else { - return string; + var comboArray = []; + // Split string into parts beginning with first word letters + var strParts = string.match(regExpFindStringParts); + // Set the base (binary or ternary) according to the complexity of the search + var base = caseMatchType === 'full' ? 3 : 2; + // If n = strParts.length, then the number of possible case combinations (numCombos) is base ^ n + // For *basic* case calculation: think of numCombos as a binary number of n bits, with each bit representing lcase (0) or ucase (1) + // For *full* case calculation: think of numCombos as a tertiary base number, e.g. 000, 111, 222, + // with each bit representing all-lowercase (0), First-Letter-Uppercase (1) or ALL-UPPERCASE (2) + var numCombos = Math.pow(base, strParts.length); + var typeCase, mixedTypeCaseStr, bitmask, caseBit; + // Iterate through every possible combination, starting with (base ^ n) - 1 and decreasing; we go from high to low, + // because title case (e.g. binary 1111) is more common than all lowercase (0000) so will be found first + for (var i = numCombos; i--;) { + mixedTypeCaseStr = ''; + bitmask = 1; + for (var j = 0; j < strParts.length; j++) { + // Get modulus of division (this is equivalent to bitwise AND for different bases) + // caseBit will be 0, 1 or 2 (latter only for 'full' case calcualation) + caseBit = ~~(i / bitmask % base); + if (caseBit === 2) { + // All uppercase + typeCase = strParts[j].toLocaleUpperCase(); + } else { + // Modify only first letter + typeCase = strParts[j].replace(/^./, function (m) { + // 1 = uppercase, 0 = lowercase + return caseBit ? m.toLocaleUpperCase() : m.toLocaleLowerCase(); + }); + } + mixedTypeCaseStr += typeCase; + // Shift bitmask to the next higher bit + bitmask *= base; + } + comboArray.push(mixedTypeCaseStr); } + return comboArray; } else { - return string; - } - } - - /** - * Returns the same String with the first letter of every word in upper-case - * @param {String} string - * @returns {String} - */ - function ucEveryFirstLetter(string) { - if (string) { - return string.replace( /\b\w/g, function (m) { - return m.toLocaleUpperCase(); - }); - } else { - return string; + return [string]; } } @@ -82,7 +90,6 @@ define(['q'], function(Q) { * (without changing the order) * It is optimized for small arrays. * Source : http://codereview.stackexchange.com/questions/60128/removing-duplicates-from-an-array-quickly - * * @param {Array} array of String * @returns {Array} same array of Strings, without duplicates */ @@ -96,6 +103,16 @@ define(['q'], function(Q) { return unique; } + /** + * Utility function : return true if the given string ends with the suffix + * @param {String} str + * @param {String} suffix + * @returns {Boolean} + */ + function endsWith(str, suffix) { + return str.indexOf(suffix, str.length - suffix.length) !== -1; + } + /** * Read an integer encoded in 4 bytes, little endian * @param {Array} byteArray @@ -288,11 +305,9 @@ define(['q'], function(Q) { * Functions and classes exposed by this module */ return { - endsWith: endsWith, - ucFirstLetter: ucFirstLetter, - lcFirstLetter: lcFirstLetter, - ucEveryFirstLetter: ucEveryFirstLetter, + allCaseFirstLetters: allCaseFirstLetters, removeDuplicateStringsInSmallArray: removeDuplicateStringsInSmallArray, + endsWith: endsWith, readIntegerFrom4Bytes: readIntegerFrom4Bytes, readIntegerFrom2Bytes : readIntegerFrom2Bytes, readFloatFrom4Bytes : readFloatFrom4Bytes, diff --git a/www/js/lib/zimArchive.js b/www/js/lib/zimArchive.js index 614cef9a5..a30d88cfd 100644 --- a/www/js/lib/zimArchive.js +++ b/www/js/lib/zimArchive.js @@ -142,31 +142,72 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'], */ /** - * Look for DirEntries with title starting with the given prefix. + * Look for DirEntries with title starting with the prefix of the current search object. * For now, ZIM titles are case sensitive. * So, as workaround, we try several variants of the prefix to find more results. * This should be enhanced when the ZIM format will be modified to store normalized titles * See https://phabricator.wikimedia.org/T108536 * - * @param {String} prefix - * @param {Integer} resultSize - * @param {callbackDirEntryList} callback + * @param {Object} search The current globalstate.search object + * @param {Integer} resultSize The number of dirEntries to find + * @param {callbackDirEntryList} callback The function to call with the result + * @param {Boolean} noInterim A flag to prevent callback until all results are ready (used in testing) */ - ZIMArchive.prototype.findDirEntriesWithPrefix = function(prefix, resultSize, callback) { + ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, resultSize, callback, noInterim) { + // Create a local invariable copy of the search prefix + const localPrefix = search.prefix; var that = this; - var prefixVariants = util.removeDuplicateStringsInSmallArray([prefix, util.ucFirstLetter(prefix), util.lcFirstLetter(prefix), util.ucEveryFirstLetter(prefix)]); + // Establish array of initial values that must be searched first. All of these patterns are generated by the full + // search type, and some by basic, but we need the most common patterns to be searched first, as it returns search + // results much more quickly if we do this (and the user can click on a result before the rarer patterns complete) + // NB duplicates are removed before processing search array + var startArray = []; + // Ensure a search is done on the string exactly as typed + startArray.push(localPrefix); + // Normalize any spacing and make string all lowercase + var prefix = localPrefix.replace(/\s+/g, ' ').toLocaleLowerCase(); + // Add lowercase string with initial uppercase (this is a very common pattern) + startArray.push(prefix.replace(/^./, function (m) { + return m.toLocaleUpperCase(); + })); + // Get the full array of combinations to check number of combinations + var fullCombos = util.removeDuplicateStringsInSmallArray(util.allCaseFirstLetters(prefix, 'full')); + // Put cap on exponential number of combinations (five words = 3^5 = 243 combinations) + search.type = fullCombos.length < 200 ? 'full' : 'basic'; + // We have to remove duplicate string combinations because util.allCaseFirstLetters() can return some combinations + // where uppercase and lowercase combinations are exactly the same, e.g. where prefix begins with punctuation + // or currency signs, for languages without case, or where user-entered case duplicates calculated case + var prefixVariants = util.removeDuplicateStringsInSmallArray( + startArray.concat( + // Get basic combinations first for speed of returning results + util.allCaseFirstLetters(prefix).concat( + search.type === 'full' ? fullCombos : [] + ) + ) + ); var dirEntries = []; + function searchNextVariant() { + // If user has initiated a new search, cancel this one + if (search.status === 'cancelled' || search.prefix !== localPrefix) return callback([], localPrefix); if (prefixVariants.length === 0 || dirEntries.length >= resultSize) { - callback(dirEntries); - return; + search.status = 'complete'; + return callback(dirEntries, localPrefix); } + // Dynamically populate list of articles + search.status = 'interim'; + if (!noInterim) callback(dirEntries, localPrefix); var prefix = prefixVariants[0]; prefixVariants = prefixVariants.slice(1); - that.findDirEntriesWithPrefixCaseSensitive(prefix, resultSize - dirEntries.length, function (newDirEntries) { - dirEntries.push.apply(dirEntries, newDirEntries); - searchNextVariant(); - }); + that.findDirEntriesWithPrefixCaseSensitive(prefix, resultSize - dirEntries.length, localPrefix, search, + function (newDirEntries, interim) { + if (search.status === 'cancelled' || search.prefix !== localPrefix) return callback([], localPrefix); + if (interim) {// Only push interim results (else results will be pushed again at end of variant loop) + [].push.apply(dirEntries, newDirEntries); + if (!noInterim && newDirEntries.length) callback(dirEntries, localPrefix); + } else searchNextVariant(); + } + ); } searchNextVariant(); }; @@ -176,26 +217,32 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'], * * @param {String} prefix The case-sensitive value against which dirEntry titles (or url) will be compared * @param {Integer} resultSize The maximum number of results to return + * @param {String} originalPrefix The original prefix typed by the user to initiate the local search + * @param {Object} search The globalstate.search object (for comparison, so that we can cancel long binary searches) * @param {callbackDirEntryList} callback The function to call with the array of dirEntries with titles that begin with prefix */ - ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, resultSize, callback) { + ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, resultSize, originalPrefix, search, callback) { var that = this; util.binarySearch(0, this._file.articleCount, function(i) { return that._file.dirEntryByTitleIndex(i).then(function(dirEntry) { - if (dirEntry.namespace < "A") return 1; - if (dirEntry.namespace > "A") return -1; + if (dirEntry.namespace < 'A') return 1; + if (dirEntry.namespace > 'A') return -1; // We should now be in namespace A return prefix <= dirEntry.getTitleOrUrl() ? -1 : 1; }); }, true).then(function(firstIndex) { var dirEntries = []; var addDirEntries = function(index) { - if (index >= firstIndex + resultSize || index >= that._file.articleCount) + if (search.status === 'cancelled' || search.prefix !== originalPrefix || index >= firstIndex + resultSize || index >= that._file.articleCount) return dirEntries; return that._file.dirEntryByTitleIndex(index).then(function(dirEntry) { var title = dirEntry.getTitleOrUrl(); - if (!title.indexOf(prefix) && dirEntry.namespace === "A") + // Only return dirEntries with titles that actually begin with prefix + if (dirEntry.namespace === 'A' && title.indexOf(prefix) === 0) { dirEntries.push(dirEntry); + // Report interim result + callback([dirEntry], true); + } return addDirEntries(index + 1); }); };