Skip to content

Commit

Permalink
Optimize title search by removing redundant cycles (#733)
Browse files Browse the repository at this point in the history
Fixes #696 (now gets up to 100 search results) and optimizes the code for #637.
  • Loading branch information
Jaifroid authored Jul 18, 2021
1 parent c82b439 commit 5270bcf
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 19 deletions.
6 changes: 3 additions & 3 deletions tests/tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
assert.equal(firstDirEntry.getTitleOrUrl() , 'A Fool for You', 'First result should be "A Fool for You"');
done();
};
localZimArchive.findDirEntriesWithPrefix({prefix: 'A'}, 5, callbackFunction, true);
localZimArchive.findDirEntriesWithPrefix({prefix: 'A', size: 5}, callbackFunction, true);
});
QUnit.test("check findDirEntriesWithPrefix 'a'", function(assert) {
var done = assert.async();
Expand All @@ -188,7 +188,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
assert.equal(firstDirEntry.getTitleOrUrl() , 'A Fool for You', 'First result should be "A Fool for You"');
done();
};
localZimArchive.findDirEntriesWithPrefix({prefix: 'a'}, 5, callbackFunction, true);
localZimArchive.findDirEntriesWithPrefix({prefix: 'a', size: 5}, callbackFunction, true);
});
QUnit.test("check findDirEntriesWithPrefix 'blues brothers'", function(assert) {
var done = assert.async();
Expand All @@ -199,7 +199,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
assert.equal(firstDirEntry.getTitleOrUrl() , 'Blues Brothers (film)', 'First result should be "Blues Brothers (film)"');
done();
};
localZimArchive.findDirEntriesWithPrefix({prefix: 'blues brothers'}, 5, callbackFunction, true);
localZimArchive.findDirEntriesWithPrefix({prefix: 'blues brothers', size: 5}, callbackFunction, true);
});
QUnit.test("article '(The Night Time Is) The Right Time' correctly redirects to 'Night Time Is the Right Time'", function(assert) {
var done = assert.async();
Expand Down
22 changes: 21 additions & 1 deletion www/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ <h2 id="contents">
<ul>
<li><a href="#aboutApp">About this App</a></li>
<li><a href="#usage">Usage</a></li>
<li><a href="#searchSyntax">Title Search Usage</a></li>
<li><a href="#privacy">Privacy Policy</a></li>
<li><a href="#expert">Expert Settings and Technical Information</a>
<ul>
Expand Down Expand Up @@ -189,6 +190,25 @@ <h4>Step 3: Drag and drop the file into the app</h4>
<h4>Step 4: Enjoy your offline content!</h4>
<p style="text-align: right"><a href="#contents">↑ Back to Contents</a></p>

<h3 id="searchSyntax">Title search usage</h3>
<p>
Title search matches the <b>start</b> of an article title, so if you search for 'France' you will get 'France (country)', 'France (disambiguation)',
'France (film)', etc. This kind of search tries to be <b>case-insensitive</b>, but the number of case variants tried is necessarily limited: e.g. if you
search for 'unesco world heritiage', it will try 'Unesco world heritage', 'UNESCO world heritage', 'unesco World heritage', 'Unesco WORLD heritage'
(etc.), but it won't try 'uNesCO' and other non-obvious case combinations. If you want to search for 'uNesCO', then you must type it exactly like that.
</p>
<p>
If not enough results are returned, you can increase the maximum number of search results using the slider in Configuration. Please note that we are
currently unable to support full text search of the contents of articles and titles, though we have plans to do so in the future if it becomes technically
possible.
</p>
<p>
<b>Alphabetical search:</b> If you type a letter of the alphabet in the title search box (upper case may be most useful), it will show an alphabetical
list of articles starting with that letter (up to the maximum number of search results you selected in Configuration). This is useful for ZIM archives
that have descriptive rather than semantic titles (e.g. TED Talks), expecially in <a href="#modes">JQuery mode</a>.
</p>
<p style="text-align: right"><a href="#contents">↑ Back to Contents</a></p>

<h3 id="privacy">Privacy policy</h3>

<h4>Short version:</h4>
Expand Down Expand Up @@ -489,7 +509,7 @@ <h3>Performance settings</h3>
<div class="col-sm-8">
<div class="range">
<label>
<input type="range" class="form-control-range" min="5" max="50" id="titleSearchRange" value="25">
<input type="range" class="form-control-range" min="5" max="100" id="titleSearchRange" value="25">
<span>Value: <span id="titleSearchRangeVal"></span> (<i>default 25, higher values increase search time</i>)</span>
</label>
</div>
Expand Down
4 changes: 2 additions & 2 deletions www/js/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -1032,9 +1032,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys
// DEV: Technical explanation: the appstate.search is a pointer to an underlying object assigned in memory, and we are here defining a new object
// in memory {'prefix': prefix, 'status': 'init', .....}, and pointing appstate.search to it; the old search object that was passed to selectedArchive
// (zimArchive.js) continues to exist in the scope of the functions initiated by the previous search until all Promises have returned
appstate.search = {'prefix': prefix, 'status': 'init', 'type': ''};
appstate.search = {'prefix': prefix, 'status': 'init', 'type': '', 'size': params.maxSearchResultsSize};
$('#activeContent').hide();
selectedArchive.findDirEntriesWithPrefix(appstate.search, params.maxSearchResultsSize, populateListOfArticles);
selectedArchive.findDirEntriesWithPrefix(appstate.search, populateListOfArticles);
} else {
$('#searchingArticles').hide();
// We have to remove the focus from the search field,
Expand Down
30 changes: 17 additions & 13 deletions www/js/lib/zimArchive.js
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,10 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
* See https://phabricator.wikimedia.org/T108536
*
* @param {Object} search The current appstate.search object
* @param {Integer} resultSize The number of dirEntries to find
* @param {callbackDirEntryList} callback The function to call with the result
* @param {Boolean} noInterim A flag to prevent callback until all results are ready (used in testing)
*/
ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, resultSize, callback, noInterim) {
ZIMArchive.prototype.findDirEntriesWithPrefix = function (search, callback, noInterim) {
var that = this;
// Establish array of initial values that must be searched first. All of these patterns are generated by the full
// search type, and some by basic, but we need the most common patterns to be searched first, as it returns search
Expand All @@ -193,7 +192,7 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
// Get the full array of combinations to check number of combinations
var fullCombos = util.removeDuplicateStringsInSmallArray(util.allCaseFirstLetters(prefix, 'full'));
// Put cap on exponential number of combinations (five words = 3^5 = 243 combinations)
search.type = fullCombos.length < 200 ? 'full' : 'basic';
search.type = fullCombos.length < 300 ? 'full' : 'basic';
// We have to remove duplicate string combinations because util.allCaseFirstLetters() can return some combinations
// where uppercase and lowercase combinations are exactly the same, e.g. where prefix begins with punctuation
// or currency signs, for languages without case, or where user-entered case duplicates calculated case
Expand All @@ -210,20 +209,22 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
function searchNextVariant() {
// If user has initiated a new search, cancel this one
if (search.status === 'cancelled') return callback([], search);
if (prefixVariants.length === 0 || dirEntries.length >= resultSize) {
if (prefixVariants.length === 0 || dirEntries.length >= search.size) {
search.status = 'complete';
return callback(dirEntries, search);
}
// Dynamically populate list of articles
search.status = 'interim';
if (!noInterim) callback(dirEntries, search);
search.found = dirEntries.length;
var prefix = prefixVariants[0];
prefixVariants = prefixVariants.slice(1);
that.findDirEntriesWithPrefixCaseSensitive(prefix, resultSize - dirEntries.length, search,
that.findDirEntriesWithPrefixCaseSensitive(prefix, search,
function (newDirEntries, interim) {
if (search.status === 'cancelled') return callback([], search);
if (interim) {// Only push interim results (else results will be pushed again at end of variant loop)
[].push.apply(dirEntries, newDirEntries);
search.found = dirEntries.length;
if (!noInterim && newDirEntries.length) callback(dirEntries, search);
} else searchNextVariant();
}
Expand Down Expand Up @@ -260,11 +261,10 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
* Look for dirEntries with title starting with the given prefix (case-sensitive)
*
* @param {String} prefix The case-sensitive value against which dirEntry titles (or url) will be compared
* @param {Integer} resultSize The maximum number of results to return
* @param {Object} search The appstate.search object (for comparison, so that we can cancel long binary searches)
* @param {callbackDirEntryList} callback The function to call with the array of dirEntries with titles that begin with prefix
*/
ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, resultSize, search, callback) {
ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, search, callback) {
var that = this;
var cns = this.getContentNamespace();
// Search v1 article listing if available, otherwise fallback to v0
Expand All @@ -281,20 +281,24 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
return prefix <= dirEntry.getTitleOrUrl() ? -1 : 1;
});
}, true).then(function(firstIndex) {
var dirEntries = [];
var addDirEntries = function(index) {
if (search.status === 'cancelled' || index >= firstIndex + resultSize || index >= articleCount) {
return dirEntries;
var vDirEntries = [];
var addDirEntries = function(index, lastTitle) {
if (search.status === 'cancelled' || search.found >= search.size || index >= articleCount
|| lastTitle && !~lastTitle.indexOf(prefix)) {
// DEV: Diagnostics to be removed before merge
if (vDirEntries.length) console.debug('Scanned ' + (index - firstIndex) + ' titles for "' + prefix +
'" (found ' + vDirEntries.length + ' match' + (vDirEntries.length === 1 ? ')' : 'es)'));
return vDirEntries;
}
return that._file.dirEntryByTitleIndex(index).then(function(dirEntry) {
var title = dirEntry.getTitleOrUrl();
// Only return dirEntries with titles that actually begin with prefix
if (dirEntry.namespace === cns && title.indexOf(prefix) === 0) {
dirEntries.push(dirEntry);
vDirEntries.push(dirEntry);
// Report interim result
callback([dirEntry], true);
}
return addDirEntries(index + 1);
return addDirEntries(index + 1, title);
});
};
return addDirEntries(firstIndex);
Expand Down

0 comments on commit 5270bcf

Please sign in to comment.