Skip to content

Commit

Permalink
Handle glossary link entries (#474)
Browse files Browse the repository at this point in the history
* Handle glossary link entries

* Minor changes

* Lint format issues

* Eliminate glossary reference in ScriptureViewSophria and general cleanup

* Check in isBibleBook

* Check in tests for isBibleBook

* Fix tests and commit changes to  make tests run independently of installed application

* Fix lint check

* Fix test imports

* Added comments
  • Loading branch information
davidmoore1 authored Apr 26, 2024
1 parent e463432 commit 1e7e8f7
Show file tree
Hide file tree
Showing 18 changed files with 17,878 additions and 69 deletions.
1 change: 1 addition & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ data
src/config.js
static
example_data
test_data

# Ignore files for PNPM, NPM and YARN
pnpm-lock.yaml
Expand Down
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ src/lib/data/catalog.js
src/lib/data/firebase-config.js
static
example_data
test_data

# Ignore files for PNPM, NPM and YARN
pnpm-lock.yaml
Expand Down
33 changes: 30 additions & 3 deletions scripts/convertBooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@

import { ConfigTaskOutput } from './convertConfig';
import { TaskOutput, Task, Promisable } from './Task';
import { readFile, writeFile, writeFileSync, mkdirSync, existsSync } from 'fs';
import { readFile, readFileSync, writeFile, writeFileSync, mkdirSync, existsSync } from 'fs';
import path from 'path';
import { SABProskomma } from '../sab-proskomma';
import { queries, postQueries, freeze } from '../sab-proskomma-tools';
import { convertMarkdownsToMilestones } from './convertMarkdown';
import { verifyGlossaryEntries } from './verifyGlossaryEntries';

/**
* Loops through bookCollections property of configData.
Expand All @@ -24,7 +25,26 @@ function replaceVideoTags(text: string, _bcId: string, _bookId: string): string
function replacePageTags(text: string, _bcId: string, _bookId: string): string {
return text.replace(/\\page (.*)/g, '\\zpage |id="$1"\\*');
}

function loadGlossary(collection: any, configData: ConfigTaskOutput, dataDir: string): string[] {
const glossary: string[] = [];
for (const book of collection.books) {
if (book.type && book.type === 'glossary') {
const glossaryContent = readFileSync(
path.join(dataDir, 'books', collection.id, book.file),
'utf8'
);
// Regular expression pattern
const regex = /\\k\s*([^\\]+)\s*\\k\*/g;
let match;
// Loop through all matches
while ((match = regex.exec(glossaryContent)) !== null) {
// match[1] contains the text between \k and \k*
glossary.push(match[1]);
}
}
}
return glossary;
}
function removeStrongNumberReferences(text: string, _bcId: string, _bookId: string): string {
//remove strong number references
// \v 1 \w In|strong="H0430"\w* \w the|strong="H0853"\w* \w beginning|strong="H7225"\w*, (Gen 1:1 WEBBE)
Expand Down Expand Up @@ -102,6 +122,7 @@ export async function convertBooks(
for (const collection of collections!) {
const pk = new SABProskomma();
const lang = collection.languageCode;
let bcGlossary: string[] = [];
if (verbose && usedLangs.has(lang)) {
console.warn(`Language ${lang} already used in another collection. Proceeding anyway.`);
}
Expand All @@ -115,6 +136,10 @@ export async function convertBooks(
const docs: Promise<void>[] = [];
//loop through books in collection
const ignoredBooks = [];
// If the collection has a glossary, load it
if (configData.data.traits['has-glossary']) {
bcGlossary = loadGlossary(collection, configData, dataDir);
}
for (const book of collection.books) {
if (book.type && unsupportedBookTypes.includes(book.type)) {
// Ignore non-default books for now
Expand All @@ -132,7 +157,9 @@ export async function convertBooks(
if (err) throw err;
process.stdout.write(` ${book.id}`);
content = applyFilters(content, bcId, book.id);

if (configData.data.traits['has-glossary']) {
content = verifyGlossaryEntries(content, bcGlossary);
}
//query Proskomma with a mutation to add a document
//more efficient than original pk.addDocument call
//as it can be run asynchronously
Expand Down
1 change: 0 additions & 1 deletion scripts/convertConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,6 @@ function filterFeaturesNotReady(data: ConfigData) {
data.mainFeatures['share-apple-app-link'] = false;

// Some settings are not done
data.mainFeatures['settings-glossary-links'] = false;
data.mainFeatures['settings-verse-of-the-day'] = false;
data.mainFeatures['settings-verse-of-the-day-time'] = false;
data.mainFeatures['settings-verse-of-the-day-book-collection'] = false;
Expand Down
5 changes: 1 addition & 4 deletions scripts/convertMarkdown.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ import path from 'path';
import { convertMarkdownsToMilestones } from './convertMarkdown';

describe('convertMarkdown', () => {
const data = readFileSync(
path.join('example_data', 'books', 'C01', '01GENengWEBbd.usfm'),
'utf8'
);
const data = readFileSync(path.join('test_data', 'books', 'C01', '01GENengWEBbd.usfm'), 'utf8');
let modifiedContent: string;
beforeEach(() => {
modifiedContent = convertMarkdownsToMilestones(data, 'C01', 'GEN');
Expand Down
15 changes: 15 additions & 0 deletions scripts/stringUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,18 @@ export function padWithInitialZeros(input: string, length: number): string {

return result;
}
function ciEqualsInner(a: string, b: string) {
return a.localeCompare(b, undefined, { sensitivity: 'accent' }) === 0;
}

export function ciEquals(a: any, b: any) {
if (typeof a !== 'string' || typeof b !== 'string') {
return a === b;
}

// v--- feature detection
return ciEqualsInner('A', 'a')
? ciEqualsInner(a, b)
: /* fallback approach here */
a.toUpperCase() === b.toUpperCase();
}
95 changes: 95 additions & 0 deletions scripts/verifyGlossaryEntries.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { describe, expect, beforeEach, it, test } from 'vitest';
import { readFile, readFileSync, writeFile, writeFileSync, mkdirSync, existsSync } from 'fs';
import path from 'path';
import { verifyGlossaryEntries } from './verifyGlossaryEntries';

describe('verifyGlossaryEntries', () => {
// Tests using Genesis 1: 1 & 2
const data = readFileSync(path.join('test_data', 'books', 'C01', '01GENengWEBbd.usfm'), 'utf8');
describe('with all entries in the glossary', () => {
let modifiedContent: string;
beforeEach(() => {
const glossary = ['excess', 'serpent', 'middle', 'subtle', 'tree', 'extra'];
modifiedContent = verifyGlossaryEntries(data, glossary);
});
it('leaves in place simple entry', () => {
expect(modifiedContent).toContain('Now the \\w serpent\\w* was more');
});
it('leaves in place entry with an extra space', () => {
expect(modifiedContent).toContain('more \\w subtle \\w*than any animal');
});
it('leaves in place entry using lemma', () => {
expect(modifiedContent).toContain(
'We may eat fruit from the \\w trees|tree \\w* of the garden'
);
});
});
describe('with all entries case mismatch', () => {
let modifiedContent: string;
beforeEach(() => {
const glossary = ['Excess', 'Serpent', 'Middle', 'Subtle', 'Tree', 'Extra'];
modifiedContent = verifyGlossaryEntries(data, glossary);
});
it('leaves in place simple entry', () => {
expect(modifiedContent).toContain('Now the \\w serpent\\w* was more');
});
it('leaves in place entry with an extra space', () => {
expect(modifiedContent).toContain('more \\w subtle \\w*than any animal');
});
it('leaves in place entry using lemma', () => {
expect(modifiedContent).toContain(
'We may eat fruit from the \\w trees|tree \\w* of the garden'
);
});
});
describe('with one mismatch', () => {
let modifiedContent: string;
beforeEach(() => {
const glossary = ['excess', 'serpent', 'middle', 'subtle', 'trees', 'extra'];
modifiedContent = verifyGlossaryEntries(data, glossary);
});
it('leaves in place simple entry', () => {
expect(modifiedContent).toContain('Now the \\w serpent\\w* was more');
});
it('leaves in place entry with an extra space', () => {
expect(modifiedContent).toContain('more \\w subtle \\w*than any animal');
});
it('removes when matches first but not lemma', () => {
expect(modifiedContent).toContain('We may eat fruit from the trees of the garden');
});
});
describe('with missing entries in the glossary', () => {
let modifiedContent: string;
beforeEach(() => {
const glossary = ['excess', 'serpent', 'more', 'middle', 'tree', 'extra'];
modifiedContent = verifyGlossaryEntries(data, glossary);
});
it('leaves in place simple entry', () => {
expect(modifiedContent).toContain('Now the \\w serpent\\w* was more');
});
it('removes mismatched entry', () => {
expect(modifiedContent).toContain('more subtle than any animal');
});
it('leaves in place entry using lemma', () => {
expect(modifiedContent).toContain(
'We may eat fruit from the \\w trees|tree \\w* of the garden'
);
});
});
describe('with empty glossary', () => {
let modifiedContent: string;
beforeEach(() => {
const glossary: string[] = [];
modifiedContent = verifyGlossaryEntries(data, glossary);
});
it('removes simple entry', () => {
expect(modifiedContent).toContain('Now the serpent was more');
});
it('removes entry with an extra space', () => {
expect(modifiedContent).toContain('more subtle than any animal');
});
it('removes entry using lemma', () => {
expect(modifiedContent).toContain('We may eat fruit from the trees of the garden');
});
});
});
52 changes: 52 additions & 0 deletions scripts/verifyGlossaryEntries.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { ciEquals } from './stringUtils';

export function verifyGlossaryEntries(content: string, glossary: string[]): string {
// Regular expression pattern
const regex = /\\w\s*([^\\]+)\s*\\w\*/;
let result: string = '';
result = content;
const sb = [];
let inputString = content;
let match;
// Loop through all matches
while ((match = regex.exec(inputString)) !== null) {
// Append text segment with 1st part of string
sb.push(inputString.substring(0, match.index));
// match[1] contains the text between \k and \k*
const matchWord = entryToMatch(match[1]);
const originalEntry: string = match[0];
const textOnlyEntry: string = textFromMatch(match[1]);
let matchFound = false;
glossary.every((glossaryEntry) => {
if (ciEquals(glossaryEntry.trim(), matchWord)) {
matchFound = true;
return false;
}
return true;
});
if (matchFound) {
sb.push(originalEntry);
} else {
sb.push(textFromMatch(textOnlyEntry));
}
const oldLength = inputString.length;
inputString = inputString.substring(match.index + match[0].length);
}
sb.push(inputString);
result = sb.join('');
return result;
}
function entryToMatch(match: string): string {
let result: string;
const parts = match.split('|');
if (parts.length > 1) {
result = parts[1].trim();
} else {
result = parts[0].trim();
}
return result;
}
function textFromMatch(match: string): string {
const parts = match.split('|');
return parts[0];
}
Loading

0 comments on commit 1e7e8f7

Please sign in to comment.