From d16df6d73fe1e80a97c820cbe9af80bdecd2ccb4 Mon Sep 17 00:00:00 2001 From: kennsippell Date: Wed, 6 Nov 2024 17:40:46 -0700 Subject: [PATCH] Testing this common library going to get weird --- .gitignore | 1 + src/fn/merge-contacts.js | 200 +++++++++++++++++++++++++++++++++++++++ src/fn/move-contacts.js | 129 +++---------------------- src/lib/mm-shared.js | 153 ++++++++++++++++++++++++++++++ 4 files changed, 365 insertions(+), 118 deletions(-) create mode 100644 src/fn/merge-contacts.js create mode 100644 src/lib/mm-shared.js diff --git a/.gitignore b/.gitignore index 39c909fa0..e1a85a64d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ upload-docs.*.log.json /.vscode/ /.idea/ /.settings/ +/json_docs/ *.swp coverage .nyc_output diff --git a/src/fn/merge-contacts.js b/src/fn/merge-contacts.js new file mode 100644 index 000000000..963ae6aab --- /dev/null +++ b/src/fn/merge-contacts.js @@ -0,0 +1,200 @@ +const minimist = require('minimist'); +const path = require('path'); + +const environment = require('../lib/environment'); +const lineageManipulation = require('../lib/lineage-manipulation'); +const lineageConstraints = require('../lib/lineage-constraints'); +const pouch = require('../lib/db'); +const { trace, info } = require('../lib/log'); + +const { + BATCH_SIZE, + prepareDocumentDirectory, + prettyPrintDocument, + replaceLineageInAncestors, + bold, + writeDocumentToDisk, + fetch, +} = require('../lib/mm-shared'); + +module.exports = { + requiresInstance: true, + execute: () => { + const args = parseExtraArgs(environment.pathToProject, environment.extraArgs); + const db = pouch(); + prepareDocumentDirectory(args); + return updateLineagesAndStage(args, db); + } +}; + +const updateLineagesAndStage = async (options, db) => { + trace(`Fetching contact details: ${options.winnerId}`); + const winnerDoc = await fetch.contact(db, options.winnerId); + + const constraints = await lineageConstraints(db, winnerDoc); + const loserDocs = await fetch.contactList(db, options.loserIds); + await validateContacts(loserDocs, constraints); + + let affectedContactCount = 0, affectedReportCount = 0; + const replacementLineage = lineageManipulation.createLineageFromDoc(winnerDoc); + for (let loserId of options.loserIds) { + const contactDoc = loserDocs[loserId]; + const descendantsAndSelf = await fetch.descendantsOf(db, loserId); + + const self = descendantsAndSelf.find(d => d._id === loserId); + writeDocumentToDisk(options, { + _id: self._id, + _rev: self._rev, + _deleted: true, + }); + + // Check that primary contact is not removed from areas where they are required + const invalidPrimaryContactDoc = await constraints.getPrimaryContactViolations(contactDoc, descendantsAndSelf); + if (invalidPrimaryContactDoc) { + throw Error(`Cannot remove contact ${prettyPrintDocument(invalidPrimaryContactDoc)} from the hierarchy for which they are a primary contact.`); + } + + trace(`Considering lineage updates to ${descendantsAndSelf.length} descendant(s) of contact ${prettyPrintDocument(contactDoc)}.`); + const updatedDescendants = replaceLineageInContacts(descendantsAndSelf, replacementLineage, loserId); + + const ancestors = await fetch.ancestorsOf(db, contactDoc); + trace(`Considering primary contact updates to ${ancestors.length} ancestor(s) of contact ${prettyPrintDocument(contactDoc)}.`); + const updatedAncestors = replaceLineageInAncestors(descendantsAndSelf, ancestors); + + minifyLineageAndWriteToDisk([...updatedDescendants, ...updatedAncestors], options); + + const movedReportsCount = await moveReports(db, descendantsAndSelf, options, options.winnerId, loserId); + trace(`${movedReportsCount} report(s) created by these affected contact(s) will be updated`); + + affectedContactCount += updatedDescendants.length + updatedAncestors.length; + affectedReportCount += movedReportsCount; + + info(`Staged updates to ${prettyPrintDocument(contactDoc)}. ${updatedDescendants.length} contact(s) and ${movedReportsCount} report(s).`); + } + + info(`Staged changes to lineage information for ${affectedContactCount} contact(s) and ${affectedReportCount} report(s).`); +}; + +/* +Checks for any errors which this will create in the hierarchy (hierarchy schema, circular hierarchies) +Confirms the list of contacts are possible to move +*/ +const validateContacts = async (contactDocs, constraints) => { + Object.values(contactDocs).forEach(doc => { + const hierarchyError = constraints.getHierarchyErrors(doc); + if (hierarchyError) { + throw Error(`Hierarchy Constraints: ${hierarchyError}`); + } + }); + + /* + It is nice that the tool can move lists of contacts as one operation, but strange things happen when two loserIds are in the same lineage. + For example, moving a district_hospital and moving a contact under that district_hospital to a new clinic causes multiple colliding writes to the same json file. + */ + const loserIds = Object.keys(contactDocs); + Object.values(contactDocs) + .forEach(doc => { + const parentIdsOfDoc = (doc.parent && lineageManipulation.pluckIdsFromLineage(doc.parent)) || []; + const violatingParentId = parentIdsOfDoc.find(winnerId => loserIds.includes(winnerId)); + if (violatingParentId) { + throw Error(`Unable to move two documents from the same lineage: '${doc._id}' and '${violatingParentId}'`); + } + }); +}; + +// Parses extraArgs and asserts if required parameters are not present +const parseExtraArgs = (projectDir, extraArgs = []) => { + const args = minimist(extraArgs, { boolean: true }); + + const loserIds = (args.losers || args.loser || '') + .split(',') + .filter(Boolean); + + if (loserIds.length === 0) { + usage(); + throw Error(`Action "merge-contacts" is missing required list of contacts ${bold('--losers')} to be merged into the winner`); + } + + if (!args.winner) { + usage(); + throw Error(`Action "merge-contacts" is missing required parameter ${bold('--winner')}`); + } + + return { + winnerId: args.winner, + loserIds, + docDirectoryPath: path.resolve(projectDir, args.docDirectoryPath || 'json_docs'), + force: !!args.force, + }; +}; + +const usage = () => { + info(` +${bold('cht-conf\'s merge-contacts action')} +When combined with 'upload-docs' this action merges multiple contacts and all their associated data into one. + +${bold('USAGE')} +cht --local merge-contacts -- --winner= --losers=, + +${bold('OPTIONS')} +--winner= + Specifies the ID of the contact that should have all other contact data merged into it. + +--losers=, + A comma delimited list of IDs of contacts which will be deleted and all of their data will be merged into the winner contact. + +--docDirectoryPath= + Specifies the folder used to store the documents representing the changes in hierarchy. +`); +}; + +const moveReports = async (db, descendantsAndSelf, writeOptions, winnerId, loserId) => { + let skip = 0; + let reportDocsBatch; + do { + info(`Processing ${skip} to ${skip + BATCH_SIZE} report docs`); + reportDocsBatch = await fetch.reportsCreatedFor(db, loserId, skip); + + reportDocsBatch.forEach(report => { + const subjectIds = ['patient_id', 'patient_uuid', 'place_id', 'place_uuid']; + for (const subjectId of subjectIds) { + if (report[subjectId]) { + report[subjectId] = winnerId; + } + + if (report.fields[subjectId]) { + report.fields[subjectId] = winnerId; + } + } + + writeDocumentToDisk(writeOptions, report); + }); + + skip += reportDocsBatch.length; + } while (reportDocsBatch.length >= BATCH_SIZE); + + return skip; +}; + +const minifyLineageAndWriteToDisk = (docs, parsedArgs) => { + docs.forEach(doc => { + lineageManipulation.minifyLineagesInDoc(doc); + writeDocumentToDisk(parsedArgs, doc); + }); +}; + +const replaceLineageInContacts = (descendantsAndSelf, replacementLineage, contactId) => descendantsAndSelf.reduce((agg, doc) => { + // skip top-level because it is now being deleted + if (doc._id === contactId) { + return agg; + } + + const parentWasUpdated = lineageManipulation.replaceLineage(doc, 'parent', replacementLineage, contactId); + + // TODO: seems wrong + const contactWasUpdated = lineageManipulation.replaceLineage(doc, 'contact', replacementLineage, contactId); + if (parentWasUpdated || contactWasUpdated) { + agg.push(doc); + } + return agg; +}, []); diff --git a/src/fn/move-contacts.js b/src/fn/move-contacts.js index 6b29e3b03..e0c9ff24f 100644 --- a/src/fn/move-contacts.js +++ b/src/fn/move-contacts.js @@ -1,16 +1,21 @@ const minimist = require('minimist'); const path = require('path'); -const userPrompt = require('../lib/user-prompt'); const environment = require('../lib/environment'); -const fs = require('../lib/sync-fs'); const lineageManipulation = require('../lib/lineage-manipulation'); const lineageConstraints = require('../lib/lineage-constraints'); const pouch = require('../lib/db'); -const { warn, trace, info } = require('../lib/log'); - -const HIERARCHY_ROOT = 'root'; -const BATCH_SIZE = 10000; +const { trace, info } = require('../lib/log'); + +const { + HIERARCHY_ROOT, + BATCH_SIZE, + prepareDocumentDirectory, + prettyPrintDocument, + replaceLineageInAncestors, + bold, + fetch, +} = require('../lib/mm-shared'); module.exports = { requiresInstance: true, @@ -22,7 +27,6 @@ module.exports = { } }; -const prettyPrintDocument = doc => `'${doc.name}' (${doc._id})`; const updateLineagesAndStage = async (options, db) => { trace(`Fetching contact details for parent: ${options.parentId}`); const parentDoc = await fetch.contact(db, options.parentId); @@ -117,21 +121,7 @@ const parseExtraArgs = (projectDir, extraArgs = []) => { }; }; -const prepareDocumentDirectory = ({ docDirectoryPath, force }) => { - if (!fs.exists(docDirectoryPath)) { - fs.mkdir(docDirectoryPath); - } else if (!force && fs.recurseFiles(docDirectoryPath).length > 0) { - warn(`The document folder '${docDirectoryPath}' already contains files. It is recommended you start with a clean folder. Do you want to delete the contents of this folder and continue?`); - if(userPrompt.keyInYN()) { - fs.deleteFilesInFolder(docDirectoryPath); - } else { - throw new Error('User aborted execution.'); - } - } -}; - const usage = () => { - const bold = text => `\x1b[1m${text}\x1b[0m`; info(` ${bold('cht-conf\'s move-contacts action')} When combined with 'upload-docs' this action effectively moves a contact from one place in the hierarchy to another. @@ -176,92 +166,6 @@ const minifyLineageAndWriteToDisk = (docs, parsedArgs) => { }); }; -const writeDocumentToDisk = ({ docDirectoryPath }, doc) => { - const destinationPath = path.join(docDirectoryPath, `${doc._id}.doc.json`); - if (fs.exists(destinationPath)) { - warn(`File at ${destinationPath} already exists and is being overwritten.`); - } - - trace(`Writing updated document to ${destinationPath}`); - fs.writeJson(destinationPath, doc); -}; - -const fetch = { - /* - Fetches all of the documents associated with the "contactIds" and confirms they exist. - */ - contactList: async (db, ids) => { - const contactDocs = await db.allDocs({ - keys: ids, - include_docs: true, - }); - - const missingContactErrors = contactDocs.rows.filter(row => !row.doc).map(row => `Contact with id '${row.key}' could not be found.`); - if (missingContactErrors.length > 0) { - throw Error(missingContactErrors); - } - - return contactDocs.rows.reduce((agg, curr) => Object.assign(agg, { [curr.doc._id]: curr.doc }), {}); - }, - - contact: async (db, id) => { - try { - if (id === HIERARCHY_ROOT) { - return undefined; - } - - return await db.get(id); - } catch (err) { - if (err.name !== 'not_found') { - throw err; - } - - throw Error(`Contact with id '${id}' could not be found`); - } - }, - - /* - Given a contact's id, obtain the documents of all descendant contacts - */ - descendantsOf: async (db, contactId) => { - const descendantDocs = await db.query('medic/contacts_by_depth', { - key: [contactId], - include_docs: true, - }); - - return descendantDocs.rows - .map(row => row.doc) - /* We should not move or update tombstone documents */ - .filter(doc => doc && doc.type !== 'tombstone'); - }, - - reportsCreatedBy: async (db, contactIds, skip) => { - const reports = await db.query('medic-client/reports_by_freetext', { - keys: contactIds.map(id => [`contact:${id}`]), - include_docs: true, - limit: BATCH_SIZE, - skip: skip, - }); - - return reports.rows.map(row => row.doc); - }, - - ancestorsOf: async (db, contactDoc) => { - const ancestorIds = lineageManipulation.pluckIdsFromLineage(contactDoc.parent); - const ancestors = await db.allDocs({ - keys: ancestorIds, - include_docs: true, - }); - - const ancestorIdsNotFound = ancestors.rows.filter(ancestor => !ancestor.doc).map(ancestor => ancestor.key); - if (ancestorIdsNotFound.length > 0) { - throw Error(`Contact '${prettyPrintDocument(contactDoc)} has parent id(s) '${ancestorIdsNotFound.join(',')}' which could not be found.`); - } - - return ancestors.rows.map(ancestor => ancestor.doc); - }, -}; - const replaceLineageInReports = (reportsCreatedByDescendants, replaceWith, startingFromIdInLineage) => reportsCreatedByDescendants.reduce((agg, doc) => { if (lineageManipulation.replaceLineage(doc, 'contact', replaceWith, startingFromIdInLineage)) { agg.push(doc); @@ -278,14 +182,3 @@ const replaceLineageInContacts = (descendantsAndSelf, replacementLineage, contac } return agg; }, []); - -const replaceLineageInAncestors = (descendantsAndSelf, ancestors) => ancestors.reduce((agg, ancestor) => { - let result = agg; - const primaryContact = descendantsAndSelf.find(descendant => ancestor.contact && descendant._id === ancestor.contact._id); - if (primaryContact) { - ancestor.contact = lineageManipulation.createLineageFromDoc(primaryContact); - result = [ancestor, ...result]; - } - - return result; -}, []); diff --git a/src/lib/mm-shared.js b/src/lib/mm-shared.js new file mode 100644 index 000000000..bd324a13f --- /dev/null +++ b/src/lib/mm-shared.js @@ -0,0 +1,153 @@ +const path = require('path'); + +const userPrompt = require('./user-prompt'); +const fs = require('./sync-fs'); +const { warn, trace } = require('./log'); +const lineageManipulation = require('./lineage-manipulation'); + +const HIERARCHY_ROOT = 'root'; +const BATCH_SIZE = 10000; + +const prettyPrintDocument = doc => `'${doc.name}' (${doc._id})`; + +const prepareDocumentDirectory = ({ docDirectoryPath, force }) => { + if (!fs.exists(docDirectoryPath)) { + fs.mkdir(docDirectoryPath); + } else if (!force && fs.recurseFiles(docDirectoryPath).length > 0) { + warn(`The document folder '${docDirectoryPath}' already contains files. It is recommended you start with a clean folder. Do you want to delete the contents of this folder and continue?`); + if(userPrompt.keyInYN()) { + fs.deleteFilesInFolder(docDirectoryPath); + } else { + throw new Error('User aborted execution.'); + } + } +}; + +const writeDocumentToDisk = ({ docDirectoryPath }, doc) => { + const destinationPath = path.join(docDirectoryPath, `${doc._id}.doc.json`); + if (fs.exists(destinationPath)) { + warn(`File at ${destinationPath} already exists and is being overwritten.`); + } + + trace(`Writing updated document to ${destinationPath}`); + fs.writeJson(destinationPath, doc); +}; + +const replaceLineageInAncestors = (descendantsAndSelf, ancestors) => ancestors.reduce((agg, ancestor) => { + let result = agg; + const primaryContact = descendantsAndSelf.find(descendant => ancestor.contact && descendant._id === ancestor.contact._id); + if (primaryContact) { + ancestor.contact = lineageManipulation.createLineageFromDoc(primaryContact); + result = [ancestor, ...result]; + } + + return result; +}, []); + + +const fetch = { + /* + Fetches all of the documents associated with the "contactIds" and confirms they exist. + */ + contactList: async (db, ids) => { + const contactDocs = await db.allDocs({ + keys: ids, + include_docs: true, + }); + + const missingContactErrors = contactDocs.rows.filter(row => !row.doc).map(row => `Contact with id '${row.key}' could not be found.`); + if (missingContactErrors.length > 0) { + throw Error(missingContactErrors); + } + + return contactDocs.rows.reduce((agg, curr) => Object.assign(agg, { [curr.doc._id]: curr.doc }), {}); + }, + + contact: async (db, id) => { + try { + if (id === HIERARCHY_ROOT) { + return undefined; + } + + return await db.get(id); + } catch (err) { + if (err.name !== 'not_found') { + throw err; + } + + throw Error(`Contact with id '${id}' could not be found`); + } + }, + + /* + Given a contact's id, obtain the documents of all descendant contacts + */ + descendantsOf: async (db, contactId) => { + const descendantDocs = await db.query('medic/contacts_by_depth', { + key: [contactId], + include_docs: true, + }); + + return descendantDocs.rows + .map(row => row.doc) + /* We should not move or update tombstone documents */ + .filter(doc => doc && doc.type !== 'tombstone'); + }, + + reportsCreatedBy: async (db, contactIds, skip) => { + const reports = await db.query('medic-client/reports_by_freetext', { + keys: contactIds.map(id => [`contact:${id}`]), + include_docs: true, + limit: BATCH_SIZE, + skip, + }); + + return reports.rows.map(row => row.doc); + }, + + reportsCreatedFor: async (db, contactId, skip) => { + // TODO is this the right way? + const reports = await db.query('medic-client/reports_by_freetext', { + keys: [ + [`patient_id:${contactId}`], + [`patient_uuid:${contactId}`], + [`place_id:${contactId}`], + [`place_uuid:${contactId}`], + ], + include_docs: true, + limit: BATCH_SIZE, + skip, + }); + + return reports.rows.map(row => row.doc); + }, + + ancestorsOf: async (db, contactDoc) => { + const ancestorIds = lineageManipulation.pluckIdsFromLineage(contactDoc.parent); + const ancestors = await db.allDocs({ + keys: ancestorIds, + include_docs: true, + }); + + const ancestorIdsNotFound = ancestors.rows.filter(ancestor => !ancestor.doc).map(ancestor => ancestor.key); + if (ancestorIdsNotFound.length > 0) { + throw Error(`Contact '${prettyPrintDocument(contactDoc)} has parent id(s) '${ancestorIdsNotFound.join(',')}' which could not be found.`); + } + + return ancestors.rows.map(ancestor => ancestor.doc); + }, +}; + +const bold = text => `\x1b[1m${text}\x1b[0m`; + +module.exports = { + HIERARCHY_ROOT, + BATCH_SIZE, + bold, + prepareDocumentDirectory, + prettyPrintDocument, + minifyLineageAndWriteToDisk, + replaceLineageInAncestors, + writeDocumentToDisk, + fetch, +};