From 21c57f41e8d6d4ee6fba34b1d74cac37f97d95cf Mon Sep 17 00:00:00 2001 From: Daniel Lupu Date: Sat, 30 Apr 2022 22:36:00 +0300 Subject: [PATCH 1/3] parallelize update script --- package.json | 3 +- update | 125 ++++++++++++++--------------------------------- update-worker.js | 82 +++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 88 deletions(-) create mode 100644 update-worker.js diff --git a/package.json b/package.json index 6500c92d3..70cac3757 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,8 @@ "ipld": "^0.30.2", "ipld-in-memory": "^8.0.0", "standard": "^16.0.4", - "swarmhash": "^0.1.1" + "swarmhash": "^0.1.1", + "workerpool": "^6.2.1" }, "scripts": { "lint": "standard update", diff --git a/update b/update index 8b8c0cbb8..4a44167d1 100755 --- a/update +++ b/update @@ -3,28 +3,15 @@ 'use strict' const fs = require('fs') -const util = require('util') const path = require('path') +const util = require('util') const semver = require('semver') -const ethUtil = require('ethereumjs-util') -const ipfsImporter = require('ipfs-unixfs-importer') -const IPLD = require('ipld') -const inMemory = require('ipld-in-memory') -const swarmhash = require('swarmhash') +const workerpool = require('workerpool') // This script updates the index files list.js and list.txt in the directories containing binaries, // as well as the 'latest' and 'nightly' symlinks/files. -const ipfsHash = async (content) => { - const iterator = ipfsImporter.importer([{ content }], await inMemory(IPLD), { onlyHash: true }) - const { value, done } = await iterator.next() - if (done) { - throw new Error('Failed to calculate an IPFS hash.') - } - - await iterator.return() - return value.cid.toString() -} +const readFile = util.promisify(fs.readFile) function generateLegacyListJS (builds, releases) { return ` @@ -104,67 +91,14 @@ function deleteIfExists (filePathRelativeToRoot) { }) } -function buildVersion (build) { - let version = build.version - if (build.prerelease && build.prerelease.length > 0) { - version += '-' + build.prerelease - } - if (build.build && build.build.length > 0) { - version += '+' + build.build - } - return version -} - -async function makeEntry (dir, parsedFileName, oldList) { - const pathRelativeToRoot = path.join(dir, parsedFileName[0]) - const absolutePath = path.join(__dirname, pathRelativeToRoot) +async function batchedAsyncMap (values, batchSize, dir, oldList) { + const batchPromises = [] - const build = { - path: parsedFileName[0], - version: parsedFileName[1], - prerelease: parsedFileName[3], - build: parsedFileName[5] - } - build.longVersion = buildVersion(build) - - if (oldList) { - const entries = oldList.builds.filter(entry => (entry.path === parsedFileName[0])) - if (entries) { - if (entries.length >= 2) { - throw Error("Found multiple list.json entries for binary '" + pathRelativeToRoot + "'") - } else if (entries.length === 1) { - build.keccak256 = entries[0].keccak256 - build.sha256 = entries[0].sha256 - build.urls = entries[0].urls - } - } - } - - if (!build.sha256 || !build.keccak256 || !build.urls || build.urls.length !== 2) { - const readFile = util.promisify(fs.readFile) - const fileContent = await readFile(absolutePath) - build.keccak256 = '0x' + ethUtil.keccak(fileContent).toString('hex') - console.log("Computing hashes of '" + pathRelativeToRoot + "'") - build.sha256 = '0x' + ethUtil.sha256(fileContent).toString('hex') - build.urls = [ - 'bzzr://' + swarmhash(fileContent).toString('hex'), - 'dweb:/ipfs/' + await ipfsHash(fileContent) - ] - } - - return build -} - -async function batchedAsyncMap (values, batchSize, asyncMapFunction) { - if (batchSize === null) { - batchSize = values.length - } - - let results = [] for (let i = 0; i < values.length; i += batchSize) { - results = results.concat(await Promise.all(values.slice(i, i + batchSize).map(asyncMapFunction))) + batchPromises.push(pool.exec('workerMain', [dir, values.slice(i, i + batchSize), oldList])) } - return results + + return (await Promise.all(batchPromises)).flat() } function processDir (dir, options, listCallback) { @@ -176,7 +110,8 @@ function processDir (dir, options, listCallback) { let oldList if (options.reuseHashes) { try { - oldList = JSON.parse(fs.readFileSync(path.join(__dirname, dir, '/list.json'))) + const oldListFileContent = await readFile(path.join(__dirname, dir, '/list.json')) + oldList = JSON.parse(oldListFileContent.toString()) } catch (err) { // Not being able to read the existing list is not a critical error. // We'll just recreate it from scratch. @@ -212,9 +147,7 @@ function processDir (dir, options, listCallback) { }) .filter(function (matchResult) { return matchResult !== null }) - const parsedList = (await batchedAsyncMap(parsedFileNames, options.maxFilesPerBatch, async function (matchResult) { - return await makeEntry(dir, matchResult, oldList) - })) + const parsedList = (await batchedAsyncMap(parsedFileNames, options.maxFilesPerBatch, dir, oldList)) .sort(function (a, b) { if (a.longVersion === b.longVersion) { return 0 @@ -236,7 +169,7 @@ function processDir (dir, options, listCallback) { .slice() .reverse() .reduce(function (prev, next) { - if (next.prerelease === undefined) { + if (next.prerelease === null) { prev[next.version] = next.path } return prev @@ -251,12 +184,7 @@ function processDir (dir, options, listCallback) { const latestRelease = parsedList .slice() .reverse() - .filter(function (listEntry) { - if (listEntry.prerelease === undefined) { - return listEntry - } - return undefined - }) + .filter(function (listEntry) { return listEntry.prerelease === null }) .map(function (listEntry) { return listEntry.version })[0] @@ -331,6 +259,7 @@ function processDir (dir, options, listCallback) { function parseCommandLine () { let reuseHashes let maxFilesPerBatch + let maxWorkerProcesses for (let i = 2; i < process.argv.length; ++i) { if (process.argv[i] === '--reuse-hashes') { @@ -347,6 +276,18 @@ function parseCommandLine () { process.exit(1) } ++i + } else if (process.argv[i] === '--max-worker-processes') { + if (i + 1 >= process.argv.length) { + console.error('Expected an integer argument after --max-worker-processes.') + process.exit(1) + } + + maxWorkerProcesses = parseInt(process.argv[i + 1], 10) + if (isNaN(maxWorkerProcesses) || maxWorkerProcesses <= 0) { + console.error("Expected the argument of --max-worker-processes to be a positive integer, got '" + process.argv[i + 1] + "'.") + process.exit(1) + } + ++i } else { console.error("Invalid option: '" + process.argv[i] + "'.") process.exit(1) @@ -358,12 +299,16 @@ function parseCommandLine () { reuseHashes = false } if (maxFilesPerBatch === undefined) { - maxFilesPerBatch = null // no limit + maxFilesPerBatch = 10 // Sensible default to not drown out a single worker + } + if (maxWorkerProcesses === undefined) { + maxWorkerProcesses = 1 } return { reuseHashes: reuseHashes, - maxFilesPerBatch: maxFilesPerBatch + maxFilesPerBatch: maxFilesPerBatch, + maxWorkerProcesses: maxWorkerProcesses } } @@ -376,6 +321,12 @@ const DIRS = [ const options = parseCommandLine() +const pool = workerpool.pool('./update-worker.js', { + maxWorkers: options.maxWorkerProcesses, + // Threads would be ideal, but do not work as expected due to an issue with the swarmhash module + workerType: 'process' +}) + DIRS.forEach(function (dir) { if (dir !== '/bin') { processDir(dir, options) diff --git a/update-worker.js b/update-worker.js new file mode 100644 index 000000000..3d20f3c78 --- /dev/null +++ b/update-worker.js @@ -0,0 +1,82 @@ +#!/usr/bin/env node +'use strict' +const fs = require('fs') +const path = require('path') +const ethUtil = require('ethereumjs-util') +const ipfsImporter = require('ipfs-unixfs-importer') +const util = require('util') +const inMemory = require('ipld-in-memory') +const swarmhash = require('swarmhash') +const IPLD = require('ipld') +const workerpool = require('workerpool') + +const readFile = util.promisify(fs.readFile) + +async function workerMain (dir, batch, oldList) { + return await Promise.all(batch.map(item => makeEntry(dir, item, oldList))) +} + +async function makeEntry (dir, parsedFileName, oldList) { + const pathRelativeToRoot = path.join(dir, parsedFileName[0]) + const absolutePath = path.join(__dirname, pathRelativeToRoot) + + const build = { + path: parsedFileName[0], + version: parsedFileName[1], + prerelease: parsedFileName[3], + build: parsedFileName[5] + } + build.longVersion = buildVersion(build) + + if (oldList) { + const entries = oldList.builds.filter(entry => (entry.path === parsedFileName[0])) + if (entries) { + if (entries.length >= 2) { + throw Error("Found multiple list.json entries for binary '" + pathRelativeToRoot + "'") + } else if (entries.length === 1) { + build.keccak256 = entries[0].keccak256 + build.sha256 = entries[0].sha256 + build.urls = entries[0].urls + } + } + } + + if (!build.sha256 || !build.keccak256 || !build.urls || build.urls.length !== 2) { + const fileContent = await readFile(absolutePath) + build.keccak256 = '0x' + ethUtil.keccak(fileContent).toString('hex') + console.log("Computing hashes of '" + pathRelativeToRoot + "'") + build.sha256 = '0x' + ethUtil.sha256(fileContent).toString('hex') + build.urls = [ + 'bzzr://' + swarmhash(fileContent).toString('hex'), + 'dweb:/ipfs/' + await ipfsHash(fileContent) + ] + } + + return build +} + +function buildVersion (build) { + let version = build.version + if (build.prerelease && build.prerelease.length > 0) { + version += '-' + build.prerelease + } + if (build.build && build.build.length > 0) { + version += '+' + build.build + } + return version +} + +async function ipfsHash (content) { + const iterator = ipfsImporter.importer([{ content }], await inMemory(IPLD), { onlyHash: true }) + const { value, done } = await iterator.next() + if (done) { + throw new Error('Failed to calculate an IPFS hash.') + } + + await iterator.return() + return value.cid.toString() +} + +workerpool.worker({ + workerMain +}) \ No newline at end of file From deccf03b6b9105260960d408bff811597d6c998b Mon Sep 17 00:00:00 2001 From: Daniel Lupu Date: Sat, 30 Apr 2022 22:45:19 +0300 Subject: [PATCH 2/3] fixup! parallelize update script --- update-worker.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/update-worker.js b/update-worker.js index 3d20f3c78..97b40fc30 100644 --- a/update-worker.js +++ b/update-worker.js @@ -79,4 +79,4 @@ async function ipfsHash (content) { workerpool.worker({ workerMain -}) \ No newline at end of file +}) From 3701bf782368fe514a911e6653714439c8eb7a5a Mon Sep 17 00:00:00 2001 From: Daniel Lupu Date: Sat, 30 Apr 2022 23:08:03 +0300 Subject: [PATCH 3/3] add missing pool.terminate call --- update | 327 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 165 insertions(+), 162 deletions(-) diff --git a/update b/update index 4a44167d1..95317ef81 100755 --- a/update +++ b/update @@ -12,6 +12,7 @@ const workerpool = require('workerpool') // as well as the 'latest' and 'nightly' symlinks/files. const readFile = util.promisify(fs.readFile) +const readdir = util.promisify(fs.readdir) function generateLegacyListJS (builds, releases) { return ` @@ -101,159 +102,155 @@ async function batchedAsyncMap (values, batchSize, dir, oldList) { return (await Promise.all(batchPromises)).flat() } -function processDir (dir, options, listCallback) { - fs.readdir(path.join(__dirname, dir), { withFileTypes: true }, async function (err, files) { - if (err) { - throw err +async function processDir (dir, options, listCallback) { + const files = await readdir(path.join(__dirname, dir), { withFileTypes: true }) + + let oldList + if (options.reuseHashes) { + try { + const oldListFileContent = await readFile(path.join(__dirname, dir, '/list.json')) + oldList = JSON.parse(oldListFileContent.toString()) + } catch (err) { + // Not being able to read the existing list is not a critical error. + // We'll just recreate it from scratch. } + } - let oldList - if (options.reuseHashes) { - try { - const oldListFileContent = await readFile(path.join(__dirname, dir, '/list.json')) - oldList = JSON.parse(oldListFileContent.toString()) - } catch (err) { - // Not being able to read the existing list is not a critical error. - // We'll just recreate it from scratch. + const binaryPrefix = (dir === '/bin' || dir === '/wasm' ? 'soljson' : 'solc-' + dir.slice(1)) + const binaryExtensions = { + '/bin': ['.js'], + '/wasm': ['.js'], + '/emscripten-asmjs': ['.js'], + '/emscripten-wasm32': ['.js'], + '/windows-amd64': ['.zip', '.exe'], + '/linux-amd64': [''], + '/macosx-amd64': [''] + }[dir] || '' + + // ascending list (oldest version first) + const parsedFileNames = files + .filter(function (file) { + // Skip symbolic links with less then 8 characters in the commit hash. + // They exist only for backwards-compatibilty and should not be on the list. + return dir !== '/bin' || + !file.isSymbolicLink() || + file.name.match(/^.+\+commit\.[0-9a-f]{8,}\.js$/) + }) + .map(function (file) { return file.name }) + .map(function (binaryName) { + const escapedExtensions = binaryExtensions.map(function (binaryExtension) { + return binaryExtension.replace('.', '\\.') + }) + return binaryName.match(new RegExp('^' + binaryPrefix + '-v([0-9.]*)(-([^+]*))?(\\+(.*))?(' + escapedExtensions.join('|') + ')$')) + }) + .filter(function (matchResult) { return matchResult !== null }) + + const parsedList = (await batchedAsyncMap(parsedFileNames, options.maxFilesPerBatch, dir, oldList)) + .sort(function (a, b) { + if (a.longVersion === b.longVersion) { + return 0 } - } - const binaryPrefix = (dir === '/bin' || dir === '/wasm' ? 'soljson' : 'solc-' + dir.slice(1)) - const binaryExtensions = { - '/bin': ['.js'], - '/wasm': ['.js'], - '/emscripten-asmjs': ['.js'], - '/emscripten-wasm32': ['.js'], - '/windows-amd64': ['.zip', '.exe'], - '/linux-amd64': [''], - '/macosx-amd64': [''] - }[dir] || '' - - // ascending list (oldest version first) - const parsedFileNames = files - .filter(function (file) { - // Skip symbolic links with less then 8 characters in the commit hash. - // They exist only for backwards-compatibilty and should not be on the list. - return dir !== '/bin' || - !file.isSymbolicLink() || - file.name.match(/^.+\+commit\.[0-9a-f]{8,}\.js$/) - }) - .map(function (file) { return file.name }) - .map(function (binaryName) { - const escapedExtensions = binaryExtensions.map(function (binaryExtension) { - return binaryExtension.replace('.', '\\.') - }) - return binaryName.match(new RegExp('^' + binaryPrefix + '-v([0-9.]*)(-([^+]*))?(\\+(.*))?(' + escapedExtensions.join('|') + ')$')) - }) - .filter(function (matchResult) { return matchResult !== null }) + // NOTE: a vs. b (the order is important), because we want oldest first on parsedList. + // NOTE: If semver considers two versions equal we don't have enough info to say which came earlier + // so we don't care about their relative order as long as it's deterministic. + return semver.compare(a.longVersion, b.longVersion) || (a.longVersion > b.longVersion ? -1 : 1) + }) - const parsedList = (await batchedAsyncMap(parsedFileNames, options.maxFilesPerBatch, dir, oldList)) - .sort(function (a, b) { - if (a.longVersion === b.longVersion) { - return 0 - } + // When the list is ready, let the callback process it + if (listCallback !== undefined) { + listCallback(parsedList) + } - // NOTE: a vs. b (the order is important), because we want oldest first on parsedList. - // NOTE: If semver considers two versions equal we don't have enough info to say which came earlier - // so we don't care about their relative order as long as it's deterministic. - return semver.compare(a.longVersion, b.longVersion) || (a.longVersion > b.longVersion ? -1 : 1) - }) + // descending list + const releases = parsedList + .slice() + .reverse() + .reduce(function (prev, next) { + if (next.prerelease === null) { + prev[next.version] = next.path + } + return prev + }, {}) + + // descending list + const buildNames = parsedList + .slice() + .reverse() + .map(function (listEntry) { return listEntry.path }) + + const latestRelease = parsedList + .slice() + .reverse() + .filter(function (listEntry) { return listEntry.prerelease === null }) + .map(function (listEntry) { + return listEntry.version + })[0] + + // latest build (nightly) + const latestBuildFile = buildNames[0] + + // latest release + const latestReleaseFile = releases[latestRelease] + + // Write list.txt + // A descending list of file names. + fs.writeFile(path.join(__dirname, dir, '/list.txt'), buildNames.join('\n'), function (err) { + if (err) { + throw err + } + console.log('Updated ' + dir + '/list.txt') + }) - // When the list is ready, let the callback process it - if (listCallback !== undefined) { - listCallback(parsedList) + // Write bin/list.json + // Ascending list of builds and descending map of releases. + fs.writeFile(path.join(__dirname, dir, '/list.json'), JSON.stringify({ builds: parsedList, releases: releases, latestRelease: latestRelease }, null, 2), function (err) { + if (err) { + throw err } + console.log('Updated ' + dir + '/list.json') + }) - // descending list - const releases = parsedList - .slice() - .reverse() - .reduce(function (prev, next) { - if (next.prerelease === null) { - prev[next.version] = next.path - } - return prev - }, {}) - - // descending list - const buildNames = parsedList - .slice() - .reverse() - .map(function (listEntry) { return listEntry.path }) - - const latestRelease = parsedList - .slice() - .reverse() - .filter(function (listEntry) { return listEntry.prerelease === null }) - .map(function (listEntry) { - return listEntry.version - })[0] - - // latest build (nightly) - const latestBuildFile = buildNames[0] - - // latest release - const latestReleaseFile = releases[latestRelease] - - // Write list.txt - // A descending list of file names. - fs.writeFile(path.join(__dirname, dir, '/list.txt'), buildNames.join('\n'), function (err) { - if (err) { - throw err - } - console.log('Updated ' + dir + '/list.txt') - }) + // Write bin/list.js + // Descending list of build filenames and descending map of releases. + fs.writeFile(path.join(__dirname, dir, '/list.js'), generateLegacyListJS(buildNames, releases), function (err) { + if (err) { + throw err + } + console.log('Updated ' + dir + '/list.js') + }) - // Write bin/list.json - // Ascending list of builds and descending map of releases. - fs.writeFile(path.join(__dirname, dir, '/list.json'), JSON.stringify({ builds: parsedList, releases: releases, latestRelease: latestRelease }, null, 2), function (err) { - if (err) { - throw err - } - console.log('Updated ' + dir + '/list.json') - }) + // Update 'latest' symlink (except for wasm/ where the link is hard-coded to point at the one in bin/). + // bin/ is a special case because we need to keep a copy rather than a symlink. The reason is that + // some tools (in particular solc-js) have hard-coded github download URLs to it and can't handle symlinks. + if (dir !== '/wasm') { + const releaseExtension = binaryExtensions.find(function (extension) { return latestReleaseFile.endsWith(extension) }) - // Write bin/list.js - // Descending list of build filenames and descending map of releases. - fs.writeFile(path.join(__dirname, dir, '/list.js'), generateLegacyListJS(buildNames, releases), function (err) { - if (err) { - throw err + binaryExtensions.forEach(function (extension) { + if (extension !== releaseExtension) { + deleteIfExists(path.join(dir, binaryPrefix + '-latest' + extension)) } - console.log('Updated ' + dir + '/list.js') }) - // Update 'latest' symlink (except for wasm/ where the link is hard-coded to point at the one in bin/). - // bin/ is a special case because we need to keep a copy rather than a symlink. The reason is that - // some tools (in particular solc-js) have hard-coded github download URLs to it and can't handle symlinks. - if (dir !== '/wasm') { - const releaseExtension = binaryExtensions.find(function (extension) { return latestReleaseFile.endsWith(extension) }) - - binaryExtensions.forEach(function (extension) { - if (extension !== releaseExtension) { - deleteIfExists(path.join(dir, binaryPrefix + '-latest' + extension)) - } - }) - - if (dir === '/bin') { - updateCopy(path.join(dir, latestReleaseFile), path.join(dir, binaryPrefix + '-latest' + releaseExtension)) - } else { - updateSymlinkSync(path.join(dir, binaryPrefix + '-latest' + releaseExtension), latestReleaseFile) - } + if (dir === '/bin') { + updateCopy(path.join(dir, latestReleaseFile), path.join(dir, binaryPrefix + '-latest' + releaseExtension)) + } else { + updateSymlinkSync(path.join(dir, binaryPrefix + '-latest' + releaseExtension), latestReleaseFile) } + } - // Update 'nightly' symlink in bin/ (we don't have nightlies for other platforms) - if (dir === '/bin') { - const nightlyExtension = binaryExtensions.find(function (extension) { return latestBuildFile.endsWith(extension) }) + // Update 'nightly' symlink in bin/ (we don't have nightlies for other platforms) + if (dir === '/bin') { + const nightlyExtension = binaryExtensions.find(function (extension) { return latestBuildFile.endsWith(extension) }) - binaryExtensions.forEach(function (extension) { - if (extension !== nightlyExtension) { - deleteIfExists(path.join(dir, binaryPrefix + '-latest' + extension)) - } - }) + binaryExtensions.forEach(function (extension) { + if (extension !== nightlyExtension) { + deleteIfExists(path.join(dir, binaryPrefix + '-latest' + extension)) + } + }) - updateSymlinkSync(path.join(dir, binaryPrefix + '-nightly' + nightlyExtension), latestBuildFile) - } - }) + updateSymlinkSync(path.join(dir, binaryPrefix + '-nightly' + nightlyExtension), latestBuildFile) + } } function parseCommandLine () { @@ -327,31 +324,33 @@ const pool = workerpool.pool('./update-worker.js', { workerType: 'process' }) -DIRS.forEach(function (dir) { - if (dir !== '/bin') { - processDir(dir, options) - } else { - processDir(dir, options, function (parsedList) { - // Any new releases added to bin/ need to be linked in other directories before we can start processing them. - parsedList.forEach(function (release) { - if (release.prerelease === undefined) { - // Starting with 0.6.2 we no longer build asm.js releases and the new builds added to bin/ are all wasm. - if (semver.gt(release.version, '0.6.1')) { - updateSymlinkSync( - path.join('/wasm', release.path), - path.join('..', 'bin', release.path) - ) - } else { - updateSymlinkSync( - path.join('/emscripten-asmjs', 'solc-emscripten-asmjs-v' + release.longVersion + '.js'), - path.join('..', 'bin', release.path) - ) +async function main () { + await Promise.all(DIRS.map(async function (dir) { + if (dir !== '/bin') { + await processDir(dir, options) + } else { + await processDir(dir, options, function (parsedList) { + // Any new releases added to bin/ need to be linked in other directories before we can start processing them. + parsedList.forEach(function (release) { + if (release.prerelease === undefined) { + // Starting with 0.6.2 we no longer build asm.js releases and the new builds added to bin/ are all wasm. + if (semver.gt(release.version, '0.6.1')) { + updateSymlinkSync( + path.join('/wasm', release.path), + path.join('..', 'bin', release.path) + ) + } else { + updateSymlinkSync( + path.join('/emscripten-asmjs', 'solc-emscripten-asmjs-v' + release.longVersion + '.js'), + path.join('..', 'bin', release.path) + ) + } } - } + }) }) - processDir('/emscripten-asmjs', options) - processDir('/wasm', options, function (parsedList) { + await processDir('/emscripten-asmjs', options) + await processDir('/wasm', options, function (parsedList) { // Any new releases added to wasm/ need to be linked in emscripten-wasm32/ first. parsedList.forEach(function (release) { if (release.prerelease === undefined) { @@ -361,9 +360,13 @@ DIRS.forEach(function (dir) { ) } }) - - processDir('/emscripten-wasm32', options) }) - }) - } -}) + + await processDir('/emscripten-wasm32', options) + } + })) + + pool.terminate() +} + +main(); \ No newline at end of file