From 06f4e29480e991332d6d3ed08578df7247e3a8e3 Mon Sep 17 00:00:00 2001 From: Joxit Date: Wed, 20 Mar 2024 16:05:41 +0100 Subject: [PATCH] feat(import): add support to `.gz` data --- lib/parameters.js | 2 +- lib/streams/recordStream.js | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/parameters.js b/lib/parameters.js index 18136d42d..259f99de6 100644 --- a/lib/parameters.js +++ b/lib/parameters.js @@ -96,7 +96,7 @@ function getFullFileList(peliasConfig, args) { if (_.isEmpty(files)) { // no specific files listed, so return all .csv and .geojson files - return glob.sync( args.dirPath + '/**/*.{csv,geojson}' ); + return glob.sync( args.dirPath + '/**/*.{csv,geojson,geojson.gz,csv.gz}' ); } else { // otherwise return the requested files with full path return files.map(function(file) { diff --git a/lib/streams/recordStream.js b/lib/streams/recordStream.js index 552429418..6bd7a8359 100644 --- a/lib/streams/recordStream.js +++ b/lib/streams/recordStream.js @@ -5,6 +5,7 @@ const csvParse = require('csv-parse').parse; const combinedStream = require('combined-stream'); const through = require('through2'); const split = require('split2'); +const zlib = require('zlib'); const logger = require('pelias-logger').get('openaddresses'); const config = require('pelias-config').generate(); @@ -26,13 +27,13 @@ function getIdPrefix(filename, dirPath) { // of the directory tree to create the id if (filename.indexOf(dirPath) !== -1) { var subpath = _.replace(filename, dirPath, ''); - var prefix = _.replace(subpath, /\.(csv|geojson)/, ''); + var prefix = _.replace(_.replace(subpath, /\.(csv|geojson)/, ''), /\.gz/, ''); return _.trim(prefix, '/'); } } // if the dirPath doesn't contain this file, return the basename without extension - return path.basename(path.basename(filename, '.csv'), '.geojson'); + return path.basename(path.basename(path.basename(filename, '.gz'), '.csv'), '.geojson'); } /** @@ -98,7 +99,11 @@ function geojsonStream(stream) { } function fileStreamDispatcher(stream, filePath) { - if (filePath.endsWith('.geojson')) { + if (filePath.endsWith('.gz')) { + stream = stream.pipe(zlib.createGunzip()); + } + + if (/\.geojson(\.gz)?/.test(filePath)) { return geojsonStream(stream); }